1 /* Global, SSA-based optimizations using mathematical identities.
2 Copyright (C) 2005-2021 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 /* Currently, the only mini-pass in this file tries to CSE reciprocal
21 operations. These are common in sequences such as this one:
22
23 modulus = sqrt(x*x + y*y + z*z);
24 x = x / modulus;
25 y = y / modulus;
26 z = z / modulus;
27
28 that can be optimized to
29
30 modulus = sqrt(x*x + y*y + z*z);
31 rmodulus = 1.0 / modulus;
32 x = x * rmodulus;
33 y = y * rmodulus;
34 z = z * rmodulus;
35
36 We do this for loop invariant divisors, and with this pass whenever
37 we notice that a division has the same divisor multiple times.
38
39 Of course, like in PRE, we don't insert a division if a dominator
40 already has one. However, this cannot be done as an extension of
41 PRE for several reasons.
42
43 First of all, with some experiments it was found out that the
44 transformation is not always useful if there are only two divisions
45 by the same divisor. This is probably because modern processors
46 can pipeline the divisions; on older, in-order processors it should
47 still be effective to optimize two divisions by the same number.
48 We make this a param, and it shall be called N in the remainder of
49 this comment.
50
51 Second, if trapping math is active, we have less freedom on where
52 to insert divisions: we can only do so in basic blocks that already
53 contain one. (If divisions don't trap, instead, we can insert
54 divisions elsewhere, which will be in blocks that are common dominators
55 of those that have the division).
56
57 We really don't want to compute the reciprocal unless a division will
58 be found. To do this, we won't insert the division in a basic block
59 that has less than N divisions *post-dominating* it.
60
61 The algorithm constructs a subset of the dominator tree, holding the
62 blocks containing the divisions and the common dominators to them,
63 and walk it twice. The first walk is in post-order, and it annotates
64 each block with the number of divisions that post-dominate it: this
65 gives information on where divisions can be inserted profitably.
66 The second walk is in pre-order, and it inserts divisions as explained
67 above, and replaces divisions by multiplications.
68
69 In the best case, the cost of the pass is O(n_statements). In the
70 worst-case, the cost is due to creating the dominator tree subset,
71 with a cost of O(n_basic_blocks ^ 2); however this can only happen
72 for n_statements / n_basic_blocks statements. So, the amortized cost
73 of creating the dominator tree subset is O(n_basic_blocks) and the
74 worst-case cost of the pass is O(n_statements * n_basic_blocks).
75
76 More practically, the cost will be small because there are few
77 divisions, and they tend to be in the same basic block, so insert_bb
78 is called very few times.
79
80 If we did this using domwalk.c, an efficient implementation would have
81 to work on all the variables in a single pass, because we could not
82 work on just a subset of the dominator tree, as we do now, and the
83 cost would also be something like O(n_statements * n_basic_blocks).
84 The data structures would be more complex in order to work on all the
85 variables in a single pass. */
86
87 #include "config.h"
88 #include "system.h"
89 #include "coretypes.h"
90 #include "backend.h"
91 #include "target.h"
92 #include "rtl.h"
93 #include "tree.h"
94 #include "gimple.h"
95 #include "predict.h"
96 #include "alloc-pool.h"
97 #include "tree-pass.h"
98 #include "ssa.h"
99 #include "optabs-tree.h"
100 #include "gimple-pretty-print.h"
101 #include "alias.h"
102 #include "fold-const.h"
103 #include "gimple-fold.h"
104 #include "gimple-iterator.h"
105 #include "gimplify.h"
106 #include "gimplify-me.h"
107 #include "stor-layout.h"
108 #include "tree-cfg.h"
109 #include "tree-dfa.h"
110 #include "tree-ssa.h"
111 #include "builtins.h"
112 #include "internal-fn.h"
113 #include "case-cfn-macros.h"
114 #include "optabs-libfuncs.h"
115 #include "tree-eh.h"
116 #include "targhooks.h"
117 #include "domwalk.h"
118 #include "tree-ssa-math-opts.h"
119
120 /* This structure represents one basic block that either computes a
121 division, or is a common dominator for basic block that compute a
122 division. */
123 struct occurrence {
124 /* The basic block represented by this structure. */
125 basic_block bb = basic_block();
126
127 /* If non-NULL, the SSA_NAME holding the definition for a reciprocal
128 inserted in BB. */
129 tree recip_def = tree();
130
131 /* If non-NULL, the SSA_NAME holding the definition for a squared
132 reciprocal inserted in BB. */
133 tree square_recip_def = tree();
134
135 /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that
136 was inserted in BB. */
137 gimple *recip_def_stmt = nullptr;
138
139 /* Pointer to a list of "struct occurrence"s for blocks dominated
140 by BB. */
141 struct occurrence *children = nullptr;
142
143 /* Pointer to the next "struct occurrence"s in the list of blocks
144 sharing a common dominator. */
145 struct occurrence *next = nullptr;
146
147 /* The number of divisions that are in BB before compute_merit. The
148 number of divisions that are in BB or post-dominate it after
149 compute_merit. */
150 int num_divisions = 0;
151
152 /* True if the basic block has a division, false if it is a common
153 dominator for basic blocks that do. If it is false and trapping
154 math is active, BB is not a candidate for inserting a reciprocal. */
155 bool bb_has_division = false;
156
157 /* Construct a struct occurrence for basic block BB, and whose
158 children list is headed by CHILDREN. */
occurrenceoccurrence159 occurrence (basic_block bb, struct occurrence *children)
160 : bb (bb), children (children)
161 {
162 bb->aux = this;
163 }
164
165 /* Destroy a struct occurrence and remove it from its basic block. */
~occurrenceoccurrence166 ~occurrence ()
167 {
168 bb->aux = nullptr;
169 }
170
171 /* Allocate memory for a struct occurrence from OCC_POOL. */
172 static void* operator new (size_t);
173
174 /* Return memory for a struct occurrence to OCC_POOL. */
175 static void operator delete (void*, size_t);
176 };
177
178 static struct
179 {
180 /* Number of 1.0/X ops inserted. */
181 int rdivs_inserted;
182
183 /* Number of 1.0/FUNC ops inserted. */
184 int rfuncs_inserted;
185 } reciprocal_stats;
186
187 static struct
188 {
189 /* Number of cexpi calls inserted. */
190 int inserted;
191
192 /* Number of conversions removed. */
193 int conv_removed;
194
195 } sincos_stats;
196
197 static struct
198 {
199 /* Number of widening multiplication ops inserted. */
200 int widen_mults_inserted;
201
202 /* Number of integer multiply-and-accumulate ops inserted. */
203 int maccs_inserted;
204
205 /* Number of fp fused multiply-add ops inserted. */
206 int fmas_inserted;
207
208 /* Number of divmod calls inserted. */
209 int divmod_calls_inserted;
210 } widen_mul_stats;
211
212 /* The instance of "struct occurrence" representing the highest
213 interesting block in the dominator tree. */
214 static struct occurrence *occ_head;
215
216 /* Allocation pool for getting instances of "struct occurrence". */
217 static object_allocator<occurrence> *occ_pool;
218
new(size_t n)219 void* occurrence::operator new (size_t n)
220 {
221 gcc_assert (n == sizeof(occurrence));
222 return occ_pool->allocate_raw ();
223 }
224
delete(void * occ,size_t n)225 void occurrence::operator delete (void *occ, size_t n)
226 {
227 gcc_assert (n == sizeof(occurrence));
228 occ_pool->remove_raw (occ);
229 }
230
231 /* Insert NEW_OCC into our subset of the dominator tree. P_HEAD points to a
232 list of "struct occurrence"s, one per basic block, having IDOM as
233 their common dominator.
234
235 We try to insert NEW_OCC as deep as possible in the tree, and we also
236 insert any other block that is a common dominator for BB and one
237 block already in the tree. */
238
239 static void
insert_bb(struct occurrence * new_occ,basic_block idom,struct occurrence ** p_head)240 insert_bb (struct occurrence *new_occ, basic_block idom,
241 struct occurrence **p_head)
242 {
243 struct occurrence *occ, **p_occ;
244
245 for (p_occ = p_head; (occ = *p_occ) != NULL; )
246 {
247 basic_block bb = new_occ->bb, occ_bb = occ->bb;
248 basic_block dom = nearest_common_dominator (CDI_DOMINATORS, occ_bb, bb);
249 if (dom == bb)
250 {
251 /* BB dominates OCC_BB. OCC becomes NEW_OCC's child: remove OCC
252 from its list. */
253 *p_occ = occ->next;
254 occ->next = new_occ->children;
255 new_occ->children = occ;
256
257 /* Try the next block (it may as well be dominated by BB). */
258 }
259
260 else if (dom == occ_bb)
261 {
262 /* OCC_BB dominates BB. Tail recurse to look deeper. */
263 insert_bb (new_occ, dom, &occ->children);
264 return;
265 }
266
267 else if (dom != idom)
268 {
269 gcc_assert (!dom->aux);
270
271 /* There is a dominator between IDOM and BB, add it and make
272 two children out of NEW_OCC and OCC. First, remove OCC from
273 its list. */
274 *p_occ = occ->next;
275 new_occ->next = occ;
276 occ->next = NULL;
277
278 /* None of the previous blocks has DOM as a dominator: if we tail
279 recursed, we would reexamine them uselessly. Just switch BB with
280 DOM, and go on looking for blocks dominated by DOM. */
281 new_occ = new occurrence (dom, new_occ);
282 }
283
284 else
285 {
286 /* Nothing special, go on with the next element. */
287 p_occ = &occ->next;
288 }
289 }
290
291 /* No place was found as a child of IDOM. Make BB a sibling of IDOM. */
292 new_occ->next = *p_head;
293 *p_head = new_occ;
294 }
295
296 /* Register that we found a division in BB.
297 IMPORTANCE is a measure of how much weighting to give
298 that division. Use IMPORTANCE = 2 to register a single
299 division. If the division is going to be found multiple
300 times use 1 (as it is with squares). */
301
302 static inline void
register_division_in(basic_block bb,int importance)303 register_division_in (basic_block bb, int importance)
304 {
305 struct occurrence *occ;
306
307 occ = (struct occurrence *) bb->aux;
308 if (!occ)
309 {
310 occ = new occurrence (bb, NULL);
311 insert_bb (occ, ENTRY_BLOCK_PTR_FOR_FN (cfun), &occ_head);
312 }
313
314 occ->bb_has_division = true;
315 occ->num_divisions += importance;
316 }
317
318
319 /* Compute the number of divisions that postdominate each block in OCC and
320 its children. */
321
322 static void
compute_merit(struct occurrence * occ)323 compute_merit (struct occurrence *occ)
324 {
325 struct occurrence *occ_child;
326 basic_block dom = occ->bb;
327
328 for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
329 {
330 basic_block bb;
331 if (occ_child->children)
332 compute_merit (occ_child);
333
334 if (flag_exceptions)
335 bb = single_noncomplex_succ (dom);
336 else
337 bb = dom;
338
339 if (dominated_by_p (CDI_POST_DOMINATORS, bb, occ_child->bb))
340 occ->num_divisions += occ_child->num_divisions;
341 }
342 }
343
344
345 /* Return whether USE_STMT is a floating-point division by DEF. */
346 static inline bool
is_division_by(gimple * use_stmt,tree def)347 is_division_by (gimple *use_stmt, tree def)
348 {
349 return is_gimple_assign (use_stmt)
350 && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
351 && gimple_assign_rhs2 (use_stmt) == def
352 /* Do not recognize x / x as valid division, as we are getting
353 confused later by replacing all immediate uses x in such
354 a stmt. */
355 && gimple_assign_rhs1 (use_stmt) != def
356 && !stmt_can_throw_internal (cfun, use_stmt);
357 }
358
359 /* Return TRUE if USE_STMT is a multiplication of DEF by A. */
360 static inline bool
is_mult_by(gimple * use_stmt,tree def,tree a)361 is_mult_by (gimple *use_stmt, tree def, tree a)
362 {
363 if (gimple_code (use_stmt) == GIMPLE_ASSIGN
364 && gimple_assign_rhs_code (use_stmt) == MULT_EXPR)
365 {
366 tree op0 = gimple_assign_rhs1 (use_stmt);
367 tree op1 = gimple_assign_rhs2 (use_stmt);
368
369 return (op0 == def && op1 == a)
370 || (op0 == a && op1 == def);
371 }
372 return 0;
373 }
374
375 /* Return whether USE_STMT is DEF * DEF. */
376 static inline bool
is_square_of(gimple * use_stmt,tree def)377 is_square_of (gimple *use_stmt, tree def)
378 {
379 return is_mult_by (use_stmt, def, def);
380 }
381
382 /* Return whether USE_STMT is a floating-point division by
383 DEF * DEF. */
384 static inline bool
is_division_by_square(gimple * use_stmt,tree def)385 is_division_by_square (gimple *use_stmt, tree def)
386 {
387 if (gimple_code (use_stmt) == GIMPLE_ASSIGN
388 && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
389 && gimple_assign_rhs1 (use_stmt) != gimple_assign_rhs2 (use_stmt)
390 && !stmt_can_throw_internal (cfun, use_stmt))
391 {
392 tree denominator = gimple_assign_rhs2 (use_stmt);
393 if (TREE_CODE (denominator) == SSA_NAME)
394 return is_square_of (SSA_NAME_DEF_STMT (denominator), def);
395 }
396 return 0;
397 }
398
399 /* Walk the subset of the dominator tree rooted at OCC, setting the
400 RECIP_DEF field to a definition of 1.0 / DEF that can be used in
401 the given basic block. The field may be left NULL, of course,
402 if it is not possible or profitable to do the optimization.
403
404 DEF_BSI is an iterator pointing at the statement defining DEF.
405 If RECIP_DEF is set, a dominator already has a computation that can
406 be used.
407
408 If should_insert_square_recip is set, then this also inserts
409 the square of the reciprocal immediately after the definition
410 of the reciprocal. */
411
412 static void
insert_reciprocals(gimple_stmt_iterator * def_gsi,struct occurrence * occ,tree def,tree recip_def,tree square_recip_def,int should_insert_square_recip,int threshold)413 insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ,
414 tree def, tree recip_def, tree square_recip_def,
415 int should_insert_square_recip, int threshold)
416 {
417 tree type;
418 gassign *new_stmt, *new_square_stmt;
419 gimple_stmt_iterator gsi;
420 struct occurrence *occ_child;
421
422 if (!recip_def
423 && (occ->bb_has_division || !flag_trapping_math)
424 /* Divide by two as all divisions are counted twice in
425 the costing loop. */
426 && occ->num_divisions / 2 >= threshold)
427 {
428 /* Make a variable with the replacement and substitute it. */
429 type = TREE_TYPE (def);
430 recip_def = create_tmp_reg (type, "reciptmp");
431 new_stmt = gimple_build_assign (recip_def, RDIV_EXPR,
432 build_one_cst (type), def);
433
434 if (should_insert_square_recip)
435 {
436 square_recip_def = create_tmp_reg (type, "powmult_reciptmp");
437 new_square_stmt = gimple_build_assign (square_recip_def, MULT_EXPR,
438 recip_def, recip_def);
439 }
440
441 if (occ->bb_has_division)
442 {
443 /* Case 1: insert before an existing division. */
444 gsi = gsi_after_labels (occ->bb);
445 while (!gsi_end_p (gsi)
446 && (!is_division_by (gsi_stmt (gsi), def))
447 && (!is_division_by_square (gsi_stmt (gsi), def)))
448 gsi_next (&gsi);
449
450 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
451 if (should_insert_square_recip)
452 gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT);
453 }
454 else if (def_gsi && occ->bb == gsi_bb (*def_gsi))
455 {
456 /* Case 2: insert right after the definition. Note that this will
457 never happen if the definition statement can throw, because in
458 that case the sole successor of the statement's basic block will
459 dominate all the uses as well. */
460 gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
461 if (should_insert_square_recip)
462 gsi_insert_after (def_gsi, new_square_stmt, GSI_NEW_STMT);
463 }
464 else
465 {
466 /* Case 3: insert in a basic block not containing defs/uses. */
467 gsi = gsi_after_labels (occ->bb);
468 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
469 if (should_insert_square_recip)
470 gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT);
471 }
472
473 reciprocal_stats.rdivs_inserted++;
474
475 occ->recip_def_stmt = new_stmt;
476 }
477
478 occ->recip_def = recip_def;
479 occ->square_recip_def = square_recip_def;
480 for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
481 insert_reciprocals (def_gsi, occ_child, def, recip_def,
482 square_recip_def, should_insert_square_recip,
483 threshold);
484 }
485
486 /* Replace occurrences of expr / (x * x) with expr * ((1 / x) * (1 / x)).
487 Take as argument the use for (x * x). */
488 static inline void
replace_reciprocal_squares(use_operand_p use_p)489 replace_reciprocal_squares (use_operand_p use_p)
490 {
491 gimple *use_stmt = USE_STMT (use_p);
492 basic_block bb = gimple_bb (use_stmt);
493 struct occurrence *occ = (struct occurrence *) bb->aux;
494
495 if (optimize_bb_for_speed_p (bb) && occ->square_recip_def
496 && occ->recip_def)
497 {
498 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
499 gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
500 gimple_assign_set_rhs2 (use_stmt, occ->square_recip_def);
501 SET_USE (use_p, occ->square_recip_def);
502 fold_stmt_inplace (&gsi);
503 update_stmt (use_stmt);
504 }
505 }
506
507
508 /* Replace the division at USE_P with a multiplication by the reciprocal, if
509 possible. */
510
511 static inline void
replace_reciprocal(use_operand_p use_p)512 replace_reciprocal (use_operand_p use_p)
513 {
514 gimple *use_stmt = USE_STMT (use_p);
515 basic_block bb = gimple_bb (use_stmt);
516 struct occurrence *occ = (struct occurrence *) bb->aux;
517
518 if (optimize_bb_for_speed_p (bb)
519 && occ->recip_def && use_stmt != occ->recip_def_stmt)
520 {
521 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
522 gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
523 SET_USE (use_p, occ->recip_def);
524 fold_stmt_inplace (&gsi);
525 update_stmt (use_stmt);
526 }
527 }
528
529
530 /* Free OCC and return one more "struct occurrence" to be freed. */
531
532 static struct occurrence *
free_bb(struct occurrence * occ)533 free_bb (struct occurrence *occ)
534 {
535 struct occurrence *child, *next;
536
537 /* First get the two pointers hanging off OCC. */
538 next = occ->next;
539 child = occ->children;
540 delete occ;
541
542 /* Now ensure that we don't recurse unless it is necessary. */
543 if (!child)
544 return next;
545 else
546 {
547 while (next)
548 next = free_bb (next);
549
550 return child;
551 }
552 }
553
554 /* Transform sequences like
555 t = sqrt (a)
556 x = 1.0 / t;
557 r1 = x * x;
558 r2 = a * x;
559 into:
560 t = sqrt (a)
561 r1 = 1.0 / a;
562 r2 = t;
563 x = r1 * r2;
564 depending on the uses of x, r1, r2. This removes one multiplication and
565 allows the sqrt and division operations to execute in parallel.
566 DEF_GSI is the gsi of the initial division by sqrt that defines
567 DEF (x in the example above). */
568
569 static void
optimize_recip_sqrt(gimple_stmt_iterator * def_gsi,tree def)570 optimize_recip_sqrt (gimple_stmt_iterator *def_gsi, tree def)
571 {
572 gimple *use_stmt;
573 imm_use_iterator use_iter;
574 gimple *stmt = gsi_stmt (*def_gsi);
575 tree x = def;
576 tree orig_sqrt_ssa_name = gimple_assign_rhs2 (stmt);
577 tree div_rhs1 = gimple_assign_rhs1 (stmt);
578
579 if (TREE_CODE (orig_sqrt_ssa_name) != SSA_NAME
580 || TREE_CODE (div_rhs1) != REAL_CST
581 || !real_equal (&TREE_REAL_CST (div_rhs1), &dconst1))
582 return;
583
584 gcall *sqrt_stmt
585 = dyn_cast <gcall *> (SSA_NAME_DEF_STMT (orig_sqrt_ssa_name));
586
587 if (!sqrt_stmt || !gimple_call_lhs (sqrt_stmt))
588 return;
589
590 switch (gimple_call_combined_fn (sqrt_stmt))
591 {
592 CASE_CFN_SQRT:
593 CASE_CFN_SQRT_FN:
594 break;
595
596 default:
597 return;
598 }
599 tree a = gimple_call_arg (sqrt_stmt, 0);
600
601 /* We have 'a' and 'x'. Now analyze the uses of 'x'. */
602
603 /* Statements that use x in x * x. */
604 auto_vec<gimple *> sqr_stmts;
605 /* Statements that use x in a * x. */
606 auto_vec<gimple *> mult_stmts;
607 bool has_other_use = false;
608 bool mult_on_main_path = false;
609
610 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, x)
611 {
612 if (is_gimple_debug (use_stmt))
613 continue;
614 if (is_square_of (use_stmt, x))
615 {
616 sqr_stmts.safe_push (use_stmt);
617 if (gimple_bb (use_stmt) == gimple_bb (stmt))
618 mult_on_main_path = true;
619 }
620 else if (is_mult_by (use_stmt, x, a))
621 {
622 mult_stmts.safe_push (use_stmt);
623 if (gimple_bb (use_stmt) == gimple_bb (stmt))
624 mult_on_main_path = true;
625 }
626 else
627 has_other_use = true;
628 }
629
630 /* In the x * x and a * x cases we just rewire stmt operands or
631 remove multiplications. In the has_other_use case we introduce
632 a multiplication so make sure we don't introduce a multiplication
633 on a path where there was none. */
634 if (has_other_use && !mult_on_main_path)
635 return;
636
637 if (sqr_stmts.is_empty () && mult_stmts.is_empty ())
638 return;
639
640 /* If x = 1.0 / sqrt (a) has uses other than those optimized here we want
641 to be able to compose it from the sqr and mult cases. */
642 if (has_other_use && (sqr_stmts.is_empty () || mult_stmts.is_empty ()))
643 return;
644
645 if (dump_file)
646 {
647 fprintf (dump_file, "Optimizing reciprocal sqrt multiplications of\n");
648 print_gimple_stmt (dump_file, sqrt_stmt, 0, TDF_NONE);
649 print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
650 fprintf (dump_file, "\n");
651 }
652
653 bool delete_div = !has_other_use;
654 tree sqr_ssa_name = NULL_TREE;
655 if (!sqr_stmts.is_empty ())
656 {
657 /* r1 = x * x. Transform the original
658 x = 1.0 / t
659 into
660 tmp1 = 1.0 / a
661 r1 = tmp1. */
662
663 sqr_ssa_name
664 = make_temp_ssa_name (TREE_TYPE (a), NULL, "recip_sqrt_sqr");
665
666 if (dump_file)
667 {
668 fprintf (dump_file, "Replacing original division\n");
669 print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
670 fprintf (dump_file, "with new division\n");
671 }
672 stmt
673 = gimple_build_assign (sqr_ssa_name, gimple_assign_rhs_code (stmt),
674 gimple_assign_rhs1 (stmt), a);
675 gsi_insert_before (def_gsi, stmt, GSI_SAME_STMT);
676 gsi_remove (def_gsi, true);
677 *def_gsi = gsi_for_stmt (stmt);
678 fold_stmt_inplace (def_gsi);
679 update_stmt (stmt);
680
681 if (dump_file)
682 print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
683
684 delete_div = false;
685 gimple *sqr_stmt;
686 unsigned int i;
687 FOR_EACH_VEC_ELT (sqr_stmts, i, sqr_stmt)
688 {
689 gimple_stmt_iterator gsi2 = gsi_for_stmt (sqr_stmt);
690 gimple_assign_set_rhs_from_tree (&gsi2, sqr_ssa_name);
691 update_stmt (sqr_stmt);
692 }
693 }
694 if (!mult_stmts.is_empty ())
695 {
696 /* r2 = a * x. Transform this into:
697 r2 = t (The original sqrt (a)). */
698 unsigned int i;
699 gimple *mult_stmt = NULL;
700 FOR_EACH_VEC_ELT (mult_stmts, i, mult_stmt)
701 {
702 gimple_stmt_iterator gsi2 = gsi_for_stmt (mult_stmt);
703
704 if (dump_file)
705 {
706 fprintf (dump_file, "Replacing squaring multiplication\n");
707 print_gimple_stmt (dump_file, mult_stmt, 0, TDF_NONE);
708 fprintf (dump_file, "with assignment\n");
709 }
710 gimple_assign_set_rhs_from_tree (&gsi2, orig_sqrt_ssa_name);
711 fold_stmt_inplace (&gsi2);
712 update_stmt (mult_stmt);
713 if (dump_file)
714 print_gimple_stmt (dump_file, mult_stmt, 0, TDF_NONE);
715 }
716 }
717
718 if (has_other_use)
719 {
720 /* Using the two temporaries tmp1, tmp2 from above
721 the original x is now:
722 x = tmp1 * tmp2. */
723 gcc_assert (orig_sqrt_ssa_name);
724 gcc_assert (sqr_ssa_name);
725
726 gimple *new_stmt
727 = gimple_build_assign (x, MULT_EXPR,
728 orig_sqrt_ssa_name, sqr_ssa_name);
729 gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
730 update_stmt (stmt);
731 }
732 else if (delete_div)
733 {
734 /* Remove the original division. */
735 gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt);
736 gsi_remove (&gsi2, true);
737 release_defs (stmt);
738 }
739 else
740 release_ssa_name (x);
741 }
742
743 /* Look for floating-point divisions among DEF's uses, and try to
744 replace them by multiplications with the reciprocal. Add
745 as many statements computing the reciprocal as needed.
746
747 DEF must be a GIMPLE register of a floating-point type. */
748
749 static void
execute_cse_reciprocals_1(gimple_stmt_iterator * def_gsi,tree def)750 execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def)
751 {
752 use_operand_p use_p, square_use_p;
753 imm_use_iterator use_iter, square_use_iter;
754 tree square_def;
755 struct occurrence *occ;
756 int count = 0;
757 int threshold;
758 int square_recip_count = 0;
759 int sqrt_recip_count = 0;
760
761 gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && TREE_CODE (def) == SSA_NAME);
762 threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def)));
763
764 /* If DEF is a square (x * x), count the number of divisions by x.
765 If there are more divisions by x than by (DEF * DEF), prefer to optimize
766 the reciprocal of x instead of DEF. This improves cases like:
767 def = x * x
768 t0 = a / def
769 t1 = b / def
770 t2 = c / x
771 Reciprocal optimization of x results in 1 division rather than 2 or 3. */
772 gimple *def_stmt = SSA_NAME_DEF_STMT (def);
773
774 if (is_gimple_assign (def_stmt)
775 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
776 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME
777 && gimple_assign_rhs1 (def_stmt) == gimple_assign_rhs2 (def_stmt))
778 {
779 tree op0 = gimple_assign_rhs1 (def_stmt);
780
781 FOR_EACH_IMM_USE_FAST (use_p, use_iter, op0)
782 {
783 gimple *use_stmt = USE_STMT (use_p);
784 if (is_division_by (use_stmt, op0))
785 sqrt_recip_count++;
786 }
787 }
788
789 FOR_EACH_IMM_USE_FAST (use_p, use_iter, def)
790 {
791 gimple *use_stmt = USE_STMT (use_p);
792 if (is_division_by (use_stmt, def))
793 {
794 register_division_in (gimple_bb (use_stmt), 2);
795 count++;
796 }
797
798 if (is_square_of (use_stmt, def))
799 {
800 square_def = gimple_assign_lhs (use_stmt);
801 FOR_EACH_IMM_USE_FAST (square_use_p, square_use_iter, square_def)
802 {
803 gimple *square_use_stmt = USE_STMT (square_use_p);
804 if (is_division_by (square_use_stmt, square_def))
805 {
806 /* This is executed twice for each division by a square. */
807 register_division_in (gimple_bb (square_use_stmt), 1);
808 square_recip_count++;
809 }
810 }
811 }
812 }
813
814 /* Square reciprocals were counted twice above. */
815 square_recip_count /= 2;
816
817 /* If it is more profitable to optimize 1 / x, don't optimize 1 / (x * x). */
818 if (sqrt_recip_count > square_recip_count)
819 goto out;
820
821 /* Do the expensive part only if we can hope to optimize something. */
822 if (count + square_recip_count >= threshold && count >= 1)
823 {
824 gimple *use_stmt;
825 for (occ = occ_head; occ; occ = occ->next)
826 {
827 compute_merit (occ);
828 insert_reciprocals (def_gsi, occ, def, NULL, NULL,
829 square_recip_count, threshold);
830 }
831
832 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def)
833 {
834 if (is_division_by (use_stmt, def))
835 {
836 FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
837 replace_reciprocal (use_p);
838 }
839 else if (square_recip_count > 0 && is_square_of (use_stmt, def))
840 {
841 FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
842 {
843 /* Find all uses of the square that are divisions and
844 * replace them by multiplications with the inverse. */
845 imm_use_iterator square_iterator;
846 gimple *powmult_use_stmt = USE_STMT (use_p);
847 tree powmult_def_name = gimple_assign_lhs (powmult_use_stmt);
848
849 FOR_EACH_IMM_USE_STMT (powmult_use_stmt,
850 square_iterator, powmult_def_name)
851 FOR_EACH_IMM_USE_ON_STMT (square_use_p, square_iterator)
852 {
853 gimple *powmult_use_stmt = USE_STMT (square_use_p);
854 if (is_division_by (powmult_use_stmt, powmult_def_name))
855 replace_reciprocal_squares (square_use_p);
856 }
857 }
858 }
859 }
860 }
861
862 out:
863 for (occ = occ_head; occ; )
864 occ = free_bb (occ);
865
866 occ_head = NULL;
867 }
868
869 /* Return an internal function that implements the reciprocal of CALL,
870 or IFN_LAST if there is no such function that the target supports. */
871
872 internal_fn
internal_fn_reciprocal(gcall * call)873 internal_fn_reciprocal (gcall *call)
874 {
875 internal_fn ifn;
876
877 switch (gimple_call_combined_fn (call))
878 {
879 CASE_CFN_SQRT:
880 CASE_CFN_SQRT_FN:
881 ifn = IFN_RSQRT;
882 break;
883
884 default:
885 return IFN_LAST;
886 }
887
888 tree_pair types = direct_internal_fn_types (ifn, call);
889 if (!direct_internal_fn_supported_p (ifn, types, OPTIMIZE_FOR_SPEED))
890 return IFN_LAST;
891
892 return ifn;
893 }
894
895 /* Go through all the floating-point SSA_NAMEs, and call
896 execute_cse_reciprocals_1 on each of them. */
897 namespace {
898
899 const pass_data pass_data_cse_reciprocals =
900 {
901 GIMPLE_PASS, /* type */
902 "recip", /* name */
903 OPTGROUP_NONE, /* optinfo_flags */
904 TV_TREE_RECIP, /* tv_id */
905 PROP_ssa, /* properties_required */
906 0, /* properties_provided */
907 0, /* properties_destroyed */
908 0, /* todo_flags_start */
909 TODO_update_ssa, /* todo_flags_finish */
910 };
911
912 class pass_cse_reciprocals : public gimple_opt_pass
913 {
914 public:
pass_cse_reciprocals(gcc::context * ctxt)915 pass_cse_reciprocals (gcc::context *ctxt)
916 : gimple_opt_pass (pass_data_cse_reciprocals, ctxt)
917 {}
918
919 /* opt_pass methods: */
gate(function *)920 virtual bool gate (function *) { return optimize && flag_reciprocal_math; }
921 virtual unsigned int execute (function *);
922
923 }; // class pass_cse_reciprocals
924
925 unsigned int
execute(function * fun)926 pass_cse_reciprocals::execute (function *fun)
927 {
928 basic_block bb;
929 tree arg;
930
931 occ_pool = new object_allocator<occurrence> ("dominators for recip");
932
933 memset (&reciprocal_stats, 0, sizeof (reciprocal_stats));
934 calculate_dominance_info (CDI_DOMINATORS);
935 calculate_dominance_info (CDI_POST_DOMINATORS);
936
937 if (flag_checking)
938 FOR_EACH_BB_FN (bb, fun)
939 gcc_assert (!bb->aux);
940
941 for (arg = DECL_ARGUMENTS (fun->decl); arg; arg = DECL_CHAIN (arg))
942 if (FLOAT_TYPE_P (TREE_TYPE (arg))
943 && is_gimple_reg (arg))
944 {
945 tree name = ssa_default_def (fun, arg);
946 if (name)
947 execute_cse_reciprocals_1 (NULL, name);
948 }
949
950 FOR_EACH_BB_FN (bb, fun)
951 {
952 tree def;
953
954 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
955 gsi_next (&gsi))
956 {
957 gphi *phi = gsi.phi ();
958 def = PHI_RESULT (phi);
959 if (! virtual_operand_p (def)
960 && FLOAT_TYPE_P (TREE_TYPE (def)))
961 execute_cse_reciprocals_1 (NULL, def);
962 }
963
964 for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
965 gsi_next (&gsi))
966 {
967 gimple *stmt = gsi_stmt (gsi);
968
969 if (gimple_has_lhs (stmt)
970 && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL
971 && FLOAT_TYPE_P (TREE_TYPE (def))
972 && TREE_CODE (def) == SSA_NAME)
973 {
974 execute_cse_reciprocals_1 (&gsi, def);
975 stmt = gsi_stmt (gsi);
976 if (flag_unsafe_math_optimizations
977 && is_gimple_assign (stmt)
978 && gimple_assign_lhs (stmt) == def
979 && !stmt_can_throw_internal (cfun, stmt)
980 && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
981 optimize_recip_sqrt (&gsi, def);
982 }
983 }
984
985 if (optimize_bb_for_size_p (bb))
986 continue;
987
988 /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b). */
989 for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
990 gsi_next (&gsi))
991 {
992 gimple *stmt = gsi_stmt (gsi);
993
994 if (is_gimple_assign (stmt)
995 && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
996 {
997 tree arg1 = gimple_assign_rhs2 (stmt);
998 gimple *stmt1;
999
1000 if (TREE_CODE (arg1) != SSA_NAME)
1001 continue;
1002
1003 stmt1 = SSA_NAME_DEF_STMT (arg1);
1004
1005 if (is_gimple_call (stmt1)
1006 && gimple_call_lhs (stmt1))
1007 {
1008 bool fail;
1009 imm_use_iterator ui;
1010 use_operand_p use_p;
1011 tree fndecl = NULL_TREE;
1012
1013 gcall *call = as_a <gcall *> (stmt1);
1014 internal_fn ifn = internal_fn_reciprocal (call);
1015 if (ifn == IFN_LAST)
1016 {
1017 fndecl = gimple_call_fndecl (call);
1018 if (!fndecl
1019 || !fndecl_built_in_p (fndecl, BUILT_IN_MD))
1020 continue;
1021 fndecl = targetm.builtin_reciprocal (fndecl);
1022 if (!fndecl)
1023 continue;
1024 }
1025
1026 /* Check that all uses of the SSA name are divisions,
1027 otherwise replacing the defining statement will do
1028 the wrong thing. */
1029 fail = false;
1030 FOR_EACH_IMM_USE_FAST (use_p, ui, arg1)
1031 {
1032 gimple *stmt2 = USE_STMT (use_p);
1033 if (is_gimple_debug (stmt2))
1034 continue;
1035 if (!is_gimple_assign (stmt2)
1036 || gimple_assign_rhs_code (stmt2) != RDIV_EXPR
1037 || gimple_assign_rhs1 (stmt2) == arg1
1038 || gimple_assign_rhs2 (stmt2) != arg1)
1039 {
1040 fail = true;
1041 break;
1042 }
1043 }
1044 if (fail)
1045 continue;
1046
1047 gimple_replace_ssa_lhs (call, arg1);
1048 if (gimple_call_internal_p (call) != (ifn != IFN_LAST))
1049 {
1050 auto_vec<tree, 4> args;
1051 for (unsigned int i = 0;
1052 i < gimple_call_num_args (call); i++)
1053 args.safe_push (gimple_call_arg (call, i));
1054 gcall *stmt2;
1055 if (ifn == IFN_LAST)
1056 stmt2 = gimple_build_call_vec (fndecl, args);
1057 else
1058 stmt2 = gimple_build_call_internal_vec (ifn, args);
1059 gimple_call_set_lhs (stmt2, arg1);
1060 gimple_move_vops (stmt2, call);
1061 gimple_call_set_nothrow (stmt2,
1062 gimple_call_nothrow_p (call));
1063 gimple_stmt_iterator gsi2 = gsi_for_stmt (call);
1064 gsi_replace (&gsi2, stmt2, true);
1065 }
1066 else
1067 {
1068 if (ifn == IFN_LAST)
1069 gimple_call_set_fndecl (call, fndecl);
1070 else
1071 gimple_call_set_internal_fn (call, ifn);
1072 update_stmt (call);
1073 }
1074 reciprocal_stats.rfuncs_inserted++;
1075
1076 FOR_EACH_IMM_USE_STMT (stmt, ui, arg1)
1077 {
1078 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1079 gimple_assign_set_rhs_code (stmt, MULT_EXPR);
1080 fold_stmt_inplace (&gsi);
1081 update_stmt (stmt);
1082 }
1083 }
1084 }
1085 }
1086 }
1087
1088 statistics_counter_event (fun, "reciprocal divs inserted",
1089 reciprocal_stats.rdivs_inserted);
1090 statistics_counter_event (fun, "reciprocal functions inserted",
1091 reciprocal_stats.rfuncs_inserted);
1092
1093 free_dominance_info (CDI_DOMINATORS);
1094 free_dominance_info (CDI_POST_DOMINATORS);
1095 delete occ_pool;
1096 return 0;
1097 }
1098
1099 } // anon namespace
1100
1101 gimple_opt_pass *
make_pass_cse_reciprocals(gcc::context * ctxt)1102 make_pass_cse_reciprocals (gcc::context *ctxt)
1103 {
1104 return new pass_cse_reciprocals (ctxt);
1105 }
1106
1107 /* If NAME is the result of a type conversion, look for other
1108 equivalent dominating or dominated conversions, and replace all
1109 uses with the earliest dominating name, removing the redundant
1110 conversions. Return the prevailing name. */
1111
1112 static tree
execute_cse_conv_1(tree name)1113 execute_cse_conv_1 (tree name)
1114 {
1115 if (SSA_NAME_IS_DEFAULT_DEF (name)
1116 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (name))
1117 return name;
1118
1119 gimple *def_stmt = SSA_NAME_DEF_STMT (name);
1120
1121 if (!gimple_assign_cast_p (def_stmt))
1122 return name;
1123
1124 tree src = gimple_assign_rhs1 (def_stmt);
1125
1126 if (TREE_CODE (src) != SSA_NAME)
1127 return name;
1128
1129 imm_use_iterator use_iter;
1130 gimple *use_stmt;
1131
1132 /* Find the earliest dominating def. */
1133 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, src)
1134 {
1135 if (use_stmt == def_stmt
1136 || !gimple_assign_cast_p (use_stmt))
1137 continue;
1138
1139 tree lhs = gimple_assign_lhs (use_stmt);
1140
1141 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)
1142 || (gimple_assign_rhs1 (use_stmt)
1143 != gimple_assign_rhs1 (def_stmt))
1144 || !types_compatible_p (TREE_TYPE (name), TREE_TYPE (lhs)))
1145 continue;
1146
1147 bool use_dominates;
1148 if (gimple_bb (def_stmt) == gimple_bb (use_stmt))
1149 {
1150 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
1151 while (!gsi_end_p (gsi) && gsi_stmt (gsi) != def_stmt)
1152 gsi_next (&gsi);
1153 use_dominates = !gsi_end_p (gsi);
1154 }
1155 else if (dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt),
1156 gimple_bb (def_stmt)))
1157 use_dominates = false;
1158 else if (dominated_by_p (CDI_DOMINATORS, gimple_bb (def_stmt),
1159 gimple_bb (use_stmt)))
1160 use_dominates = true;
1161 else
1162 continue;
1163
1164 if (use_dominates)
1165 {
1166 std::swap (name, lhs);
1167 std::swap (def_stmt, use_stmt);
1168 }
1169 }
1170
1171 /* Now go through all uses of SRC again, replacing the equivalent
1172 dominated conversions. We may replace defs that were not
1173 dominated by the then-prevailing defs when we first visited
1174 them. */
1175 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, src)
1176 {
1177 if (use_stmt == def_stmt
1178 || !gimple_assign_cast_p (use_stmt))
1179 continue;
1180
1181 tree lhs = gimple_assign_lhs (use_stmt);
1182
1183 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)
1184 || (gimple_assign_rhs1 (use_stmt)
1185 != gimple_assign_rhs1 (def_stmt))
1186 || !types_compatible_p (TREE_TYPE (name), TREE_TYPE (lhs)))
1187 continue;
1188
1189 if (gimple_bb (def_stmt) == gimple_bb (use_stmt)
1190 || dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt),
1191 gimple_bb (def_stmt)))
1192 {
1193 sincos_stats.conv_removed++;
1194
1195 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
1196 replace_uses_by (lhs, name);
1197 gsi_remove (&gsi, true);
1198 }
1199 }
1200
1201 return name;
1202 }
1203
1204 /* Records an occurrence at statement USE_STMT in the vector of trees
1205 STMTS if it is dominated by *TOP_BB or dominates it or this basic block
1206 is not yet initialized. Returns true if the occurrence was pushed on
1207 the vector. Adjusts *TOP_BB to be the basic block dominating all
1208 statements in the vector. */
1209
1210 static bool
maybe_record_sincos(vec<gimple * > * stmts,basic_block * top_bb,gimple * use_stmt)1211 maybe_record_sincos (vec<gimple *> *stmts,
1212 basic_block *top_bb, gimple *use_stmt)
1213 {
1214 basic_block use_bb = gimple_bb (use_stmt);
1215 if (*top_bb
1216 && (*top_bb == use_bb
1217 || dominated_by_p (CDI_DOMINATORS, use_bb, *top_bb)))
1218 stmts->safe_push (use_stmt);
1219 else if (!*top_bb
1220 || dominated_by_p (CDI_DOMINATORS, *top_bb, use_bb))
1221 {
1222 stmts->safe_push (use_stmt);
1223 *top_bb = use_bb;
1224 }
1225 else
1226 return false;
1227
1228 return true;
1229 }
1230
1231 /* Look for sin, cos and cexpi calls with the same argument NAME and
1232 create a single call to cexpi CSEing the result in this case.
1233 We first walk over all immediate uses of the argument collecting
1234 statements that we can CSE in a vector and in a second pass replace
1235 the statement rhs with a REALPART or IMAGPART expression on the
1236 result of the cexpi call we insert before the use statement that
1237 dominates all other candidates. */
1238
1239 static bool
execute_cse_sincos_1(tree name)1240 execute_cse_sincos_1 (tree name)
1241 {
1242 gimple_stmt_iterator gsi;
1243 imm_use_iterator use_iter;
1244 tree fndecl, res, type = NULL_TREE;
1245 gimple *def_stmt, *use_stmt, *stmt;
1246 int seen_cos = 0, seen_sin = 0, seen_cexpi = 0;
1247 auto_vec<gimple *> stmts;
1248 basic_block top_bb = NULL;
1249 int i;
1250 bool cfg_changed = false;
1251
1252 name = execute_cse_conv_1 (name);
1253
1254 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name)
1255 {
1256 if (gimple_code (use_stmt) != GIMPLE_CALL
1257 || !gimple_call_lhs (use_stmt))
1258 continue;
1259
1260 switch (gimple_call_combined_fn (use_stmt))
1261 {
1262 CASE_CFN_COS:
1263 seen_cos |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1264 break;
1265
1266 CASE_CFN_SIN:
1267 seen_sin |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1268 break;
1269
1270 CASE_CFN_CEXPI:
1271 seen_cexpi |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1272 break;
1273
1274 default:;
1275 continue;
1276 }
1277
1278 tree t = mathfn_built_in_type (gimple_call_combined_fn (use_stmt));
1279 if (!type)
1280 {
1281 type = t;
1282 t = TREE_TYPE (name);
1283 }
1284 /* This checks that NAME has the right type in the first round,
1285 and, in subsequent rounds, that the built_in type is the same
1286 type, or a compatible type. */
1287 if (type != t && !types_compatible_p (type, t))
1288 return false;
1289 }
1290 if (seen_cos + seen_sin + seen_cexpi <= 1)
1291 return false;
1292
1293 /* Simply insert cexpi at the beginning of top_bb but not earlier than
1294 the name def statement. */
1295 fndecl = mathfn_built_in (type, BUILT_IN_CEXPI);
1296 if (!fndecl)
1297 return false;
1298 stmt = gimple_build_call (fndecl, 1, name);
1299 res = make_temp_ssa_name (TREE_TYPE (TREE_TYPE (fndecl)), stmt, "sincostmp");
1300 gimple_call_set_lhs (stmt, res);
1301
1302 def_stmt = SSA_NAME_DEF_STMT (name);
1303 if (!SSA_NAME_IS_DEFAULT_DEF (name)
1304 && gimple_code (def_stmt) != GIMPLE_PHI
1305 && gimple_bb (def_stmt) == top_bb)
1306 {
1307 gsi = gsi_for_stmt (def_stmt);
1308 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1309 }
1310 else
1311 {
1312 gsi = gsi_after_labels (top_bb);
1313 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1314 }
1315 sincos_stats.inserted++;
1316
1317 /* And adjust the recorded old call sites. */
1318 for (i = 0; stmts.iterate (i, &use_stmt); ++i)
1319 {
1320 tree rhs = NULL;
1321
1322 switch (gimple_call_combined_fn (use_stmt))
1323 {
1324 CASE_CFN_COS:
1325 rhs = fold_build1 (REALPART_EXPR, type, res);
1326 break;
1327
1328 CASE_CFN_SIN:
1329 rhs = fold_build1 (IMAGPART_EXPR, type, res);
1330 break;
1331
1332 CASE_CFN_CEXPI:
1333 rhs = res;
1334 break;
1335
1336 default:;
1337 gcc_unreachable ();
1338 }
1339
1340 /* Replace call with a copy. */
1341 stmt = gimple_build_assign (gimple_call_lhs (use_stmt), rhs);
1342
1343 gsi = gsi_for_stmt (use_stmt);
1344 gsi_replace (&gsi, stmt, true);
1345 if (gimple_purge_dead_eh_edges (gimple_bb (stmt)))
1346 cfg_changed = true;
1347 }
1348
1349 return cfg_changed;
1350 }
1351
1352 /* To evaluate powi(x,n), the floating point value x raised to the
1353 constant integer exponent n, we use a hybrid algorithm that
1354 combines the "window method" with look-up tables. For an
1355 introduction to exponentiation algorithms and "addition chains",
1356 see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth,
1357 "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming",
1358 3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation
1359 Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998. */
1360
1361 /* Provide a default value for POWI_MAX_MULTS, the maximum number of
1362 multiplications to inline before calling the system library's pow
1363 function. powi(x,n) requires at worst 2*bits(n)-2 multiplications,
1364 so this default never requires calling pow, powf or powl. */
1365
1366 #ifndef POWI_MAX_MULTS
1367 #define POWI_MAX_MULTS (2*HOST_BITS_PER_WIDE_INT-2)
1368 #endif
1369
1370 /* The size of the "optimal power tree" lookup table. All
1371 exponents less than this value are simply looked up in the
1372 powi_table below. This threshold is also used to size the
1373 cache of pseudo registers that hold intermediate results. */
1374 #define POWI_TABLE_SIZE 256
1375
1376 /* The size, in bits of the window, used in the "window method"
1377 exponentiation algorithm. This is equivalent to a radix of
1378 (1<<POWI_WINDOW_SIZE) in the corresponding "m-ary method". */
1379 #define POWI_WINDOW_SIZE 3
1380
1381 /* The following table is an efficient representation of an
1382 "optimal power tree". For each value, i, the corresponding
1383 value, j, in the table states than an optimal evaluation
1384 sequence for calculating pow(x,i) can be found by evaluating
1385 pow(x,j)*pow(x,i-j). An optimal power tree for the first
1386 100 integers is given in Knuth's "Seminumerical algorithms". */
1387
1388 static const unsigned char powi_table[POWI_TABLE_SIZE] =
1389 {
1390 0, 1, 1, 2, 2, 3, 3, 4, /* 0 - 7 */
1391 4, 6, 5, 6, 6, 10, 7, 9, /* 8 - 15 */
1392 8, 16, 9, 16, 10, 12, 11, 13, /* 16 - 23 */
1393 12, 17, 13, 18, 14, 24, 15, 26, /* 24 - 31 */
1394 16, 17, 17, 19, 18, 33, 19, 26, /* 32 - 39 */
1395 20, 25, 21, 40, 22, 27, 23, 44, /* 40 - 47 */
1396 24, 32, 25, 34, 26, 29, 27, 44, /* 48 - 55 */
1397 28, 31, 29, 34, 30, 60, 31, 36, /* 56 - 63 */
1398 32, 64, 33, 34, 34, 46, 35, 37, /* 64 - 71 */
1399 36, 65, 37, 50, 38, 48, 39, 69, /* 72 - 79 */
1400 40, 49, 41, 43, 42, 51, 43, 58, /* 80 - 87 */
1401 44, 64, 45, 47, 46, 59, 47, 76, /* 88 - 95 */
1402 48, 65, 49, 66, 50, 67, 51, 66, /* 96 - 103 */
1403 52, 70, 53, 74, 54, 104, 55, 74, /* 104 - 111 */
1404 56, 64, 57, 69, 58, 78, 59, 68, /* 112 - 119 */
1405 60, 61, 61, 80, 62, 75, 63, 68, /* 120 - 127 */
1406 64, 65, 65, 128, 66, 129, 67, 90, /* 128 - 135 */
1407 68, 73, 69, 131, 70, 94, 71, 88, /* 136 - 143 */
1408 72, 128, 73, 98, 74, 132, 75, 121, /* 144 - 151 */
1409 76, 102, 77, 124, 78, 132, 79, 106, /* 152 - 159 */
1410 80, 97, 81, 160, 82, 99, 83, 134, /* 160 - 167 */
1411 84, 86, 85, 95, 86, 160, 87, 100, /* 168 - 175 */
1412 88, 113, 89, 98, 90, 107, 91, 122, /* 176 - 183 */
1413 92, 111, 93, 102, 94, 126, 95, 150, /* 184 - 191 */
1414 96, 128, 97, 130, 98, 133, 99, 195, /* 192 - 199 */
1415 100, 128, 101, 123, 102, 164, 103, 138, /* 200 - 207 */
1416 104, 145, 105, 146, 106, 109, 107, 149, /* 208 - 215 */
1417 108, 200, 109, 146, 110, 170, 111, 157, /* 216 - 223 */
1418 112, 128, 113, 130, 114, 182, 115, 132, /* 224 - 231 */
1419 116, 200, 117, 132, 118, 158, 119, 206, /* 232 - 239 */
1420 120, 240, 121, 162, 122, 147, 123, 152, /* 240 - 247 */
1421 124, 166, 125, 214, 126, 138, 127, 153, /* 248 - 255 */
1422 };
1423
1424
1425 /* Return the number of multiplications required to calculate
1426 powi(x,n) where n is less than POWI_TABLE_SIZE. This is a
1427 subroutine of powi_cost. CACHE is an array indicating
1428 which exponents have already been calculated. */
1429
1430 static int
powi_lookup_cost(unsigned HOST_WIDE_INT n,bool * cache)1431 powi_lookup_cost (unsigned HOST_WIDE_INT n, bool *cache)
1432 {
1433 /* If we've already calculated this exponent, then this evaluation
1434 doesn't require any additional multiplications. */
1435 if (cache[n])
1436 return 0;
1437
1438 cache[n] = true;
1439 return powi_lookup_cost (n - powi_table[n], cache)
1440 + powi_lookup_cost (powi_table[n], cache) + 1;
1441 }
1442
1443 /* Return the number of multiplications required to calculate
1444 powi(x,n) for an arbitrary x, given the exponent N. This
1445 function needs to be kept in sync with powi_as_mults below. */
1446
1447 static int
powi_cost(HOST_WIDE_INT n)1448 powi_cost (HOST_WIDE_INT n)
1449 {
1450 bool cache[POWI_TABLE_SIZE];
1451 unsigned HOST_WIDE_INT digit;
1452 unsigned HOST_WIDE_INT val;
1453 int result;
1454
1455 if (n == 0)
1456 return 0;
1457
1458 /* Ignore the reciprocal when calculating the cost. */
1459 val = (n < 0) ? -n : n;
1460
1461 /* Initialize the exponent cache. */
1462 memset (cache, 0, POWI_TABLE_SIZE * sizeof (bool));
1463 cache[1] = true;
1464
1465 result = 0;
1466
1467 while (val >= POWI_TABLE_SIZE)
1468 {
1469 if (val & 1)
1470 {
1471 digit = val & ((1 << POWI_WINDOW_SIZE) - 1);
1472 result += powi_lookup_cost (digit, cache)
1473 + POWI_WINDOW_SIZE + 1;
1474 val >>= POWI_WINDOW_SIZE;
1475 }
1476 else
1477 {
1478 val >>= 1;
1479 result++;
1480 }
1481 }
1482
1483 return result + powi_lookup_cost (val, cache);
1484 }
1485
1486 /* Recursive subroutine of powi_as_mults. This function takes the
1487 array, CACHE, of already calculated exponents and an exponent N and
1488 returns a tree that corresponds to CACHE[1]**N, with type TYPE. */
1489
1490 static tree
powi_as_mults_1(gimple_stmt_iterator * gsi,location_t loc,tree type,HOST_WIDE_INT n,tree * cache)1491 powi_as_mults_1 (gimple_stmt_iterator *gsi, location_t loc, tree type,
1492 HOST_WIDE_INT n, tree *cache)
1493 {
1494 tree op0, op1, ssa_target;
1495 unsigned HOST_WIDE_INT digit;
1496 gassign *mult_stmt;
1497
1498 if (n < POWI_TABLE_SIZE && cache[n])
1499 return cache[n];
1500
1501 ssa_target = make_temp_ssa_name (type, NULL, "powmult");
1502
1503 if (n < POWI_TABLE_SIZE)
1504 {
1505 cache[n] = ssa_target;
1506 op0 = powi_as_mults_1 (gsi, loc, type, n - powi_table[n], cache);
1507 op1 = powi_as_mults_1 (gsi, loc, type, powi_table[n], cache);
1508 }
1509 else if (n & 1)
1510 {
1511 digit = n & ((1 << POWI_WINDOW_SIZE) - 1);
1512 op0 = powi_as_mults_1 (gsi, loc, type, n - digit, cache);
1513 op1 = powi_as_mults_1 (gsi, loc, type, digit, cache);
1514 }
1515 else
1516 {
1517 op0 = powi_as_mults_1 (gsi, loc, type, n >> 1, cache);
1518 op1 = op0;
1519 }
1520
1521 mult_stmt = gimple_build_assign (ssa_target, MULT_EXPR, op0, op1);
1522 gimple_set_location (mult_stmt, loc);
1523 gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
1524
1525 return ssa_target;
1526 }
1527
1528 /* Convert ARG0**N to a tree of multiplications of ARG0 with itself.
1529 This function needs to be kept in sync with powi_cost above. */
1530
1531 tree
powi_as_mults(gimple_stmt_iterator * gsi,location_t loc,tree arg0,HOST_WIDE_INT n)1532 powi_as_mults (gimple_stmt_iterator *gsi, location_t loc,
1533 tree arg0, HOST_WIDE_INT n)
1534 {
1535 tree cache[POWI_TABLE_SIZE], result, type = TREE_TYPE (arg0);
1536 gassign *div_stmt;
1537 tree target;
1538
1539 if (n == 0)
1540 return build_one_cst (type);
1541
1542 memset (cache, 0, sizeof (cache));
1543 cache[1] = arg0;
1544
1545 result = powi_as_mults_1 (gsi, loc, type, (n < 0) ? -n : n, cache);
1546 if (n >= 0)
1547 return result;
1548
1549 /* If the original exponent was negative, reciprocate the result. */
1550 target = make_temp_ssa_name (type, NULL, "powmult");
1551 div_stmt = gimple_build_assign (target, RDIV_EXPR,
1552 build_real (type, dconst1), result);
1553 gimple_set_location (div_stmt, loc);
1554 gsi_insert_before (gsi, div_stmt, GSI_SAME_STMT);
1555
1556 return target;
1557 }
1558
1559 /* ARG0 and N are the two arguments to a powi builtin in GSI with
1560 location info LOC. If the arguments are appropriate, create an
1561 equivalent sequence of statements prior to GSI using an optimal
1562 number of multiplications, and return an expession holding the
1563 result. */
1564
1565 static tree
gimple_expand_builtin_powi(gimple_stmt_iterator * gsi,location_t loc,tree arg0,HOST_WIDE_INT n)1566 gimple_expand_builtin_powi (gimple_stmt_iterator *gsi, location_t loc,
1567 tree arg0, HOST_WIDE_INT n)
1568 {
1569 /* Avoid largest negative number. */
1570 if (n != -n
1571 && ((n >= -1 && n <= 2)
1572 || (optimize_function_for_speed_p (cfun)
1573 && powi_cost (n) <= POWI_MAX_MULTS)))
1574 return powi_as_mults (gsi, loc, arg0, n);
1575
1576 return NULL_TREE;
1577 }
1578
1579 /* Build a gimple call statement that calls FN with argument ARG.
1580 Set the lhs of the call statement to a fresh SSA name. Insert the
1581 statement prior to GSI's current position, and return the fresh
1582 SSA name. */
1583
1584 static tree
build_and_insert_call(gimple_stmt_iterator * gsi,location_t loc,tree fn,tree arg)1585 build_and_insert_call (gimple_stmt_iterator *gsi, location_t loc,
1586 tree fn, tree arg)
1587 {
1588 gcall *call_stmt;
1589 tree ssa_target;
1590
1591 call_stmt = gimple_build_call (fn, 1, arg);
1592 ssa_target = make_temp_ssa_name (TREE_TYPE (arg), NULL, "powroot");
1593 gimple_set_lhs (call_stmt, ssa_target);
1594 gimple_set_location (call_stmt, loc);
1595 gsi_insert_before (gsi, call_stmt, GSI_SAME_STMT);
1596
1597 return ssa_target;
1598 }
1599
1600 /* Build a gimple binary operation with the given CODE and arguments
1601 ARG0, ARG1, assigning the result to a new SSA name for variable
1602 TARGET. Insert the statement prior to GSI's current position, and
1603 return the fresh SSA name.*/
1604
1605 static tree
build_and_insert_binop(gimple_stmt_iterator * gsi,location_t loc,const char * name,enum tree_code code,tree arg0,tree arg1)1606 build_and_insert_binop (gimple_stmt_iterator *gsi, location_t loc,
1607 const char *name, enum tree_code code,
1608 tree arg0, tree arg1)
1609 {
1610 tree result = make_temp_ssa_name (TREE_TYPE (arg0), NULL, name);
1611 gassign *stmt = gimple_build_assign (result, code, arg0, arg1);
1612 gimple_set_location (stmt, loc);
1613 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1614 return result;
1615 }
1616
1617 /* Build a gimple reference operation with the given CODE and argument
1618 ARG, assigning the result to a new SSA name of TYPE with NAME.
1619 Insert the statement prior to GSI's current position, and return
1620 the fresh SSA name. */
1621
1622 static inline tree
build_and_insert_ref(gimple_stmt_iterator * gsi,location_t loc,tree type,const char * name,enum tree_code code,tree arg0)1623 build_and_insert_ref (gimple_stmt_iterator *gsi, location_t loc, tree type,
1624 const char *name, enum tree_code code, tree arg0)
1625 {
1626 tree result = make_temp_ssa_name (type, NULL, name);
1627 gimple *stmt = gimple_build_assign (result, build1 (code, type, arg0));
1628 gimple_set_location (stmt, loc);
1629 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1630 return result;
1631 }
1632
1633 /* Build a gimple assignment to cast VAL to TYPE. Insert the statement
1634 prior to GSI's current position, and return the fresh SSA name. */
1635
1636 static tree
build_and_insert_cast(gimple_stmt_iterator * gsi,location_t loc,tree type,tree val)1637 build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc,
1638 tree type, tree val)
1639 {
1640 tree result = make_ssa_name (type);
1641 gassign *stmt = gimple_build_assign (result, NOP_EXPR, val);
1642 gimple_set_location (stmt, loc);
1643 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1644 return result;
1645 }
1646
1647 struct pow_synth_sqrt_info
1648 {
1649 bool *factors;
1650 unsigned int deepest;
1651 unsigned int num_mults;
1652 };
1653
1654 /* Return true iff the real value C can be represented as a
1655 sum of powers of 0.5 up to N. That is:
1656 C == SUM<i from 1..N> (a[i]*(0.5**i)) where a[i] is either 0 or 1.
1657 Record in INFO the various parameters of the synthesis algorithm such
1658 as the factors a[i], the maximum 0.5 power and the number of
1659 multiplications that will be required. */
1660
1661 bool
representable_as_half_series_p(REAL_VALUE_TYPE c,unsigned n,struct pow_synth_sqrt_info * info)1662 representable_as_half_series_p (REAL_VALUE_TYPE c, unsigned n,
1663 struct pow_synth_sqrt_info *info)
1664 {
1665 REAL_VALUE_TYPE factor = dconsthalf;
1666 REAL_VALUE_TYPE remainder = c;
1667
1668 info->deepest = 0;
1669 info->num_mults = 0;
1670 memset (info->factors, 0, n * sizeof (bool));
1671
1672 for (unsigned i = 0; i < n; i++)
1673 {
1674 REAL_VALUE_TYPE res;
1675
1676 /* If something inexact happened bail out now. */
1677 if (real_arithmetic (&res, MINUS_EXPR, &remainder, &factor))
1678 return false;
1679
1680 /* We have hit zero. The number is representable as a sum
1681 of powers of 0.5. */
1682 if (real_equal (&res, &dconst0))
1683 {
1684 info->factors[i] = true;
1685 info->deepest = i + 1;
1686 return true;
1687 }
1688 else if (!REAL_VALUE_NEGATIVE (res))
1689 {
1690 remainder = res;
1691 info->factors[i] = true;
1692 info->num_mults++;
1693 }
1694 else
1695 info->factors[i] = false;
1696
1697 real_arithmetic (&factor, MULT_EXPR, &factor, &dconsthalf);
1698 }
1699 return false;
1700 }
1701
1702 /* Return the tree corresponding to FN being applied
1703 to ARG N times at GSI and LOC.
1704 Look up previous results from CACHE if need be.
1705 cache[0] should contain just plain ARG i.e. FN applied to ARG 0 times. */
1706
1707 static tree
get_fn_chain(tree arg,unsigned int n,gimple_stmt_iterator * gsi,tree fn,location_t loc,tree * cache)1708 get_fn_chain (tree arg, unsigned int n, gimple_stmt_iterator *gsi,
1709 tree fn, location_t loc, tree *cache)
1710 {
1711 tree res = cache[n];
1712 if (!res)
1713 {
1714 tree prev = get_fn_chain (arg, n - 1, gsi, fn, loc, cache);
1715 res = build_and_insert_call (gsi, loc, fn, prev);
1716 cache[n] = res;
1717 }
1718
1719 return res;
1720 }
1721
1722 /* Print to STREAM the repeated application of function FNAME to ARG
1723 N times. So, for FNAME = "foo", ARG = "x", N = 2 it would print:
1724 "foo (foo (x))". */
1725
1726 static void
print_nested_fn(FILE * stream,const char * fname,const char * arg,unsigned int n)1727 print_nested_fn (FILE* stream, const char *fname, const char* arg,
1728 unsigned int n)
1729 {
1730 if (n == 0)
1731 fprintf (stream, "%s", arg);
1732 else
1733 {
1734 fprintf (stream, "%s (", fname);
1735 print_nested_fn (stream, fname, arg, n - 1);
1736 fprintf (stream, ")");
1737 }
1738 }
1739
1740 /* Print to STREAM the fractional sequence of sqrt chains
1741 applied to ARG, described by INFO. Used for the dump file. */
1742
1743 static void
dump_fractional_sqrt_sequence(FILE * stream,const char * arg,struct pow_synth_sqrt_info * info)1744 dump_fractional_sqrt_sequence (FILE *stream, const char *arg,
1745 struct pow_synth_sqrt_info *info)
1746 {
1747 for (unsigned int i = 0; i < info->deepest; i++)
1748 {
1749 bool is_set = info->factors[i];
1750 if (is_set)
1751 {
1752 print_nested_fn (stream, "sqrt", arg, i + 1);
1753 if (i != info->deepest - 1)
1754 fprintf (stream, " * ");
1755 }
1756 }
1757 }
1758
1759 /* Print to STREAM a representation of raising ARG to an integer
1760 power N. Used for the dump file. */
1761
1762 static void
dump_integer_part(FILE * stream,const char * arg,HOST_WIDE_INT n)1763 dump_integer_part (FILE *stream, const char* arg, HOST_WIDE_INT n)
1764 {
1765 if (n > 1)
1766 fprintf (stream, "powi (%s, " HOST_WIDE_INT_PRINT_DEC ")", arg, n);
1767 else if (n == 1)
1768 fprintf (stream, "%s", arg);
1769 }
1770
1771 /* Attempt to synthesize a POW[F] (ARG0, ARG1) call using chains of
1772 square roots. Place at GSI and LOC. Limit the maximum depth
1773 of the sqrt chains to MAX_DEPTH. Return the tree holding the
1774 result of the expanded sequence or NULL_TREE if the expansion failed.
1775
1776 This routine assumes that ARG1 is a real number with a fractional part
1777 (the integer exponent case will have been handled earlier in
1778 gimple_expand_builtin_pow).
1779
1780 For ARG1 > 0.0:
1781 * For ARG1 composed of a whole part WHOLE_PART and a fractional part
1782 FRAC_PART i.e. WHOLE_PART == floor (ARG1) and
1783 FRAC_PART == ARG1 - WHOLE_PART:
1784 Produce POWI (ARG0, WHOLE_PART) * POW (ARG0, FRAC_PART) where
1785 POW (ARG0, FRAC_PART) is expanded as a product of square root chains
1786 if it can be expressed as such, that is if FRAC_PART satisfies:
1787 FRAC_PART == <SUM from i = 1 until MAX_DEPTH> (a[i] * (0.5**i))
1788 where integer a[i] is either 0 or 1.
1789
1790 Example:
1791 POW (x, 3.625) == POWI (x, 3) * POW (x, 0.625)
1792 --> POWI (x, 3) * SQRT (x) * SQRT (SQRT (SQRT (x)))
1793
1794 For ARG1 < 0.0 there are two approaches:
1795 * (A) Expand to 1.0 / POW (ARG0, -ARG1) where POW (ARG0, -ARG1)
1796 is calculated as above.
1797
1798 Example:
1799 POW (x, -5.625) == 1.0 / POW (x, 5.625)
1800 --> 1.0 / (POWI (x, 5) * SQRT (x) * SQRT (SQRT (SQRT (x))))
1801
1802 * (B) : WHOLE_PART := - ceil (abs (ARG1))
1803 FRAC_PART := ARG1 - WHOLE_PART
1804 and expand to POW (x, FRAC_PART) / POWI (x, WHOLE_PART).
1805 Example:
1806 POW (x, -5.875) == POW (x, 0.125) / POWI (X, 6)
1807 --> SQRT (SQRT (SQRT (x))) / (POWI (x, 6))
1808
1809 For ARG1 < 0.0 we choose between (A) and (B) depending on
1810 how many multiplications we'd have to do.
1811 So, for the example in (B): POW (x, -5.875), if we were to
1812 follow algorithm (A) we would produce:
1813 1.0 / POWI (X, 5) * SQRT (X) * SQRT (SQRT (X)) * SQRT (SQRT (SQRT (X)))
1814 which contains more multiplications than approach (B).
1815
1816 Hopefully, this approach will eliminate potentially expensive POW library
1817 calls when unsafe floating point math is enabled and allow the compiler to
1818 further optimise the multiplies, square roots and divides produced by this
1819 function. */
1820
1821 static tree
expand_pow_as_sqrts(gimple_stmt_iterator * gsi,location_t loc,tree arg0,tree arg1,HOST_WIDE_INT max_depth)1822 expand_pow_as_sqrts (gimple_stmt_iterator *gsi, location_t loc,
1823 tree arg0, tree arg1, HOST_WIDE_INT max_depth)
1824 {
1825 tree type = TREE_TYPE (arg0);
1826 machine_mode mode = TYPE_MODE (type);
1827 tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1828 bool one_over = true;
1829
1830 if (!sqrtfn)
1831 return NULL_TREE;
1832
1833 if (TREE_CODE (arg1) != REAL_CST)
1834 return NULL_TREE;
1835
1836 REAL_VALUE_TYPE exp_init = TREE_REAL_CST (arg1);
1837
1838 gcc_assert (max_depth > 0);
1839 tree *cache = XALLOCAVEC (tree, max_depth + 1);
1840
1841 struct pow_synth_sqrt_info synth_info;
1842 synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1843 synth_info.deepest = 0;
1844 synth_info.num_mults = 0;
1845
1846 bool neg_exp = REAL_VALUE_NEGATIVE (exp_init);
1847 REAL_VALUE_TYPE exp = real_value_abs (&exp_init);
1848
1849 /* The whole and fractional parts of exp. */
1850 REAL_VALUE_TYPE whole_part;
1851 REAL_VALUE_TYPE frac_part;
1852
1853 real_floor (&whole_part, mode, &exp);
1854 real_arithmetic (&frac_part, MINUS_EXPR, &exp, &whole_part);
1855
1856
1857 REAL_VALUE_TYPE ceil_whole = dconst0;
1858 REAL_VALUE_TYPE ceil_fract = dconst0;
1859
1860 if (neg_exp)
1861 {
1862 real_ceil (&ceil_whole, mode, &exp);
1863 real_arithmetic (&ceil_fract, MINUS_EXPR, &ceil_whole, &exp);
1864 }
1865
1866 if (!representable_as_half_series_p (frac_part, max_depth, &synth_info))
1867 return NULL_TREE;
1868
1869 /* Check whether it's more profitable to not use 1.0 / ... */
1870 if (neg_exp)
1871 {
1872 struct pow_synth_sqrt_info alt_synth_info;
1873 alt_synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1874 alt_synth_info.deepest = 0;
1875 alt_synth_info.num_mults = 0;
1876
1877 if (representable_as_half_series_p (ceil_fract, max_depth,
1878 &alt_synth_info)
1879 && alt_synth_info.deepest <= synth_info.deepest
1880 && alt_synth_info.num_mults < synth_info.num_mults)
1881 {
1882 whole_part = ceil_whole;
1883 frac_part = ceil_fract;
1884 synth_info.deepest = alt_synth_info.deepest;
1885 synth_info.num_mults = alt_synth_info.num_mults;
1886 memcpy (synth_info.factors, alt_synth_info.factors,
1887 (max_depth + 1) * sizeof (bool));
1888 one_over = false;
1889 }
1890 }
1891
1892 HOST_WIDE_INT n = real_to_integer (&whole_part);
1893 REAL_VALUE_TYPE cint;
1894 real_from_integer (&cint, VOIDmode, n, SIGNED);
1895
1896 if (!real_identical (&whole_part, &cint))
1897 return NULL_TREE;
1898
1899 if (powi_cost (n) + synth_info.num_mults > POWI_MAX_MULTS)
1900 return NULL_TREE;
1901
1902 memset (cache, 0, (max_depth + 1) * sizeof (tree));
1903
1904 tree integer_res = n == 0 ? build_real (type, dconst1) : arg0;
1905
1906 /* Calculate the integer part of the exponent. */
1907 if (n > 1)
1908 {
1909 integer_res = gimple_expand_builtin_powi (gsi, loc, arg0, n);
1910 if (!integer_res)
1911 return NULL_TREE;
1912 }
1913
1914 if (dump_file)
1915 {
1916 char string[64];
1917
1918 real_to_decimal (string, &exp_init, sizeof (string), 0, 1);
1919 fprintf (dump_file, "synthesizing pow (x, %s) as:\n", string);
1920
1921 if (neg_exp)
1922 {
1923 if (one_over)
1924 {
1925 fprintf (dump_file, "1.0 / (");
1926 dump_integer_part (dump_file, "x", n);
1927 if (n > 0)
1928 fprintf (dump_file, " * ");
1929 dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1930 fprintf (dump_file, ")");
1931 }
1932 else
1933 {
1934 dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1935 fprintf (dump_file, " / (");
1936 dump_integer_part (dump_file, "x", n);
1937 fprintf (dump_file, ")");
1938 }
1939 }
1940 else
1941 {
1942 dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1943 if (n > 0)
1944 fprintf (dump_file, " * ");
1945 dump_integer_part (dump_file, "x", n);
1946 }
1947
1948 fprintf (dump_file, "\ndeepest sqrt chain: %d\n", synth_info.deepest);
1949 }
1950
1951
1952 tree fract_res = NULL_TREE;
1953 cache[0] = arg0;
1954
1955 /* Calculate the fractional part of the exponent. */
1956 for (unsigned i = 0; i < synth_info.deepest; i++)
1957 {
1958 if (synth_info.factors[i])
1959 {
1960 tree sqrt_chain = get_fn_chain (arg0, i + 1, gsi, sqrtfn, loc, cache);
1961
1962 if (!fract_res)
1963 fract_res = sqrt_chain;
1964
1965 else
1966 fract_res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1967 fract_res, sqrt_chain);
1968 }
1969 }
1970
1971 tree res = NULL_TREE;
1972
1973 if (neg_exp)
1974 {
1975 if (one_over)
1976 {
1977 if (n > 0)
1978 res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1979 fract_res, integer_res);
1980 else
1981 res = fract_res;
1982
1983 res = build_and_insert_binop (gsi, loc, "powrootrecip", RDIV_EXPR,
1984 build_real (type, dconst1), res);
1985 }
1986 else
1987 {
1988 res = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
1989 fract_res, integer_res);
1990 }
1991 }
1992 else
1993 res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1994 fract_res, integer_res);
1995 return res;
1996 }
1997
1998 /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
1999 with location info LOC. If possible, create an equivalent and
2000 less expensive sequence of statements prior to GSI, and return an
2001 expession holding the result. */
2002
2003 static tree
gimple_expand_builtin_pow(gimple_stmt_iterator * gsi,location_t loc,tree arg0,tree arg1)2004 gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc,
2005 tree arg0, tree arg1)
2006 {
2007 REAL_VALUE_TYPE c, cint, dconst1_3, dconst1_4, dconst1_6;
2008 REAL_VALUE_TYPE c2, dconst3;
2009 HOST_WIDE_INT n;
2010 tree type, sqrtfn, cbrtfn, sqrt_arg0, result, cbrt_x, powi_cbrt_x;
2011 machine_mode mode;
2012 bool speed_p = optimize_bb_for_speed_p (gsi_bb (*gsi));
2013 bool hw_sqrt_exists, c_is_int, c2_is_int;
2014
2015 dconst1_4 = dconst1;
2016 SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2);
2017
2018 /* If the exponent isn't a constant, there's nothing of interest
2019 to be done. */
2020 if (TREE_CODE (arg1) != REAL_CST)
2021 return NULL_TREE;
2022
2023 /* Don't perform the operation if flag_signaling_nans is on
2024 and the operand is a signaling NaN. */
2025 if (HONOR_SNANS (TYPE_MODE (TREE_TYPE (arg1)))
2026 && ((TREE_CODE (arg0) == REAL_CST
2027 && REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg0)))
2028 || REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg1))))
2029 return NULL_TREE;
2030
2031 /* If the exponent is equivalent to an integer, expand to an optimal
2032 multiplication sequence when profitable. */
2033 c = TREE_REAL_CST (arg1);
2034 n = real_to_integer (&c);
2035 real_from_integer (&cint, VOIDmode, n, SIGNED);
2036 c_is_int = real_identical (&c, &cint);
2037
2038 if (c_is_int
2039 && ((n >= -1 && n <= 2)
2040 || (flag_unsafe_math_optimizations
2041 && speed_p
2042 && powi_cost (n) <= POWI_MAX_MULTS)))
2043 return gimple_expand_builtin_powi (gsi, loc, arg0, n);
2044
2045 /* Attempt various optimizations using sqrt and cbrt. */
2046 type = TREE_TYPE (arg0);
2047 mode = TYPE_MODE (type);
2048 sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
2049
2050 /* Optimize pow(x,0.5) = sqrt(x). This replacement is always safe
2051 unless signed zeros must be maintained. pow(-0,0.5) = +0, while
2052 sqrt(-0) = -0. */
2053 if (sqrtfn
2054 && real_equal (&c, &dconsthalf)
2055 && !HONOR_SIGNED_ZEROS (mode))
2056 return build_and_insert_call (gsi, loc, sqrtfn, arg0);
2057
2058 hw_sqrt_exists = optab_handler (sqrt_optab, mode) != CODE_FOR_nothing;
2059
2060 /* Optimize pow(x,1./3.) = cbrt(x). This requires unsafe math
2061 optimizations since 1./3. is not exactly representable. If x
2062 is negative and finite, the correct value of pow(x,1./3.) is
2063 a NaN with the "invalid" exception raised, because the value
2064 of 1./3. actually has an even denominator. The correct value
2065 of cbrt(x) is a negative real value. */
2066 cbrtfn = mathfn_built_in (type, BUILT_IN_CBRT);
2067 dconst1_3 = real_value_truncate (mode, dconst_third ());
2068
2069 if (flag_unsafe_math_optimizations
2070 && cbrtfn
2071 && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2072 && real_equal (&c, &dconst1_3))
2073 return build_and_insert_call (gsi, loc, cbrtfn, arg0);
2074
2075 /* Optimize pow(x,1./6.) = cbrt(sqrt(x)). Don't do this optimization
2076 if we don't have a hardware sqrt insn. */
2077 dconst1_6 = dconst1_3;
2078 SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1);
2079
2080 if (flag_unsafe_math_optimizations
2081 && sqrtfn
2082 && cbrtfn
2083 && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2084 && speed_p
2085 && hw_sqrt_exists
2086 && real_equal (&c, &dconst1_6))
2087 {
2088 /* sqrt(x) */
2089 sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0);
2090
2091 /* cbrt(sqrt(x)) */
2092 return build_and_insert_call (gsi, loc, cbrtfn, sqrt_arg0);
2093 }
2094
2095
2096 /* Attempt to expand the POW as a product of square root chains.
2097 Expand the 0.25 case even when otpimising for size. */
2098 if (flag_unsafe_math_optimizations
2099 && sqrtfn
2100 && hw_sqrt_exists
2101 && (speed_p || real_equal (&c, &dconst1_4))
2102 && !HONOR_SIGNED_ZEROS (mode))
2103 {
2104 unsigned int max_depth = speed_p
2105 ? param_max_pow_sqrt_depth
2106 : 2;
2107
2108 tree expand_with_sqrts
2109 = expand_pow_as_sqrts (gsi, loc, arg0, arg1, max_depth);
2110
2111 if (expand_with_sqrts)
2112 return expand_with_sqrts;
2113 }
2114
2115 real_arithmetic (&c2, MULT_EXPR, &c, &dconst2);
2116 n = real_to_integer (&c2);
2117 real_from_integer (&cint, VOIDmode, n, SIGNED);
2118 c2_is_int = real_identical (&c2, &cint);
2119
2120 /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into
2121
2122 powi(x, n/3) * powi(cbrt(x), n%3), n > 0;
2123 1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)), n < 0.
2124
2125 Do not calculate the first factor when n/3 = 0. As cbrt(x) is
2126 different from pow(x, 1./3.) due to rounding and behavior with
2127 negative x, we need to constrain this transformation to unsafe
2128 math and positive x or finite math. */
2129 real_from_integer (&dconst3, VOIDmode, 3, SIGNED);
2130 real_arithmetic (&c2, MULT_EXPR, &c, &dconst3);
2131 real_round (&c2, mode, &c2);
2132 n = real_to_integer (&c2);
2133 real_from_integer (&cint, VOIDmode, n, SIGNED);
2134 real_arithmetic (&c2, RDIV_EXPR, &cint, &dconst3);
2135 real_convert (&c2, mode, &c2);
2136
2137 if (flag_unsafe_math_optimizations
2138 && cbrtfn
2139 && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2140 && real_identical (&c2, &c)
2141 && !c2_is_int
2142 && optimize_function_for_speed_p (cfun)
2143 && powi_cost (n / 3) <= POWI_MAX_MULTS)
2144 {
2145 tree powi_x_ndiv3 = NULL_TREE;
2146
2147 /* Attempt to fold powi(arg0, abs(n/3)) into multiplies. If not
2148 possible or profitable, give up. Skip the degenerate case when
2149 abs(n) < 3, where the result is always 1. */
2150 if (absu_hwi (n) >= 3)
2151 {
2152 powi_x_ndiv3 = gimple_expand_builtin_powi (gsi, loc, arg0,
2153 abs_hwi (n / 3));
2154 if (!powi_x_ndiv3)
2155 return NULL_TREE;
2156 }
2157
2158 /* Calculate powi(cbrt(x), n%3). Don't use gimple_expand_builtin_powi
2159 as that creates an unnecessary variable. Instead, just produce
2160 either cbrt(x) or cbrt(x) * cbrt(x). */
2161 cbrt_x = build_and_insert_call (gsi, loc, cbrtfn, arg0);
2162
2163 if (absu_hwi (n) % 3 == 1)
2164 powi_cbrt_x = cbrt_x;
2165 else
2166 powi_cbrt_x = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
2167 cbrt_x, cbrt_x);
2168
2169 /* Multiply the two subexpressions, unless powi(x,abs(n)/3) = 1. */
2170 if (absu_hwi (n) < 3)
2171 result = powi_cbrt_x;
2172 else
2173 result = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
2174 powi_x_ndiv3, powi_cbrt_x);
2175
2176 /* If n is negative, reciprocate the result. */
2177 if (n < 0)
2178 result = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
2179 build_real (type, dconst1), result);
2180
2181 return result;
2182 }
2183
2184 /* No optimizations succeeded. */
2185 return NULL_TREE;
2186 }
2187
2188 /* ARG is the argument to a cabs builtin call in GSI with location info
2189 LOC. Create a sequence of statements prior to GSI that calculates
2190 sqrt(R*R + I*I), where R and I are the real and imaginary components
2191 of ARG, respectively. Return an expression holding the result. */
2192
2193 static tree
gimple_expand_builtin_cabs(gimple_stmt_iterator * gsi,location_t loc,tree arg)2194 gimple_expand_builtin_cabs (gimple_stmt_iterator *gsi, location_t loc, tree arg)
2195 {
2196 tree real_part, imag_part, addend1, addend2, sum, result;
2197 tree type = TREE_TYPE (TREE_TYPE (arg));
2198 tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
2199 machine_mode mode = TYPE_MODE (type);
2200
2201 if (!flag_unsafe_math_optimizations
2202 || !optimize_bb_for_speed_p (gimple_bb (gsi_stmt (*gsi)))
2203 || !sqrtfn
2204 || optab_handler (sqrt_optab, mode) == CODE_FOR_nothing)
2205 return NULL_TREE;
2206
2207 real_part = build_and_insert_ref (gsi, loc, type, "cabs",
2208 REALPART_EXPR, arg);
2209 addend1 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
2210 real_part, real_part);
2211 imag_part = build_and_insert_ref (gsi, loc, type, "cabs",
2212 IMAGPART_EXPR, arg);
2213 addend2 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
2214 imag_part, imag_part);
2215 sum = build_and_insert_binop (gsi, loc, "cabs", PLUS_EXPR, addend1, addend2);
2216 result = build_and_insert_call (gsi, loc, sqrtfn, sum);
2217
2218 return result;
2219 }
2220
2221 /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1
2222 on the SSA_NAME argument of each of them. Also expand powi(x,n) into
2223 an optimal number of multiplies, when n is a constant. */
2224
2225 namespace {
2226
2227 const pass_data pass_data_cse_sincos =
2228 {
2229 GIMPLE_PASS, /* type */
2230 "sincos", /* name */
2231 OPTGROUP_NONE, /* optinfo_flags */
2232 TV_TREE_SINCOS, /* tv_id */
2233 PROP_ssa, /* properties_required */
2234 PROP_gimple_opt_math, /* properties_provided */
2235 0, /* properties_destroyed */
2236 0, /* todo_flags_start */
2237 TODO_update_ssa, /* todo_flags_finish */
2238 };
2239
2240 class pass_cse_sincos : public gimple_opt_pass
2241 {
2242 public:
pass_cse_sincos(gcc::context * ctxt)2243 pass_cse_sincos (gcc::context *ctxt)
2244 : gimple_opt_pass (pass_data_cse_sincos, ctxt)
2245 {}
2246
2247 /* opt_pass methods: */
gate(function *)2248 virtual bool gate (function *)
2249 {
2250 /* We no longer require either sincos or cexp, since powi expansion
2251 piggybacks on this pass. */
2252 return optimize;
2253 }
2254
2255 virtual unsigned int execute (function *);
2256
2257 }; // class pass_cse_sincos
2258
2259 unsigned int
execute(function * fun)2260 pass_cse_sincos::execute (function *fun)
2261 {
2262 basic_block bb;
2263 bool cfg_changed = false;
2264
2265 calculate_dominance_info (CDI_DOMINATORS);
2266 memset (&sincos_stats, 0, sizeof (sincos_stats));
2267
2268 FOR_EACH_BB_FN (bb, fun)
2269 {
2270 gimple_stmt_iterator gsi;
2271 bool cleanup_eh = false;
2272
2273 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2274 {
2275 gimple *stmt = gsi_stmt (gsi);
2276
2277 /* Only the last stmt in a bb could throw, no need to call
2278 gimple_purge_dead_eh_edges if we change something in the middle
2279 of a basic block. */
2280 cleanup_eh = false;
2281
2282 if (is_gimple_call (stmt)
2283 && gimple_call_lhs (stmt))
2284 {
2285 tree arg, arg0, arg1, result;
2286 HOST_WIDE_INT n;
2287 location_t loc;
2288
2289 switch (gimple_call_combined_fn (stmt))
2290 {
2291 CASE_CFN_COS:
2292 CASE_CFN_SIN:
2293 CASE_CFN_CEXPI:
2294 arg = gimple_call_arg (stmt, 0);
2295 /* Make sure we have either sincos or cexp. */
2296 if (!targetm.libc_has_function (function_c99_math_complex,
2297 TREE_TYPE (arg))
2298 && !targetm.libc_has_function (function_sincos,
2299 TREE_TYPE (arg)))
2300 break;
2301
2302 if (TREE_CODE (arg) == SSA_NAME)
2303 cfg_changed |= execute_cse_sincos_1 (arg);
2304 break;
2305
2306 CASE_CFN_POW:
2307 arg0 = gimple_call_arg (stmt, 0);
2308 arg1 = gimple_call_arg (stmt, 1);
2309
2310 loc = gimple_location (stmt);
2311 result = gimple_expand_builtin_pow (&gsi, loc, arg0, arg1);
2312
2313 if (result)
2314 {
2315 tree lhs = gimple_get_lhs (stmt);
2316 gassign *new_stmt = gimple_build_assign (lhs, result);
2317 gimple_set_location (new_stmt, loc);
2318 unlink_stmt_vdef (stmt);
2319 gsi_replace (&gsi, new_stmt, true);
2320 cleanup_eh = true;
2321 if (gimple_vdef (stmt))
2322 release_ssa_name (gimple_vdef (stmt));
2323 }
2324 break;
2325
2326 CASE_CFN_POWI:
2327 arg0 = gimple_call_arg (stmt, 0);
2328 arg1 = gimple_call_arg (stmt, 1);
2329 loc = gimple_location (stmt);
2330
2331 if (real_minus_onep (arg0))
2332 {
2333 tree t0, t1, cond, one, minus_one;
2334 gassign *stmt;
2335
2336 t0 = TREE_TYPE (arg0);
2337 t1 = TREE_TYPE (arg1);
2338 one = build_real (t0, dconst1);
2339 minus_one = build_real (t0, dconstm1);
2340
2341 cond = make_temp_ssa_name (t1, NULL, "powi_cond");
2342 stmt = gimple_build_assign (cond, BIT_AND_EXPR,
2343 arg1, build_int_cst (t1, 1));
2344 gimple_set_location (stmt, loc);
2345 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2346
2347 result = make_temp_ssa_name (t0, NULL, "powi");
2348 stmt = gimple_build_assign (result, COND_EXPR, cond,
2349 minus_one, one);
2350 gimple_set_location (stmt, loc);
2351 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2352 }
2353 else
2354 {
2355 if (!tree_fits_shwi_p (arg1))
2356 break;
2357
2358 n = tree_to_shwi (arg1);
2359 result = gimple_expand_builtin_powi (&gsi, loc, arg0, n);
2360 }
2361
2362 if (result)
2363 {
2364 tree lhs = gimple_get_lhs (stmt);
2365 gassign *new_stmt = gimple_build_assign (lhs, result);
2366 gimple_set_location (new_stmt, loc);
2367 unlink_stmt_vdef (stmt);
2368 gsi_replace (&gsi, new_stmt, true);
2369 cleanup_eh = true;
2370 if (gimple_vdef (stmt))
2371 release_ssa_name (gimple_vdef (stmt));
2372 }
2373 break;
2374
2375 CASE_CFN_CABS:
2376 arg0 = gimple_call_arg (stmt, 0);
2377 loc = gimple_location (stmt);
2378 result = gimple_expand_builtin_cabs (&gsi, loc, arg0);
2379
2380 if (result)
2381 {
2382 tree lhs = gimple_get_lhs (stmt);
2383 gassign *new_stmt = gimple_build_assign (lhs, result);
2384 gimple_set_location (new_stmt, loc);
2385 unlink_stmt_vdef (stmt);
2386 gsi_replace (&gsi, new_stmt, true);
2387 cleanup_eh = true;
2388 if (gimple_vdef (stmt))
2389 release_ssa_name (gimple_vdef (stmt));
2390 }
2391 break;
2392
2393 default:;
2394 }
2395 }
2396 }
2397 if (cleanup_eh)
2398 cfg_changed |= gimple_purge_dead_eh_edges (bb);
2399 }
2400
2401 statistics_counter_event (fun, "sincos statements inserted",
2402 sincos_stats.inserted);
2403 statistics_counter_event (fun, "conv statements removed",
2404 sincos_stats.conv_removed);
2405
2406 return cfg_changed ? TODO_cleanup_cfg : 0;
2407 }
2408
2409 } // anon namespace
2410
2411 gimple_opt_pass *
make_pass_cse_sincos(gcc::context * ctxt)2412 make_pass_cse_sincos (gcc::context *ctxt)
2413 {
2414 return new pass_cse_sincos (ctxt);
2415 }
2416
2417 /* Return true if stmt is a type conversion operation that can be stripped
2418 when used in a widening multiply operation. */
2419 static bool
widening_mult_conversion_strippable_p(tree result_type,gimple * stmt)2420 widening_mult_conversion_strippable_p (tree result_type, gimple *stmt)
2421 {
2422 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
2423
2424 if (TREE_CODE (result_type) == INTEGER_TYPE)
2425 {
2426 tree op_type;
2427 tree inner_op_type;
2428
2429 if (!CONVERT_EXPR_CODE_P (rhs_code))
2430 return false;
2431
2432 op_type = TREE_TYPE (gimple_assign_lhs (stmt));
2433
2434 /* If the type of OP has the same precision as the result, then
2435 we can strip this conversion. The multiply operation will be
2436 selected to create the correct extension as a by-product. */
2437 if (TYPE_PRECISION (result_type) == TYPE_PRECISION (op_type))
2438 return true;
2439
2440 /* We can also strip a conversion if it preserves the signed-ness of
2441 the operation and doesn't narrow the range. */
2442 inner_op_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
2443
2444 /* If the inner-most type is unsigned, then we can strip any
2445 intermediate widening operation. If it's signed, then the
2446 intermediate widening operation must also be signed. */
2447 if ((TYPE_UNSIGNED (inner_op_type)
2448 || TYPE_UNSIGNED (op_type) == TYPE_UNSIGNED (inner_op_type))
2449 && TYPE_PRECISION (op_type) > TYPE_PRECISION (inner_op_type))
2450 return true;
2451
2452 return false;
2453 }
2454
2455 return rhs_code == FIXED_CONVERT_EXPR;
2456 }
2457
2458 /* Return true if RHS is a suitable operand for a widening multiplication,
2459 assuming a target type of TYPE.
2460 There are two cases:
2461
2462 - RHS makes some value at least twice as wide. Store that value
2463 in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT.
2464
2465 - RHS is an integer constant. Store that value in *NEW_RHS_OUT if so,
2466 but leave *TYPE_OUT untouched. */
2467
2468 static bool
is_widening_mult_rhs_p(tree type,tree rhs,tree * type_out,tree * new_rhs_out)2469 is_widening_mult_rhs_p (tree type, tree rhs, tree *type_out,
2470 tree *new_rhs_out)
2471 {
2472 gimple *stmt;
2473 tree type1, rhs1;
2474
2475 if (TREE_CODE (rhs) == SSA_NAME)
2476 {
2477 stmt = SSA_NAME_DEF_STMT (rhs);
2478 if (is_gimple_assign (stmt))
2479 {
2480 if (! widening_mult_conversion_strippable_p (type, stmt))
2481 rhs1 = rhs;
2482 else
2483 {
2484 rhs1 = gimple_assign_rhs1 (stmt);
2485
2486 if (TREE_CODE (rhs1) == INTEGER_CST)
2487 {
2488 *new_rhs_out = rhs1;
2489 *type_out = NULL;
2490 return true;
2491 }
2492 }
2493 }
2494 else
2495 rhs1 = rhs;
2496
2497 type1 = TREE_TYPE (rhs1);
2498
2499 if (TREE_CODE (type1) != TREE_CODE (type)
2500 || TYPE_PRECISION (type1) * 2 > TYPE_PRECISION (type))
2501 return false;
2502
2503 *new_rhs_out = rhs1;
2504 *type_out = type1;
2505 return true;
2506 }
2507
2508 if (TREE_CODE (rhs) == INTEGER_CST)
2509 {
2510 *new_rhs_out = rhs;
2511 *type_out = NULL;
2512 return true;
2513 }
2514
2515 return false;
2516 }
2517
2518 /* Return true if STMT performs a widening multiplication, assuming the
2519 output type is TYPE. If so, store the unwidened types of the operands
2520 in *TYPE1_OUT and *TYPE2_OUT respectively. Also fill *RHS1_OUT and
2521 *RHS2_OUT such that converting those operands to types *TYPE1_OUT
2522 and *TYPE2_OUT would give the operands of the multiplication. */
2523
2524 static bool
is_widening_mult_p(gimple * stmt,tree * type1_out,tree * rhs1_out,tree * type2_out,tree * rhs2_out)2525 is_widening_mult_p (gimple *stmt,
2526 tree *type1_out, tree *rhs1_out,
2527 tree *type2_out, tree *rhs2_out)
2528 {
2529 tree type = TREE_TYPE (gimple_assign_lhs (stmt));
2530
2531 if (TREE_CODE (type) == INTEGER_TYPE)
2532 {
2533 if (TYPE_OVERFLOW_TRAPS (type))
2534 return false;
2535 }
2536 else if (TREE_CODE (type) != FIXED_POINT_TYPE)
2537 return false;
2538
2539 if (!is_widening_mult_rhs_p (type, gimple_assign_rhs1 (stmt), type1_out,
2540 rhs1_out))
2541 return false;
2542
2543 if (!is_widening_mult_rhs_p (type, gimple_assign_rhs2 (stmt), type2_out,
2544 rhs2_out))
2545 return false;
2546
2547 if (*type1_out == NULL)
2548 {
2549 if (*type2_out == NULL || !int_fits_type_p (*rhs1_out, *type2_out))
2550 return false;
2551 *type1_out = *type2_out;
2552 }
2553
2554 if (*type2_out == NULL)
2555 {
2556 if (!int_fits_type_p (*rhs2_out, *type1_out))
2557 return false;
2558 *type2_out = *type1_out;
2559 }
2560
2561 /* Ensure that the larger of the two operands comes first. */
2562 if (TYPE_PRECISION (*type1_out) < TYPE_PRECISION (*type2_out))
2563 {
2564 std::swap (*type1_out, *type2_out);
2565 std::swap (*rhs1_out, *rhs2_out);
2566 }
2567
2568 return true;
2569 }
2570
2571 /* Check to see if the CALL statement is an invocation of copysign
2572 with 1. being the first argument. */
2573 static bool
is_copysign_call_with_1(gimple * call)2574 is_copysign_call_with_1 (gimple *call)
2575 {
2576 gcall *c = dyn_cast <gcall *> (call);
2577 if (! c)
2578 return false;
2579
2580 enum combined_fn code = gimple_call_combined_fn (c);
2581
2582 if (code == CFN_LAST)
2583 return false;
2584
2585 if (builtin_fn_p (code))
2586 {
2587 switch (as_builtin_fn (code))
2588 {
2589 CASE_FLT_FN (BUILT_IN_COPYSIGN):
2590 CASE_FLT_FN_FLOATN_NX (BUILT_IN_COPYSIGN):
2591 return real_onep (gimple_call_arg (c, 0));
2592 default:
2593 return false;
2594 }
2595 }
2596
2597 if (internal_fn_p (code))
2598 {
2599 switch (as_internal_fn (code))
2600 {
2601 case IFN_COPYSIGN:
2602 return real_onep (gimple_call_arg (c, 0));
2603 default:
2604 return false;
2605 }
2606 }
2607
2608 return false;
2609 }
2610
2611 /* Try to expand the pattern x * copysign (1, y) into xorsign (x, y).
2612 This only happens when the xorsign optab is defined, if the
2613 pattern is not a xorsign pattern or if expansion fails FALSE is
2614 returned, otherwise TRUE is returned. */
2615 static bool
convert_expand_mult_copysign(gimple * stmt,gimple_stmt_iterator * gsi)2616 convert_expand_mult_copysign (gimple *stmt, gimple_stmt_iterator *gsi)
2617 {
2618 tree treeop0, treeop1, lhs, type;
2619 location_t loc = gimple_location (stmt);
2620 lhs = gimple_assign_lhs (stmt);
2621 treeop0 = gimple_assign_rhs1 (stmt);
2622 treeop1 = gimple_assign_rhs2 (stmt);
2623 type = TREE_TYPE (lhs);
2624 machine_mode mode = TYPE_MODE (type);
2625
2626 if (HONOR_SNANS (type))
2627 return false;
2628
2629 if (TREE_CODE (treeop0) == SSA_NAME && TREE_CODE (treeop1) == SSA_NAME)
2630 {
2631 gimple *call0 = SSA_NAME_DEF_STMT (treeop0);
2632 if (!has_single_use (treeop0) || !is_copysign_call_with_1 (call0))
2633 {
2634 call0 = SSA_NAME_DEF_STMT (treeop1);
2635 if (!has_single_use (treeop1) || !is_copysign_call_with_1 (call0))
2636 return false;
2637
2638 treeop1 = treeop0;
2639 }
2640 if (optab_handler (xorsign_optab, mode) == CODE_FOR_nothing)
2641 return false;
2642
2643 gcall *c = as_a<gcall*> (call0);
2644 treeop0 = gimple_call_arg (c, 1);
2645
2646 gcall *call_stmt
2647 = gimple_build_call_internal (IFN_XORSIGN, 2, treeop1, treeop0);
2648 gimple_set_lhs (call_stmt, lhs);
2649 gimple_set_location (call_stmt, loc);
2650 gsi_replace (gsi, call_stmt, true);
2651 return true;
2652 }
2653
2654 return false;
2655 }
2656
2657 /* Process a single gimple statement STMT, which has a MULT_EXPR as
2658 its rhs, and try to convert it into a WIDEN_MULT_EXPR. The return
2659 value is true iff we converted the statement. */
2660
2661 static bool
convert_mult_to_widen(gimple * stmt,gimple_stmt_iterator * gsi)2662 convert_mult_to_widen (gimple *stmt, gimple_stmt_iterator *gsi)
2663 {
2664 tree lhs, rhs1, rhs2, type, type1, type2;
2665 enum insn_code handler;
2666 scalar_int_mode to_mode, from_mode, actual_mode;
2667 optab op;
2668 int actual_precision;
2669 location_t loc = gimple_location (stmt);
2670 bool from_unsigned1, from_unsigned2;
2671
2672 lhs = gimple_assign_lhs (stmt);
2673 type = TREE_TYPE (lhs);
2674 if (TREE_CODE (type) != INTEGER_TYPE)
2675 return false;
2676
2677 if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2))
2678 return false;
2679
2680 to_mode = SCALAR_INT_TYPE_MODE (type);
2681 from_mode = SCALAR_INT_TYPE_MODE (type1);
2682 if (to_mode == from_mode)
2683 return false;
2684
2685 from_unsigned1 = TYPE_UNSIGNED (type1);
2686 from_unsigned2 = TYPE_UNSIGNED (type2);
2687
2688 if (from_unsigned1 && from_unsigned2)
2689 op = umul_widen_optab;
2690 else if (!from_unsigned1 && !from_unsigned2)
2691 op = smul_widen_optab;
2692 else
2693 op = usmul_widen_optab;
2694
2695 handler = find_widening_optab_handler_and_mode (op, to_mode, from_mode,
2696 &actual_mode);
2697
2698 if (handler == CODE_FOR_nothing)
2699 {
2700 if (op != smul_widen_optab)
2701 {
2702 /* We can use a signed multiply with unsigned types as long as
2703 there is a wider mode to use, or it is the smaller of the two
2704 types that is unsigned. Note that type1 >= type2, always. */
2705 if ((TYPE_UNSIGNED (type1)
2706 && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
2707 || (TYPE_UNSIGNED (type2)
2708 && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
2709 {
2710 if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
2711 || GET_MODE_SIZE (to_mode) <= GET_MODE_SIZE (from_mode))
2712 return false;
2713 }
2714
2715 op = smul_widen_optab;
2716 handler = find_widening_optab_handler_and_mode (op, to_mode,
2717 from_mode,
2718 &actual_mode);
2719
2720 if (handler == CODE_FOR_nothing)
2721 return false;
2722
2723 from_unsigned1 = from_unsigned2 = false;
2724 }
2725 else
2726 {
2727 /* Expand can synthesize smul_widen_optab if the target
2728 supports umul_widen_optab. */
2729 op = umul_widen_optab;
2730 handler = find_widening_optab_handler_and_mode (op, to_mode,
2731 from_mode,
2732 &actual_mode);
2733 if (handler == CODE_FOR_nothing)
2734 return false;
2735 }
2736 }
2737
2738 /* Ensure that the inputs to the handler are in the correct precison
2739 for the opcode. This will be the full mode size. */
2740 actual_precision = GET_MODE_PRECISION (actual_mode);
2741 if (2 * actual_precision > TYPE_PRECISION (type))
2742 return false;
2743 if (actual_precision != TYPE_PRECISION (type1)
2744 || from_unsigned1 != TYPE_UNSIGNED (type1))
2745 rhs1 = build_and_insert_cast (gsi, loc,
2746 build_nonstandard_integer_type
2747 (actual_precision, from_unsigned1), rhs1);
2748 if (actual_precision != TYPE_PRECISION (type2)
2749 || from_unsigned2 != TYPE_UNSIGNED (type2))
2750 rhs2 = build_and_insert_cast (gsi, loc,
2751 build_nonstandard_integer_type
2752 (actual_precision, from_unsigned2), rhs2);
2753
2754 /* Handle constants. */
2755 if (TREE_CODE (rhs1) == INTEGER_CST)
2756 rhs1 = fold_convert (type1, rhs1);
2757 if (TREE_CODE (rhs2) == INTEGER_CST)
2758 rhs2 = fold_convert (type2, rhs2);
2759
2760 gimple_assign_set_rhs1 (stmt, rhs1);
2761 gimple_assign_set_rhs2 (stmt, rhs2);
2762 gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR);
2763 update_stmt (stmt);
2764 widen_mul_stats.widen_mults_inserted++;
2765 return true;
2766 }
2767
2768 /* Process a single gimple statement STMT, which is found at the
2769 iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its
2770 rhs (given by CODE), and try to convert it into a
2771 WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR. The return value
2772 is true iff we converted the statement. */
2773
2774 static bool
convert_plusminus_to_widen(gimple_stmt_iterator * gsi,gimple * stmt,enum tree_code code)2775 convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple *stmt,
2776 enum tree_code code)
2777 {
2778 gimple *rhs1_stmt = NULL, *rhs2_stmt = NULL;
2779 gimple *conv1_stmt = NULL, *conv2_stmt = NULL, *conv_stmt;
2780 tree type, type1, type2, optype;
2781 tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs;
2782 enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK;
2783 optab this_optab;
2784 enum tree_code wmult_code;
2785 enum insn_code handler;
2786 scalar_mode to_mode, from_mode, actual_mode;
2787 location_t loc = gimple_location (stmt);
2788 int actual_precision;
2789 bool from_unsigned1, from_unsigned2;
2790
2791 lhs = gimple_assign_lhs (stmt);
2792 type = TREE_TYPE (lhs);
2793 if (TREE_CODE (type) != INTEGER_TYPE
2794 && TREE_CODE (type) != FIXED_POINT_TYPE)
2795 return false;
2796
2797 if (code == MINUS_EXPR)
2798 wmult_code = WIDEN_MULT_MINUS_EXPR;
2799 else
2800 wmult_code = WIDEN_MULT_PLUS_EXPR;
2801
2802 rhs1 = gimple_assign_rhs1 (stmt);
2803 rhs2 = gimple_assign_rhs2 (stmt);
2804
2805 if (TREE_CODE (rhs1) == SSA_NAME)
2806 {
2807 rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2808 if (is_gimple_assign (rhs1_stmt))
2809 rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
2810 }
2811
2812 if (TREE_CODE (rhs2) == SSA_NAME)
2813 {
2814 rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2815 if (is_gimple_assign (rhs2_stmt))
2816 rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
2817 }
2818
2819 /* Allow for one conversion statement between the multiply
2820 and addition/subtraction statement. If there are more than
2821 one conversions then we assume they would invalidate this
2822 transformation. If that's not the case then they should have
2823 been folded before now. */
2824 if (CONVERT_EXPR_CODE_P (rhs1_code))
2825 {
2826 conv1_stmt = rhs1_stmt;
2827 rhs1 = gimple_assign_rhs1 (rhs1_stmt);
2828 if (TREE_CODE (rhs1) == SSA_NAME)
2829 {
2830 rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2831 if (is_gimple_assign (rhs1_stmt))
2832 rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
2833 }
2834 else
2835 return false;
2836 }
2837 if (CONVERT_EXPR_CODE_P (rhs2_code))
2838 {
2839 conv2_stmt = rhs2_stmt;
2840 rhs2 = gimple_assign_rhs1 (rhs2_stmt);
2841 if (TREE_CODE (rhs2) == SSA_NAME)
2842 {
2843 rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2844 if (is_gimple_assign (rhs2_stmt))
2845 rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
2846 }
2847 else
2848 return false;
2849 }
2850
2851 /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call
2852 is_widening_mult_p, but we still need the rhs returns.
2853
2854 It might also appear that it would be sufficient to use the existing
2855 operands of the widening multiply, but that would limit the choice of
2856 multiply-and-accumulate instructions.
2857
2858 If the widened-multiplication result has more than one uses, it is
2859 probably wiser not to do the conversion. Also restrict this operation
2860 to single basic block to avoid moving the multiply to a different block
2861 with a higher execution frequency. */
2862 if (code == PLUS_EXPR
2863 && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR))
2864 {
2865 if (!has_single_use (rhs1)
2866 || gimple_bb (rhs1_stmt) != gimple_bb (stmt)
2867 || !is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1,
2868 &type2, &mult_rhs2))
2869 return false;
2870 add_rhs = rhs2;
2871 conv_stmt = conv1_stmt;
2872 }
2873 else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR)
2874 {
2875 if (!has_single_use (rhs2)
2876 || gimple_bb (rhs2_stmt) != gimple_bb (stmt)
2877 || !is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1,
2878 &type2, &mult_rhs2))
2879 return false;
2880 add_rhs = rhs1;
2881 conv_stmt = conv2_stmt;
2882 }
2883 else
2884 return false;
2885
2886 to_mode = SCALAR_TYPE_MODE (type);
2887 from_mode = SCALAR_TYPE_MODE (type1);
2888 if (to_mode == from_mode)
2889 return false;
2890
2891 from_unsigned1 = TYPE_UNSIGNED (type1);
2892 from_unsigned2 = TYPE_UNSIGNED (type2);
2893 optype = type1;
2894
2895 /* There's no such thing as a mixed sign madd yet, so use a wider mode. */
2896 if (from_unsigned1 != from_unsigned2)
2897 {
2898 if (!INTEGRAL_TYPE_P (type))
2899 return false;
2900 /* We can use a signed multiply with unsigned types as long as
2901 there is a wider mode to use, or it is the smaller of the two
2902 types that is unsigned. Note that type1 >= type2, always. */
2903 if ((from_unsigned1
2904 && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
2905 || (from_unsigned2
2906 && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
2907 {
2908 if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
2909 || GET_MODE_SIZE (from_mode) >= GET_MODE_SIZE (to_mode))
2910 return false;
2911 }
2912
2913 from_unsigned1 = from_unsigned2 = false;
2914 optype = build_nonstandard_integer_type (GET_MODE_PRECISION (from_mode),
2915 false);
2916 }
2917
2918 /* If there was a conversion between the multiply and addition
2919 then we need to make sure it fits a multiply-and-accumulate.
2920 The should be a single mode change which does not change the
2921 value. */
2922 if (conv_stmt)
2923 {
2924 /* We use the original, unmodified data types for this. */
2925 tree from_type = TREE_TYPE (gimple_assign_rhs1 (conv_stmt));
2926 tree to_type = TREE_TYPE (gimple_assign_lhs (conv_stmt));
2927 int data_size = TYPE_PRECISION (type1) + TYPE_PRECISION (type2);
2928 bool is_unsigned = TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2);
2929
2930 if (TYPE_PRECISION (from_type) > TYPE_PRECISION (to_type))
2931 {
2932 /* Conversion is a truncate. */
2933 if (TYPE_PRECISION (to_type) < data_size)
2934 return false;
2935 }
2936 else if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type))
2937 {
2938 /* Conversion is an extend. Check it's the right sort. */
2939 if (TYPE_UNSIGNED (from_type) != is_unsigned
2940 && !(is_unsigned && TYPE_PRECISION (from_type) > data_size))
2941 return false;
2942 }
2943 /* else convert is a no-op for our purposes. */
2944 }
2945
2946 /* Verify that the machine can perform a widening multiply
2947 accumulate in this mode/signedness combination, otherwise
2948 this transformation is likely to pessimize code. */
2949 this_optab = optab_for_tree_code (wmult_code, optype, optab_default);
2950 handler = find_widening_optab_handler_and_mode (this_optab, to_mode,
2951 from_mode, &actual_mode);
2952
2953 if (handler == CODE_FOR_nothing)
2954 return false;
2955
2956 /* Ensure that the inputs to the handler are in the correct precison
2957 for the opcode. This will be the full mode size. */
2958 actual_precision = GET_MODE_PRECISION (actual_mode);
2959 if (actual_precision != TYPE_PRECISION (type1)
2960 || from_unsigned1 != TYPE_UNSIGNED (type1))
2961 mult_rhs1 = build_and_insert_cast (gsi, loc,
2962 build_nonstandard_integer_type
2963 (actual_precision, from_unsigned1),
2964 mult_rhs1);
2965 if (actual_precision != TYPE_PRECISION (type2)
2966 || from_unsigned2 != TYPE_UNSIGNED (type2))
2967 mult_rhs2 = build_and_insert_cast (gsi, loc,
2968 build_nonstandard_integer_type
2969 (actual_precision, from_unsigned2),
2970 mult_rhs2);
2971
2972 if (!useless_type_conversion_p (type, TREE_TYPE (add_rhs)))
2973 add_rhs = build_and_insert_cast (gsi, loc, type, add_rhs);
2974
2975 /* Handle constants. */
2976 if (TREE_CODE (mult_rhs1) == INTEGER_CST)
2977 mult_rhs1 = fold_convert (type1, mult_rhs1);
2978 if (TREE_CODE (mult_rhs2) == INTEGER_CST)
2979 mult_rhs2 = fold_convert (type2, mult_rhs2);
2980
2981 gimple_assign_set_rhs_with_ops (gsi, wmult_code, mult_rhs1, mult_rhs2,
2982 add_rhs);
2983 update_stmt (gsi_stmt (*gsi));
2984 widen_mul_stats.maccs_inserted++;
2985 return true;
2986 }
2987
2988 /* Given a result MUL_RESULT which is a result of a multiplication of OP1 and
2989 OP2 and which we know is used in statements that can be, together with the
2990 multiplication, converted to FMAs, perform the transformation. */
2991
2992 static void
convert_mult_to_fma_1(tree mul_result,tree op1,tree op2)2993 convert_mult_to_fma_1 (tree mul_result, tree op1, tree op2)
2994 {
2995 tree type = TREE_TYPE (mul_result);
2996 gimple *use_stmt;
2997 imm_use_iterator imm_iter;
2998 gcall *fma_stmt;
2999
3000 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result)
3001 {
3002 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
3003 tree addop, mulop1 = op1, result = mul_result;
3004 bool negate_p = false;
3005 gimple_seq seq = NULL;
3006
3007 if (is_gimple_debug (use_stmt))
3008 continue;
3009
3010 if (is_gimple_assign (use_stmt)
3011 && gimple_assign_rhs_code (use_stmt) == NEGATE_EXPR)
3012 {
3013 result = gimple_assign_lhs (use_stmt);
3014 use_operand_p use_p;
3015 gimple *neguse_stmt;
3016 single_imm_use (gimple_assign_lhs (use_stmt), &use_p, &neguse_stmt);
3017 gsi_remove (&gsi, true);
3018 release_defs (use_stmt);
3019
3020 use_stmt = neguse_stmt;
3021 gsi = gsi_for_stmt (use_stmt);
3022 negate_p = true;
3023 }
3024
3025 tree cond, else_value, ops[3];
3026 tree_code code;
3027 if (!can_interpret_as_conditional_op_p (use_stmt, &cond, &code,
3028 ops, &else_value))
3029 gcc_unreachable ();
3030 addop = ops[0] == result ? ops[1] : ops[0];
3031
3032 if (code == MINUS_EXPR)
3033 {
3034 if (ops[0] == result)
3035 /* a * b - c -> a * b + (-c) */
3036 addop = gimple_build (&seq, NEGATE_EXPR, type, addop);
3037 else
3038 /* a - b * c -> (-b) * c + a */
3039 negate_p = !negate_p;
3040 }
3041
3042 if (negate_p)
3043 mulop1 = gimple_build (&seq, NEGATE_EXPR, type, mulop1);
3044
3045 if (seq)
3046 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
3047
3048 if (cond)
3049 fma_stmt = gimple_build_call_internal (IFN_COND_FMA, 5, cond, mulop1,
3050 op2, addop, else_value);
3051 else
3052 fma_stmt = gimple_build_call_internal (IFN_FMA, 3, mulop1, op2, addop);
3053 gimple_set_lhs (fma_stmt, gimple_get_lhs (use_stmt));
3054 gimple_call_set_nothrow (fma_stmt, !stmt_can_throw_internal (cfun,
3055 use_stmt));
3056 gsi_replace (&gsi, fma_stmt, true);
3057 /* Follow all SSA edges so that we generate FMS, FNMA and FNMS
3058 regardless of where the negation occurs. */
3059 gimple *orig_stmt = gsi_stmt (gsi);
3060 if (fold_stmt (&gsi, follow_all_ssa_edges))
3061 {
3062 if (maybe_clean_or_replace_eh_stmt (orig_stmt, gsi_stmt (gsi)))
3063 gcc_unreachable ();
3064 update_stmt (gsi_stmt (gsi));
3065 }
3066
3067 if (dump_file && (dump_flags & TDF_DETAILS))
3068 {
3069 fprintf (dump_file, "Generated FMA ");
3070 print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, TDF_NONE);
3071 fprintf (dump_file, "\n");
3072 }
3073
3074 /* If the FMA result is negated in a single use, fold the negation
3075 too. */
3076 orig_stmt = gsi_stmt (gsi);
3077 use_operand_p use_p;
3078 gimple *neg_stmt;
3079 if (is_gimple_call (orig_stmt)
3080 && gimple_call_internal_p (orig_stmt)
3081 && gimple_call_lhs (orig_stmt)
3082 && TREE_CODE (gimple_call_lhs (orig_stmt)) == SSA_NAME
3083 && single_imm_use (gimple_call_lhs (orig_stmt), &use_p, &neg_stmt)
3084 && is_gimple_assign (neg_stmt)
3085 && gimple_assign_rhs_code (neg_stmt) == NEGATE_EXPR
3086 && !stmt_could_throw_p (cfun, neg_stmt))
3087 {
3088 gsi = gsi_for_stmt (neg_stmt);
3089 if (fold_stmt (&gsi, follow_all_ssa_edges))
3090 {
3091 if (maybe_clean_or_replace_eh_stmt (neg_stmt, gsi_stmt (gsi)))
3092 gcc_unreachable ();
3093 update_stmt (gsi_stmt (gsi));
3094 if (dump_file && (dump_flags & TDF_DETAILS))
3095 {
3096 fprintf (dump_file, "Folded FMA negation ");
3097 print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, TDF_NONE);
3098 fprintf (dump_file, "\n");
3099 }
3100 }
3101 }
3102
3103 widen_mul_stats.fmas_inserted++;
3104 }
3105 }
3106
3107 /* Data necessary to perform the actual transformation from a multiplication
3108 and an addition to an FMA after decision is taken it should be done and to
3109 then delete the multiplication statement from the function IL. */
3110
3111 struct fma_transformation_info
3112 {
3113 gimple *mul_stmt;
3114 tree mul_result;
3115 tree op1;
3116 tree op2;
3117 };
3118
3119 /* Structure containing the current state of FMA deferring, i.e. whether we are
3120 deferring, whether to continue deferring, and all data necessary to come
3121 back and perform all deferred transformations. */
3122
3123 class fma_deferring_state
3124 {
3125 public:
3126 /* Class constructor. Pass true as PERFORM_DEFERRING in order to actually
3127 do any deferring. */
3128
fma_deferring_state(bool perform_deferring)3129 fma_deferring_state (bool perform_deferring)
3130 : m_candidates (), m_mul_result_set (), m_initial_phi (NULL),
3131 m_last_result (NULL_TREE), m_deferring_p (perform_deferring) {}
3132
3133 /* List of FMA candidates for which we the transformation has been determined
3134 possible but we at this point in BB analysis we do not consider them
3135 beneficial. */
3136 auto_vec<fma_transformation_info, 8> m_candidates;
3137
3138 /* Set of results of multiplication that are part of an already deferred FMA
3139 candidates. */
3140 hash_set<tree> m_mul_result_set;
3141
3142 /* The PHI that supposedly feeds back result of a FMA to another over loop
3143 boundary. */
3144 gphi *m_initial_phi;
3145
3146 /* Result of the last produced FMA candidate or NULL if there has not been
3147 one. */
3148 tree m_last_result;
3149
3150 /* If true, deferring might still be profitable. If false, transform all
3151 candidates and no longer defer. */
3152 bool m_deferring_p;
3153 };
3154
3155 /* Transform all deferred FMA candidates and mark STATE as no longer
3156 deferring. */
3157
3158 static void
cancel_fma_deferring(fma_deferring_state * state)3159 cancel_fma_deferring (fma_deferring_state *state)
3160 {
3161 if (!state->m_deferring_p)
3162 return;
3163
3164 for (unsigned i = 0; i < state->m_candidates.length (); i++)
3165 {
3166 if (dump_file && (dump_flags & TDF_DETAILS))
3167 fprintf (dump_file, "Generating deferred FMA\n");
3168
3169 const fma_transformation_info &fti = state->m_candidates[i];
3170 convert_mult_to_fma_1 (fti.mul_result, fti.op1, fti.op2);
3171
3172 gimple_stmt_iterator gsi = gsi_for_stmt (fti.mul_stmt);
3173 gsi_remove (&gsi, true);
3174 release_defs (fti.mul_stmt);
3175 }
3176 state->m_deferring_p = false;
3177 }
3178
3179 /* If OP is an SSA name defined by a PHI node, return the PHI statement.
3180 Otherwise return NULL. */
3181
3182 static gphi *
result_of_phi(tree op)3183 result_of_phi (tree op)
3184 {
3185 if (TREE_CODE (op) != SSA_NAME)
3186 return NULL;
3187
3188 return dyn_cast <gphi *> (SSA_NAME_DEF_STMT (op));
3189 }
3190
3191 /* After processing statements of a BB and recording STATE, return true if the
3192 initial phi is fed by the last FMA candidate result ore one such result from
3193 previously processed BBs marked in LAST_RESULT_SET. */
3194
3195 static bool
last_fma_candidate_feeds_initial_phi(fma_deferring_state * state,hash_set<tree> * last_result_set)3196 last_fma_candidate_feeds_initial_phi (fma_deferring_state *state,
3197 hash_set<tree> *last_result_set)
3198 {
3199 ssa_op_iter iter;
3200 use_operand_p use;
3201 FOR_EACH_PHI_ARG (use, state->m_initial_phi, iter, SSA_OP_USE)
3202 {
3203 tree t = USE_FROM_PTR (use);
3204 if (t == state->m_last_result
3205 || last_result_set->contains (t))
3206 return true;
3207 }
3208
3209 return false;
3210 }
3211
3212 /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
3213 with uses in additions and subtractions to form fused multiply-add
3214 operations. Returns true if successful and MUL_STMT should be removed.
3215 If MUL_COND is nonnull, the multiplication in MUL_STMT is conditional
3216 on MUL_COND, otherwise it is unconditional.
3217
3218 If STATE indicates that we are deferring FMA transformation, that means
3219 that we do not produce FMAs for basic blocks which look like:
3220
3221 <bb 6>
3222 # accumulator_111 = PHI <0.0(5), accumulator_66(6)>
3223 _65 = _14 * _16;
3224 accumulator_66 = _65 + accumulator_111;
3225
3226 or its unrolled version, i.e. with several FMA candidates that feed result
3227 of one into the addend of another. Instead, we add them to a list in STATE
3228 and if we later discover an FMA candidate that is not part of such a chain,
3229 we go back and perform all deferred past candidates. */
3230
3231 static bool
3232 convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2,
3233 fma_deferring_state *state, tree mul_cond = NULL_TREE)
3234 {
3235 tree mul_result = gimple_get_lhs (mul_stmt);
3236 /* If there isn't a LHS then this can't be an FMA. There can be no LHS
3237 if the statement was left just for the side-effects. */
3238 if (!mul_result)
3239 return false;
3240 tree type = TREE_TYPE (mul_result);
3241 gimple *use_stmt, *neguse_stmt;
3242 use_operand_p use_p;
3243 imm_use_iterator imm_iter;
3244
3245 if (FLOAT_TYPE_P (type)
3246 && flag_fp_contract_mode == FP_CONTRACT_OFF)
3247 return false;
3248
3249 /* We don't want to do bitfield reduction ops. */
3250 if (INTEGRAL_TYPE_P (type)
3251 && (!type_has_mode_precision_p (type) || TYPE_OVERFLOW_TRAPS (type)))
3252 return false;
3253
3254 /* If the target doesn't support it, don't generate it. We assume that
3255 if fma isn't available then fms, fnma or fnms are not either. */
3256 optimization_type opt_type = bb_optimization_type (gimple_bb (mul_stmt));
3257 if (!direct_internal_fn_supported_p (IFN_FMA, type, opt_type))
3258 return false;
3259
3260 /* If the multiplication has zero uses, it is kept around probably because
3261 of -fnon-call-exceptions. Don't optimize it away in that case,
3262 it is DCE job. */
3263 if (has_zero_uses (mul_result))
3264 return false;
3265
3266 bool check_defer
3267 = (state->m_deferring_p
3268 && maybe_le (tree_to_poly_int64 (TYPE_SIZE (type)),
3269 param_avoid_fma_max_bits));
3270 bool defer = check_defer;
3271 bool seen_negate_p = false;
3272 /* Make sure that the multiplication statement becomes dead after
3273 the transformation, thus that all uses are transformed to FMAs.
3274 This means we assume that an FMA operation has the same cost
3275 as an addition. */
FOR_EACH_IMM_USE_FAST(use_p,imm_iter,mul_result)3276 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result)
3277 {
3278 tree result = mul_result;
3279 bool negate_p = false;
3280
3281 use_stmt = USE_STMT (use_p);
3282
3283 if (is_gimple_debug (use_stmt))
3284 continue;
3285
3286 /* For now restrict this operations to single basic blocks. In theory
3287 we would want to support sinking the multiplication in
3288 m = a*b;
3289 if ()
3290 ma = m + c;
3291 else
3292 d = m;
3293 to form a fma in the then block and sink the multiplication to the
3294 else block. */
3295 if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3296 return false;
3297
3298 /* A negate on the multiplication leads to FNMA. */
3299 if (is_gimple_assign (use_stmt)
3300 && gimple_assign_rhs_code (use_stmt) == NEGATE_EXPR)
3301 {
3302 ssa_op_iter iter;
3303 use_operand_p usep;
3304
3305 /* If (due to earlier missed optimizations) we have two
3306 negates of the same value, treat them as equivalent
3307 to a single negate with multiple uses. */
3308 if (seen_negate_p)
3309 return false;
3310
3311 result = gimple_assign_lhs (use_stmt);
3312
3313 /* Make sure the negate statement becomes dead with this
3314 single transformation. */
3315 if (!single_imm_use (gimple_assign_lhs (use_stmt),
3316 &use_p, &neguse_stmt))
3317 return false;
3318
3319 /* Make sure the multiplication isn't also used on that stmt. */
3320 FOR_EACH_PHI_OR_STMT_USE (usep, neguse_stmt, iter, SSA_OP_USE)
3321 if (USE_FROM_PTR (usep) == mul_result)
3322 return false;
3323
3324 /* Re-validate. */
3325 use_stmt = neguse_stmt;
3326 if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3327 return false;
3328
3329 negate_p = seen_negate_p = true;
3330 }
3331
3332 tree cond, else_value, ops[3];
3333 tree_code code;
3334 if (!can_interpret_as_conditional_op_p (use_stmt, &cond, &code, ops,
3335 &else_value))
3336 return false;
3337
3338 switch (code)
3339 {
3340 case MINUS_EXPR:
3341 if (ops[1] == result)
3342 negate_p = !negate_p;
3343 break;
3344 case PLUS_EXPR:
3345 break;
3346 default:
3347 /* FMA can only be formed from PLUS and MINUS. */
3348 return false;
3349 }
3350
3351 if (mul_cond && cond != mul_cond)
3352 return false;
3353
3354 if (cond)
3355 {
3356 if (cond == result || else_value == result)
3357 return false;
3358 if (!direct_internal_fn_supported_p (IFN_COND_FMA, type, opt_type))
3359 return false;
3360 }
3361
3362 /* If the subtrahend (OPS[1]) is computed by a MULT_EXPR that
3363 we'll visit later, we might be able to get a more profitable
3364 match with fnma.
3365 OTOH, if we don't, a negate / fma pair has likely lower latency
3366 that a mult / subtract pair. */
3367 if (code == MINUS_EXPR
3368 && !negate_p
3369 && ops[0] == result
3370 && !direct_internal_fn_supported_p (IFN_FMS, type, opt_type)
3371 && direct_internal_fn_supported_p (IFN_FNMA, type, opt_type)
3372 && TREE_CODE (ops[1]) == SSA_NAME
3373 && has_single_use (ops[1]))
3374 {
3375 gimple *stmt2 = SSA_NAME_DEF_STMT (ops[1]);
3376 if (is_gimple_assign (stmt2)
3377 && gimple_assign_rhs_code (stmt2) == MULT_EXPR)
3378 return false;
3379 }
3380
3381 /* We can't handle a * b + a * b. */
3382 if (ops[0] == ops[1])
3383 return false;
3384 /* If deferring, make sure we are not looking at an instruction that
3385 wouldn't have existed if we were not. */
3386 if (state->m_deferring_p
3387 && (state->m_mul_result_set.contains (ops[0])
3388 || state->m_mul_result_set.contains (ops[1])))
3389 return false;
3390
3391 if (check_defer)
3392 {
3393 tree use_lhs = gimple_get_lhs (use_stmt);
3394 if (state->m_last_result)
3395 {
3396 if (ops[1] == state->m_last_result
3397 || ops[0] == state->m_last_result)
3398 defer = true;
3399 else
3400 defer = false;
3401 }
3402 else
3403 {
3404 gcc_checking_assert (!state->m_initial_phi);
3405 gphi *phi;
3406 if (ops[0] == result)
3407 phi = result_of_phi (ops[1]);
3408 else
3409 {
3410 gcc_assert (ops[1] == result);
3411 phi = result_of_phi (ops[0]);
3412 }
3413
3414 if (phi)
3415 {
3416 state->m_initial_phi = phi;
3417 defer = true;
3418 }
3419 else
3420 defer = false;
3421 }
3422
3423 state->m_last_result = use_lhs;
3424 check_defer = false;
3425 }
3426 else
3427 defer = false;
3428
3429 /* While it is possible to validate whether or not the exact form that
3430 we've recognized is available in the backend, the assumption is that
3431 if the deferring logic above did not trigger, the transformation is
3432 never a loss. For instance, suppose the target only has the plain FMA
3433 pattern available. Consider a*b-c -> fma(a,b,-c): we've exchanged
3434 MUL+SUB for FMA+NEG, which is still two operations. Consider
3435 -(a*b)-c -> fma(-a,b,-c): we still have 3 operations, but in the FMA
3436 form the two NEGs are independent and could be run in parallel. */
3437 }
3438
3439 if (defer)
3440 {
3441 fma_transformation_info fti;
3442 fti.mul_stmt = mul_stmt;
3443 fti.mul_result = mul_result;
3444 fti.op1 = op1;
3445 fti.op2 = op2;
3446 state->m_candidates.safe_push (fti);
3447 state->m_mul_result_set.add (mul_result);
3448
3449 if (dump_file && (dump_flags & TDF_DETAILS))
3450 {
3451 fprintf (dump_file, "Deferred generating FMA for multiplication ");
3452 print_gimple_stmt (dump_file, mul_stmt, 0, TDF_NONE);
3453 fprintf (dump_file, "\n");
3454 }
3455
3456 return false;
3457 }
3458 else
3459 {
3460 if (state->m_deferring_p)
3461 cancel_fma_deferring (state);
3462 convert_mult_to_fma_1 (mul_result, op1, op2);
3463 return true;
3464 }
3465 }
3466
3467
3468 /* Helper function of match_arith_overflow. For MUL_OVERFLOW, if we have
3469 a check for non-zero like:
3470 _1 = x_4(D) * y_5(D);
3471 *res_7(D) = _1;
3472 if (x_4(D) != 0)
3473 goto <bb 3>; [50.00%]
3474 else
3475 goto <bb 4>; [50.00%]
3476
3477 <bb 3> [local count: 536870913]:
3478 _2 = _1 / x_4(D);
3479 _9 = _2 != y_5(D);
3480 _10 = (int) _9;
3481
3482 <bb 4> [local count: 1073741824]:
3483 # iftmp.0_3 = PHI <_10(3), 0(2)>
3484 then in addition to using .MUL_OVERFLOW (x_4(D), y_5(D)) we can also
3485 optimize the x_4(D) != 0 condition to 1. */
3486
3487 static void
maybe_optimize_guarding_check(vec<gimple * > & mul_stmts,gimple * cond_stmt,gimple * div_stmt,bool * cfg_changed)3488 maybe_optimize_guarding_check (vec<gimple *> &mul_stmts, gimple *cond_stmt,
3489 gimple *div_stmt, bool *cfg_changed)
3490 {
3491 basic_block bb = gimple_bb (cond_stmt);
3492 if (gimple_bb (div_stmt) != bb || !single_pred_p (bb))
3493 return;
3494 edge pred_edge = single_pred_edge (bb);
3495 basic_block pred_bb = pred_edge->src;
3496 if (EDGE_COUNT (pred_bb->succs) != 2)
3497 return;
3498 edge other_edge = EDGE_SUCC (pred_bb, EDGE_SUCC (pred_bb, 0) == pred_edge);
3499 edge other_succ_edge = NULL;
3500 if (gimple_code (cond_stmt) == GIMPLE_COND)
3501 {
3502 if (EDGE_COUNT (bb->succs) != 2)
3503 return;
3504 other_succ_edge = EDGE_SUCC (bb, 0);
3505 if (gimple_cond_code (cond_stmt) == NE_EXPR)
3506 {
3507 if (other_succ_edge->flags & EDGE_TRUE_VALUE)
3508 other_succ_edge = EDGE_SUCC (bb, 1);
3509 }
3510 else if (other_succ_edge->flags & EDGE_FALSE_VALUE)
3511 other_succ_edge = EDGE_SUCC (bb, 0);
3512 if (other_edge->dest != other_succ_edge->dest)
3513 return;
3514 }
3515 else if (!single_succ_p (bb) || other_edge->dest != single_succ (bb))
3516 return;
3517 gimple *zero_cond = last_stmt (pred_bb);
3518 if (zero_cond == NULL
3519 || gimple_code (zero_cond) != GIMPLE_COND
3520 || (gimple_cond_code (zero_cond)
3521 != ((pred_edge->flags & EDGE_TRUE_VALUE) ? NE_EXPR : EQ_EXPR))
3522 || !integer_zerop (gimple_cond_rhs (zero_cond)))
3523 return;
3524 tree zero_cond_lhs = gimple_cond_lhs (zero_cond);
3525 if (TREE_CODE (zero_cond_lhs) != SSA_NAME)
3526 return;
3527 if (gimple_assign_rhs2 (div_stmt) != zero_cond_lhs)
3528 {
3529 /* Allow the divisor to be result of a same precision cast
3530 from zero_cond_lhs. */
3531 tree rhs2 = gimple_assign_rhs2 (div_stmt);
3532 if (TREE_CODE (rhs2) != SSA_NAME)
3533 return;
3534 gimple *g = SSA_NAME_DEF_STMT (rhs2);
3535 if (!gimple_assign_cast_p (g)
3536 || gimple_assign_rhs1 (g) != gimple_cond_lhs (zero_cond)
3537 || !INTEGRAL_TYPE_P (TREE_TYPE (zero_cond_lhs))
3538 || (TYPE_PRECISION (TREE_TYPE (zero_cond_lhs))
3539 != TYPE_PRECISION (TREE_TYPE (rhs2))))
3540 return;
3541 }
3542 gimple_stmt_iterator gsi = gsi_after_labels (bb);
3543 mul_stmts.quick_push (div_stmt);
3544 if (is_gimple_debug (gsi_stmt (gsi)))
3545 gsi_next_nondebug (&gsi);
3546 unsigned cast_count = 0;
3547 while (gsi_stmt (gsi) != cond_stmt)
3548 {
3549 /* If original mul_stmt has a single use, allow it in the same bb,
3550 we are looking then just at __builtin_mul_overflow_p.
3551 Though, in that case the original mul_stmt will be replaced
3552 by .MUL_OVERFLOW, REALPART_EXPR and IMAGPART_EXPR stmts. */
3553 gimple *mul_stmt;
3554 unsigned int i;
3555 bool ok = false;
3556 FOR_EACH_VEC_ELT (mul_stmts, i, mul_stmt)
3557 {
3558 if (gsi_stmt (gsi) == mul_stmt)
3559 {
3560 ok = true;
3561 break;
3562 }
3563 }
3564 if (!ok && gimple_assign_cast_p (gsi_stmt (gsi)) && ++cast_count < 4)
3565 ok = true;
3566 if (!ok)
3567 return;
3568 gsi_next_nondebug (&gsi);
3569 }
3570 if (gimple_code (cond_stmt) == GIMPLE_COND)
3571 {
3572 basic_block succ_bb = other_edge->dest;
3573 for (gphi_iterator gpi = gsi_start_phis (succ_bb); !gsi_end_p (gpi);
3574 gsi_next (&gpi))
3575 {
3576 gphi *phi = gpi.phi ();
3577 tree v1 = gimple_phi_arg_def (phi, other_edge->dest_idx);
3578 tree v2 = gimple_phi_arg_def (phi, other_succ_edge->dest_idx);
3579 if (!operand_equal_p (v1, v2, 0))
3580 return;
3581 }
3582 }
3583 else
3584 {
3585 tree lhs = gimple_assign_lhs (cond_stmt);
3586 if (!lhs || !INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
3587 return;
3588 gsi_next_nondebug (&gsi);
3589 if (!gsi_end_p (gsi))
3590 {
3591 if (gimple_assign_rhs_code (cond_stmt) == COND_EXPR)
3592 return;
3593 gimple *cast_stmt = gsi_stmt (gsi);
3594 if (!gimple_assign_cast_p (cast_stmt))
3595 return;
3596 tree new_lhs = gimple_assign_lhs (cast_stmt);
3597 gsi_next_nondebug (&gsi);
3598 if (!gsi_end_p (gsi)
3599 || !new_lhs
3600 || !INTEGRAL_TYPE_P (TREE_TYPE (new_lhs))
3601 || TYPE_PRECISION (TREE_TYPE (new_lhs)) <= 1)
3602 return;
3603 lhs = new_lhs;
3604 }
3605 edge succ_edge = single_succ_edge (bb);
3606 basic_block succ_bb = succ_edge->dest;
3607 gsi = gsi_start_phis (succ_bb);
3608 if (gsi_end_p (gsi))
3609 return;
3610 gphi *phi = as_a <gphi *> (gsi_stmt (gsi));
3611 gsi_next (&gsi);
3612 if (!gsi_end_p (gsi))
3613 return;
3614 if (gimple_phi_arg_def (phi, succ_edge->dest_idx) != lhs)
3615 return;
3616 tree other_val = gimple_phi_arg_def (phi, other_edge->dest_idx);
3617 if (gimple_assign_rhs_code (cond_stmt) == COND_EXPR)
3618 {
3619 tree cond = gimple_assign_rhs1 (cond_stmt);
3620 if (TREE_CODE (cond) == NE_EXPR)
3621 {
3622 if (!operand_equal_p (other_val,
3623 gimple_assign_rhs3 (cond_stmt), 0))
3624 return;
3625 }
3626 else if (!operand_equal_p (other_val,
3627 gimple_assign_rhs2 (cond_stmt), 0))
3628 return;
3629 }
3630 else if (gimple_assign_rhs_code (cond_stmt) == NE_EXPR)
3631 {
3632 if (!integer_zerop (other_val))
3633 return;
3634 }
3635 else if (!integer_onep (other_val))
3636 return;
3637 }
3638 gcond *zero_gcond = as_a <gcond *> (zero_cond);
3639 if (pred_edge->flags & EDGE_TRUE_VALUE)
3640 gimple_cond_make_true (zero_gcond);
3641 else
3642 gimple_cond_make_false (zero_gcond);
3643 update_stmt (zero_cond);
3644 *cfg_changed = true;
3645 }
3646
3647 /* Helper function for arith_overflow_check_p. Return true
3648 if VAL1 is equal to VAL2 cast to corresponding integral type
3649 with other signedness or vice versa. */
3650
3651 static bool
arith_cast_equal_p(tree val1,tree val2)3652 arith_cast_equal_p (tree val1, tree val2)
3653 {
3654 if (TREE_CODE (val1) == INTEGER_CST && TREE_CODE (val2) == INTEGER_CST)
3655 return wi::eq_p (wi::to_wide (val1), wi::to_wide (val2));
3656 else if (TREE_CODE (val1) != SSA_NAME || TREE_CODE (val2) != SSA_NAME)
3657 return false;
3658 if (gimple_assign_cast_p (SSA_NAME_DEF_STMT (val1))
3659 && gimple_assign_rhs1 (SSA_NAME_DEF_STMT (val1)) == val2)
3660 return true;
3661 if (gimple_assign_cast_p (SSA_NAME_DEF_STMT (val2))
3662 && gimple_assign_rhs1 (SSA_NAME_DEF_STMT (val2)) == val1)
3663 return true;
3664 return false;
3665 }
3666
3667 /* Helper function of match_arith_overflow. Return 1
3668 if USE_STMT is unsigned overflow check ovf != 0 for
3669 STMT, -1 if USE_STMT is unsigned overflow check ovf == 0
3670 and 0 otherwise. */
3671
3672 static int
arith_overflow_check_p(gimple * stmt,gimple * cast_stmt,gimple * & use_stmt,tree maxval,tree * other)3673 arith_overflow_check_p (gimple *stmt, gimple *cast_stmt, gimple *&use_stmt,
3674 tree maxval, tree *other)
3675 {
3676 enum tree_code ccode = ERROR_MARK;
3677 tree crhs1 = NULL_TREE, crhs2 = NULL_TREE;
3678 enum tree_code code = gimple_assign_rhs_code (stmt);
3679 tree lhs = gimple_assign_lhs (cast_stmt ? cast_stmt : stmt);
3680 tree rhs1 = gimple_assign_rhs1 (stmt);
3681 tree rhs2 = gimple_assign_rhs2 (stmt);
3682 tree multop = NULL_TREE, divlhs = NULL_TREE;
3683 gimple *cur_use_stmt = use_stmt;
3684
3685 if (code == MULT_EXPR)
3686 {
3687 if (!is_gimple_assign (use_stmt))
3688 return 0;
3689 if (gimple_assign_rhs_code (use_stmt) != TRUNC_DIV_EXPR)
3690 return 0;
3691 if (gimple_assign_rhs1 (use_stmt) != lhs)
3692 return 0;
3693 if (cast_stmt)
3694 {
3695 if (arith_cast_equal_p (gimple_assign_rhs2 (use_stmt), rhs1))
3696 multop = rhs2;
3697 else if (arith_cast_equal_p (gimple_assign_rhs2 (use_stmt), rhs2))
3698 multop = rhs1;
3699 else
3700 return 0;
3701 }
3702 else if (gimple_assign_rhs2 (use_stmt) == rhs1)
3703 multop = rhs2;
3704 else if (operand_equal_p (gimple_assign_rhs2 (use_stmt), rhs2, 0))
3705 multop = rhs1;
3706 else
3707 return 0;
3708 if (stmt_ends_bb_p (use_stmt))
3709 return 0;
3710 divlhs = gimple_assign_lhs (use_stmt);
3711 if (!divlhs)
3712 return 0;
3713 use_operand_p use;
3714 if (!single_imm_use (divlhs, &use, &cur_use_stmt))
3715 return 0;
3716 }
3717 if (gimple_code (cur_use_stmt) == GIMPLE_COND)
3718 {
3719 ccode = gimple_cond_code (cur_use_stmt);
3720 crhs1 = gimple_cond_lhs (cur_use_stmt);
3721 crhs2 = gimple_cond_rhs (cur_use_stmt);
3722 }
3723 else if (is_gimple_assign (cur_use_stmt))
3724 {
3725 if (gimple_assign_rhs_class (cur_use_stmt) == GIMPLE_BINARY_RHS)
3726 {
3727 ccode = gimple_assign_rhs_code (cur_use_stmt);
3728 crhs1 = gimple_assign_rhs1 (cur_use_stmt);
3729 crhs2 = gimple_assign_rhs2 (cur_use_stmt);
3730 }
3731 else if (gimple_assign_rhs_code (cur_use_stmt) == COND_EXPR)
3732 {
3733 tree cond = gimple_assign_rhs1 (cur_use_stmt);
3734 if (COMPARISON_CLASS_P (cond))
3735 {
3736 ccode = TREE_CODE (cond);
3737 crhs1 = TREE_OPERAND (cond, 0);
3738 crhs2 = TREE_OPERAND (cond, 1);
3739 }
3740 else
3741 return 0;
3742 }
3743 else
3744 return 0;
3745 }
3746 else
3747 return 0;
3748
3749 if (TREE_CODE_CLASS (ccode) != tcc_comparison)
3750 return 0;
3751
3752 switch (ccode)
3753 {
3754 case GT_EXPR:
3755 case LE_EXPR:
3756 if (maxval)
3757 {
3758 /* r = a + b; r > maxval or r <= maxval */
3759 if (crhs1 == lhs
3760 && TREE_CODE (crhs2) == INTEGER_CST
3761 && tree_int_cst_equal (crhs2, maxval))
3762 return ccode == GT_EXPR ? 1 : -1;
3763 break;
3764 }
3765 /* r = a - b; r > a or r <= a
3766 r = a + b; a > r or a <= r or b > r or b <= r. */
3767 if ((code == MINUS_EXPR && crhs1 == lhs && crhs2 == rhs1)
3768 || (code == PLUS_EXPR && (crhs1 == rhs1 || crhs1 == rhs2)
3769 && crhs2 == lhs))
3770 return ccode == GT_EXPR ? 1 : -1;
3771 /* r = ~a; b > r or b <= r. */
3772 if (code == BIT_NOT_EXPR && crhs2 == lhs)
3773 {
3774 if (other)
3775 *other = crhs1;
3776 return ccode == GT_EXPR ? 1 : -1;
3777 }
3778 break;
3779 case LT_EXPR:
3780 case GE_EXPR:
3781 if (maxval)
3782 break;
3783 /* r = a - b; a < r or a >= r
3784 r = a + b; r < a or r >= a or r < b or r >= b. */
3785 if ((code == MINUS_EXPR && crhs1 == rhs1 && crhs2 == lhs)
3786 || (code == PLUS_EXPR && crhs1 == lhs
3787 && (crhs2 == rhs1 || crhs2 == rhs2)))
3788 return ccode == LT_EXPR ? 1 : -1;
3789 /* r = ~a; r < b or r >= b. */
3790 if (code == BIT_NOT_EXPR && crhs1 == lhs)
3791 {
3792 if (other)
3793 *other = crhs2;
3794 return ccode == LT_EXPR ? 1 : -1;
3795 }
3796 break;
3797 case EQ_EXPR:
3798 case NE_EXPR:
3799 /* r = a * b; _1 = r / a; _1 == b
3800 r = a * b; _1 = r / b; _1 == a
3801 r = a * b; _1 = r / a; _1 != b
3802 r = a * b; _1 = r / b; _1 != a. */
3803 if (code == MULT_EXPR)
3804 {
3805 if (cast_stmt)
3806 {
3807 if ((crhs1 == divlhs && arith_cast_equal_p (crhs2, multop))
3808 || (crhs2 == divlhs && arith_cast_equal_p (crhs1, multop)))
3809 {
3810 use_stmt = cur_use_stmt;
3811 return ccode == NE_EXPR ? 1 : -1;
3812 }
3813 }
3814 else if ((crhs1 == divlhs && operand_equal_p (crhs2, multop, 0))
3815 || (crhs2 == divlhs && crhs1 == multop))
3816 {
3817 use_stmt = cur_use_stmt;
3818 return ccode == NE_EXPR ? 1 : -1;
3819 }
3820 }
3821 break;
3822 default:
3823 break;
3824 }
3825 return 0;
3826 }
3827
3828 /* Recognize for unsigned x
3829 x = y - z;
3830 if (x > y)
3831 where there are other uses of x and replace it with
3832 _7 = .SUB_OVERFLOW (y, z);
3833 x = REALPART_EXPR <_7>;
3834 _8 = IMAGPART_EXPR <_7>;
3835 if (_8)
3836 and similarly for addition.
3837
3838 Also recognize:
3839 yc = (type) y;
3840 zc = (type) z;
3841 x = yc + zc;
3842 if (x > max)
3843 where y and z have unsigned types with maximum max
3844 and there are other uses of x and all of those cast x
3845 back to that unsigned type and again replace it with
3846 _7 = .ADD_OVERFLOW (y, z);
3847 _9 = REALPART_EXPR <_7>;
3848 _8 = IMAGPART_EXPR <_7>;
3849 if (_8)
3850 and replace (utype) x with _9.
3851
3852 Also recognize:
3853 x = ~z;
3854 if (y > x)
3855 and replace it with
3856 _7 = .ADD_OVERFLOW (y, z);
3857 _8 = IMAGPART_EXPR <_7>;
3858 if (_8)
3859
3860 And also recognize:
3861 z = x * y;
3862 if (x != 0)
3863 goto <bb 3>; [50.00%]
3864 else
3865 goto <bb 4>; [50.00%]
3866
3867 <bb 3> [local count: 536870913]:
3868 _2 = z / x;
3869 _9 = _2 != y;
3870 _10 = (int) _9;
3871
3872 <bb 4> [local count: 1073741824]:
3873 # iftmp.0_3 = PHI <_10(3), 0(2)>
3874 and replace it with
3875 _7 = .MUL_OVERFLOW (x, y);
3876 z = IMAGPART_EXPR <_7>;
3877 _8 = IMAGPART_EXPR <_7>;
3878 _9 = _8 != 0;
3879 iftmp.0_3 = (int) _9; */
3880
3881 static bool
match_arith_overflow(gimple_stmt_iterator * gsi,gimple * stmt,enum tree_code code,bool * cfg_changed)3882 match_arith_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
3883 enum tree_code code, bool *cfg_changed)
3884 {
3885 tree lhs = gimple_assign_lhs (stmt);
3886 tree type = TREE_TYPE (lhs);
3887 use_operand_p use_p;
3888 imm_use_iterator iter;
3889 bool use_seen = false;
3890 bool ovf_use_seen = false;
3891 gimple *use_stmt;
3892 gimple *add_stmt = NULL;
3893 bool add_first = false;
3894 gimple *cond_stmt = NULL;
3895 gimple *cast_stmt = NULL;
3896 tree cast_lhs = NULL_TREE;
3897
3898 gcc_checking_assert (code == PLUS_EXPR
3899 || code == MINUS_EXPR
3900 || code == MULT_EXPR
3901 || code == BIT_NOT_EXPR);
3902 if (!INTEGRAL_TYPE_P (type)
3903 || !TYPE_UNSIGNED (type)
3904 || has_zero_uses (lhs)
3905 || (code != PLUS_EXPR
3906 && code != MULT_EXPR
3907 && optab_handler (code == MINUS_EXPR ? usubv4_optab : uaddv4_optab,
3908 TYPE_MODE (type)) == CODE_FOR_nothing))
3909 return false;
3910
3911 tree rhs1 = gimple_assign_rhs1 (stmt);
3912 tree rhs2 = gimple_assign_rhs2 (stmt);
3913 FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
3914 {
3915 use_stmt = USE_STMT (use_p);
3916 if (is_gimple_debug (use_stmt))
3917 continue;
3918
3919 tree other = NULL_TREE;
3920 if (arith_overflow_check_p (stmt, NULL, use_stmt, NULL_TREE, &other))
3921 {
3922 if (code == BIT_NOT_EXPR)
3923 {
3924 gcc_assert (other);
3925 if (TREE_CODE (other) != SSA_NAME)
3926 return false;
3927 if (rhs2 == NULL)
3928 rhs2 = other;
3929 else
3930 return false;
3931 cond_stmt = use_stmt;
3932 }
3933 ovf_use_seen = true;
3934 }
3935 else
3936 {
3937 use_seen = true;
3938 if (code == MULT_EXPR
3939 && cast_stmt == NULL
3940 && gimple_assign_cast_p (use_stmt))
3941 {
3942 cast_lhs = gimple_assign_lhs (use_stmt);
3943 if (INTEGRAL_TYPE_P (TREE_TYPE (cast_lhs))
3944 && !TYPE_UNSIGNED (TREE_TYPE (cast_lhs))
3945 && (TYPE_PRECISION (TREE_TYPE (cast_lhs))
3946 == TYPE_PRECISION (TREE_TYPE (lhs))))
3947 cast_stmt = use_stmt;
3948 else
3949 cast_lhs = NULL_TREE;
3950 }
3951 }
3952 if (ovf_use_seen && use_seen)
3953 break;
3954 }
3955
3956 if (!ovf_use_seen
3957 && code == MULT_EXPR
3958 && cast_stmt)
3959 {
3960 if (TREE_CODE (rhs1) != SSA_NAME
3961 || (TREE_CODE (rhs2) != SSA_NAME && TREE_CODE (rhs2) != INTEGER_CST))
3962 return false;
3963 FOR_EACH_IMM_USE_FAST (use_p, iter, cast_lhs)
3964 {
3965 use_stmt = USE_STMT (use_p);
3966 if (is_gimple_debug (use_stmt))
3967 continue;
3968
3969 if (arith_overflow_check_p (stmt, cast_stmt, use_stmt,
3970 NULL_TREE, NULL))
3971 ovf_use_seen = true;
3972 }
3973 }
3974 else
3975 {
3976 cast_stmt = NULL;
3977 cast_lhs = NULL_TREE;
3978 }
3979
3980 tree maxval = NULL_TREE;
3981 if (!ovf_use_seen
3982 || (code != MULT_EXPR && (code == BIT_NOT_EXPR ? use_seen : !use_seen))
3983 || (code == PLUS_EXPR
3984 && optab_handler (uaddv4_optab,
3985 TYPE_MODE (type)) == CODE_FOR_nothing)
3986 || (code == MULT_EXPR
3987 && optab_handler (cast_stmt ? mulv4_optab : umulv4_optab,
3988 TYPE_MODE (type)) == CODE_FOR_nothing))
3989 {
3990 if (code != PLUS_EXPR)
3991 return false;
3992 if (TREE_CODE (rhs1) != SSA_NAME
3993 || !gimple_assign_cast_p (SSA_NAME_DEF_STMT (rhs1)))
3994 return false;
3995 rhs1 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (rhs1));
3996 tree type1 = TREE_TYPE (rhs1);
3997 if (!INTEGRAL_TYPE_P (type1)
3998 || !TYPE_UNSIGNED (type1)
3999 || TYPE_PRECISION (type1) >= TYPE_PRECISION (type)
4000 || (TYPE_PRECISION (type1)
4001 != GET_MODE_BITSIZE (SCALAR_INT_TYPE_MODE (type1))))
4002 return false;
4003 if (TREE_CODE (rhs2) == INTEGER_CST)
4004 {
4005 if (wi::ne_p (wi::rshift (wi::to_wide (rhs2),
4006 TYPE_PRECISION (type1),
4007 UNSIGNED), 0))
4008 return false;
4009 rhs2 = fold_convert (type1, rhs2);
4010 }
4011 else
4012 {
4013 if (TREE_CODE (rhs2) != SSA_NAME
4014 || !gimple_assign_cast_p (SSA_NAME_DEF_STMT (rhs2)))
4015 return false;
4016 rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (rhs2));
4017 tree type2 = TREE_TYPE (rhs2);
4018 if (!INTEGRAL_TYPE_P (type2)
4019 || !TYPE_UNSIGNED (type2)
4020 || TYPE_PRECISION (type2) >= TYPE_PRECISION (type)
4021 || (TYPE_PRECISION (type2)
4022 != GET_MODE_BITSIZE (SCALAR_INT_TYPE_MODE (type2))))
4023 return false;
4024 }
4025 if (TYPE_PRECISION (type1) >= TYPE_PRECISION (TREE_TYPE (rhs2)))
4026 type = type1;
4027 else
4028 type = TREE_TYPE (rhs2);
4029
4030 if (TREE_CODE (type) != INTEGER_TYPE
4031 || optab_handler (uaddv4_optab,
4032 TYPE_MODE (type)) == CODE_FOR_nothing)
4033 return false;
4034
4035 maxval = wide_int_to_tree (type, wi::max_value (TYPE_PRECISION (type),
4036 UNSIGNED));
4037 ovf_use_seen = false;
4038 use_seen = false;
4039 basic_block use_bb = NULL;
4040 FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
4041 {
4042 use_stmt = USE_STMT (use_p);
4043 if (is_gimple_debug (use_stmt))
4044 continue;
4045
4046 if (arith_overflow_check_p (stmt, NULL, use_stmt, maxval, NULL))
4047 {
4048 ovf_use_seen = true;
4049 use_bb = gimple_bb (use_stmt);
4050 }
4051 else
4052 {
4053 if (!gimple_assign_cast_p (use_stmt)
4054 || gimple_assign_rhs_code (use_stmt) == VIEW_CONVERT_EXPR)
4055 return false;
4056 tree use_lhs = gimple_assign_lhs (use_stmt);
4057 if (!INTEGRAL_TYPE_P (TREE_TYPE (use_lhs))
4058 || (TYPE_PRECISION (TREE_TYPE (use_lhs))
4059 > TYPE_PRECISION (type)))
4060 return false;
4061 use_seen = true;
4062 }
4063 }
4064 if (!ovf_use_seen)
4065 return false;
4066 if (!useless_type_conversion_p (type, TREE_TYPE (rhs1)))
4067 {
4068 if (!use_seen)
4069 return false;
4070 tree new_rhs1 = make_ssa_name (type);
4071 gimple *g = gimple_build_assign (new_rhs1, NOP_EXPR, rhs1);
4072 gsi_insert_before (gsi, g, GSI_SAME_STMT);
4073 rhs1 = new_rhs1;
4074 }
4075 else if (!useless_type_conversion_p (type, TREE_TYPE (rhs2)))
4076 {
4077 if (!use_seen)
4078 return false;
4079 tree new_rhs2 = make_ssa_name (type);
4080 gimple *g = gimple_build_assign (new_rhs2, NOP_EXPR, rhs2);
4081 gsi_insert_before (gsi, g, GSI_SAME_STMT);
4082 rhs2 = new_rhs2;
4083 }
4084 else if (!use_seen)
4085 {
4086 /* If there are no uses of the wider addition, check if
4087 forwprop has not created a narrower addition.
4088 Require it to be in the same bb as the overflow check. */
4089 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs1)
4090 {
4091 use_stmt = USE_STMT (use_p);
4092 if (is_gimple_debug (use_stmt))
4093 continue;
4094
4095 if (use_stmt == stmt)
4096 continue;
4097
4098 if (!is_gimple_assign (use_stmt)
4099 || gimple_bb (use_stmt) != use_bb
4100 || gimple_assign_rhs_code (use_stmt) != PLUS_EXPR)
4101 continue;
4102
4103 if (gimple_assign_rhs1 (use_stmt) == rhs1)
4104 {
4105 if (!operand_equal_p (gimple_assign_rhs2 (use_stmt),
4106 rhs2, 0))
4107 continue;
4108 }
4109 else if (gimple_assign_rhs2 (use_stmt) == rhs1)
4110 {
4111 if (gimple_assign_rhs1 (use_stmt) != rhs2)
4112 continue;
4113 }
4114 else
4115 continue;
4116
4117 add_stmt = use_stmt;
4118 break;
4119 }
4120 if (add_stmt == NULL)
4121 return false;
4122
4123 /* If stmt and add_stmt are in the same bb, we need to find out
4124 which one is earlier. If they are in different bbs, we've
4125 checked add_stmt is in the same bb as one of the uses of the
4126 stmt lhs, so stmt needs to dominate add_stmt too. */
4127 if (gimple_bb (stmt) == gimple_bb (add_stmt))
4128 {
4129 gimple_stmt_iterator gsif = *gsi;
4130 gimple_stmt_iterator gsib = *gsi;
4131 int i;
4132 /* Search both forward and backward from stmt and have a small
4133 upper bound. */
4134 for (i = 0; i < 128; i++)
4135 {
4136 if (!gsi_end_p (gsib))
4137 {
4138 gsi_prev_nondebug (&gsib);
4139 if (gsi_stmt (gsib) == add_stmt)
4140 {
4141 add_first = true;
4142 break;
4143 }
4144 }
4145 else if (gsi_end_p (gsif))
4146 break;
4147 if (!gsi_end_p (gsif))
4148 {
4149 gsi_next_nondebug (&gsif);
4150 if (gsi_stmt (gsif) == add_stmt)
4151 break;
4152 }
4153 }
4154 if (i == 128)
4155 return false;
4156 if (add_first)
4157 *gsi = gsi_for_stmt (add_stmt);
4158 }
4159 }
4160 }
4161
4162 if (code == BIT_NOT_EXPR)
4163 *gsi = gsi_for_stmt (cond_stmt);
4164
4165 auto_vec<gimple *, 8> mul_stmts;
4166 if (code == MULT_EXPR && cast_stmt)
4167 {
4168 type = TREE_TYPE (cast_lhs);
4169 gimple *g = SSA_NAME_DEF_STMT (rhs1);
4170 if (gimple_assign_cast_p (g)
4171 && useless_type_conversion_p (type,
4172 TREE_TYPE (gimple_assign_rhs1 (g)))
4173 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
4174 rhs1 = gimple_assign_rhs1 (g);
4175 else
4176 {
4177 g = gimple_build_assign (make_ssa_name (type), NOP_EXPR, rhs1);
4178 gsi_insert_before (gsi, g, GSI_SAME_STMT);
4179 rhs1 = gimple_assign_lhs (g);
4180 mul_stmts.quick_push (g);
4181 }
4182 if (TREE_CODE (rhs2) == INTEGER_CST)
4183 rhs2 = fold_convert (type, rhs2);
4184 else
4185 {
4186 g = SSA_NAME_DEF_STMT (rhs2);
4187 if (gimple_assign_cast_p (g)
4188 && useless_type_conversion_p (type,
4189 TREE_TYPE (gimple_assign_rhs1 (g)))
4190 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
4191 rhs2 = gimple_assign_rhs1 (g);
4192 else
4193 {
4194 g = gimple_build_assign (make_ssa_name (type), NOP_EXPR, rhs2);
4195 gsi_insert_before (gsi, g, GSI_SAME_STMT);
4196 rhs2 = gimple_assign_lhs (g);
4197 mul_stmts.quick_push (g);
4198 }
4199 }
4200 }
4201 tree ctype = build_complex_type (type);
4202 gcall *g = gimple_build_call_internal (code == MULT_EXPR
4203 ? IFN_MUL_OVERFLOW
4204 : code != MINUS_EXPR
4205 ? IFN_ADD_OVERFLOW : IFN_SUB_OVERFLOW,
4206 2, rhs1, rhs2);
4207 tree ctmp = make_ssa_name (ctype);
4208 gimple_call_set_lhs (g, ctmp);
4209 gsi_insert_before (gsi, g, GSI_SAME_STMT);
4210 tree new_lhs = (maxval || cast_stmt) ? make_ssa_name (type) : lhs;
4211 gassign *g2;
4212 if (code != BIT_NOT_EXPR)
4213 {
4214 g2 = gimple_build_assign (new_lhs, REALPART_EXPR,
4215 build1 (REALPART_EXPR, type, ctmp));
4216 if (maxval || cast_stmt)
4217 {
4218 gsi_insert_before (gsi, g2, GSI_SAME_STMT);
4219 if (add_first)
4220 *gsi = gsi_for_stmt (stmt);
4221 }
4222 else
4223 gsi_replace (gsi, g2, true);
4224 if (code == MULT_EXPR)
4225 {
4226 mul_stmts.quick_push (g);
4227 mul_stmts.quick_push (g2);
4228 if (cast_stmt)
4229 {
4230 g2 = gimple_build_assign (lhs, NOP_EXPR, new_lhs);
4231 gsi_replace (gsi, g2, true);
4232 mul_stmts.quick_push (g2);
4233 }
4234 }
4235 }
4236 tree ovf = make_ssa_name (type);
4237 g2 = gimple_build_assign (ovf, IMAGPART_EXPR,
4238 build1 (IMAGPART_EXPR, type, ctmp));
4239 if (code != BIT_NOT_EXPR)
4240 gsi_insert_after (gsi, g2, GSI_NEW_STMT);
4241 else
4242 gsi_insert_before (gsi, g2, GSI_SAME_STMT);
4243 if (code == MULT_EXPR)
4244 mul_stmts.quick_push (g2);
4245
4246 FOR_EACH_IMM_USE_STMT (use_stmt, iter, cast_lhs ? cast_lhs : lhs)
4247 {
4248 if (is_gimple_debug (use_stmt))
4249 continue;
4250
4251 gimple *orig_use_stmt = use_stmt;
4252 int ovf_use = arith_overflow_check_p (stmt, cast_stmt, use_stmt,
4253 maxval, NULL);
4254 if (ovf_use == 0)
4255 {
4256 gcc_assert (code != BIT_NOT_EXPR);
4257 if (maxval)
4258 {
4259 tree use_lhs = gimple_assign_lhs (use_stmt);
4260 gimple_assign_set_rhs1 (use_stmt, new_lhs);
4261 if (useless_type_conversion_p (TREE_TYPE (use_lhs),
4262 TREE_TYPE (new_lhs)))
4263 gimple_assign_set_rhs_code (use_stmt, SSA_NAME);
4264 update_stmt (use_stmt);
4265 }
4266 continue;
4267 }
4268 if (gimple_code (use_stmt) == GIMPLE_COND)
4269 {
4270 gcond *cond_stmt = as_a <gcond *> (use_stmt);
4271 gimple_cond_set_lhs (cond_stmt, ovf);
4272 gimple_cond_set_rhs (cond_stmt, build_int_cst (type, 0));
4273 gimple_cond_set_code (cond_stmt, ovf_use == 1 ? NE_EXPR : EQ_EXPR);
4274 }
4275 else
4276 {
4277 gcc_checking_assert (is_gimple_assign (use_stmt));
4278 if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS)
4279 {
4280 gimple_assign_set_rhs1 (use_stmt, ovf);
4281 gimple_assign_set_rhs2 (use_stmt, build_int_cst (type, 0));
4282 gimple_assign_set_rhs_code (use_stmt,
4283 ovf_use == 1 ? NE_EXPR : EQ_EXPR);
4284 }
4285 else
4286 {
4287 gcc_checking_assert (gimple_assign_rhs_code (use_stmt)
4288 == COND_EXPR);
4289 tree cond = build2 (ovf_use == 1 ? NE_EXPR : EQ_EXPR,
4290 boolean_type_node, ovf,
4291 build_int_cst (type, 0));
4292 gimple_assign_set_rhs1 (use_stmt, cond);
4293 }
4294 }
4295 update_stmt (use_stmt);
4296 if (code == MULT_EXPR && use_stmt != orig_use_stmt)
4297 {
4298 gimple_stmt_iterator gsi2 = gsi_for_stmt (orig_use_stmt);
4299 maybe_optimize_guarding_check (mul_stmts, use_stmt, orig_use_stmt,
4300 cfg_changed);
4301 gsi_remove (&gsi2, true);
4302 release_ssa_name (gimple_assign_lhs (orig_use_stmt));
4303 }
4304 }
4305 if (maxval)
4306 {
4307 gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt);
4308 gsi_remove (&gsi2, true);
4309 if (add_stmt)
4310 {
4311 gimple *g = gimple_build_assign (gimple_assign_lhs (add_stmt),
4312 new_lhs);
4313 gsi2 = gsi_for_stmt (add_stmt);
4314 gsi_replace (&gsi2, g, true);
4315 }
4316 }
4317 else if (code == BIT_NOT_EXPR)
4318 {
4319 *gsi = gsi_for_stmt (stmt);
4320 gsi_remove (gsi, true);
4321 release_ssa_name (lhs);
4322 return true;
4323 }
4324 return false;
4325 }
4326
4327 /* Return true if target has support for divmod. */
4328
4329 static bool
target_supports_divmod_p(optab divmod_optab,optab div_optab,machine_mode mode)4330 target_supports_divmod_p (optab divmod_optab, optab div_optab, machine_mode mode)
4331 {
4332 /* If target supports hardware divmod insn, use it for divmod. */
4333 if (optab_handler (divmod_optab, mode) != CODE_FOR_nothing)
4334 return true;
4335
4336 /* Check if libfunc for divmod is available. */
4337 rtx libfunc = optab_libfunc (divmod_optab, mode);
4338 if (libfunc != NULL_RTX)
4339 {
4340 /* If optab_handler exists for div_optab, perhaps in a wider mode,
4341 we don't want to use the libfunc even if it exists for given mode. */
4342 machine_mode div_mode;
4343 FOR_EACH_MODE_FROM (div_mode, mode)
4344 if (optab_handler (div_optab, div_mode) != CODE_FOR_nothing)
4345 return false;
4346
4347 return targetm.expand_divmod_libfunc != NULL;
4348 }
4349
4350 return false;
4351 }
4352
4353 /* Check if stmt is candidate for divmod transform. */
4354
4355 static bool
divmod_candidate_p(gassign * stmt)4356 divmod_candidate_p (gassign *stmt)
4357 {
4358 tree type = TREE_TYPE (gimple_assign_lhs (stmt));
4359 machine_mode mode = TYPE_MODE (type);
4360 optab divmod_optab, div_optab;
4361
4362 if (TYPE_UNSIGNED (type))
4363 {
4364 divmod_optab = udivmod_optab;
4365 div_optab = udiv_optab;
4366 }
4367 else
4368 {
4369 divmod_optab = sdivmod_optab;
4370 div_optab = sdiv_optab;
4371 }
4372
4373 tree op1 = gimple_assign_rhs1 (stmt);
4374 tree op2 = gimple_assign_rhs2 (stmt);
4375
4376 /* Disable the transform if either is a constant, since division-by-constant
4377 may have specialized expansion. */
4378 if (CONSTANT_CLASS_P (op1))
4379 return false;
4380
4381 if (CONSTANT_CLASS_P (op2))
4382 {
4383 if (integer_pow2p (op2))
4384 return false;
4385
4386 if (TYPE_PRECISION (type) <= HOST_BITS_PER_WIDE_INT
4387 && TYPE_PRECISION (type) <= BITS_PER_WORD)
4388 return false;
4389
4390 /* If the divisor is not power of 2 and the precision wider than
4391 HWI, expand_divmod punts on that, so in that case it is better
4392 to use divmod optab or libfunc. Similarly if choose_multiplier
4393 might need pre/post shifts of BITS_PER_WORD or more. */
4394 }
4395
4396 /* Exclude the case where TYPE_OVERFLOW_TRAPS (type) as that should
4397 expand using the [su]divv optabs. */
4398 if (TYPE_OVERFLOW_TRAPS (type))
4399 return false;
4400
4401 if (!target_supports_divmod_p (divmod_optab, div_optab, mode))
4402 return false;
4403
4404 return true;
4405 }
4406
4407 /* This function looks for:
4408 t1 = a TRUNC_DIV_EXPR b;
4409 t2 = a TRUNC_MOD_EXPR b;
4410 and transforms it to the following sequence:
4411 complex_tmp = DIVMOD (a, b);
4412 t1 = REALPART_EXPR(a);
4413 t2 = IMAGPART_EXPR(b);
4414 For conditions enabling the transform see divmod_candidate_p().
4415
4416 The pass has three parts:
4417 1) Find top_stmt which is trunc_div or trunc_mod stmt and dominates all
4418 other trunc_div_expr and trunc_mod_expr stmts.
4419 2) Add top_stmt and all trunc_div and trunc_mod stmts dominated by top_stmt
4420 to stmts vector.
4421 3) Insert DIVMOD call just before top_stmt and update entries in
4422 stmts vector to use return value of DIMOVD (REALEXPR_PART for div,
4423 IMAGPART_EXPR for mod). */
4424
4425 static bool
convert_to_divmod(gassign * stmt)4426 convert_to_divmod (gassign *stmt)
4427 {
4428 if (stmt_can_throw_internal (cfun, stmt)
4429 || !divmod_candidate_p (stmt))
4430 return false;
4431
4432 tree op1 = gimple_assign_rhs1 (stmt);
4433 tree op2 = gimple_assign_rhs2 (stmt);
4434
4435 imm_use_iterator use_iter;
4436 gimple *use_stmt;
4437 auto_vec<gimple *> stmts;
4438
4439 gimple *top_stmt = stmt;
4440 basic_block top_bb = gimple_bb (stmt);
4441
4442 /* Part 1: Try to set top_stmt to "topmost" stmt that dominates
4443 at-least stmt and possibly other trunc_div/trunc_mod stmts
4444 having same operands as stmt. */
4445
4446 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, op1)
4447 {
4448 if (is_gimple_assign (use_stmt)
4449 && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
4450 || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
4451 && operand_equal_p (op1, gimple_assign_rhs1 (use_stmt), 0)
4452 && operand_equal_p (op2, gimple_assign_rhs2 (use_stmt), 0))
4453 {
4454 if (stmt_can_throw_internal (cfun, use_stmt))
4455 continue;
4456
4457 basic_block bb = gimple_bb (use_stmt);
4458
4459 if (bb == top_bb)
4460 {
4461 if (gimple_uid (use_stmt) < gimple_uid (top_stmt))
4462 top_stmt = use_stmt;
4463 }
4464 else if (dominated_by_p (CDI_DOMINATORS, top_bb, bb))
4465 {
4466 top_bb = bb;
4467 top_stmt = use_stmt;
4468 }
4469 }
4470 }
4471
4472 tree top_op1 = gimple_assign_rhs1 (top_stmt);
4473 tree top_op2 = gimple_assign_rhs2 (top_stmt);
4474
4475 stmts.safe_push (top_stmt);
4476 bool div_seen = (gimple_assign_rhs_code (top_stmt) == TRUNC_DIV_EXPR);
4477
4478 /* Part 2: Add all trunc_div/trunc_mod statements domianted by top_bb
4479 to stmts vector. The 2nd loop will always add stmt to stmts vector, since
4480 gimple_bb (top_stmt) dominates gimple_bb (stmt), so the
4481 2nd loop ends up adding at-least single trunc_mod_expr stmt. */
4482
4483 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, top_op1)
4484 {
4485 if (is_gimple_assign (use_stmt)
4486 && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
4487 || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
4488 && operand_equal_p (top_op1, gimple_assign_rhs1 (use_stmt), 0)
4489 && operand_equal_p (top_op2, gimple_assign_rhs2 (use_stmt), 0))
4490 {
4491 if (use_stmt == top_stmt
4492 || stmt_can_throw_internal (cfun, use_stmt)
4493 || !dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt), top_bb))
4494 continue;
4495
4496 stmts.safe_push (use_stmt);
4497 if (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR)
4498 div_seen = true;
4499 }
4500 }
4501
4502 if (!div_seen)
4503 return false;
4504
4505 /* Part 3: Create libcall to internal fn DIVMOD:
4506 divmod_tmp = DIVMOD (op1, op2). */
4507
4508 gcall *call_stmt = gimple_build_call_internal (IFN_DIVMOD, 2, op1, op2);
4509 tree res = make_temp_ssa_name (build_complex_type (TREE_TYPE (op1)),
4510 call_stmt, "divmod_tmp");
4511 gimple_call_set_lhs (call_stmt, res);
4512 /* We rejected throwing statements above. */
4513 gimple_call_set_nothrow (call_stmt, true);
4514
4515 /* Insert the call before top_stmt. */
4516 gimple_stmt_iterator top_stmt_gsi = gsi_for_stmt (top_stmt);
4517 gsi_insert_before (&top_stmt_gsi, call_stmt, GSI_SAME_STMT);
4518
4519 widen_mul_stats.divmod_calls_inserted++;
4520
4521 /* Update all statements in stmts vector:
4522 lhs = op1 TRUNC_DIV_EXPR op2 -> lhs = REALPART_EXPR<divmod_tmp>
4523 lhs = op1 TRUNC_MOD_EXPR op2 -> lhs = IMAGPART_EXPR<divmod_tmp>. */
4524
4525 for (unsigned i = 0; stmts.iterate (i, &use_stmt); ++i)
4526 {
4527 tree new_rhs;
4528
4529 switch (gimple_assign_rhs_code (use_stmt))
4530 {
4531 case TRUNC_DIV_EXPR:
4532 new_rhs = fold_build1 (REALPART_EXPR, TREE_TYPE (op1), res);
4533 break;
4534
4535 case TRUNC_MOD_EXPR:
4536 new_rhs = fold_build1 (IMAGPART_EXPR, TREE_TYPE (op1), res);
4537 break;
4538
4539 default:
4540 gcc_unreachable ();
4541 }
4542
4543 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
4544 gimple_assign_set_rhs_from_tree (&gsi, new_rhs);
4545 update_stmt (use_stmt);
4546 }
4547
4548 return true;
4549 }
4550
4551 /* Find integer multiplications where the operands are extended from
4552 smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
4553 where appropriate. */
4554
4555 namespace {
4556
4557 const pass_data pass_data_optimize_widening_mul =
4558 {
4559 GIMPLE_PASS, /* type */
4560 "widening_mul", /* name */
4561 OPTGROUP_NONE, /* optinfo_flags */
4562 TV_TREE_WIDEN_MUL, /* tv_id */
4563 PROP_ssa, /* properties_required */
4564 0, /* properties_provided */
4565 0, /* properties_destroyed */
4566 0, /* todo_flags_start */
4567 TODO_update_ssa, /* todo_flags_finish */
4568 };
4569
4570 class pass_optimize_widening_mul : public gimple_opt_pass
4571 {
4572 public:
pass_optimize_widening_mul(gcc::context * ctxt)4573 pass_optimize_widening_mul (gcc::context *ctxt)
4574 : gimple_opt_pass (pass_data_optimize_widening_mul, ctxt)
4575 {}
4576
4577 /* opt_pass methods: */
gate(function *)4578 virtual bool gate (function *)
4579 {
4580 return flag_expensive_optimizations && optimize;
4581 }
4582
4583 virtual unsigned int execute (function *);
4584
4585 }; // class pass_optimize_widening_mul
4586
4587 /* Walker class to perform the transformation in reverse dominance order. */
4588
4589 class math_opts_dom_walker : public dom_walker
4590 {
4591 public:
4592 /* Constructor, CFG_CHANGED is a pointer to a boolean flag that will be set
4593 if walking modidifes the CFG. */
4594
math_opts_dom_walker(bool * cfg_changed_p)4595 math_opts_dom_walker (bool *cfg_changed_p)
4596 : dom_walker (CDI_DOMINATORS), m_last_result_set (),
4597 m_cfg_changed_p (cfg_changed_p) {}
4598
4599 /* The actual actions performed in the walk. */
4600
4601 virtual void after_dom_children (basic_block);
4602
4603 /* Set of results of chains of multiply and add statement combinations that
4604 were not transformed into FMAs because of active deferring. */
4605 hash_set<tree> m_last_result_set;
4606
4607 /* Pointer to a flag of the user that needs to be set if CFG has been
4608 modified. */
4609 bool *m_cfg_changed_p;
4610 };
4611
4612 void
after_dom_children(basic_block bb)4613 math_opts_dom_walker::after_dom_children (basic_block bb)
4614 {
4615 gimple_stmt_iterator gsi;
4616
4617 fma_deferring_state fma_state (param_avoid_fma_max_bits > 0);
4618
4619 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
4620 {
4621 gimple *stmt = gsi_stmt (gsi);
4622 enum tree_code code;
4623
4624 if (is_gimple_assign (stmt))
4625 {
4626 code = gimple_assign_rhs_code (stmt);
4627 switch (code)
4628 {
4629 case MULT_EXPR:
4630 if (!convert_mult_to_widen (stmt, &gsi)
4631 && !convert_expand_mult_copysign (stmt, &gsi)
4632 && convert_mult_to_fma (stmt,
4633 gimple_assign_rhs1 (stmt),
4634 gimple_assign_rhs2 (stmt),
4635 &fma_state))
4636 {
4637 gsi_remove (&gsi, true);
4638 release_defs (stmt);
4639 continue;
4640 }
4641 match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p);
4642 break;
4643
4644 case PLUS_EXPR:
4645 case MINUS_EXPR:
4646 if (!convert_plusminus_to_widen (&gsi, stmt, code))
4647 match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p);
4648 break;
4649
4650 case BIT_NOT_EXPR:
4651 if (match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p))
4652 continue;
4653 break;
4654
4655 case TRUNC_MOD_EXPR:
4656 convert_to_divmod (as_a<gassign *> (stmt));
4657 break;
4658
4659 default:;
4660 }
4661 }
4662 else if (is_gimple_call (stmt))
4663 {
4664 switch (gimple_call_combined_fn (stmt))
4665 {
4666 CASE_CFN_POW:
4667 if (gimple_call_lhs (stmt)
4668 && TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST
4669 && real_equal (&TREE_REAL_CST (gimple_call_arg (stmt, 1)),
4670 &dconst2)
4671 && convert_mult_to_fma (stmt,
4672 gimple_call_arg (stmt, 0),
4673 gimple_call_arg (stmt, 0),
4674 &fma_state))
4675 {
4676 unlink_stmt_vdef (stmt);
4677 if (gsi_remove (&gsi, true)
4678 && gimple_purge_dead_eh_edges (bb))
4679 *m_cfg_changed_p = true;
4680 release_defs (stmt);
4681 continue;
4682 }
4683 break;
4684
4685 case CFN_COND_MUL:
4686 if (convert_mult_to_fma (stmt,
4687 gimple_call_arg (stmt, 1),
4688 gimple_call_arg (stmt, 2),
4689 &fma_state,
4690 gimple_call_arg (stmt, 0)))
4691
4692 {
4693 gsi_remove (&gsi, true);
4694 release_defs (stmt);
4695 continue;
4696 }
4697 break;
4698
4699 case CFN_LAST:
4700 cancel_fma_deferring (&fma_state);
4701 break;
4702
4703 default:
4704 break;
4705 }
4706 }
4707 gsi_next (&gsi);
4708 }
4709 if (fma_state.m_deferring_p
4710 && fma_state.m_initial_phi)
4711 {
4712 gcc_checking_assert (fma_state.m_last_result);
4713 if (!last_fma_candidate_feeds_initial_phi (&fma_state,
4714 &m_last_result_set))
4715 cancel_fma_deferring (&fma_state);
4716 else
4717 m_last_result_set.add (fma_state.m_last_result);
4718 }
4719 }
4720
4721
4722 unsigned int
execute(function * fun)4723 pass_optimize_widening_mul::execute (function *fun)
4724 {
4725 bool cfg_changed = false;
4726
4727 memset (&widen_mul_stats, 0, sizeof (widen_mul_stats));
4728 calculate_dominance_info (CDI_DOMINATORS);
4729 renumber_gimple_stmt_uids (cfun);
4730
4731 math_opts_dom_walker (&cfg_changed).walk (ENTRY_BLOCK_PTR_FOR_FN (cfun));
4732
4733 statistics_counter_event (fun, "widening multiplications inserted",
4734 widen_mul_stats.widen_mults_inserted);
4735 statistics_counter_event (fun, "widening maccs inserted",
4736 widen_mul_stats.maccs_inserted);
4737 statistics_counter_event (fun, "fused multiply-adds inserted",
4738 widen_mul_stats.fmas_inserted);
4739 statistics_counter_event (fun, "divmod calls inserted",
4740 widen_mul_stats.divmod_calls_inserted);
4741
4742 return cfg_changed ? TODO_cleanup_cfg : 0;
4743 }
4744
4745 } // anon namespace
4746
4747 gimple_opt_pass *
make_pass_optimize_widening_mul(gcc::context * ctxt)4748 make_pass_optimize_widening_mul (gcc::context *ctxt)
4749 {
4750 return new pass_optimize_widening_mul (ctxt);
4751 }
4752