1 /* Conditional Dead Call Elimination pass for the GNU compiler.
2    Copyright (C) 2008-2019 Free Software Foundation, Inc.
3    Contributed by Xinliang David Li <davidxl@google.com>
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
10 later version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "tree.h"
26 #include "gimple.h"
27 #include "cfghooks.h"
28 #include "tree-pass.h"
29 #include "ssa.h"
30 #include "gimple-pretty-print.h"
31 #include "fold-const.h"
32 #include "stor-layout.h"
33 #include "gimple-iterator.h"
34 #include "tree-cfg.h"
35 #include "tree-into-ssa.h"
36 #include "builtins.h"
37 #include "internal-fn.h"
38 #include "tree-dfa.h"
39 
40 
41 /* This pass serves two closely-related purposes:
42 
43    1. It conditionally executes calls that set errno if (a) the result of
44       the call is unused and (b) a simple range check on the arguments can
45       detect most cases where errno does not need to be set.
46 
47       This is the "conditional dead-code elimination" that gave the pass
48       its original name, since the call is dead for most argument values.
49       The calls for which it helps are usually part of the C++ abstraction
50       penalty exposed after inlining.
51 
52    2. It looks for calls to built-in functions that set errno and whose
53       result is used.  It checks whether there is an associated internal
54       function that doesn't set errno and whether the target supports
55       that internal function.  If so, the pass uses the internal function
56       to compute the result of the built-in function but still arranges
57       for errno to be set when necessary.  There are two ways of setting
58       errno:
59 
60       a. by protecting the original call with the same argument checks as (1)
61 
62       b. by protecting the original call with a check that the result
63 	 of the internal function is not equal to itself (i.e. is NaN).
64 
65       (b) requires that NaNs are the only erroneous results.  It is not
66       appropriate for functions like log, which returns ERANGE for zero
67       arguments.  (b) is also likely to perform worse than (a) because it
68       requires the result to be calculated first.  The pass therefore uses
69       (a) when it can and uses (b) as a fallback.
70 
71       For (b) the pass can replace the original call with a call to
72       IFN_SET_EDOM, if the target supports direct assignments to errno.
73 
74    In both cases, arguments that require errno to be set should occur
75    rarely in practice.  Checks of the errno result should also be rare,
76    but the compiler would need powerful interprocedural analysis to
77    prove that errno is not checked.  It's much easier to add argument
78    checks or result checks instead.
79 
80      An example of (1) is:
81 
82 	 log (x);   // Mostly dead call
83      ==>
84 	 if (__builtin_islessequal (x, 0))
85 	     log (x);
86 
87      With this change, call to log (x) is effectively eliminated, as
88      in the majority of the cases, log won't be called with x out of
89      range.  The branch is totally predictable, so the branch cost
90      is low.
91 
92      An example of (2) is:
93 
94 	y = sqrt (x);
95      ==>
96 	y = IFN_SQRT (x);
97 	if (__builtin_isless (x, 0))
98 	    sqrt (x);
99 
100      In the vast majority of cases we should then never need to call sqrt.
101 
102    Note that library functions are not supposed to clear errno to zero without
103    error.  See IEEE Std 1003.1, section 2.3 Error Numbers, and section 7.5:3 of
104    ISO/IEC 9899 (C99).
105 
106    The condition wrapping the builtin call is conservatively set to avoid too
107    aggressive (wrong) shrink wrapping.  */
108 
109 
110 /* A structure for representing input domain of
111    a function argument in integer.  If the lower
112    bound is -inf, has_lb is set to false.  If the
113    upper bound is +inf, has_ub is false.
114    is_lb_inclusive and is_ub_inclusive are flags
115    to indicate if lb and ub value are inclusive
116    respectively.  */
117 
118 struct inp_domain
119 {
120   int lb;
121   int ub;
122   bool has_lb;
123   bool has_ub;
124   bool is_lb_inclusive;
125   bool is_ub_inclusive;
126 };
127 
128 /* A helper function to construct and return an input
129    domain object.  LB is the lower bound, HAS_LB is
130    a boolean flag indicating if the lower bound exists,
131    and LB_INCLUSIVE is a boolean flag indicating if the
132    lower bound is inclusive or not.  UB, HAS_UB, and
133    UB_INCLUSIVE have the same meaning, but for upper
134    bound of the domain.  */
135 
136 static inp_domain
get_domain(int lb,bool has_lb,bool lb_inclusive,int ub,bool has_ub,bool ub_inclusive)137 get_domain (int lb, bool has_lb, bool lb_inclusive,
138             int ub, bool has_ub, bool ub_inclusive)
139 {
140   inp_domain domain;
141   domain.lb = lb;
142   domain.has_lb = has_lb;
143   domain.is_lb_inclusive = lb_inclusive;
144   domain.ub = ub;
145   domain.has_ub = has_ub;
146   domain.is_ub_inclusive = ub_inclusive;
147   return domain;
148 }
149 
150 /* A helper function to check the target format for the
151    argument type. In this implementation, only IEEE formats
152    are supported.  ARG is the call argument to be checked.
153    Returns true if the format is supported.  To support other
154    target formats,  function get_no_error_domain needs to be
155    enhanced to have range bounds properly computed. Since
156    the check is cheap (very small number of candidates
157    to be checked), the result is not cached for each float type.  */
158 
159 static bool
check_target_format(tree arg)160 check_target_format (tree arg)
161 {
162   tree type;
163   machine_mode mode;
164   const struct real_format *rfmt;
165 
166   type = TREE_TYPE (arg);
167   mode = TYPE_MODE (type);
168   rfmt = REAL_MODE_FORMAT (mode);
169   if ((mode == SFmode
170        && (rfmt == &ieee_single_format || rfmt == &mips_single_format
171 	   || rfmt == &motorola_single_format))
172       || (mode == DFmode
173 	  && (rfmt == &ieee_double_format || rfmt == &mips_double_format
174 	      || rfmt == &motorola_double_format))
175       /* For long double, we cannot really check XFmode
176          which is only defined on intel platforms.
177          Candidate pre-selection using builtin function
178          code guarantees that we are checking formats
179          for long double modes: double, quad, and extended.  */
180       || (mode != SFmode && mode != DFmode
181           && (rfmt == &ieee_quad_format
182 	      || rfmt == &mips_quad_format
183 	      || rfmt == &ieee_extended_motorola_format
184               || rfmt == &ieee_extended_intel_96_format
185               || rfmt == &ieee_extended_intel_128_format
186               || rfmt == &ieee_extended_intel_96_round_53_format)))
187     return true;
188 
189   return false;
190 }
191 
192 
193 /* A helper function to help select calls to pow that are suitable for
194    conditional DCE transformation.  It looks for pow calls that can be
195    guided with simple conditions.  Such calls either have constant base
196    values or base values converted from integers.  Returns true if
197    the pow call POW_CALL is a candidate.  */
198 
199 /* The maximum integer bit size for base argument of a pow call
200    that is suitable for shrink-wrapping transformation.  */
201 #define MAX_BASE_INT_BIT_SIZE 32
202 
203 static bool
check_pow(gcall * pow_call)204 check_pow (gcall *pow_call)
205 {
206   tree base, expn;
207   enum tree_code bc, ec;
208 
209   if (gimple_call_num_args (pow_call) != 2)
210     return false;
211 
212   base = gimple_call_arg (pow_call, 0);
213   expn = gimple_call_arg (pow_call, 1);
214 
215   if (!check_target_format (expn))
216     return false;
217 
218   bc = TREE_CODE (base);
219   ec = TREE_CODE (expn);
220 
221   /* Folding candidates are not interesting.
222      Can actually assert that it is already folded.  */
223   if (ec == REAL_CST && bc == REAL_CST)
224     return false;
225 
226   if (bc == REAL_CST)
227     {
228       /* Only handle a fixed range of constant.  */
229       REAL_VALUE_TYPE mv;
230       REAL_VALUE_TYPE bcv = TREE_REAL_CST (base);
231       if (real_equal (&bcv, &dconst1))
232         return false;
233       if (real_less (&bcv, &dconst1))
234         return false;
235       real_from_integer (&mv, TYPE_MODE (TREE_TYPE (base)), 256, UNSIGNED);
236       if (real_less (&mv, &bcv))
237         return false;
238       return true;
239     }
240   else if (bc == SSA_NAME)
241     {
242       tree base_val0, type;
243       gimple *base_def;
244       int bit_sz;
245 
246       /* Only handles cases where base value is converted
247          from integer values.  */
248       base_def = SSA_NAME_DEF_STMT (base);
249       if (gimple_code (base_def) != GIMPLE_ASSIGN)
250         return false;
251 
252       if (gimple_assign_rhs_code (base_def) != FLOAT_EXPR)
253         return false;
254       base_val0 = gimple_assign_rhs1 (base_def);
255 
256       type = TREE_TYPE (base_val0);
257       if (TREE_CODE (type) != INTEGER_TYPE)
258         return false;
259       bit_sz = TYPE_PRECISION (type);
260       /* If the type of the base is too wide,
261          the resulting shrink wrapping condition
262 	 will be too conservative.  */
263       if (bit_sz > MAX_BASE_INT_BIT_SIZE)
264         return false;
265 
266       return true;
267     }
268   else
269     return false;
270 }
271 
272 /* A helper function to help select candidate function calls that are
273    suitable for conditional DCE.  Candidate functions must have single
274    valid input domain in this implementation except for pow (see check_pow).
275    Returns true if the function call is a candidate.  */
276 
277 static bool
check_builtin_call(gcall * bcall)278 check_builtin_call (gcall *bcall)
279 {
280   tree arg;
281 
282   arg = gimple_call_arg (bcall, 0);
283   return check_target_format (arg);
284 }
285 
286 /* Return true if built-in function call CALL calls a math function
287    and if we know how to test the range of its arguments to detect _most_
288    situations in which errno is not set.  The test must err on the side
289    of treating non-erroneous values as potentially erroneous.  */
290 
291 static bool
can_test_argument_range(gcall * call)292 can_test_argument_range (gcall *call)
293 {
294   switch (DECL_FUNCTION_CODE (gimple_call_fndecl (call)))
295     {
296     /* Trig functions.  */
297     CASE_FLT_FN (BUILT_IN_ACOS):
298     CASE_FLT_FN (BUILT_IN_ASIN):
299     /* Hyperbolic functions.  */
300     CASE_FLT_FN (BUILT_IN_ACOSH):
301     CASE_FLT_FN (BUILT_IN_ATANH):
302     CASE_FLT_FN (BUILT_IN_COSH):
303     CASE_FLT_FN (BUILT_IN_SINH):
304     /* Log functions.  */
305     CASE_FLT_FN (BUILT_IN_LOG):
306     CASE_FLT_FN (BUILT_IN_LOG2):
307     CASE_FLT_FN (BUILT_IN_LOG10):
308     CASE_FLT_FN (BUILT_IN_LOG1P):
309     /* Exp functions.  */
310     CASE_FLT_FN (BUILT_IN_EXP):
311     CASE_FLT_FN (BUILT_IN_EXP2):
312     CASE_FLT_FN (BUILT_IN_EXP10):
313     CASE_FLT_FN (BUILT_IN_EXPM1):
314     CASE_FLT_FN (BUILT_IN_POW10):
315     /* Sqrt.  */
316     CASE_FLT_FN (BUILT_IN_SQRT):
317     CASE_FLT_FN_FLOATN_NX (BUILT_IN_SQRT):
318       return check_builtin_call (call);
319     /* Special one: two argument pow.  */
320     case BUILT_IN_POW:
321       return check_pow (call);
322     default:
323       break;
324     }
325 
326   return false;
327 }
328 
329 /* Return true if CALL can produce a domain error (EDOM) but can never
330    produce a pole, range overflow or range underflow error (all ERANGE).
331    This means that we can tell whether a function would have set errno
332    by testing whether the result is a NaN.  */
333 
334 static bool
edom_only_function(gcall * call)335 edom_only_function (gcall *call)
336 {
337   switch (DECL_FUNCTION_CODE (gimple_call_fndecl (call)))
338     {
339     CASE_FLT_FN (BUILT_IN_ACOS):
340     CASE_FLT_FN (BUILT_IN_ASIN):
341     CASE_FLT_FN (BUILT_IN_ATAN):
342     CASE_FLT_FN (BUILT_IN_COS):
343     CASE_FLT_FN (BUILT_IN_SIGNIFICAND):
344     CASE_FLT_FN (BUILT_IN_SIN):
345     CASE_FLT_FN (BUILT_IN_SQRT):
346     CASE_FLT_FN_FLOATN_NX (BUILT_IN_SQRT):
347     CASE_FLT_FN (BUILT_IN_FMOD):
348     CASE_FLT_FN (BUILT_IN_REMAINDER):
349       return true;
350 
351     default:
352       return false;
353     }
354 }
355 
356 /* Return true if it is structurally possible to guard CALL.  */
357 
358 static bool
can_guard_call_p(gimple * call)359 can_guard_call_p (gimple *call)
360 {
361   return (!stmt_ends_bb_p (call)
362 	  || find_fallthru_edge (gimple_bb (call)->succs));
363 }
364 
365 /* For a comparison code return the comparison code we should use if we don't
366    HONOR_NANS.  */
367 
368 static enum tree_code
comparison_code_if_no_nans(tree_code code)369 comparison_code_if_no_nans (tree_code code)
370 {
371   switch (code)
372     {
373     case UNLT_EXPR:
374       return LT_EXPR;
375     case UNGT_EXPR:
376       return GT_EXPR;
377     case UNLE_EXPR:
378       return LE_EXPR;
379     case UNGE_EXPR:
380       return GE_EXPR;
381     case UNEQ_EXPR:
382       return EQ_EXPR;
383     case LTGT_EXPR:
384       return NE_EXPR;
385 
386     case LT_EXPR:
387     case GT_EXPR:
388     case LE_EXPR:
389     case GE_EXPR:
390     case EQ_EXPR:
391     case NE_EXPR:
392       return code;
393 
394     default:
395       gcc_unreachable ();
396     }
397 }
398 
399 /* A helper function to generate gimple statements for one bound
400    comparison, so that the built-in function is called whenever
401    TCODE <ARG, LBUB> is *false*.  TEMP_NAME1/TEMP_NAME2 are names
402    of the temporaries, CONDS is a vector holding the produced GIMPLE
403    statements, and NCONDS points to the variable holding the number of
404    logical comparisons.  CONDS is either empty or a list ended with a
405    null tree.  */
406 
407 static void
gen_one_condition(tree arg,int lbub,enum tree_code tcode,const char * temp_name1,const char * temp_name2,vec<gimple * > conds,unsigned * nconds)408 gen_one_condition (tree arg, int lbub,
409                    enum tree_code tcode,
410                    const char *temp_name1,
411 		   const char *temp_name2,
412 		   vec<gimple *> conds,
413                    unsigned *nconds)
414 {
415   if (!HONOR_NANS (arg))
416     tcode = comparison_code_if_no_nans (tcode);
417 
418   tree lbub_real_cst, lbub_cst, float_type;
419   tree temp, tempn, tempc, tempcn;
420   gassign *stmt1;
421   gassign *stmt2;
422   gcond *stmt3;
423 
424   float_type = TREE_TYPE (arg);
425   lbub_cst = build_int_cst (integer_type_node, lbub);
426   lbub_real_cst = build_real_from_int_cst (float_type, lbub_cst);
427 
428   temp = create_tmp_var (float_type, temp_name1);
429   stmt1 = gimple_build_assign (temp, arg);
430   tempn = make_ssa_name (temp, stmt1);
431   gimple_assign_set_lhs (stmt1, tempn);
432 
433   tempc = create_tmp_var (boolean_type_node, temp_name2);
434   stmt2 = gimple_build_assign (tempc,
435                                fold_build2 (tcode,
436 					    boolean_type_node,
437 					    tempn, lbub_real_cst));
438   tempcn = make_ssa_name (tempc, stmt2);
439   gimple_assign_set_lhs (stmt2, tempcn);
440 
441   stmt3 = gimple_build_cond_from_tree (tempcn, NULL_TREE, NULL_TREE);
442   conds.quick_push (stmt1);
443   conds.quick_push (stmt2);
444   conds.quick_push (stmt3);
445   (*nconds)++;
446 }
447 
448 /* A helper function to generate GIMPLE statements for
449    out of input domain check.  ARG is the call argument
450    to be runtime checked, DOMAIN holds the valid domain
451    for the given function, CONDS points to the vector
452    holding the result GIMPLE statements.  *NCONDS is
453    the number of logical comparisons.  This function
454    produces no more than two logical comparisons, one
455    for lower bound check, one for upper bound check.  */
456 
457 static void
gen_conditions_for_domain(tree arg,inp_domain domain,vec<gimple * > conds,unsigned * nconds)458 gen_conditions_for_domain (tree arg, inp_domain domain,
459 			   vec<gimple *> conds,
460                            unsigned *nconds)
461 {
462   if (domain.has_lb)
463     gen_one_condition (arg, domain.lb,
464                        (domain.is_lb_inclusive
465                         ? UNGE_EXPR : UNGT_EXPR),
466                        "DCE_COND_LB", "DCE_COND_LB_TEST",
467                        conds, nconds);
468 
469   if (domain.has_ub)
470     {
471       /* Now push a separator.  */
472       if (domain.has_lb)
473         conds.quick_push (NULL);
474 
475       gen_one_condition (arg, domain.ub,
476                          (domain.is_ub_inclusive
477                           ? UNLE_EXPR : UNLT_EXPR),
478                          "DCE_COND_UB", "DCE_COND_UB_TEST",
479                          conds, nconds);
480     }
481 }
482 
483 
484 /* A helper function to generate condition
485    code for the y argument in call pow (some_const, y).
486    See candidate selection in check_pow.  Since the
487    candidates' base values have a limited range,
488    the guarded code generated for y are simple:
489    if (__builtin_isgreater (y, max_y))
490      pow (const, y);
491    Note max_y can be computed separately for each
492    const base, but in this implementation, we
493    choose to compute it using the max base
494    in the allowed range for the purpose of
495    simplicity.  BASE is the constant base value,
496    EXPN is the expression for the exponent argument,
497    *CONDS is the vector to hold resulting statements,
498    and *NCONDS is the number of logical conditions.  */
499 
500 static void
gen_conditions_for_pow_cst_base(tree base,tree expn,vec<gimple * > conds,unsigned * nconds)501 gen_conditions_for_pow_cst_base (tree base, tree expn,
502 				 vec<gimple *> conds,
503                                  unsigned *nconds)
504 {
505   inp_domain exp_domain;
506   /* Validate the range of the base constant to make
507      sure it is consistent with check_pow.  */
508   REAL_VALUE_TYPE mv;
509   REAL_VALUE_TYPE bcv = TREE_REAL_CST (base);
510   gcc_assert (!real_equal (&bcv, &dconst1)
511               && !real_less (&bcv, &dconst1));
512   real_from_integer (&mv, TYPE_MODE (TREE_TYPE (base)), 256, UNSIGNED);
513   gcc_assert (!real_less (&mv, &bcv));
514 
515   exp_domain = get_domain (0, false, false,
516                            127, true, false);
517 
518   gen_conditions_for_domain (expn, exp_domain,
519                              conds, nconds);
520 }
521 
522 /* Generate error condition code for pow calls with
523    non constant base values.  The candidates selected
524    have their base argument value converted from
525    integer (see check_pow) value (1, 2, 4 bytes), and
526    the max exp value is computed based on the size
527    of the integer type (i.e. max possible base value).
528    The resulting input domain for exp argument is thus
529    conservative (smaller than the max value allowed by
530    the runtime value of the base).  BASE is the integer
531    base value, EXPN is the expression for the exponent
532    argument, *CONDS is the vector to hold resulting
533    statements, and *NCONDS is the number of logical
534    conditions.  */
535 
536 static void
gen_conditions_for_pow_int_base(tree base,tree expn,vec<gimple * > conds,unsigned * nconds)537 gen_conditions_for_pow_int_base (tree base, tree expn,
538 				 vec<gimple *> conds,
539                                  unsigned *nconds)
540 {
541   gimple *base_def;
542   tree base_val0;
543   tree int_type;
544   tree temp, tempn;
545   tree cst0;
546   gimple *stmt1, *stmt2;
547   int bit_sz, max_exp;
548   inp_domain exp_domain;
549 
550   base_def = SSA_NAME_DEF_STMT (base);
551   base_val0 = gimple_assign_rhs1 (base_def);
552   int_type = TREE_TYPE (base_val0);
553   bit_sz = TYPE_PRECISION (int_type);
554   gcc_assert (bit_sz > 0
555               && bit_sz <= MAX_BASE_INT_BIT_SIZE);
556 
557   /* Determine the max exp argument value according to
558      the size of the base integer.  The max exp value
559      is conservatively estimated assuming IEEE754 double
560      precision format.  */
561   if (bit_sz == 8)
562     max_exp = 128;
563   else if (bit_sz == 16)
564     max_exp = 64;
565   else
566     {
567       gcc_assert (bit_sz == MAX_BASE_INT_BIT_SIZE);
568       max_exp = 32;
569     }
570 
571   /* For pow ((double)x, y), generate the following conditions:
572      cond 1:
573      temp1 = x;
574      if (__builtin_islessequal (temp1, 0))
575 
576      cond 2:
577      temp2 = y;
578      if (__builtin_isgreater (temp2, max_exp_real_cst))  */
579 
580   /* Generate condition in reverse order -- first
581      the condition for the exp argument.  */
582 
583   exp_domain = get_domain (0, false, false,
584                            max_exp, true, true);
585 
586   gen_conditions_for_domain (expn, exp_domain,
587                              conds, nconds);
588 
589   /* Now generate condition for the base argument.
590      Note it does not use the helper function
591      gen_conditions_for_domain because the base
592      type is integer.  */
593 
594   /* Push a separator.  */
595   conds.quick_push (NULL);
596 
597   temp = create_tmp_var (int_type, "DCE_COND1");
598   cst0 = build_int_cst (int_type, 0);
599   stmt1 = gimple_build_assign (temp, base_val0);
600   tempn = make_ssa_name (temp, stmt1);
601   gimple_assign_set_lhs (stmt1, tempn);
602   stmt2 = gimple_build_cond (GT_EXPR, tempn, cst0, NULL_TREE, NULL_TREE);
603 
604   conds.quick_push (stmt1);
605   conds.quick_push (stmt2);
606   (*nconds)++;
607 }
608 
609 /* Method to generate conditional statements for guarding conditionally
610    dead calls to pow.  One or more statements can be generated for
611    each logical condition.  Statement groups of different conditions
612    are separated by a NULL tree and they are stored in the vec
613    conds.  The number of logical conditions are stored in *nconds.
614 
615    See C99 standard, 7.12.7.4:2, for description of pow (x, y).
616    The precise condition for domain errors are complex.  In this
617    implementation, a simplified (but conservative) valid domain
618    for x and y are used: x is positive to avoid dom errors, while
619    y is smaller than a upper bound (depending on x) to avoid range
620    errors.  Runtime code is generated to check x (if not constant)
621    and y against the valid domain.  If it is out, jump to the call,
622    otherwise the call is bypassed.  POW_CALL is the call statement,
623    *CONDS is a vector holding the resulting condition statements,
624    and *NCONDS is the number of logical conditions.  */
625 
626 static void
gen_conditions_for_pow(gcall * pow_call,vec<gimple * > conds,unsigned * nconds)627 gen_conditions_for_pow (gcall *pow_call, vec<gimple *> conds,
628                         unsigned *nconds)
629 {
630   tree base, expn;
631   enum tree_code bc;
632 
633   gcc_checking_assert (check_pow (pow_call));
634 
635   *nconds = 0;
636 
637   base = gimple_call_arg (pow_call, 0);
638   expn = gimple_call_arg (pow_call, 1);
639 
640   bc = TREE_CODE (base);
641 
642   if (bc == REAL_CST)
643     gen_conditions_for_pow_cst_base (base, expn, conds, nconds);
644   else if (bc == SSA_NAME)
645     gen_conditions_for_pow_int_base (base, expn, conds, nconds);
646   else
647     gcc_unreachable ();
648 }
649 
650 /* A helper routine to help computing the valid input domain
651    for a builtin function.  See C99 7.12.7 for details.  In this
652    implementation, we only handle single region domain.  The
653    resulting region can be conservative (smaller) than the actual
654    one and rounded to integers.  Some of the bounds are documented
655    in the standard, while other limit constants are computed
656    assuming IEEE floating point format (for SF and DF modes).
657    Since IEEE only sets minimum requirements for long double format,
658    different long double formats exist under different implementations
659    (e.g, 64 bit double precision (DF), 80 bit double-extended
660    precision (XF), and 128 bit quad precision (QF) ).  For simplicity,
661    in this implementation, the computed bounds for long double assume
662    64 bit format (DF), and are therefore conservative.  Another
663    assumption is that single precision float type is always SF mode,
664    and double type is DF mode.  This function is quite
665    implementation specific, so it may not be suitable to be part of
666    builtins.c.  This needs to be revisited later to see if it can
667    be leveraged in x87 assembly expansion.  */
668 
669 static inp_domain
get_no_error_domain(enum built_in_function fnc)670 get_no_error_domain (enum built_in_function fnc)
671 {
672   switch (fnc)
673     {
674     /* Trig functions: return [-1, +1]  */
675     CASE_FLT_FN (BUILT_IN_ACOS):
676     CASE_FLT_FN (BUILT_IN_ASIN):
677       return get_domain (-1, true, true,
678                          1, true, true);
679     /* Hyperbolic functions.  */
680     CASE_FLT_FN (BUILT_IN_ACOSH):
681       /* acosh: [1, +inf)  */
682       return get_domain (1, true, true,
683                          1, false, false);
684     CASE_FLT_FN (BUILT_IN_ATANH):
685       /* atanh: (-1, +1)  */
686       return get_domain (-1, true, false,
687                          1, true, false);
688     case BUILT_IN_COSHF:
689     case BUILT_IN_SINHF:
690       /* coshf: (-89, +89)  */
691       return get_domain (-89, true, false,
692                          89, true, false);
693     case BUILT_IN_COSH:
694     case BUILT_IN_SINH:
695     case BUILT_IN_COSHL:
696     case BUILT_IN_SINHL:
697       /* cosh: (-710, +710)  */
698       return get_domain (-710, true, false,
699                          710, true, false);
700     /* Log functions: (0, +inf)  */
701     CASE_FLT_FN (BUILT_IN_LOG):
702     CASE_FLT_FN (BUILT_IN_LOG2):
703     CASE_FLT_FN (BUILT_IN_LOG10):
704       return get_domain (0, true, false,
705                          0, false, false);
706     CASE_FLT_FN (BUILT_IN_LOG1P):
707       return get_domain (-1, true, false,
708                          0, false, false);
709     /* Exp functions.  */
710     case BUILT_IN_EXPF:
711     case BUILT_IN_EXPM1F:
712       /* expf: (-inf, 88)  */
713       return get_domain (-1, false, false,
714                          88, true, false);
715     case BUILT_IN_EXP:
716     case BUILT_IN_EXPM1:
717     case BUILT_IN_EXPL:
718     case BUILT_IN_EXPM1L:
719       /* exp: (-inf, 709)  */
720       return get_domain (-1, false, false,
721                          709, true, false);
722     case BUILT_IN_EXP2F:
723       /* exp2f: (-inf, 128)  */
724       return get_domain (-1, false, false,
725                          128, true, false);
726     case BUILT_IN_EXP2:
727     case BUILT_IN_EXP2L:
728       /* exp2: (-inf, 1024)  */
729       return get_domain (-1, false, false,
730                          1024, true, false);
731     case BUILT_IN_EXP10F:
732     case BUILT_IN_POW10F:
733       /* exp10f: (-inf, 38)  */
734       return get_domain (-1, false, false,
735                          38, true, false);
736     case BUILT_IN_EXP10:
737     case BUILT_IN_POW10:
738     case BUILT_IN_EXP10L:
739     case BUILT_IN_POW10L:
740       /* exp10: (-inf, 308)  */
741       return get_domain (-1, false, false,
742                          308, true, false);
743     /* sqrt: [0, +inf)  */
744     CASE_FLT_FN (BUILT_IN_SQRT):
745     CASE_FLT_FN_FLOATN_NX (BUILT_IN_SQRT):
746       return get_domain (0, true, true,
747                          0, false, false);
748     default:
749       gcc_unreachable ();
750     }
751 
752   gcc_unreachable ();
753 }
754 
755 /* The function to generate shrink wrap conditions for a partially
756    dead builtin call whose return value is not used anywhere,
757    but has to be kept live due to potential error condition.
758    BI_CALL is the builtin call, CONDS is the vector of statements
759    for condition code, NCODES is the pointer to the number of
760    logical conditions.  Statements belonging to different logical
761    condition are separated by NULL tree in the vector.  */
762 
763 static void
gen_shrink_wrap_conditions(gcall * bi_call,vec<gimple * > conds,unsigned int * nconds)764 gen_shrink_wrap_conditions (gcall *bi_call, vec<gimple *> conds,
765                             unsigned int *nconds)
766 {
767   gcall *call;
768   tree fn;
769   enum built_in_function fnc;
770 
771   gcc_assert (nconds && conds.exists ());
772   gcc_assert (conds.length () == 0);
773   gcc_assert (is_gimple_call (bi_call));
774 
775   call = bi_call;
776   fn = gimple_call_fndecl (call);
777   gcc_assert (fn && fndecl_built_in_p (fn));
778   fnc = DECL_FUNCTION_CODE (fn);
779   *nconds = 0;
780 
781   if (fnc == BUILT_IN_POW)
782     gen_conditions_for_pow (call, conds, nconds);
783   else
784     {
785       tree arg;
786       inp_domain domain = get_no_error_domain (fnc);
787       *nconds = 0;
788       arg = gimple_call_arg (bi_call, 0);
789       gen_conditions_for_domain (arg, domain, conds, nconds);
790     }
791 
792   return;
793 }
794 
795 /* Shrink-wrap BI_CALL so that it is only called when one of the NCONDS
796    conditions in CONDS is false.  */
797 
798 static void
shrink_wrap_one_built_in_call_with_conds(gcall * bi_call,vec<gimple * > conds,unsigned int nconds)799 shrink_wrap_one_built_in_call_with_conds (gcall *bi_call, vec <gimple *> conds,
800 					  unsigned int nconds)
801 {
802   gimple_stmt_iterator bi_call_bsi;
803   basic_block bi_call_bb, join_tgt_bb, guard_bb;
804   edge join_tgt_in_edge_from_call, join_tgt_in_edge_fall_thru;
805   edge bi_call_in_edge0, guard_bb_in_edge;
806   unsigned tn_cond_stmts;
807   unsigned ci;
808   gimple *cond_expr = NULL;
809   gimple *cond_expr_start;
810 
811   /* The cfg we want to create looks like this:
812 
813 	   [guard n-1]         <- guard_bb (old block)
814 	     |    \
815 	     | [guard n-2]                   }
816 	     |    / \                        }
817 	     |   /  ...                      } new blocks
818 	     |  /  [guard 0]                 }
819 	     | /    /   |                    }
820 	    [ call ]    |     <- bi_call_bb  }
821 	     | \        |
822 	     |  \       |
823 	     |   [ join ]     <- join_tgt_bb (old iff call must end bb)
824 	     |
825 	 possible EH edges (only if [join] is old)
826 
827      When [join] is new, the immediate dominators for these blocks are:
828 
829      1. [guard n-1]: unchanged
830      2. [call]: [guard n-1]
831      3. [guard m]: [guard m+1] for 0 <= m <= n-2
832      4. [join]: [guard n-1]
833 
834      We punt for the more complex case case of [join] being old and
835      simply free the dominance info.  We also punt on postdominators,
836      which aren't expected to be available at this point anyway.  */
837   bi_call_bb = gimple_bb (bi_call);
838 
839   /* Now find the join target bb -- split bi_call_bb if needed.  */
840   if (stmt_ends_bb_p (bi_call))
841     {
842       /* We checked that there was a fallthrough edge in
843 	 can_guard_call_p.  */
844       join_tgt_in_edge_from_call = find_fallthru_edge (bi_call_bb->succs);
845       gcc_assert (join_tgt_in_edge_from_call);
846       /* We don't want to handle PHIs.  */
847       if (EDGE_COUNT (join_tgt_in_edge_from_call->dest->preds) > 1)
848 	join_tgt_bb = split_edge (join_tgt_in_edge_from_call);
849       else
850 	{
851 	  join_tgt_bb = join_tgt_in_edge_from_call->dest;
852 	  /* We may have degenerate PHIs in the destination.  Propagate
853 	     those out.  */
854 	  for (gphi_iterator i = gsi_start_phis (join_tgt_bb); !gsi_end_p (i);)
855 	    {
856 	      gphi *phi = i.phi ();
857 	      replace_uses_by (gimple_phi_result (phi),
858 			       gimple_phi_arg_def (phi, 0));
859 	      remove_phi_node (&i, true);
860 	    }
861 	}
862     }
863   else
864     {
865       join_tgt_in_edge_from_call = split_block (bi_call_bb, bi_call);
866       join_tgt_bb = join_tgt_in_edge_from_call->dest;
867     }
868 
869   bi_call_bsi = gsi_for_stmt (bi_call);
870 
871   /* Now it is time to insert the first conditional expression
872      into bi_call_bb and split this bb so that bi_call is
873      shrink-wrapped.  */
874   tn_cond_stmts = conds.length ();
875   cond_expr = NULL;
876   cond_expr_start = conds[0];
877   for (ci = 0; ci < tn_cond_stmts; ci++)
878     {
879       gimple *c = conds[ci];
880       gcc_assert (c || ci != 0);
881       if (!c)
882         break;
883       gsi_insert_before (&bi_call_bsi, c, GSI_SAME_STMT);
884       cond_expr = c;
885     }
886   ci++;
887   gcc_assert (cond_expr && gimple_code (cond_expr) == GIMPLE_COND);
888 
889   typedef std::pair<edge, edge> edge_pair;
890   auto_vec<edge_pair, 8> edges;
891 
892   bi_call_in_edge0 = split_block (bi_call_bb, cond_expr);
893   bi_call_in_edge0->flags &= ~EDGE_FALLTHRU;
894   bi_call_in_edge0->flags |= EDGE_FALSE_VALUE;
895   guard_bb = bi_call_bb;
896   bi_call_bb = bi_call_in_edge0->dest;
897   join_tgt_in_edge_fall_thru = make_edge (guard_bb, join_tgt_bb,
898                                           EDGE_TRUE_VALUE);
899 
900   edges.reserve (nconds);
901   edges.quick_push (edge_pair (bi_call_in_edge0, join_tgt_in_edge_fall_thru));
902 
903   /* Code generation for the rest of the conditions  */
904   for (unsigned int i = 1; i < nconds; ++i)
905     {
906       unsigned ci0;
907       edge bi_call_in_edge;
908       gimple_stmt_iterator guard_bsi = gsi_for_stmt (cond_expr_start);
909       ci0 = ci;
910       cond_expr_start = conds[ci0];
911       for (; ci < tn_cond_stmts; ci++)
912         {
913 	  gimple *c = conds[ci];
914           gcc_assert (c || ci != ci0);
915           if (!c)
916             break;
917           gsi_insert_before (&guard_bsi, c, GSI_SAME_STMT);
918           cond_expr = c;
919         }
920       ci++;
921       gcc_assert (cond_expr && gimple_code (cond_expr) == GIMPLE_COND);
922       guard_bb_in_edge = split_block (guard_bb, cond_expr);
923       guard_bb_in_edge->flags &= ~EDGE_FALLTHRU;
924       guard_bb_in_edge->flags |= EDGE_TRUE_VALUE;
925 
926       bi_call_in_edge = make_edge (guard_bb, bi_call_bb, EDGE_FALSE_VALUE);
927       edges.quick_push (edge_pair (bi_call_in_edge, guard_bb_in_edge));
928     }
929 
930   /* Now update the probability and profile information, processing the
931      guards in order of execution.
932 
933      There are two approaches we could take here.  On the one hand we
934      could assign a probability of X to the call block and distribute
935      that probability among its incoming edges.  On the other hand we
936      could assign a probability of X to each individual call edge.
937 
938      The choice only affects calls that have more than one condition.
939      In those cases, the second approach would give the call block
940      a greater probability than the first.  However, the difference
941      is only small, and our chosen X is a pure guess anyway.
942 
943      Here we take the second approach because it's slightly simpler
944      and because it's easy to see that it doesn't lose profile counts.  */
945   bi_call_bb->count = profile_count::zero ();
946   while (!edges.is_empty ())
947     {
948       edge_pair e = edges.pop ();
949       edge call_edge = e.first;
950       edge nocall_edge = e.second;
951       basic_block src_bb = call_edge->src;
952       gcc_assert (src_bb == nocall_edge->src);
953 
954       call_edge->probability = profile_probability::very_unlikely ();
955       nocall_edge->probability = profile_probability::always ()
956 				 - call_edge->probability;
957 
958       bi_call_bb->count += call_edge->count ();
959 
960       if (nocall_edge->dest != join_tgt_bb)
961 	nocall_edge->dest->count = src_bb->count - bi_call_bb->count;
962     }
963 
964   if (dom_info_available_p (CDI_DOMINATORS))
965     {
966       /* The split_blocks leave [guard 0] as the immediate dominator
967 	 of [call] and [call] as the immediate dominator of [join].
968 	 Fix them up.  */
969       set_immediate_dominator (CDI_DOMINATORS, bi_call_bb, guard_bb);
970       set_immediate_dominator (CDI_DOMINATORS, join_tgt_bb, guard_bb);
971     }
972 
973   if (dump_file && (dump_flags & TDF_DETAILS))
974     {
975       location_t loc;
976       loc = gimple_location (bi_call);
977       fprintf (dump_file,
978                "%s:%d: note: function call is shrink-wrapped"
979                " into error conditions.\n",
980                LOCATION_FILE (loc), LOCATION_LINE (loc));
981     }
982 }
983 
984 /* Shrink-wrap BI_CALL so that it is only called when it might set errno
985    (but is always called if it would set errno).  */
986 
987 static void
shrink_wrap_one_built_in_call(gcall * bi_call)988 shrink_wrap_one_built_in_call (gcall *bi_call)
989 {
990   unsigned nconds = 0;
991   auto_vec<gimple *, 12> conds;
992   gen_shrink_wrap_conditions (bi_call, conds, &nconds);
993   gcc_assert (nconds != 0);
994   shrink_wrap_one_built_in_call_with_conds (bi_call, conds, nconds);
995 }
996 
997 /* Return true if built-in function call CALL could be implemented using
998    a combination of an internal function to compute the result and a
999    separate call to set errno.  */
1000 
1001 static bool
can_use_internal_fn(gcall * call)1002 can_use_internal_fn (gcall *call)
1003 {
1004   /* Only replace calls that set errno.  */
1005   if (!gimple_vdef (call))
1006     return false;
1007 
1008   /* See whether there is an internal function for this built-in.  */
1009   if (replacement_internal_fn (call) == IFN_LAST)
1010     return false;
1011 
1012   /* See whether we can catch all cases where errno would be set,
1013      while still avoiding the call in most cases.  */
1014   if (!can_test_argument_range (call)
1015       && !edom_only_function (call))
1016     return false;
1017 
1018   return true;
1019 }
1020 
1021 /* Implement built-in function call CALL using an internal function.  */
1022 
1023 static void
use_internal_fn(gcall * call)1024 use_internal_fn (gcall *call)
1025 {
1026   /* We'll be inserting another call with the same arguments after the
1027      lhs has been set, so prevent any possible coalescing failure from
1028      having both values live at once.  See PR 71020.  */
1029   replace_abnormal_ssa_names (call);
1030 
1031   unsigned nconds = 0;
1032   auto_vec<gimple *, 12> conds;
1033   if (can_test_argument_range (call))
1034     {
1035       gen_shrink_wrap_conditions (call, conds, &nconds);
1036       gcc_assert (nconds != 0);
1037     }
1038   else
1039     gcc_assert (edom_only_function (call));
1040 
1041   internal_fn ifn = replacement_internal_fn (call);
1042   gcc_assert (ifn != IFN_LAST);
1043 
1044   /* Construct the new call, with the same arguments as the original one.  */
1045   auto_vec <tree, 16> args;
1046   unsigned int nargs = gimple_call_num_args (call);
1047   for (unsigned int i = 0; i < nargs; ++i)
1048     args.safe_push (gimple_call_arg (call, i));
1049   gcall *new_call = gimple_build_call_internal_vec (ifn, args);
1050   gimple_set_location (new_call, gimple_location (call));
1051   gimple_call_set_nothrow (new_call, gimple_call_nothrow_p (call));
1052 
1053   /* Transfer the LHS to the new call.  */
1054   tree lhs = gimple_call_lhs (call);
1055   gimple_call_set_lhs (new_call, lhs);
1056   gimple_call_set_lhs (call, NULL_TREE);
1057   SSA_NAME_DEF_STMT (lhs) = new_call;
1058 
1059   /* Insert the new call.  */
1060   gimple_stmt_iterator gsi = gsi_for_stmt (call);
1061   gsi_insert_before (&gsi, new_call, GSI_SAME_STMT);
1062 
1063   if (nconds == 0)
1064     {
1065       /* Skip the call if LHS == LHS.  If we reach here, EDOM is the only
1066 	 valid errno value and it is used iff the result is NaN.  */
1067       conds.quick_push (gimple_build_cond (EQ_EXPR, lhs, lhs,
1068 					   NULL_TREE, NULL_TREE));
1069       nconds++;
1070 
1071       /* Try replacing the original call with a direct assignment to
1072 	 errno, via an internal function.  */
1073       if (set_edom_supported_p () && !stmt_ends_bb_p (call))
1074 	{
1075 	  gimple_stmt_iterator gsi = gsi_for_stmt (call);
1076 	  gcall *new_call = gimple_build_call_internal (IFN_SET_EDOM, 0);
1077 	  gimple_set_vuse (new_call, gimple_vuse (call));
1078 	  gimple_set_vdef (new_call, gimple_vdef (call));
1079 	  SSA_NAME_DEF_STMT (gimple_vdef (new_call)) = new_call;
1080 	  gimple_set_location (new_call, gimple_location (call));
1081 	  gsi_replace (&gsi, new_call, false);
1082 	  call = new_call;
1083 	}
1084     }
1085 
1086   shrink_wrap_one_built_in_call_with_conds (call, conds, nconds);
1087 }
1088 
1089 /* The top level function for conditional dead code shrink
1090    wrapping transformation.  */
1091 
1092 static void
shrink_wrap_conditional_dead_built_in_calls(vec<gcall * > calls)1093 shrink_wrap_conditional_dead_built_in_calls (vec<gcall *> calls)
1094 {
1095   unsigned i = 0;
1096 
1097   unsigned n = calls.length ();
1098   for (; i < n ; i++)
1099     {
1100       gcall *bi_call = calls[i];
1101       if (gimple_call_lhs (bi_call))
1102 	use_internal_fn (bi_call);
1103       else
1104 	shrink_wrap_one_built_in_call (bi_call);
1105     }
1106 }
1107 
1108 namespace {
1109 
1110 const pass_data pass_data_call_cdce =
1111 {
1112   GIMPLE_PASS, /* type */
1113   "cdce", /* name */
1114   OPTGROUP_NONE, /* optinfo_flags */
1115   TV_TREE_CALL_CDCE, /* tv_id */
1116   ( PROP_cfg | PROP_ssa ), /* properties_required */
1117   0, /* properties_provided */
1118   0, /* properties_destroyed */
1119   0, /* todo_flags_start */
1120   0, /* todo_flags_finish */
1121 };
1122 
1123 class pass_call_cdce : public gimple_opt_pass
1124 {
1125 public:
pass_call_cdce(gcc::context * ctxt)1126   pass_call_cdce (gcc::context *ctxt)
1127     : gimple_opt_pass (pass_data_call_cdce, ctxt)
1128   {}
1129 
1130   /* opt_pass methods: */
gate(function *)1131   virtual bool gate (function *)
1132     {
1133       /* The limit constants used in the implementation
1134 	 assume IEEE floating point format.  Other formats
1135 	 can be supported in the future if needed.  */
1136       return flag_tree_builtin_call_dce != 0;
1137     }
1138 
1139   virtual unsigned int execute (function *);
1140 
1141 }; // class pass_call_cdce
1142 
1143 unsigned int
execute(function * fun)1144 pass_call_cdce::execute (function *fun)
1145 {
1146   basic_block bb;
1147   gimple_stmt_iterator i;
1148   auto_vec<gcall *> cond_dead_built_in_calls;
1149   FOR_EACH_BB_FN (bb, fun)
1150     {
1151       /* Skip blocks that are being optimized for size, since our
1152 	 transformation always increases code size.  */
1153       if (optimize_bb_for_size_p (bb))
1154 	continue;
1155 
1156       /* Collect dead call candidates.  */
1157       for (i = gsi_start_bb (bb); !gsi_end_p (i); gsi_next (&i))
1158         {
1159 	  gcall *stmt = dyn_cast <gcall *> (gsi_stmt (i));
1160           if (stmt
1161 	      && gimple_call_builtin_p (stmt, BUILT_IN_NORMAL)
1162 	      && (gimple_call_lhs (stmt)
1163 		  ? can_use_internal_fn (stmt)
1164 		  : can_test_argument_range (stmt))
1165 	      && can_guard_call_p (stmt))
1166             {
1167               if (dump_file && (dump_flags & TDF_DETAILS))
1168                 {
1169                   fprintf (dump_file, "Found conditional dead call: ");
1170                   print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1171                   fprintf (dump_file, "\n");
1172                 }
1173 	      if (!cond_dead_built_in_calls.exists ())
1174 		cond_dead_built_in_calls.create (64);
1175 	      cond_dead_built_in_calls.safe_push (stmt);
1176             }
1177 	}
1178     }
1179 
1180   if (!cond_dead_built_in_calls.exists ())
1181     return 0;
1182 
1183   shrink_wrap_conditional_dead_built_in_calls (cond_dead_built_in_calls);
1184   free_dominance_info (CDI_POST_DOMINATORS);
1185   /* As we introduced new control-flow we need to insert PHI-nodes
1186      for the call-clobbers of the remaining call.  */
1187   mark_virtual_operands_for_renaming (fun);
1188   return TODO_update_ssa;
1189 }
1190 
1191 } // anon namespace
1192 
1193 gimple_opt_pass *
make_pass_call_cdce(gcc::context * ctxt)1194 make_pass_call_cdce (gcc::context *ctxt)
1195 {
1196   return new pass_call_cdce (ctxt);
1197 }
1198