1 /* Tail call optimization on trees.
2    Copyright (C) 2003-2021 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
10 
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 GNU General Public License for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "gimple.h"
27 #include "cfghooks.h"
28 #include "tree-pass.h"
29 #include "ssa.h"
30 #include "cgraph.h"
31 #include "gimple-pretty-print.h"
32 #include "fold-const.h"
33 #include "stor-layout.h"
34 #include "gimple-iterator.h"
35 #include "gimplify-me.h"
36 #include "tree-cfg.h"
37 #include "tree-into-ssa.h"
38 #include "tree-dfa.h"
39 #include "except.h"
40 #include "tree-eh.h"
41 #include "dbgcnt.h"
42 #include "cfgloop.h"
43 #include "common/common-target.h"
44 #include "ipa-utils.h"
45 #include "tree-ssa-live.h"
46 
47 /* The file implements the tail recursion elimination.  It is also used to
48    analyze the tail calls in general, passing the results to the rtl level
49    where they are used for sibcall optimization.
50 
51    In addition to the standard tail recursion elimination, we handle the most
52    trivial cases of making the call tail recursive by creating accumulators.
53    For example the following function
54 
55    int sum (int n)
56    {
57      if (n > 0)
58        return n + sum (n - 1);
59      else
60        return 0;
61    }
62 
63    is transformed into
64 
65    int sum (int n)
66    {
67      int acc = 0;
68 
69      while (n > 0)
70        acc += n--;
71 
72      return acc;
73    }
74 
75    To do this, we maintain two accumulators (a_acc and m_acc) that indicate
76    when we reach the return x statement, we should return a_acc + x * m_acc
77    instead.  They are initially initialized to 0 and 1, respectively,
78    so the semantics of the function is obviously preserved.  If we are
79    guaranteed that the value of the accumulator never change, we
80    omit the accumulator.
81 
82    There are three cases how the function may exit.  The first one is
83    handled in adjust_return_value, the other two in adjust_accumulator_values
84    (the second case is actually a special case of the third one and we
85    present it separately just for clarity):
86 
87    1) Just return x, where x is not in any of the remaining special shapes.
88       We rewrite this to a gimple equivalent of return m_acc * x + a_acc.
89 
90    2) return f (...), where f is the current function, is rewritten in a
91       classical tail-recursion elimination way, into assignment of arguments
92       and jump to the start of the function.  Values of the accumulators
93       are unchanged.
94 
95    3) return a + m * f(...), where a and m do not depend on call to f.
96       To preserve the semantics described before we want this to be rewritten
97       in such a way that we finally return
98 
99       a_acc + (a + m * f(...)) * m_acc = (a_acc + a * m_acc) + (m * m_acc) * f(...).
100 
101       I.e. we increase a_acc by a * m_acc, multiply m_acc by m and
102       eliminate the tail call to f.  Special cases when the value is just
103       added or just multiplied are obtained by setting a = 0 or m = 1.
104 
105    TODO -- it is possible to do similar tricks for other operations.  */
106 
107 /* A structure that describes the tailcall.  */
108 
109 struct tailcall
110 {
111   /* The iterator pointing to the call statement.  */
112   gimple_stmt_iterator call_gsi;
113 
114   /* True if it is a call to the current function.  */
115   bool tail_recursion;
116 
117   /* The return value of the caller is mult * f + add, where f is the return
118      value of the call.  */
119   tree mult, add;
120 
121   /* Next tailcall in the chain.  */
122   struct tailcall *next;
123 };
124 
125 /* The variables holding the value of multiplicative and additive
126    accumulator.  */
127 static tree m_acc, a_acc;
128 
129 /* Bitmap with a bit for each function parameter which is set to true if we
130    have to copy the parameter for conversion of tail-recursive calls.  */
131 
132 static bitmap tailr_arg_needs_copy;
133 
134 static bool optimize_tail_call (struct tailcall *, bool);
135 static void eliminate_tail_call (struct tailcall *);
136 
137 /* Returns false when the function is not suitable for tail call optimization
138    from some reason (e.g. if it takes variable number of arguments).  */
139 
140 static bool
suitable_for_tail_opt_p(void)141 suitable_for_tail_opt_p (void)
142 {
143   if (cfun->stdarg)
144     return false;
145 
146   return true;
147 }
148 
149 /* Returns false when the function is not suitable for tail call optimization
150    for some reason (e.g. if it takes variable number of arguments).
151    This test must pass in addition to suitable_for_tail_opt_p in order to make
152    tail call discovery happen.  */
153 
154 static bool
suitable_for_tail_call_opt_p(void)155 suitable_for_tail_call_opt_p (void)
156 {
157   tree param;
158 
159   /* alloca (until we have stack slot life analysis) inhibits
160      sibling call optimizations, but not tail recursion.  */
161   if (cfun->calls_alloca)
162     return false;
163 
164   /* If we are using sjlj exceptions, we may need to add a call to
165      _Unwind_SjLj_Unregister at exit of the function.  Which means
166      that we cannot do any sibcall transformations.  */
167   if (targetm_common.except_unwind_info (&global_options) == UI_SJLJ
168       && current_function_has_exception_handlers ())
169     return false;
170 
171   /* Any function that calls setjmp might have longjmp called from
172      any called function.  ??? We really should represent this
173      properly in the CFG so that this needn't be special cased.  */
174   if (cfun->calls_setjmp)
175     return false;
176 
177   /* Various targets don't handle tail calls correctly in functions
178      that call __builtin_eh_return.  */
179   if (cfun->calls_eh_return)
180     return false;
181 
182   /* ??? It is OK if the argument of a function is taken in some cases,
183      but not in all cases.  See PR15387 and PR19616.  Revisit for 4.1.  */
184   for (param = DECL_ARGUMENTS (current_function_decl);
185        param;
186        param = DECL_CHAIN (param))
187     if (TREE_ADDRESSABLE (param))
188       return false;
189 
190   return true;
191 }
192 
193 /* Checks whether the expression EXPR in stmt AT is independent of the
194    statement pointed to by GSI (in a sense that we already know EXPR's value
195    at GSI).  We use the fact that we are only called from the chain of
196    basic blocks that have only single successor.  Returns the expression
197    containing the value of EXPR at GSI.  */
198 
199 static tree
independent_of_stmt_p(tree expr,gimple * at,gimple_stmt_iterator gsi,bitmap to_move)200 independent_of_stmt_p (tree expr, gimple *at, gimple_stmt_iterator gsi,
201 		       bitmap to_move)
202 {
203   basic_block bb, call_bb, at_bb;
204   edge e;
205   edge_iterator ei;
206 
207   if (is_gimple_min_invariant (expr))
208     return expr;
209 
210   if (TREE_CODE (expr) != SSA_NAME)
211     return NULL_TREE;
212 
213   if (bitmap_bit_p (to_move, SSA_NAME_VERSION (expr)))
214     return expr;
215 
216   /* Mark the blocks in the chain leading to the end.  */
217   at_bb = gimple_bb (at);
218   call_bb = gimple_bb (gsi_stmt (gsi));
219   for (bb = call_bb; bb != at_bb; bb = single_succ (bb))
220     bb->aux = &bb->aux;
221   bb->aux = &bb->aux;
222 
223   while (1)
224     {
225       at = SSA_NAME_DEF_STMT (expr);
226       bb = gimple_bb (at);
227 
228       /* The default definition or defined before the chain.  */
229       if (!bb || !bb->aux)
230 	break;
231 
232       if (bb == call_bb)
233 	{
234 	  for (; !gsi_end_p (gsi); gsi_next (&gsi))
235 	    if (gsi_stmt (gsi) == at)
236 	      break;
237 
238 	  if (!gsi_end_p (gsi))
239 	    expr = NULL_TREE;
240 	  break;
241 	}
242 
243       if (gimple_code (at) != GIMPLE_PHI)
244 	{
245 	  expr = NULL_TREE;
246 	  break;
247 	}
248 
249       FOR_EACH_EDGE (e, ei, bb->preds)
250 	if (e->src->aux)
251 	  break;
252       gcc_assert (e);
253 
254       expr = PHI_ARG_DEF_FROM_EDGE (at, e);
255       if (TREE_CODE (expr) != SSA_NAME)
256 	{
257 	  /* The value is a constant.  */
258 	  break;
259 	}
260     }
261 
262   /* Unmark the blocks.  */
263   for (bb = call_bb; bb != at_bb; bb = single_succ (bb))
264     bb->aux = NULL;
265   bb->aux = NULL;
266 
267   return expr;
268 }
269 
270 enum par { FAIL, OK, TRY_MOVE };
271 
272 /* Simulates the effect of an assignment STMT on the return value of the tail
273    recursive CALL passed in ASS_VAR.  M and A are the multiplicative and the
274    additive factor for the real return value.  */
275 
276 static par
process_assignment(gassign * stmt,gimple_stmt_iterator call,tree * m,tree * a,tree * ass_var,bitmap to_move)277 process_assignment (gassign *stmt,
278 		    gimple_stmt_iterator call, tree *m,
279 		    tree *a, tree *ass_var, bitmap to_move)
280 {
281   tree op0, op1 = NULL_TREE, non_ass_var = NULL_TREE;
282   tree dest = gimple_assign_lhs (stmt);
283   enum tree_code code = gimple_assign_rhs_code (stmt);
284   enum gimple_rhs_class rhs_class = get_gimple_rhs_class (code);
285   tree src_var = gimple_assign_rhs1 (stmt);
286 
287   /* See if this is a simple copy operation of an SSA name to the function
288      result.  In that case we may have a simple tail call.  Ignore type
289      conversions that can never produce extra code between the function
290      call and the function return.  */
291   if ((rhs_class == GIMPLE_SINGLE_RHS || gimple_assign_cast_p (stmt))
292       && src_var == *ass_var)
293     {
294       /* Reject a tailcall if the type conversion might need
295 	 additional code.  */
296       if (gimple_assign_cast_p (stmt))
297 	{
298 	  if (TYPE_MODE (TREE_TYPE (dest)) != TYPE_MODE (TREE_TYPE (src_var)))
299 	    return FAIL;
300 
301 	  /* Even if the type modes are the same, if the precision of the
302 	     type is smaller than mode's precision,
303 	     reduce_to_bit_field_precision would generate additional code.  */
304 	  if (INTEGRAL_TYPE_P (TREE_TYPE (dest))
305 	      && !type_has_mode_precision_p (TREE_TYPE (dest)))
306 	    return FAIL;
307 	}
308 
309       *ass_var = dest;
310       return OK;
311     }
312 
313   switch (rhs_class)
314     {
315     case GIMPLE_BINARY_RHS:
316       op1 = gimple_assign_rhs2 (stmt);
317 
318       /* Fall through.  */
319 
320     case GIMPLE_UNARY_RHS:
321       op0 = gimple_assign_rhs1 (stmt);
322       break;
323 
324     default:
325       return FAIL;
326     }
327 
328   /* Accumulator optimizations will reverse the order of operations.
329      We can only do that for floating-point types if we're assuming
330      that addition and multiplication are associative.  */
331   if (!flag_associative_math)
332     if (FLOAT_TYPE_P (TREE_TYPE (DECL_RESULT (current_function_decl))))
333       return FAIL;
334 
335   if (rhs_class == GIMPLE_UNARY_RHS
336       && op0 == *ass_var)
337     ;
338   else if (op0 == *ass_var
339 	   && (non_ass_var = independent_of_stmt_p (op1, stmt, call,
340 						    to_move)))
341     ;
342   else if (*ass_var
343 	   && op1 == *ass_var
344 	   && (non_ass_var = independent_of_stmt_p (op0, stmt, call,
345 						    to_move)))
346     ;
347   else
348     return TRY_MOVE;
349 
350   switch (code)
351     {
352     case PLUS_EXPR:
353       *a = non_ass_var;
354       *ass_var = dest;
355       return OK;
356 
357     case POINTER_PLUS_EXPR:
358       if (op0 != *ass_var)
359 	return FAIL;
360       *a = non_ass_var;
361       *ass_var = dest;
362       return OK;
363 
364     case MULT_EXPR:
365       *m = non_ass_var;
366       *ass_var = dest;
367       return OK;
368 
369     case NEGATE_EXPR:
370       *m = build_minus_one_cst (TREE_TYPE (op0));
371       *ass_var = dest;
372       return OK;
373 
374     case MINUS_EXPR:
375       if (*ass_var == op0)
376         *a = fold_build1 (NEGATE_EXPR, TREE_TYPE (non_ass_var), non_ass_var);
377       else
378         {
379 	  *m = build_minus_one_cst (TREE_TYPE (non_ass_var));
380           *a = fold_build1 (NEGATE_EXPR, TREE_TYPE (non_ass_var), non_ass_var);
381         }
382 
383       *ass_var = dest;
384       return OK;
385 
386     default:
387       return FAIL;
388     }
389 }
390 
391 /* Propagate VAR through phis on edge E.  */
392 
393 static tree
propagate_through_phis(tree var,edge e)394 propagate_through_phis (tree var, edge e)
395 {
396   basic_block dest = e->dest;
397   gphi_iterator gsi;
398 
399   for (gsi = gsi_start_phis (dest); !gsi_end_p (gsi); gsi_next (&gsi))
400     {
401       gphi *phi = gsi.phi ();
402       if (PHI_ARG_DEF_FROM_EDGE (phi, e) == var)
403         return PHI_RESULT (phi);
404     }
405   return var;
406 }
407 
408 /* Argument for compute_live_vars/live_vars_at_stmt and what compute_live_vars
409    returns.  Computed lazily, but just once for the function.  */
410 static live_vars_map *live_vars;
411 static vec<bitmap_head> live_vars_vec;
412 
413 /* Finds tailcalls falling into basic block BB. The list of found tailcalls is
414    added to the start of RET.  */
415 
416 static void
find_tail_calls(basic_block bb,struct tailcall ** ret)417 find_tail_calls (basic_block bb, struct tailcall **ret)
418 {
419   tree ass_var = NULL_TREE, ret_var, func, param;
420   gimple *stmt;
421   gcall *call = NULL;
422   gimple_stmt_iterator gsi, agsi;
423   bool tail_recursion;
424   struct tailcall *nw;
425   edge e;
426   tree m, a;
427   basic_block abb;
428   size_t idx;
429   tree var;
430 
431   if (!single_succ_p (bb))
432     return;
433 
434   for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
435     {
436       stmt = gsi_stmt (gsi);
437 
438       /* Ignore labels, returns, nops, clobbers and debug stmts.  */
439       if (gimple_code (stmt) == GIMPLE_LABEL
440 	  || gimple_code (stmt) == GIMPLE_RETURN
441 	  || gimple_code (stmt) == GIMPLE_NOP
442 	  || gimple_code (stmt) == GIMPLE_PREDICT
443 	  || gimple_clobber_p (stmt)
444 	  || is_gimple_debug (stmt))
445 	continue;
446 
447       /* Check for a call.  */
448       if (is_gimple_call (stmt))
449 	{
450 	  call = as_a <gcall *> (stmt);
451 	  ass_var = gimple_call_lhs (call);
452 	  break;
453 	}
454 
455       /* Allow simple copies between local variables, even if they're
456 	 aggregates.  */
457       if (is_gimple_assign (stmt)
458 	  && auto_var_in_fn_p (gimple_assign_lhs (stmt), cfun->decl)
459 	  && auto_var_in_fn_p (gimple_assign_rhs1 (stmt), cfun->decl))
460 	continue;
461 
462       /* If the statement references memory or volatile operands, fail.  */
463       if (gimple_references_memory_p (stmt)
464 	  || gimple_has_volatile_ops (stmt))
465 	return;
466     }
467 
468   if (gsi_end_p (gsi))
469     {
470       edge_iterator ei;
471       /* Recurse to the predecessors.  */
472       FOR_EACH_EDGE (e, ei, bb->preds)
473 	find_tail_calls (e->src, ret);
474 
475       return;
476     }
477 
478   /* If the LHS of our call is not just a simple register or local
479      variable, we can't transform this into a tail or sibling call.
480      This situation happens, in (e.g.) "*p = foo()" where foo returns a
481      struct.  In this case we won't have a temporary here, but we need
482      to carry out the side effect anyway, so tailcall is impossible.
483 
484      ??? In some situations (when the struct is returned in memory via
485      invisible argument) we could deal with this, e.g. by passing 'p'
486      itself as that argument to foo, but it's too early to do this here,
487      and expand_call() will not handle it anyway.  If it ever can, then
488      we need to revisit this here, to allow that situation.  */
489   if (ass_var
490       && !is_gimple_reg (ass_var)
491       && !auto_var_in_fn_p (ass_var, cfun->decl))
492     return;
493 
494   /* If the call might throw an exception that wouldn't propagate out of
495      cfun, we can't transform to a tail or sibling call (82081).  */
496   if (stmt_could_throw_p (cfun, stmt)
497       && !stmt_can_throw_external (cfun, stmt))
498     return;
499 
500   /* If the function returns a value, then at present, the tail call
501      must return the same type of value.  There is conceptually a copy
502      between the object returned by the tail call candidate and the
503      object returned by CFUN itself.
504 
505      This means that if we have:
506 
507 	 lhs = f (&<retval>);    // f reads from <retval>
508 				 // (lhs is usually also <retval>)
509 
510      there is a copy between the temporary object returned by f and lhs,
511      meaning that any use of <retval> in f occurs before the assignment
512      to lhs begins.  Thus the <retval> that is live on entry to the call
513      to f is really an independent local variable V that happens to be
514      stored in the RESULT_DECL rather than a local VAR_DECL.
515 
516      Turning this into a tail call would remove the copy and make the
517      lifetimes of the return value and V overlap.  The same applies to
518      tail recursion, since if f can read from <retval>, we have to assume
519      that CFUN might already have written to <retval> before the call.
520 
521      The problem doesn't apply when <retval> is passed by value, but that
522      isn't a case we handle anyway.  */
523   tree result_decl = DECL_RESULT (cfun->decl);
524   if (result_decl
525       && may_be_aliased (result_decl)
526       && ref_maybe_used_by_stmt_p (call, result_decl, false))
527     return;
528 
529   /* We found the call, check whether it is suitable.  */
530   tail_recursion = false;
531   func = gimple_call_fndecl (call);
532   if (func
533       && !fndecl_built_in_p (func)
534       && recursive_call_p (current_function_decl, func))
535     {
536       tree arg;
537 
538       for (param = DECL_ARGUMENTS (current_function_decl), idx = 0;
539 	   param && idx < gimple_call_num_args (call);
540 	   param = DECL_CHAIN (param), idx ++)
541 	{
542 	  arg = gimple_call_arg (call, idx);
543 	  if (param != arg)
544 	    {
545 	      /* Make sure there are no problems with copying.  The parameter
546 	         have a copyable type and the two arguments must have reasonably
547 	         equivalent types.  The latter requirement could be relaxed if
548 	         we emitted a suitable type conversion statement.  */
549 	      if (!is_gimple_reg_type (TREE_TYPE (param))
550 		  || !useless_type_conversion_p (TREE_TYPE (param),
551 					         TREE_TYPE (arg)))
552 		break;
553 
554 	      /* The parameter should be a real operand, so that phi node
555 		 created for it at the start of the function has the meaning
556 		 of copying the value.  This test implies is_gimple_reg_type
557 		 from the previous condition, however this one could be
558 		 relaxed by being more careful with copying the new value
559 		 of the parameter (emitting appropriate GIMPLE_ASSIGN and
560 		 updating the virtual operands).  */
561 	      if (!is_gimple_reg (param))
562 		break;
563 	    }
564 	}
565       if (idx == gimple_call_num_args (call) && !param)
566 	tail_recursion = true;
567     }
568 
569   /* Compute live vars if not computed yet.  */
570   if (live_vars == NULL)
571     {
572       unsigned int cnt = 0;
573       FOR_EACH_LOCAL_DECL (cfun, idx, var)
574 	if (VAR_P (var)
575 	    && auto_var_in_fn_p (var, cfun->decl)
576 	    && may_be_aliased (var))
577 	  {
578 	    if (live_vars == NULL)
579 	      live_vars = new live_vars_map;
580 	    live_vars->put (DECL_UID (var), cnt++);
581 	  }
582       if (live_vars)
583 	live_vars_vec = compute_live_vars (cfun, live_vars);
584     }
585 
586   /* Determine a bitmap of variables which are still in scope after the
587      call.  */
588   bitmap local_live_vars = NULL;
589   if (live_vars)
590     local_live_vars = live_vars_at_stmt (live_vars_vec, live_vars, call);
591 
592   /* Make sure the tail invocation of this function does not indirectly
593      refer to local variables.  (Passing variables directly by value
594      is OK.)  */
595   FOR_EACH_LOCAL_DECL (cfun, idx, var)
596     {
597       if (TREE_CODE (var) != PARM_DECL
598 	  && auto_var_in_fn_p (var, cfun->decl)
599 	  && may_be_aliased (var)
600 	  && (ref_maybe_used_by_stmt_p (call, var, false)
601 	      || call_may_clobber_ref_p (call, var, false)))
602 	{
603 	  if (!VAR_P (var))
604 	    {
605 	      if (local_live_vars)
606 		BITMAP_FREE (local_live_vars);
607 	      return;
608 	    }
609 	  else
610 	    {
611 	      unsigned int *v = live_vars->get (DECL_UID (var));
612 	      if (bitmap_bit_p (local_live_vars, *v))
613 		{
614 		  BITMAP_FREE (local_live_vars);
615 		  return;
616 		}
617 	    }
618 	}
619     }
620 
621   if (local_live_vars)
622     BITMAP_FREE (local_live_vars);
623 
624   /* Now check the statements after the call.  None of them has virtual
625      operands, so they may only depend on the call through its return
626      value.  The return value should also be dependent on each of them,
627      since we are running after dce.  */
628   m = NULL_TREE;
629   a = NULL_TREE;
630   auto_bitmap to_move_defs;
631   auto_vec<gimple *> to_move_stmts;
632 
633   abb = bb;
634   agsi = gsi;
635   while (1)
636     {
637       tree tmp_a = NULL_TREE;
638       tree tmp_m = NULL_TREE;
639       gsi_next (&agsi);
640 
641       while (gsi_end_p (agsi))
642 	{
643 	  ass_var = propagate_through_phis (ass_var, single_succ_edge (abb));
644 	  abb = single_succ (abb);
645 	  agsi = gsi_start_bb (abb);
646 	}
647 
648       stmt = gsi_stmt (agsi);
649       if (gimple_code (stmt) == GIMPLE_RETURN)
650 	break;
651 
652       if (gimple_code (stmt) == GIMPLE_LABEL
653 	  || gimple_code (stmt) == GIMPLE_NOP
654 	  || gimple_code (stmt) == GIMPLE_PREDICT
655 	  || gimple_clobber_p (stmt)
656 	  || is_gimple_debug (stmt))
657 	continue;
658 
659       if (gimple_code (stmt) != GIMPLE_ASSIGN)
660 	return;
661 
662       /* This is a gimple assign. */
663       par ret = process_assignment (as_a <gassign *> (stmt), gsi,
664 				    &tmp_m, &tmp_a, &ass_var, to_move_defs);
665       if (ret == FAIL)
666 	return;
667       else if (ret == TRY_MOVE)
668 	{
669 	  if (! tail_recursion)
670 	    return;
671 	  /* Do not deal with checking dominance, the real fix is to
672 	     do path isolation for the transform phase anyway, removing
673 	     the need to compute the accumulators with new stmts.  */
674 	  if (abb != bb)
675 	    return;
676 	  for (unsigned opno = 1; opno < gimple_num_ops (stmt); ++opno)
677 	    {
678 	      tree op = gimple_op (stmt, opno);
679 	      if (independent_of_stmt_p (op, stmt, gsi, to_move_defs) != op)
680 		return;
681 	    }
682 	  bitmap_set_bit (to_move_defs,
683 			  SSA_NAME_VERSION (gimple_assign_lhs (stmt)));
684 	  to_move_stmts.safe_push (stmt);
685 	  continue;
686 	}
687 
688       if (tmp_a)
689 	{
690 	  tree type = TREE_TYPE (tmp_a);
691 	  if (a)
692 	    a = fold_build2 (PLUS_EXPR, type, fold_convert (type, a), tmp_a);
693 	  else
694 	    a = tmp_a;
695 	}
696       if (tmp_m)
697 	{
698 	  tree type = TREE_TYPE (tmp_m);
699 	  if (m)
700 	    m = fold_build2 (MULT_EXPR, type, fold_convert (type, m), tmp_m);
701 	  else
702 	    m = tmp_m;
703 
704 	  if (a)
705 	    a = fold_build2 (MULT_EXPR, type, fold_convert (type, a), tmp_m);
706 	}
707     }
708 
709   /* See if this is a tail call we can handle.  */
710   ret_var = gimple_return_retval (as_a <greturn *> (stmt));
711 
712   /* We may proceed if there either is no return value, or the return value
713      is identical to the call's return.  */
714   if (ret_var
715       && (ret_var != ass_var))
716     return;
717 
718   /* If this is not a tail recursive call, we cannot handle addends or
719      multiplicands.  */
720   if (!tail_recursion && (m || a))
721     return;
722 
723   /* For pointers only allow additions.  */
724   if (m && POINTER_TYPE_P (TREE_TYPE (DECL_RESULT (current_function_decl))))
725     return;
726 
727   /* Move queued defs.  */
728   if (tail_recursion)
729     {
730       unsigned i;
731       FOR_EACH_VEC_ELT (to_move_stmts, i, stmt)
732 	{
733 	  gimple_stmt_iterator mgsi = gsi_for_stmt (stmt);
734 	  gsi_move_before (&mgsi, &gsi);
735 	}
736       if (!tailr_arg_needs_copy)
737 	tailr_arg_needs_copy = BITMAP_ALLOC (NULL);
738       for (param = DECL_ARGUMENTS (current_function_decl), idx = 0;
739 	   param;
740 	   param = DECL_CHAIN (param), idx++)
741 	{
742 	  tree ddef, arg = gimple_call_arg (call, idx);
743 	  if (is_gimple_reg (param)
744 	      && (ddef = ssa_default_def (cfun, param))
745 	      && (arg != ddef))
746 	    bitmap_set_bit (tailr_arg_needs_copy, idx);
747 	}
748     }
749 
750   nw = XNEW (struct tailcall);
751 
752   nw->call_gsi = gsi;
753 
754   nw->tail_recursion = tail_recursion;
755 
756   nw->mult = m;
757   nw->add = a;
758 
759   nw->next = *ret;
760   *ret = nw;
761 }
762 
763 /* Helper to insert PHI_ARGH to the phi of VAR in the destination of edge E.  */
764 
765 static void
add_successor_phi_arg(edge e,tree var,tree phi_arg)766 add_successor_phi_arg (edge e, tree var, tree phi_arg)
767 {
768   gphi_iterator gsi;
769 
770   for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
771     if (PHI_RESULT (gsi.phi ()) == var)
772       break;
773 
774   gcc_assert (!gsi_end_p (gsi));
775   add_phi_arg (gsi.phi (), phi_arg, e, UNKNOWN_LOCATION);
776 }
777 
778 /* Creates a GIMPLE statement which computes the operation specified by
779    CODE, ACC and OP1 to a new variable with name LABEL and inserts the
780    statement in the position specified by GSI.  Returns the
781    tree node of the statement's result.  */
782 
783 static tree
adjust_return_value_with_ops(enum tree_code code,const char * label,tree acc,tree op1,gimple_stmt_iterator gsi)784 adjust_return_value_with_ops (enum tree_code code, const char *label,
785 			      tree acc, tree op1, gimple_stmt_iterator gsi)
786 {
787 
788   tree ret_type = TREE_TYPE (DECL_RESULT (current_function_decl));
789   tree result = make_temp_ssa_name (ret_type, NULL, label);
790   gassign *stmt;
791 
792   if (POINTER_TYPE_P (ret_type))
793     {
794       gcc_assert (code == PLUS_EXPR && TREE_TYPE (acc) == sizetype);
795       code = POINTER_PLUS_EXPR;
796     }
797   if (types_compatible_p (TREE_TYPE (acc), TREE_TYPE (op1))
798       && code != POINTER_PLUS_EXPR)
799     stmt = gimple_build_assign (result, code, acc, op1);
800   else
801     {
802       tree tem;
803       if (code == POINTER_PLUS_EXPR)
804 	tem = fold_build2 (code, TREE_TYPE (op1), op1, acc);
805       else
806 	tem = fold_build2 (code, TREE_TYPE (op1),
807 			   fold_convert (TREE_TYPE (op1), acc), op1);
808       tree rhs = fold_convert (ret_type, tem);
809       rhs = force_gimple_operand_gsi (&gsi, rhs,
810 				      false, NULL, true, GSI_SAME_STMT);
811       stmt = gimple_build_assign (result, rhs);
812     }
813 
814   gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
815   return result;
816 }
817 
818 /* Creates a new GIMPLE statement that adjusts the value of accumulator ACC by
819    the computation specified by CODE and OP1 and insert the statement
820    at the position specified by GSI as a new statement.  Returns new SSA name
821    of updated accumulator.  */
822 
823 static tree
update_accumulator_with_ops(enum tree_code code,tree acc,tree op1,gimple_stmt_iterator gsi)824 update_accumulator_with_ops (enum tree_code code, tree acc, tree op1,
825 			     gimple_stmt_iterator gsi)
826 {
827   gassign *stmt;
828   tree var = copy_ssa_name (acc);
829   if (types_compatible_p (TREE_TYPE (acc), TREE_TYPE (op1)))
830     stmt = gimple_build_assign (var, code, acc, op1);
831   else
832     {
833       tree rhs = fold_convert (TREE_TYPE (acc),
834 			       fold_build2 (code,
835 					    TREE_TYPE (op1),
836 					    fold_convert (TREE_TYPE (op1), acc),
837 					    op1));
838       rhs = force_gimple_operand_gsi (&gsi, rhs,
839 				      false, NULL, false, GSI_CONTINUE_LINKING);
840       stmt = gimple_build_assign (var, rhs);
841     }
842   gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
843   return var;
844 }
845 
846 /* Adjust the accumulator values according to A and M after GSI, and update
847    the phi nodes on edge BACK.  */
848 
849 static void
adjust_accumulator_values(gimple_stmt_iterator gsi,tree m,tree a,edge back)850 adjust_accumulator_values (gimple_stmt_iterator gsi, tree m, tree a, edge back)
851 {
852   tree var, a_acc_arg, m_acc_arg;
853 
854   if (m)
855     m = force_gimple_operand_gsi (&gsi, m, true, NULL, true, GSI_SAME_STMT);
856   if (a)
857     a = force_gimple_operand_gsi (&gsi, a, true, NULL, true, GSI_SAME_STMT);
858 
859   a_acc_arg = a_acc;
860   m_acc_arg = m_acc;
861   if (a)
862     {
863       if (m_acc)
864 	{
865 	  if (integer_onep (a))
866 	    var = m_acc;
867 	  else
868 	    var = adjust_return_value_with_ops (MULT_EXPR, "acc_tmp", m_acc,
869 						a, gsi);
870 	}
871       else
872 	var = a;
873 
874       a_acc_arg = update_accumulator_with_ops (PLUS_EXPR, a_acc, var, gsi);
875     }
876 
877   if (m)
878     m_acc_arg = update_accumulator_with_ops (MULT_EXPR, m_acc, m, gsi);
879 
880   if (a_acc)
881     add_successor_phi_arg (back, a_acc, a_acc_arg);
882 
883   if (m_acc)
884     add_successor_phi_arg (back, m_acc, m_acc_arg);
885 }
886 
887 /* Adjust value of the return at the end of BB according to M and A
888    accumulators.  */
889 
890 static void
adjust_return_value(basic_block bb,tree m,tree a)891 adjust_return_value (basic_block bb, tree m, tree a)
892 {
893   tree retval;
894   greturn *ret_stmt = as_a <greturn *> (gimple_seq_last_stmt (bb_seq (bb)));
895   gimple_stmt_iterator gsi = gsi_last_bb (bb);
896 
897   gcc_assert (gimple_code (ret_stmt) == GIMPLE_RETURN);
898 
899   retval = gimple_return_retval (ret_stmt);
900   if (!retval || retval == error_mark_node)
901     return;
902 
903   if (m)
904     retval = adjust_return_value_with_ops (MULT_EXPR, "mul_tmp", m_acc, retval,
905 					   gsi);
906   if (a)
907     retval = adjust_return_value_with_ops (PLUS_EXPR, "acc_tmp", a_acc, retval,
908 					   gsi);
909   gimple_return_set_retval (ret_stmt, retval);
910   update_stmt (ret_stmt);
911 }
912 
913 /* Subtract COUNT and FREQUENCY from the basic block and it's
914    outgoing edge.  */
915 static void
decrease_profile(basic_block bb,profile_count count)916 decrease_profile (basic_block bb, profile_count count)
917 {
918   bb->count = bb->count - count;
919   if (!single_succ_p (bb))
920     {
921       gcc_assert (!EDGE_COUNT (bb->succs));
922       return;
923     }
924 }
925 
926 /* Eliminates tail call described by T.  TMP_VARS is a list of
927    temporary variables used to copy the function arguments.  */
928 
929 static void
eliminate_tail_call(struct tailcall * t)930 eliminate_tail_call (struct tailcall *t)
931 {
932   tree param, rslt;
933   gimple *stmt, *call;
934   tree arg;
935   size_t idx;
936   basic_block bb, first;
937   edge e;
938   gphi *phi;
939   gphi_iterator gpi;
940   gimple_stmt_iterator gsi;
941   gimple *orig_stmt;
942 
943   stmt = orig_stmt = gsi_stmt (t->call_gsi);
944   bb = gsi_bb (t->call_gsi);
945 
946   if (dump_file && (dump_flags & TDF_DETAILS))
947     {
948       fprintf (dump_file, "Eliminated tail recursion in bb %d : ",
949 	       bb->index);
950       print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
951       fprintf (dump_file, "\n");
952     }
953 
954   gcc_assert (is_gimple_call (stmt));
955 
956   first = single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun));
957 
958   /* Remove the code after call_gsi that will become unreachable.  The
959      possibly unreachable code in other blocks is removed later in
960      cfg cleanup.  */
961   gsi = t->call_gsi;
962   gimple_stmt_iterator gsi2 = gsi_last_bb (gimple_bb (gsi_stmt (gsi)));
963   while (gsi_stmt (gsi2) != gsi_stmt (gsi))
964     {
965       gimple *t = gsi_stmt (gsi2);
966       /* Do not remove the return statement, so that redirect_edge_and_branch
967 	 sees how the block ends.  */
968       if (gimple_code (t) != GIMPLE_RETURN)
969 	{
970 	  gimple_stmt_iterator gsi3 = gsi2;
971 	  gsi_prev (&gsi2);
972 	  gsi_remove (&gsi3, true);
973 	  release_defs (t);
974 	}
975       else
976 	gsi_prev (&gsi2);
977     }
978 
979   /* Number of executions of function has reduced by the tailcall.  */
980   e = single_succ_edge (gsi_bb (t->call_gsi));
981 
982   profile_count count = e->count ();
983 
984   /* When profile is inconsistent and the recursion edge is more frequent
985      than number of executions of functions, scale it down, so we do not end
986      up with 0 executions of entry block.  */
987   if (count >= ENTRY_BLOCK_PTR_FOR_FN (cfun)->count)
988     count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.apply_scale (7, 8);
989   decrease_profile (EXIT_BLOCK_PTR_FOR_FN (cfun), count);
990   decrease_profile (ENTRY_BLOCK_PTR_FOR_FN (cfun), count);
991   if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
992     decrease_profile (e->dest, count);
993 
994   /* Replace the call by a jump to the start of function.  */
995   e = redirect_edge_and_branch (single_succ_edge (gsi_bb (t->call_gsi)),
996 				first);
997   gcc_assert (e);
998   PENDING_STMT (e) = NULL;
999 
1000   /* Add phi node entries for arguments.  The ordering of the phi nodes should
1001      be the same as the ordering of the arguments.  */
1002   for (param = DECL_ARGUMENTS (current_function_decl),
1003 	 idx = 0, gpi = gsi_start_phis (first);
1004        param;
1005        param = DECL_CHAIN (param), idx++)
1006     {
1007       if (!bitmap_bit_p (tailr_arg_needs_copy, idx))
1008 	continue;
1009 
1010       arg = gimple_call_arg (stmt, idx);
1011       phi = gpi.phi ();
1012       gcc_assert (param == SSA_NAME_VAR (PHI_RESULT (phi)));
1013 
1014       add_phi_arg (phi, arg, e, gimple_location (stmt));
1015       gsi_next (&gpi);
1016     }
1017 
1018   /* Update the values of accumulators.  */
1019   adjust_accumulator_values (t->call_gsi, t->mult, t->add, e);
1020 
1021   call = gsi_stmt (t->call_gsi);
1022   rslt = gimple_call_lhs (call);
1023   if (rslt != NULL_TREE && TREE_CODE (rslt) == SSA_NAME)
1024     {
1025       /* Result of the call will no longer be defined.  So adjust the
1026 	 SSA_NAME_DEF_STMT accordingly.  */
1027       SSA_NAME_DEF_STMT (rslt) = gimple_build_nop ();
1028     }
1029 
1030   gsi_remove (&t->call_gsi, true);
1031   release_defs (call);
1032 }
1033 
1034 /* Optimizes the tailcall described by T.  If OPT_TAILCALLS is true, also
1035    mark the tailcalls for the sibcall optimization.  */
1036 
1037 static bool
optimize_tail_call(struct tailcall * t,bool opt_tailcalls)1038 optimize_tail_call (struct tailcall *t, bool opt_tailcalls)
1039 {
1040   if (t->tail_recursion)
1041     {
1042       eliminate_tail_call (t);
1043       return true;
1044     }
1045 
1046   if (opt_tailcalls)
1047     {
1048       gcall *stmt = as_a <gcall *> (gsi_stmt (t->call_gsi));
1049 
1050       gimple_call_set_tail (stmt, true);
1051       cfun->tail_call_marked = true;
1052       if (dump_file && (dump_flags & TDF_DETAILS))
1053         {
1054 	  fprintf (dump_file, "Found tail call ");
1055 	  print_gimple_stmt (dump_file, stmt, 0, dump_flags);
1056 	  fprintf (dump_file, " in bb %i\n", (gsi_bb (t->call_gsi))->index);
1057 	}
1058     }
1059 
1060   return false;
1061 }
1062 
1063 /* Creates a tail-call accumulator of the same type as the return type of the
1064    current function.  LABEL is the name used to creating the temporary
1065    variable for the accumulator.  The accumulator will be inserted in the
1066    phis of a basic block BB with single predecessor with an initial value
1067    INIT converted to the current function return type.  */
1068 
1069 static tree
create_tailcall_accumulator(const char * label,basic_block bb,tree init)1070 create_tailcall_accumulator (const char *label, basic_block bb, tree init)
1071 {
1072   tree ret_type = TREE_TYPE (DECL_RESULT (current_function_decl));
1073   if (POINTER_TYPE_P (ret_type))
1074     ret_type = sizetype;
1075 
1076   tree tmp = make_temp_ssa_name (ret_type, NULL, label);
1077   gphi *phi;
1078 
1079   phi = create_phi_node (tmp, bb);
1080   /* RET_TYPE can be a float when -ffast-maths is enabled.  */
1081   add_phi_arg (phi, fold_convert (ret_type, init), single_pred_edge (bb),
1082 	       UNKNOWN_LOCATION);
1083   return PHI_RESULT (phi);
1084 }
1085 
1086 /* Optimizes tail calls in the function, turning the tail recursion
1087    into iteration.  */
1088 
1089 static unsigned int
tree_optimize_tail_calls_1(bool opt_tailcalls)1090 tree_optimize_tail_calls_1 (bool opt_tailcalls)
1091 {
1092   edge e;
1093   bool phis_constructed = false;
1094   struct tailcall *tailcalls = NULL, *act, *next;
1095   bool changed = false;
1096   basic_block first = single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun));
1097   tree param;
1098   gimple *stmt;
1099   edge_iterator ei;
1100 
1101   if (!suitable_for_tail_opt_p ())
1102     return 0;
1103   if (opt_tailcalls)
1104     opt_tailcalls = suitable_for_tail_call_opt_p ();
1105 
1106   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
1107     {
1108       /* Only traverse the normal exits, i.e. those that end with return
1109 	 statement.  */
1110       stmt = last_stmt (e->src);
1111 
1112       if (stmt
1113 	  && gimple_code (stmt) == GIMPLE_RETURN)
1114 	find_tail_calls (e->src, &tailcalls);
1115     }
1116 
1117   if (live_vars)
1118     {
1119       destroy_live_vars (live_vars_vec);
1120       delete live_vars;
1121       live_vars = NULL;
1122     }
1123 
1124   /* Construct the phi nodes and accumulators if necessary.  */
1125   a_acc = m_acc = NULL_TREE;
1126   for (act = tailcalls; act; act = act->next)
1127     {
1128       if (!act->tail_recursion)
1129 	continue;
1130 
1131       if (!phis_constructed)
1132 	{
1133 	  /* Ensure that there is only one predecessor of the block
1134 	     or if there are existing degenerate PHI nodes.  */
1135 	  if (!single_pred_p (first)
1136 	      || !gimple_seq_empty_p (phi_nodes (first)))
1137 	    first =
1138 	      split_edge (single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1139 
1140 	  /* Copy the args if needed.  */
1141 	  unsigned idx;
1142 	  for (param = DECL_ARGUMENTS (current_function_decl), idx = 0;
1143 	       param;
1144 	       param = DECL_CHAIN (param), idx++)
1145 	    if (bitmap_bit_p (tailr_arg_needs_copy, idx))
1146 	      {
1147 		tree name = ssa_default_def (cfun, param);
1148 		tree new_name = make_ssa_name (param, SSA_NAME_DEF_STMT (name));
1149 		gphi *phi;
1150 
1151 		set_ssa_default_def (cfun, param, new_name);
1152 		phi = create_phi_node (name, first);
1153 		add_phi_arg (phi, new_name, single_pred_edge (first),
1154 			     EXPR_LOCATION (param));
1155 	      }
1156 	  phis_constructed = true;
1157 	}
1158 
1159       if (act->add && !a_acc)
1160 	a_acc = create_tailcall_accumulator ("add_acc", first,
1161 					     integer_zero_node);
1162 
1163       if (act->mult && !m_acc)
1164 	m_acc = create_tailcall_accumulator ("mult_acc", first,
1165 					     integer_one_node);
1166     }
1167 
1168   if (a_acc || m_acc)
1169     {
1170       /* When the tail call elimination using accumulators is performed,
1171 	 statements adding the accumulated value are inserted at all exits.
1172 	 This turns all other tail calls to non-tail ones.  */
1173       opt_tailcalls = false;
1174     }
1175 
1176   for (; tailcalls; tailcalls = next)
1177     {
1178       next = tailcalls->next;
1179       changed |= optimize_tail_call (tailcalls, opt_tailcalls);
1180       free (tailcalls);
1181     }
1182 
1183   if (a_acc || m_acc)
1184     {
1185       /* Modify the remaining return statements.  */
1186       FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
1187 	{
1188 	  stmt = last_stmt (e->src);
1189 
1190 	  if (stmt
1191 	      && gimple_code (stmt) == GIMPLE_RETURN)
1192 	    adjust_return_value (e->src, m_acc, a_acc);
1193 	}
1194     }
1195 
1196   if (changed)
1197     {
1198       /* We may have created new loops.  Make them magically appear.  */
1199       loops_state_set (LOOPS_NEED_FIXUP);
1200       free_dominance_info (CDI_DOMINATORS);
1201     }
1202 
1203   /* Add phi nodes for the virtual operands defined in the function to the
1204      header of the loop created by tail recursion elimination.  Do so
1205      by triggering the SSA renamer.  */
1206   if (phis_constructed)
1207     mark_virtual_operands_for_renaming (cfun);
1208 
1209   if (tailr_arg_needs_copy)
1210     BITMAP_FREE (tailr_arg_needs_copy);
1211 
1212   if (changed)
1213     return TODO_cleanup_cfg | TODO_update_ssa_only_virtuals;
1214   return 0;
1215 }
1216 
1217 static bool
gate_tail_calls(void)1218 gate_tail_calls (void)
1219 {
1220   return flag_optimize_sibling_calls != 0 && dbg_cnt (tail_call);
1221 }
1222 
1223 static unsigned int
execute_tail_calls(void)1224 execute_tail_calls (void)
1225 {
1226   return tree_optimize_tail_calls_1 (true);
1227 }
1228 
1229 namespace {
1230 
1231 const pass_data pass_data_tail_recursion =
1232 {
1233   GIMPLE_PASS, /* type */
1234   "tailr", /* name */
1235   OPTGROUP_NONE, /* optinfo_flags */
1236   TV_NONE, /* tv_id */
1237   ( PROP_cfg | PROP_ssa ), /* properties_required */
1238   0, /* properties_provided */
1239   0, /* properties_destroyed */
1240   0, /* todo_flags_start */
1241   0, /* todo_flags_finish */
1242 };
1243 
1244 class pass_tail_recursion : public gimple_opt_pass
1245 {
1246 public:
pass_tail_recursion(gcc::context * ctxt)1247   pass_tail_recursion (gcc::context *ctxt)
1248     : gimple_opt_pass (pass_data_tail_recursion, ctxt)
1249   {}
1250 
1251   /* opt_pass methods: */
clone()1252   opt_pass * clone () { return new pass_tail_recursion (m_ctxt); }
gate(function *)1253   virtual bool gate (function *) { return gate_tail_calls (); }
execute(function *)1254   virtual unsigned int execute (function *)
1255     {
1256       return tree_optimize_tail_calls_1 (false);
1257     }
1258 
1259 }; // class pass_tail_recursion
1260 
1261 } // anon namespace
1262 
1263 gimple_opt_pass *
make_pass_tail_recursion(gcc::context * ctxt)1264 make_pass_tail_recursion (gcc::context *ctxt)
1265 {
1266   return new pass_tail_recursion (ctxt);
1267 }
1268 
1269 namespace {
1270 
1271 const pass_data pass_data_tail_calls =
1272 {
1273   GIMPLE_PASS, /* type */
1274   "tailc", /* name */
1275   OPTGROUP_NONE, /* optinfo_flags */
1276   TV_NONE, /* tv_id */
1277   ( PROP_cfg | PROP_ssa ), /* properties_required */
1278   0, /* properties_provided */
1279   0, /* properties_destroyed */
1280   0, /* todo_flags_start */
1281   0, /* todo_flags_finish */
1282 };
1283 
1284 class pass_tail_calls : public gimple_opt_pass
1285 {
1286 public:
pass_tail_calls(gcc::context * ctxt)1287   pass_tail_calls (gcc::context *ctxt)
1288     : gimple_opt_pass (pass_data_tail_calls, ctxt)
1289   {}
1290 
1291   /* opt_pass methods: */
gate(function *)1292   virtual bool gate (function *) { return gate_tail_calls (); }
execute(function *)1293   virtual unsigned int execute (function *) { return execute_tail_calls (); }
1294 
1295 }; // class pass_tail_calls
1296 
1297 } // anon namespace
1298 
1299 gimple_opt_pass *
make_pass_tail_calls(gcc::context * ctxt)1300 make_pass_tail_calls (gcc::context *ctxt)
1301 {
1302   return new pass_tail_calls (ctxt);
1303 }
1304