1 /* Utilities for ipa analysis.
2    Copyright (C) 2005-2016 Free Software Foundation, Inc.
3    Contributed by Kenneth Zadeck <zadeck@naturalbridge.com>
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "tree.h"
26 #include "gimple.h"
27 #include "predict.h"
28 #include "alloc-pool.h"
29 #include "cgraph.h"
30 #include "lto-streamer.h"
31 #include "dumpfile.h"
32 #include "splay-tree.h"
33 #include "ipa-utils.h"
34 #include "symbol-summary.h"
35 #include "ipa-prop.h"
36 #include "ipa-inline.h"
37 
38 /* Debugging function for postorder and inorder code. NOTE is a string
39    that is printed before the nodes are printed.  ORDER is an array of
40    cgraph_nodes that has COUNT useful nodes in it.  */
41 
42 void
ipa_print_order(FILE * out,const char * note,struct cgraph_node ** order,int count)43 ipa_print_order (FILE* out,
44 		 const char * note,
45 		 struct cgraph_node** order,
46 		 int count)
47 {
48   int i;
49   fprintf (out, "\n\n ordered call graph: %s\n", note);
50 
51   for (i = count - 1; i >= 0; i--)
52     order[i]->dump (out);
53   fprintf (out, "\n");
54   fflush (out);
55 }
56 
57 
58 struct searchc_env {
59   struct cgraph_node **stack;
60   int stack_size;
61   struct cgraph_node **result;
62   int order_pos;
63   splay_tree nodes_marked_new;
64   bool reduce;
65   bool allow_overwritable;
66   int count;
67 };
68 
69 /* This is an implementation of Tarjan's strongly connected region
70    finder as reprinted in Aho Hopcraft and Ullman's The Design and
71    Analysis of Computer Programs (1975) pages 192-193.  This version
72    has been customized for cgraph_nodes.  The env parameter is because
73    it is recursive and there are no nested functions here.  This
74    function should only be called from itself or
75    ipa_reduced_postorder.  ENV is a stack env and would be
76    unnecessary if C had nested functions.  V is the node to start
77    searching from.  */
78 
79 static void
searchc(struct searchc_env * env,struct cgraph_node * v,bool (* ignore_edge)(struct cgraph_edge *))80 searchc (struct searchc_env* env, struct cgraph_node *v,
81 	 bool (*ignore_edge) (struct cgraph_edge *))
82 {
83   struct cgraph_edge *edge;
84   struct ipa_dfs_info *v_info = (struct ipa_dfs_info *) v->aux;
85 
86   /* mark node as old */
87   v_info->new_node = false;
88   splay_tree_remove (env->nodes_marked_new, v->uid);
89 
90   v_info->dfn_number = env->count;
91   v_info->low_link = env->count;
92   env->count++;
93   env->stack[(env->stack_size)++] = v;
94   v_info->on_stack = true;
95 
96   for (edge = v->callees; edge; edge = edge->next_callee)
97     {
98       struct ipa_dfs_info * w_info;
99       enum availability avail;
100       struct cgraph_node *w = edge->callee->ultimate_alias_target (&avail);
101 
102       if (!w || (ignore_edge && ignore_edge (edge)))
103         continue;
104 
105       if (w->aux
106 	  && (avail > AVAIL_INTERPOSABLE
107 	      || (env->allow_overwritable && avail == AVAIL_INTERPOSABLE)))
108 	{
109 	  w_info = (struct ipa_dfs_info *) w->aux;
110 	  if (w_info->new_node)
111 	    {
112 	      searchc (env, w, ignore_edge);
113 	      v_info->low_link =
114 		(v_info->low_link < w_info->low_link) ?
115 		v_info->low_link : w_info->low_link;
116 	    }
117 	  else
118 	    if ((w_info->dfn_number < v_info->dfn_number)
119 		&& (w_info->on_stack))
120 	      v_info->low_link =
121 		(w_info->dfn_number < v_info->low_link) ?
122 		w_info->dfn_number : v_info->low_link;
123 	}
124     }
125 
126 
127   if (v_info->low_link == v_info->dfn_number)
128     {
129       struct cgraph_node *last = NULL;
130       struct cgraph_node *x;
131       struct ipa_dfs_info *x_info;
132       do {
133 	x = env->stack[--(env->stack_size)];
134 	x_info = (struct ipa_dfs_info *) x->aux;
135 	x_info->on_stack = false;
136 	x_info->scc_no = v_info->dfn_number;
137 
138 	if (env->reduce)
139 	  {
140 	    x_info->next_cycle = last;
141 	    last = x;
142 	  }
143 	else
144 	  env->result[env->order_pos++] = x;
145       }
146       while (v != x);
147       if (env->reduce)
148 	env->result[env->order_pos++] = v;
149     }
150 }
151 
152 /* Topsort the call graph by caller relation.  Put the result in ORDER.
153 
154    The REDUCE flag is true if you want the cycles reduced to single nodes.
155    You can use ipa_get_nodes_in_cycle to obtain a vector containing all real
156    call graph nodes in a reduced node.
157 
158    Set ALLOW_OVERWRITABLE if nodes with such availability should be included.
159    IGNORE_EDGE, if non-NULL is a hook that may make some edges insignificant
160    for the topological sort.   */
161 
162 int
ipa_reduced_postorder(struct cgraph_node ** order,bool reduce,bool allow_overwritable,bool (* ignore_edge)(struct cgraph_edge *))163 ipa_reduced_postorder (struct cgraph_node **order,
164 		       bool reduce, bool allow_overwritable,
165 		       bool (*ignore_edge) (struct cgraph_edge *))
166 {
167   struct cgraph_node *node;
168   struct searchc_env env;
169   splay_tree_node result;
170   env.stack = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
171   env.stack_size = 0;
172   env.result = order;
173   env.order_pos = 0;
174   env.nodes_marked_new = splay_tree_new (splay_tree_compare_ints, 0, 0);
175   env.count = 1;
176   env.reduce = reduce;
177   env.allow_overwritable = allow_overwritable;
178 
179   FOR_EACH_DEFINED_FUNCTION (node)
180     {
181       enum availability avail = node->get_availability ();
182 
183       if (avail > AVAIL_INTERPOSABLE
184 	  || (allow_overwritable
185 	      && (avail == AVAIL_INTERPOSABLE)))
186 	{
187 	  /* Reuse the info if it is already there.  */
188 	  struct ipa_dfs_info *info = (struct ipa_dfs_info *) node->aux;
189 	  if (!info)
190 	    info = XCNEW (struct ipa_dfs_info);
191 	  info->new_node = true;
192 	  info->on_stack = false;
193 	  info->next_cycle = NULL;
194 	  node->aux = info;
195 
196 	  splay_tree_insert (env.nodes_marked_new,
197 			     (splay_tree_key)node->uid,
198 			     (splay_tree_value)node);
199 	}
200       else
201 	node->aux = NULL;
202     }
203   result = splay_tree_min (env.nodes_marked_new);
204   while (result)
205     {
206       node = (struct cgraph_node *)result->value;
207       searchc (&env, node, ignore_edge);
208       result = splay_tree_min (env.nodes_marked_new);
209     }
210   splay_tree_delete (env.nodes_marked_new);
211   free (env.stack);
212 
213   return env.order_pos;
214 }
215 
216 /* Deallocate all ipa_dfs_info structures pointed to by the aux pointer of call
217    graph nodes.  */
218 
219 void
ipa_free_postorder_info(void)220 ipa_free_postorder_info (void)
221 {
222   struct cgraph_node *node;
223   FOR_EACH_DEFINED_FUNCTION (node)
224     {
225       /* Get rid of the aux information.  */
226       if (node->aux)
227 	{
228 	  free (node->aux);
229 	  node->aux = NULL;
230 	}
231     }
232 }
233 
234 /* Get the set of nodes for the cycle in the reduced call graph starting
235    from NODE.  */
236 
237 vec<cgraph_node *>
ipa_get_nodes_in_cycle(struct cgraph_node * node)238 ipa_get_nodes_in_cycle (struct cgraph_node *node)
239 {
240   vec<cgraph_node *> v = vNULL;
241   struct ipa_dfs_info *node_dfs_info;
242   while (node)
243     {
244       v.safe_push (node);
245       node_dfs_info = (struct ipa_dfs_info *) node->aux;
246       node = node_dfs_info->next_cycle;
247     }
248   return v;
249 }
250 
251 /* Return true iff the CS is an edge within a strongly connected component as
252    computed by ipa_reduced_postorder.  */
253 
254 bool
ipa_edge_within_scc(struct cgraph_edge * cs)255 ipa_edge_within_scc (struct cgraph_edge *cs)
256 {
257   struct ipa_dfs_info *caller_dfs = (struct ipa_dfs_info *) cs->caller->aux;
258   struct ipa_dfs_info *callee_dfs;
259   struct cgraph_node *callee = cs->callee->function_symbol ();
260 
261   callee_dfs = (struct ipa_dfs_info *) callee->aux;
262   return (caller_dfs
263 	  && callee_dfs
264 	  && caller_dfs->scc_no == callee_dfs->scc_no);
265 }
266 
267 struct postorder_stack
268 {
269   struct cgraph_node *node;
270   struct cgraph_edge *edge;
271   int ref;
272 };
273 
274 /* Fill array order with all nodes with output flag set in the reverse
275    topological order.  Return the number of elements in the array.
276    FIXME: While walking, consider aliases, too.  */
277 
278 int
ipa_reverse_postorder(struct cgraph_node ** order)279 ipa_reverse_postorder (struct cgraph_node **order)
280 {
281   struct cgraph_node *node, *node2;
282   int stack_size = 0;
283   int order_pos = 0;
284   struct cgraph_edge *edge;
285   int pass;
286   struct ipa_ref *ref = NULL;
287 
288   struct postorder_stack *stack =
289     XCNEWVEC (struct postorder_stack, symtab->cgraph_count);
290 
291   /* We have to deal with cycles nicely, so use a depth first traversal
292      output algorithm.  Ignore the fact that some functions won't need
293      to be output and put them into order as well, so we get dependencies
294      right through inline functions.  */
295   FOR_EACH_FUNCTION (node)
296     node->aux = NULL;
297   for (pass = 0; pass < 2; pass++)
298     FOR_EACH_FUNCTION (node)
299       if (!node->aux
300 	  && (pass
301 	      || (!node->address_taken
302 		  && !node->global.inlined_to
303 		  && !node->alias && !node->thunk.thunk_p
304 		  && !node->only_called_directly_p ())))
305 	{
306 	  stack_size = 0;
307           stack[stack_size].node = node;
308 	  stack[stack_size].edge = node->callers;
309 	  stack[stack_size].ref = 0;
310 	  node->aux = (void *)(size_t)1;
311 	  while (stack_size >= 0)
312 	    {
313 	      while (true)
314 		{
315 		  node2 = NULL;
316 		  while (stack[stack_size].edge && !node2)
317 		    {
318 		      edge = stack[stack_size].edge;
319 		      node2 = edge->caller;
320 		      stack[stack_size].edge = edge->next_caller;
321 		      /* Break possible cycles involving always-inline
322 			 functions by ignoring edges from always-inline
323 			 functions to non-always-inline functions.  */
324 		      if (DECL_DISREGARD_INLINE_LIMITS (edge->caller->decl)
325 			  && !DECL_DISREGARD_INLINE_LIMITS
326 			    (edge->callee->function_symbol ()->decl))
327 			node2 = NULL;
328 		    }
329 		  for (; stack[stack_size].node->iterate_referring (
330 						       stack[stack_size].ref,
331 						       ref) && !node2;
332 		       stack[stack_size].ref++)
333 		    {
334 		      if (ref->use == IPA_REF_ALIAS)
335 			node2 = dyn_cast <cgraph_node *> (ref->referring);
336 		    }
337 		  if (!node2)
338 		    break;
339 		  if (!node2->aux)
340 		    {
341 		      stack[++stack_size].node = node2;
342 		      stack[stack_size].edge = node2->callers;
343 		      stack[stack_size].ref = 0;
344 		      node2->aux = (void *)(size_t)1;
345 		    }
346 		}
347 	      order[order_pos++] = stack[stack_size--].node;
348 	    }
349 	}
350   free (stack);
351   FOR_EACH_FUNCTION (node)
352     node->aux = NULL;
353   return order_pos;
354 }
355 
356 
357 
358 /* Given a memory reference T, will return the variable at the bottom
359    of the access.  Unlike get_base_address, this will recurse through
360    INDIRECT_REFS.  */
361 
362 tree
get_base_var(tree t)363 get_base_var (tree t)
364 {
365   while (!SSA_VAR_P (t)
366 	 && (!CONSTANT_CLASS_P (t))
367 	 && TREE_CODE (t) != LABEL_DECL
368 	 && TREE_CODE (t) != FUNCTION_DECL
369 	 && TREE_CODE (t) != CONST_DECL
370 	 && TREE_CODE (t) != CONSTRUCTOR)
371     {
372       t = TREE_OPERAND (t, 0);
373     }
374   return t;
375 }
376 
377 
378 /* SRC and DST are going to be merged.  Take SRC's profile and merge it into
379    DST so it is not going to be lost.  Possibly destroy SRC's body on the way
380    unless PRESERVE_BODY is set.  */
381 
382 void
ipa_merge_profiles(struct cgraph_node * dst,struct cgraph_node * src,bool preserve_body)383 ipa_merge_profiles (struct cgraph_node *dst,
384 		    struct cgraph_node *src,
385 		    bool preserve_body)
386 {
387   tree oldsrcdecl = src->decl;
388   struct function *srccfun, *dstcfun;
389   bool match = true;
390 
391   if (!src->definition
392       || !dst->definition)
393     return;
394   if (src->frequency < dst->frequency)
395     src->frequency = dst->frequency;
396 
397   /* Time profiles are merged.  */
398   if (dst->tp_first_run > src->tp_first_run && src->tp_first_run)
399     dst->tp_first_run = src->tp_first_run;
400 
401   if (src->profile_id && !dst->profile_id)
402     dst->profile_id = src->profile_id;
403 
404   if (!dst->count)
405     return;
406   if (!src->count || src->alias)
407     return;
408   if (symtab->dump_file)
409     {
410       fprintf (symtab->dump_file, "Merging profiles of %s/%i to %s/%i\n",
411 	       xstrdup_for_dump (src->name ()), src->order,
412 	       xstrdup_for_dump (dst->name ()), dst->order);
413     }
414   dst->count += src->count;
415 
416   /* This is ugly.  We need to get both function bodies into memory.
417      If declaration is merged, we need to duplicate it to be able
418      to load body that is being replaced.  This makes symbol table
419      temporarily inconsistent.  */
420   if (src->decl == dst->decl)
421     {
422       struct lto_in_decl_state temp;
423       struct lto_in_decl_state *state;
424 
425       /* We are going to move the decl, we want to remove its file decl data.
426 	 and link these with the new decl. */
427       temp.fn_decl = src->decl;
428       lto_in_decl_state **slot
429 	= src->lto_file_data->function_decl_states->find_slot (&temp,
430 							       NO_INSERT);
431       state = *slot;
432       src->lto_file_data->function_decl_states->clear_slot (slot);
433       gcc_assert (state);
434 
435       /* Duplicate the decl and be sure it does not link into body of DST.  */
436       src->decl = copy_node (src->decl);
437       DECL_STRUCT_FUNCTION (src->decl) = NULL;
438       DECL_ARGUMENTS (src->decl) = NULL;
439       DECL_INITIAL (src->decl) = NULL;
440       DECL_RESULT (src->decl) = NULL;
441 
442       /* Associate the decl state with new declaration, so LTO streamer
443  	 can look it up.  */
444       state->fn_decl = src->decl;
445       slot
446 	= src->lto_file_data->function_decl_states->find_slot (state, INSERT);
447       gcc_assert (!*slot);
448       *slot = state;
449     }
450   src->get_untransformed_body ();
451   dst->get_untransformed_body ();
452   srccfun = DECL_STRUCT_FUNCTION (src->decl);
453   dstcfun = DECL_STRUCT_FUNCTION (dst->decl);
454   if (n_basic_blocks_for_fn (srccfun)
455       != n_basic_blocks_for_fn (dstcfun))
456     {
457       if (symtab->dump_file)
458 	fprintf (symtab->dump_file,
459 		 "Giving up; number of basic block mismatch.\n");
460       match = false;
461     }
462   else if (last_basic_block_for_fn (srccfun)
463 	   != last_basic_block_for_fn (dstcfun))
464     {
465       if (symtab->dump_file)
466 	fprintf (symtab->dump_file,
467 		 "Giving up; last block mismatch.\n");
468       match = false;
469     }
470   else
471     {
472       basic_block srcbb, dstbb;
473 
474       FOR_ALL_BB_FN (srcbb, srccfun)
475 	{
476 	  unsigned int i;
477 
478 	  dstbb = BASIC_BLOCK_FOR_FN (dstcfun, srcbb->index);
479 	  if (dstbb == NULL)
480 	    {
481 	      if (symtab->dump_file)
482 		fprintf (symtab->dump_file,
483 			 "No matching block for bb %i.\n",
484 			 srcbb->index);
485 	      match = false;
486 	      break;
487 	    }
488 	  if (EDGE_COUNT (srcbb->succs) != EDGE_COUNT (dstbb->succs))
489 	    {
490 	      if (symtab->dump_file)
491 		fprintf (symtab->dump_file,
492 			 "Edge count mistmatch for bb %i.\n",
493 			 srcbb->index);
494 	      match = false;
495 	      break;
496 	    }
497 	  for (i = 0; i < EDGE_COUNT (srcbb->succs); i++)
498 	    {
499 	      edge srce = EDGE_SUCC (srcbb, i);
500 	      edge dste = EDGE_SUCC (dstbb, i);
501 	      if (srce->dest->index != dste->dest->index)
502 		{
503 		  if (symtab->dump_file)
504 		    fprintf (symtab->dump_file,
505 			     "Succ edge mistmatch for bb %i.\n",
506 			     srce->dest->index);
507 		  match = false;
508 		  break;
509 		}
510 	    }
511 	}
512     }
513   if (match)
514     {
515       struct cgraph_edge *e, *e2;
516       basic_block srcbb, dstbb;
517 
518       /* TODO: merge also statement histograms.  */
519       FOR_ALL_BB_FN (srcbb, srccfun)
520 	{
521 	  unsigned int i;
522 
523 	  dstbb = BASIC_BLOCK_FOR_FN (dstcfun, srcbb->index);
524 	  dstbb->count += srcbb->count;
525 	  for (i = 0; i < EDGE_COUNT (srcbb->succs); i++)
526 	    {
527 	      edge srce = EDGE_SUCC (srcbb, i);
528 	      edge dste = EDGE_SUCC (dstbb, i);
529 	      dste->count += srce->count;
530 	    }
531 	}
532       push_cfun (dstcfun);
533       counts_to_freqs ();
534       compute_function_frequency ();
535       pop_cfun ();
536       for (e = dst->callees; e; e = e->next_callee)
537 	{
538 	  if (e->speculative)
539 	    continue;
540 	  e->count = gimple_bb (e->call_stmt)->count;
541 	  e->frequency = compute_call_stmt_bb_frequency
542 			     (dst->decl,
543 			      gimple_bb (e->call_stmt));
544 	}
545       for (e = dst->indirect_calls, e2 = src->indirect_calls; e;
546 	   e2 = (e2 ? e2->next_callee : NULL), e = e->next_callee)
547 	{
548 	  gcov_type count = gimple_bb (e->call_stmt)->count;
549 	  int freq = compute_call_stmt_bb_frequency
550 			(dst->decl,
551 			 gimple_bb (e->call_stmt));
552 	  /* When call is speculative, we need to re-distribute probabilities
553 	     the same way as they was.  This is not really correct because
554 	     in the other copy the speculation may differ; but probably it
555 	     is not really worth the effort.  */
556 	  if (e->speculative)
557 	    {
558 	      cgraph_edge *direct, *indirect;
559 	      cgraph_edge *direct2 = NULL, *indirect2 = NULL;
560 	      ipa_ref *ref;
561 
562 	      e->speculative_call_info (direct, indirect, ref);
563 	      gcc_assert (e == indirect);
564 	      if (e2 && e2->speculative)
565 	        e2->speculative_call_info (direct2, indirect2, ref);
566 	      if (indirect->count || direct->count)
567 		{
568 		  /* We should mismatch earlier if there is no matching
569 		     indirect edge.  */
570 		  if (!e2)
571 		    {
572 		      if (dump_file)
573 		        fprintf (dump_file,
574 				 "Mismatch in merging indirect edges\n");
575 		    }
576 		  else if (!e2->speculative)
577 		    indirect->count += e2->count;
578 		  else if (e2->speculative)
579 		    {
580 		      if (DECL_ASSEMBLER_NAME (direct2->callee->decl)
581 			  != DECL_ASSEMBLER_NAME (direct->callee->decl))
582 			{
583 			  if (direct2->count >= direct->count)
584 			    {
585 			      direct->redirect_callee (direct2->callee);
586 			      indirect->count += indirect2->count
587 						 + direct->count;
588 			      direct->count = direct2->count;
589 			    }
590 			  else
591 			    indirect->count += indirect2->count + direct2->count;
592 			}
593 		      else
594 			{
595 			   direct->count += direct2->count;
596 			   indirect->count += indirect2->count;
597 			}
598 		    }
599 		  int  prob = RDIV (direct->count * REG_BR_PROB_BASE ,
600 				    direct->count + indirect->count);
601 		  direct->frequency = RDIV (freq * prob, REG_BR_PROB_BASE);
602 		  indirect->frequency = RDIV (freq * (REG_BR_PROB_BASE - prob),
603 					      REG_BR_PROB_BASE);
604 		}
605 	      else
606 		/* At the moment we should have only profile feedback based
607 		   speculations when merging.  */
608 		gcc_unreachable ();
609 	    }
610 	  else if (e2 && e2->speculative)
611 	    {
612 	      cgraph_edge *direct, *indirect;
613 	      ipa_ref *ref;
614 
615 	      e2->speculative_call_info (direct, indirect, ref);
616 	      e->count = count;
617 	      e->frequency = freq;
618 	      int prob = RDIV (direct->count * REG_BR_PROB_BASE, e->count);
619 	      e->make_speculative (direct->callee, direct->count,
620 				   RDIV (freq * prob, REG_BR_PROB_BASE));
621 	    }
622 	  else
623 	    {
624 	      e->count = count;
625 	      e->frequency = freq;
626 	    }
627 	}
628       if (!preserve_body)
629         src->release_body ();
630       inline_update_overall_summary (dst);
631     }
632   /* TODO: if there is no match, we can scale up.  */
633   src->decl = oldsrcdecl;
634 }
635 
636 /* Return true if call to DEST is known to be self-recusive call withing FUNC.   */
637 
638 bool
recursive_call_p(tree func,tree dest)639 recursive_call_p (tree func, tree dest)
640 {
641   struct cgraph_node *dest_node = cgraph_node::get_create (dest);
642   struct cgraph_node *cnode = cgraph_node::get_create (func);
643 
644   return dest_node->semantically_equivalent_p (cnode);
645 }
646