1 /* cg_print.c -  Print routines for displaying call graphs.
2 
3    Copyright 2000, 2001, 2002, 2004 Free Software Foundation, Inc.
4 
5    This file is part of GNU Binutils.
6 
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 2 of the License, or
10    (at your option) any later version.
11 
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16 
17    You should have received a copy of the GNU General Public License
18    along with this program; if not, write to the Free Software
19    Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
20    02110-1301, USA.  */
21 
22 #include "libiberty.h"
23 #include "gprof.h"
24 #include "search_list.h"
25 #include "source.h"
26 #include "symtab.h"
27 #include "cg_arcs.h"
28 #include "cg_print.h"
29 #include "hist.h"
30 #include "utils.h"
31 #include "corefile.h"
32 
33 /* Return value of comparison functions used to sort tables.  */
34 #define	LESSTHAN	-1
35 #define	EQUALTO		0
36 #define	GREATERTHAN	1
37 
38 static void print_header (void);
39 static void print_cycle (Sym *);
40 static int cmp_member (Sym *, Sym *);
41 static void sort_members (Sym *);
42 static void print_members (Sym *);
43 static int cmp_arc (Arc *, Arc *);
44 static void sort_parents (Sym *);
45 static void print_parents (Sym *);
46 static void sort_children (Sym *);
47 static void print_children (Sym *);
48 static void print_line (Sym *);
49 static int cmp_name (const PTR, const PTR);
50 static int cmp_arc_count (const PTR, const PTR);
51 static int cmp_fun_nuses (const PTR, const PTR);
52 static void order_and_dump_functions_by_arcs
53   (Arc **, unsigned long, int, Arc **, unsigned long *);
54 
55 /* Declarations of automatically generated functions to output blurbs.  */
56 extern void bsd_callg_blurb (FILE * fp);
57 extern void fsf_callg_blurb (FILE * fp);
58 
59 double print_time = 0.0;
60 
61 
62 static void
print_header()63 print_header ()
64 {
65   if (first_output)
66     first_output = FALSE;
67   else
68     printf ("\f\n");
69 
70   if (!bsd_style_output)
71     {
72       if (print_descriptions)
73 	printf (_("\t\t     Call graph (explanation follows)\n\n"));
74       else
75 	printf (_("\t\t\tCall graph\n\n"));
76     }
77 
78   printf (_("\ngranularity: each sample hit covers %ld byte(s)"),
79 	  (long) hist_scale * sizeof (UNIT));
80 
81   if (print_time > 0.0)
82     printf (_(" for %.2f%% of %.2f seconds\n\n"),
83 	    100.0 / print_time, print_time / hz);
84   else
85     {
86       printf (_(" no time propagated\n\n"));
87 
88       /* This doesn't hurt, since all the numerators will be 0.0.  */
89       print_time = 1.0;
90     }
91 
92   if (bsd_style_output)
93     {
94       printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s     %-8.8s\n",
95 	      "", "", "", "", _("called"), _("total"), _("parents"));
96       printf ("%-6.6s %5.5s %7.7s %11.11s %7.7s+%-7.7s %-8.8s\t%5.5s\n",
97 	      _("index"), _("%time"), _("self"), _("descendants"),
98 	      _("called"), _("self"), _("name"), _("index"));
99       printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s     %-8.8s\n",
100 	      "", "", "", "", _("called"), _("total"), _("children"));
101       printf ("\n");
102     }
103   else
104     {
105       printf (_("index %% time    self  children    called     name\n"));
106     }
107 }
108 
109 /* Print a cycle header.  */
110 
111 static void
print_cycle(Sym * cyc)112 print_cycle (Sym *cyc)
113 {
114   char buf[BUFSIZ];
115 
116   sprintf (buf, "[%d]", cyc->cg.index);
117   printf (bsd_style_output
118 	  ? "%-6.6s %5.1f %7.2f %11.2f %7lu"
119 	  : "%-6.6s %5.1f %7.2f %7.2f %7lu", buf,
120 	  100 * (cyc->cg.prop.self + cyc->cg.prop.child) / print_time,
121 	  cyc->cg.prop.self / hz, cyc->cg.prop.child / hz, cyc->ncalls);
122 
123   if (cyc->cg.self_calls != 0)
124     printf ("+%-7lu", cyc->cg.self_calls);
125   else
126     printf (" %7.7s", "");
127 
128   printf (_(" <cycle %d as a whole> [%d]\n"), cyc->cg.cyc.num, cyc->cg.index);
129 }
130 
131 /* Compare LEFT and RIGHT membmer.  Major comparison key is
132    CG.PROP.SELF+CG.PROP.CHILD, secondary key is NCALLS+CG.SELF_CALLS.  */
133 
134 static int
cmp_member(Sym * left,Sym * right)135 cmp_member (Sym *left, Sym *right)
136 {
137   double left_time = left->cg.prop.self + left->cg.prop.child;
138   double right_time = right->cg.prop.self + right->cg.prop.child;
139   unsigned long left_calls = left->ncalls + left->cg.self_calls;
140   unsigned long right_calls = right->ncalls + right->cg.self_calls;
141 
142   if (left_time > right_time)
143     return GREATERTHAN;
144 
145   if (left_time < right_time)
146     return LESSTHAN;
147 
148   if (left_calls > right_calls)
149     return GREATERTHAN;
150 
151   if (left_calls < right_calls)
152     return LESSTHAN;
153 
154   return EQUALTO;
155 }
156 
157 /* Sort members of a cycle.  */
158 
159 static void
sort_members(Sym * cyc)160 sort_members (Sym *cyc)
161 {
162   Sym *todo, *doing, *prev;
163 
164   /* Detach cycle members from cyclehead,
165      and insertion sort them back on.  */
166   todo = cyc->cg.cyc.next;
167   cyc->cg.cyc.next = 0;
168 
169   for (doing = todo; doing && doing->cg.cyc.next; doing = todo)
170     {
171       todo = doing->cg.cyc.next;
172 
173       for (prev = cyc; prev->cg.cyc.next; prev = prev->cg.cyc.next)
174 	{
175 	  if (cmp_member (doing, prev->cg.cyc.next) == GREATERTHAN)
176 	    break;
177 	}
178 
179       doing->cg.cyc.next = prev->cg.cyc.next;
180       prev->cg.cyc.next = doing;
181     }
182 }
183 
184 /* Print the members of a cycle.  */
185 
186 static void
print_members(Sym * cyc)187 print_members (Sym *cyc)
188 {
189   Sym *member;
190 
191   sort_members (cyc);
192 
193   for (member = cyc->cg.cyc.next; member; member = member->cg.cyc.next)
194     {
195       printf (bsd_style_output
196 	      ? "%6.6s %5.5s %7.2f %11.2f %7lu"
197 	      : "%6.6s %5.5s %7.2f %7.2f %7lu",
198 	      "", "", member->cg.prop.self / hz, member->cg.prop.child / hz,
199 	      member->ncalls);
200 
201       if (member->cg.self_calls != 0)
202 	printf ("+%-7lu", member->cg.self_calls);
203       else
204 	printf (" %7.7s", "");
205 
206       printf ("     ");
207       print_name (member);
208       printf ("\n");
209     }
210 }
211 
212 /* Compare two arcs to/from the same child/parent.
213 	- if one arc is a self arc, it's least.
214 	- if one arc is within a cycle, it's less than.
215 	- if both arcs are within a cycle, compare arc counts.
216 	- if neither arc is within a cycle, compare with
217 		time + child_time as major key
218 		arc count as minor key.  */
219 
220 static int
cmp_arc(Arc * left,Arc * right)221 cmp_arc (Arc *left, Arc *right)
222 {
223   Sym *left_parent = left->parent;
224   Sym *left_child = left->child;
225   Sym *right_parent = right->parent;
226   Sym *right_child = right->child;
227   double left_time, right_time;
228 
229   DBG (TIMEDEBUG,
230        printf ("[cmp_arc] ");
231        print_name (left_parent);
232        printf (" calls ");
233        print_name (left_child);
234        printf (" %f + %f %lu/%lu\n", left->time, left->child_time,
235 	       left->count, left_child->ncalls);
236        printf ("[cmp_arc] ");
237        print_name (right_parent);
238        printf (" calls ");
239        print_name (right_child);
240        printf (" %f + %f %lu/%lu\n", right->time, right->child_time,
241 	       right->count, right_child->ncalls);
242        printf ("\n");
243     );
244 
245   if (left_parent == left_child)
246     return LESSTHAN;		/* Left is a self call.  */
247 
248   if (right_parent == right_child)
249     return GREATERTHAN;		/* Right is a self call.  */
250 
251   if (left_parent->cg.cyc.num != 0 && left_child->cg.cyc.num != 0
252       && left_parent->cg.cyc.num == left_child->cg.cyc.num)
253     {
254       /* Left is a call within a cycle.  */
255       if (right_parent->cg.cyc.num != 0 && right_child->cg.cyc.num != 0
256 	  && right_parent->cg.cyc.num == right_child->cg.cyc.num)
257 	{
258 	  /* Right is a call within the cycle, too.  */
259 	  if (left->count < right->count)
260 	    return LESSTHAN;
261 
262 	  if (left->count > right->count)
263 	    return GREATERTHAN;
264 
265 	  return EQUALTO;
266 	}
267       else
268 	{
269 	  /* Right isn't a call within the cycle.  */
270 	  return LESSTHAN;
271 	}
272     }
273   else
274     {
275       /* Left isn't a call within a cycle.  */
276       if (right_parent->cg.cyc.num != 0 && right_child->cg.cyc.num != 0
277 	  && right_parent->cg.cyc.num == right_child->cg.cyc.num)
278 	{
279 	  /* Right is a call within a cycle.  */
280 	  return GREATERTHAN;
281 	}
282       else
283 	{
284 	  /* Neither is a call within a cycle.  */
285 	  left_time = left->time + left->child_time;
286 	  right_time = right->time + right->child_time;
287 
288 	  if (left_time < right_time)
289 	    return LESSTHAN;
290 
291 	  if (left_time > right_time)
292 	    return GREATERTHAN;
293 
294 	  if (left->count < right->count)
295 	    return LESSTHAN;
296 
297 	  if (left->count > right->count)
298 	    return GREATERTHAN;
299 
300 	  return EQUALTO;
301 	}
302     }
303 }
304 
305 
306 static void
sort_parents(Sym * child)307 sort_parents (Sym * child)
308 {
309   Arc *arc, *detached, sorted, *prev;
310 
311   /* Unlink parents from child, then insertion sort back on to
312      sorted's parents.
313 	  *arc        the arc you have detached and are inserting.
314 	  *detached   the rest of the arcs to be sorted.
315 	  sorted      arc list onto which you insertion sort.
316 	  *prev       arc before the arc you are comparing.  */
317   sorted.next_parent = 0;
318 
319   for (arc = child->cg.parents; arc; arc = detached)
320     {
321       detached = arc->next_parent;
322 
323       /* Consider *arc as disconnected; insert it into sorted.  */
324       for (prev = &sorted; prev->next_parent; prev = prev->next_parent)
325 	{
326 	  if (cmp_arc (arc, prev->next_parent) != GREATERTHAN)
327 	    break;
328 	}
329 
330       arc->next_parent = prev->next_parent;
331       prev->next_parent = arc;
332     }
333 
334   /* Reattach sorted arcs to child.  */
335   child->cg.parents = sorted.next_parent;
336 }
337 
338 
339 static void
print_parents(Sym * child)340 print_parents (Sym *child)
341 {
342   Sym *parent;
343   Arc *arc;
344   Sym *cycle_head;
345 
346   if (child->cg.cyc.head != 0)
347     cycle_head = child->cg.cyc.head;
348   else
349     cycle_head = child;
350 
351   if (!child->cg.parents)
352     {
353       printf (bsd_style_output
354 	      ? _("%6.6s %5.5s %7.7s %11.11s %7.7s %7.7s     <spontaneous>\n")
355 	      : _("%6.6s %5.5s %7.7s %7.7s %7.7s %7.7s     <spontaneous>\n"),
356 	      "", "", "", "", "", "");
357       return;
358     }
359 
360   sort_parents (child);
361 
362   for (arc = child->cg.parents; arc; arc = arc->next_parent)
363     {
364       parent = arc->parent;
365       if (child == parent || (child->cg.cyc.num != 0
366 			      && parent->cg.cyc.num == child->cg.cyc.num))
367 	{
368 	  /* Selfcall or call among siblings.  */
369 	  printf (bsd_style_output
370 		  ? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s     "
371 		  : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s     ",
372 		  "", "", "", "",
373 		  arc->count, "");
374 	  print_name (parent);
375 	  printf ("\n");
376 	}
377       else
378 	{
379 	  /* Regular parent of child.  */
380 	  printf (bsd_style_output
381 		  ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu     "
382 		  : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu     ",
383 		  "", "",
384 		  arc->time / hz, arc->child_time / hz,
385 		  arc->count, cycle_head->ncalls);
386 	  print_name (parent);
387 	  printf ("\n");
388 	}
389     }
390 }
391 
392 
393 static void
sort_children(Sym * parent)394 sort_children (Sym *parent)
395 {
396   Arc *arc, *detached, sorted, *prev;
397 
398   /* Unlink children from parent, then insertion sort back on to
399      sorted's children.
400 	  *arc        the arc you have detached and are inserting.
401 	  *detached   the rest of the arcs to be sorted.
402 	  sorted      arc list onto which you insertion sort.
403 	  *prev       arc before the arc you are comparing.  */
404   sorted.next_child = 0;
405 
406   for (arc = parent->cg.children; arc; arc = detached)
407     {
408       detached = arc->next_child;
409 
410       /* Consider *arc as disconnected; insert it into sorted.  */
411       for (prev = &sorted; prev->next_child; prev = prev->next_child)
412 	{
413 	  if (cmp_arc (arc, prev->next_child) != LESSTHAN)
414 	    break;
415 	}
416 
417       arc->next_child = prev->next_child;
418       prev->next_child = arc;
419     }
420 
421   /* Reattach sorted children to parent.  */
422   parent->cg.children = sorted.next_child;
423 }
424 
425 
426 static void
print_children(Sym * parent)427 print_children (Sym *parent)
428 {
429   Sym *child;
430   Arc *arc;
431 
432   sort_children (parent);
433   arc = parent->cg.children;
434 
435   for (arc = parent->cg.children; arc; arc = arc->next_child)
436     {
437       child = arc->child;
438       if (child == parent || (child->cg.cyc.num != 0
439 			      && child->cg.cyc.num == parent->cg.cyc.num))
440 	{
441 	  /* Self call or call to sibling.  */
442 	  printf (bsd_style_output
443 		  ? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s     "
444 		  : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s     ",
445 		  "", "", "", "", arc->count, "");
446 	  print_name (child);
447 	  printf ("\n");
448 	}
449       else
450 	{
451 	  /* Regular child of parent.  */
452 	  printf (bsd_style_output
453 		  ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu     "
454 		  : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu     ",
455 		  "", "",
456 		  arc->time / hz, arc->child_time / hz,
457 		  arc->count, child->cg.cyc.head->ncalls);
458 	  print_name (child);
459 	  printf ("\n");
460 	}
461     }
462 }
463 
464 
465 static void
print_line(Sym * np)466 print_line (Sym *np)
467 {
468   char buf[BUFSIZ];
469 
470   sprintf (buf, "[%d]", np->cg.index);
471   printf (bsd_style_output
472 	  ? "%-6.6s %5.1f %7.2f %11.2f"
473 	  : "%-6.6s %5.1f %7.2f %7.2f", buf,
474 	  100 * (np->cg.prop.self + np->cg.prop.child) / print_time,
475 	  np->cg.prop.self / hz, np->cg.prop.child / hz);
476 
477   if ((np->ncalls + np->cg.self_calls) != 0)
478     {
479       printf (" %7lu", np->ncalls);
480 
481       if (np->cg.self_calls != 0)
482 	  printf ("+%-7lu ", np->cg.self_calls);
483       else
484 	  printf (" %7.7s ", "");
485     }
486   else
487     {
488       printf (" %7.7s %7.7s ", "", "");
489     }
490 
491   print_name (np);
492   printf ("\n");
493 }
494 
495 
496 /* Print dynamic call graph.  */
497 
498 void
cg_print(Sym ** timesortsym)499 cg_print (Sym ** timesortsym)
500 {
501   unsigned int index;
502   Sym *parent;
503 
504   if (print_descriptions && bsd_style_output)
505     bsd_callg_blurb (stdout);
506 
507   print_header ();
508 
509   for (index = 0; index < symtab.len + num_cycles; ++index)
510     {
511       parent = timesortsym[index];
512 
513       if ((ignore_zeros && parent->ncalls == 0
514 	   && parent->cg.self_calls == 0 && parent->cg.prop.self == 0
515 	   && parent->cg.prop.child == 0)
516 	  || !parent->cg.print_flag
517 	  || (line_granularity && ! parent->is_func))
518 	continue;
519 
520       if (!parent->name && parent->cg.cyc.num != 0)
521 	{
522 	  /* Cycle header.  */
523 	  print_cycle (parent);
524 	  print_members (parent);
525 	}
526       else
527 	{
528 	  print_parents (parent);
529 	  print_line (parent);
530 	  print_children (parent);
531 	}
532 
533       if (bsd_style_output)
534 	printf ("\n");
535 
536       printf ("-----------------------------------------------\n");
537 
538       if (bsd_style_output)
539 	printf ("\n");
540     }
541 
542   free (timesortsym);
543 
544   if (print_descriptions && !bsd_style_output)
545     fsf_callg_blurb (stdout);
546 }
547 
548 
549 static int
cmp_name(const PTR left,const PTR right)550 cmp_name (const PTR left, const PTR right)
551 {
552   const Sym **npp1 = (const Sym **) left;
553   const Sym **npp2 = (const Sym **) right;
554 
555   return strcmp ((*npp1)->name, (*npp2)->name);
556 }
557 
558 
559 void
cg_print_index()560 cg_print_index ()
561 {
562   unsigned int index;
563   unsigned int nnames, todo, i, j;
564   int col, starting_col;
565   Sym **name_sorted_syms, *sym;
566   const char *filename;
567   char buf[20];
568   int column_width = (output_width - 1) / 3;	/* Don't write in last col!  */
569 
570   /* Now, sort regular function name
571      alphabetically to create an index.  */
572   name_sorted_syms = (Sym **) xmalloc ((symtab.len + num_cycles) * sizeof (Sym *));
573 
574   for (index = 0, nnames = 0; index < symtab.len; index++)
575     {
576       if (ignore_zeros && symtab.base[index].ncalls == 0
577 	  && symtab.base[index].hist.time == 0)
578 	continue;
579 
580       name_sorted_syms[nnames++] = &symtab.base[index];
581     }
582 
583   qsort (name_sorted_syms, nnames, sizeof (Sym *), cmp_name);
584 
585   for (index = 1, todo = nnames; index <= num_cycles; index++)
586     name_sorted_syms[todo++] = &cycle_header[index];
587 
588   printf ("\f\n");
589   printf (_("Index by function name\n\n"));
590   index = (todo + 2) / 3;
591 
592   for (i = 0; i < index; i++)
593     {
594       col = 0;
595       starting_col = 0;
596 
597       for (j = i; j < todo; j += index)
598 	{
599 	  sym = name_sorted_syms[j];
600 
601 	  if (sym->cg.print_flag)
602 	    sprintf (buf, "[%d]", sym->cg.index);
603 	  else
604 	    sprintf (buf, "(%d)", sym->cg.index);
605 
606 	  if (j < nnames)
607 	    {
608 	      if (bsd_style_output)
609 		{
610 		  printf ("%6.6s %-19.19s", buf, sym->name);
611 		}
612 	      else
613 		{
614 		  col += strlen (buf);
615 
616 		  for (; col < starting_col + 5; ++col)
617 		    putchar (' ');
618 
619 		  printf (" %s ", buf);
620 		  col += print_name_only (sym);
621 
622 		  if (!line_granularity && sym->is_static && sym->file)
623 		    {
624 		      filename = sym->file->name;
625 
626 		      if (!print_path)
627 			{
628 			  filename = strrchr (filename, '/');
629 
630 			  if (filename)
631 			    ++filename;
632 			  else
633 			    filename = sym->file->name;
634 			}
635 
636 		      printf (" (%s)", filename);
637 		      col += strlen (filename) + 3;
638 		    }
639 		}
640 	    }
641 	  else
642 	    {
643 	      if (bsd_style_output)
644 		{
645 		  printf ("%6.6s ", buf);
646 		  sprintf (buf, _("<cycle %d>"), sym->cg.cyc.num);
647 		  printf ("%-19.19s", buf);
648 		}
649 	      else
650 		{
651 		  col += strlen (buf);
652 		  for (; col < starting_col + 5; ++col)
653 		    putchar (' ');
654 		  printf (" %s ", buf);
655 		  sprintf (buf, _("<cycle %d>"), sym->cg.cyc.num);
656 		  printf ("%s", buf);
657 		  col += strlen (buf);
658 		}
659 	    }
660 
661 	  starting_col += column_width;
662 	}
663 
664       printf ("\n");
665     }
666 
667   free (name_sorted_syms);
668 }
669 
670 /* Compare two arcs based on their usage counts.
671    We want to sort in descending order.  */
672 
673 static int
cmp_arc_count(const PTR left,const PTR right)674 cmp_arc_count (const PTR left, const PTR right)
675 {
676   const Arc **npp1 = (const Arc **) left;
677   const Arc **npp2 = (const Arc **) right;
678 
679   if ((*npp1)->count > (*npp2)->count)
680     return -1;
681   else if ((*npp1)->count < (*npp2)->count)
682     return 1;
683   else
684     return 0;
685 }
686 
687 /* Compare two funtions based on their usage counts.
688    We want to sort in descending order.  */
689 
690 static int
cmp_fun_nuses(const PTR left,const PTR right)691 cmp_fun_nuses (const PTR left, const PTR right)
692 {
693   const Sym **npp1 = (const Sym **) left;
694   const Sym **npp2 = (const Sym **) right;
695 
696   if ((*npp1)->nuses > (*npp2)->nuses)
697     return -1;
698   else if ((*npp1)->nuses < (*npp2)->nuses)
699     return 1;
700   else
701     return 0;
702 }
703 
704 /* Print a suggested function ordering based on the profiling data.
705 
706    We perform 4 major steps when ordering functions:
707 
708 	* Group unused functions together and place them at the
709 	end of the function order.
710 
711 	* Search the highest use arcs (those which account for 90% of
712 	the total arc count) for functions which have several parents.
713 
714 	Group those with the most call sites together (currently the
715 	top 1.25% which have at least five different call sites).
716 
717 	These are emitted at the start of the function order.
718 
719 	* Use a greedy placement algorithm to place functions which
720 	occur in the top 99% of the arcs in the profile.  Some provisions
721 	are made to handle high usage arcs where the parent and/or
722 	child has already been placed.
723 
724 	* Run the same greedy placement algorithm on the remaining
725 	arcs to place the leftover functions.
726 
727 
728    The various "magic numbers" should (one day) be tuneable by command
729    line options.  They were arrived at by benchmarking a few applications
730    with various values to see which values produced better overall function
731    orderings.
732 
733    Of course, profiling errors, machine limitations (PA long calls), and
734    poor cutoff values for the placement algorithm may limit the usefullness
735    of the resulting function order.  Improvements would be greatly appreciated.
736 
737    Suggestions:
738 
739 	* Place the functions with many callers near the middle of the
740 	list to reduce long calls.
741 
742 	* Propagate arc usage changes as functions are placed.  Ie if
743 	func1 and func2 are placed together, arcs to/from those arcs
744 	to the same parent/child should be combined, then resort the
745 	arcs to choose the next one.
746 
747 	* Implement some global positioning algorithm to place the
748 	chains made by the greedy local positioning algorithm.  Probably
749 	by examining arcs which haven't been placed yet to tie two
750 	chains together.
751 
752 	* Take a function's size and time into account in the algorithm;
753 	size in particular is important on the PA (long calls).  Placing
754 	many small functions onto their own page may be wise.
755 
756 	* Use better profiling information; many published algorithms
757 	are based on call sequences through time, rather than just
758 	arc counts.
759 
760 	* Prodecure cloning could improve performance when a small number
761 	of arcs account for most of the calls to a particular function.
762 
763 	* Use relocation information to avoid moving unused functions
764 	completely out of the code stream; this would avoid severe lossage
765 	when the profile data bears little resemblance to actual runs.
766 
767 	* Propagation of arc usages should also improve .o link line
768 	ordering which shares the same arc placement algorithm with
769 	the function ordering code (in fact it is a degenerate case
770 	of function ordering).  */
771 
772 void
cg_print_function_ordering()773 cg_print_function_ordering ()
774 {
775   unsigned long index, used, unused, scratch_index;
776   unsigned long  unplaced_arc_count, high_arc_count, scratch_arc_count;
777 #ifdef __GNUC__
778   unsigned long long total_arcs, tmp_arcs_count;
779 #else
780   unsigned long total_arcs, tmp_arcs_count;
781 #endif
782   Sym **unused_syms, **used_syms, **scratch_syms;
783   Arc **unplaced_arcs, **high_arcs, **scratch_arcs;
784 
785   index = 0;
786   used = 0;
787   unused = 0;
788   scratch_index = 0;
789   unplaced_arc_count = 0;
790   high_arc_count = 0;
791   scratch_arc_count = 0;
792 
793   /* First group all the unused functions together.  */
794   unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
795   used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
796   scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
797   high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
798   scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
799   unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
800 
801   /* Walk through all the functions; mark those which are never
802      called as placed (we'll emit them as a group later).  */
803   for (index = 0, used = 0, unused = 0; index < symtab.len; index++)
804     {
805       if (symtab.base[index].ncalls == 0)
806 	{
807 	  /* Filter out gprof generated names.  */
808 	  if (strcmp (symtab.base[index].name, "<locore>")
809 	      && strcmp (symtab.base[index].name, "<hicore>"))
810 	    {
811 	      unused_syms[unused++] = &symtab.base[index];
812 	      symtab.base[index].has_been_placed = 1;
813 	    }
814 	}
815       else
816 	{
817 	  used_syms[used++] = &symtab.base[index];
818 	  symtab.base[index].has_been_placed = 0;
819 	  symtab.base[index].next = 0;
820 	  symtab.base[index].prev = 0;
821 	  symtab.base[index].nuses = 0;
822 	}
823     }
824 
825   /* Sort the arcs from most used to least used.  */
826   qsort (arcs, numarcs, sizeof (Arc *), cmp_arc_count);
827 
828   /* Compute the total arc count.  Also mark arcs as unplaced.
829 
830      Note we don't compensate for overflow if that happens!
831      Overflow is much less likely when this file is compiled
832      with GCC as it can double-wide integers via long long.  */
833   total_arcs = 0;
834   for (index = 0; index < numarcs; index++)
835     {
836       total_arcs += arcs[index]->count;
837       arcs[index]->has_been_placed = 0;
838     }
839 
840   /* We want to pull out those functions which are referenced
841      by many highly used arcs and emit them as a group.  This
842      could probably use some tuning.  */
843   tmp_arcs_count = 0;
844   for (index = 0; index < numarcs; index++)
845     {
846       tmp_arcs_count += arcs[index]->count;
847 
848       /* Count how many times each parent and child are used up
849 	 to our threshhold of arcs (90%).  */
850       if ((double)tmp_arcs_count / (double)total_arcs > 0.90)
851 	break;
852 
853       arcs[index]->child->nuses++;
854     }
855 
856   /* Now sort a temporary symbol table based on the number of
857      times each function was used in the highest used arcs.  */
858   memcpy (scratch_syms, used_syms, used * sizeof (Sym *));
859   qsort (scratch_syms, used, sizeof (Sym *), cmp_fun_nuses);
860 
861   /* Now pick out those symbols we're going to emit as
862      a group.  We take up to 1.25% of the used symbols.  */
863   for (index = 0; index < used / 80; index++)
864     {
865       Sym *sym = scratch_syms[index];
866       Arc *arc;
867 
868       /* If we hit symbols that aren't used from many call sites,
869 	 then we can quit.  We choose five as the low limit for
870 	 no particular reason.  */
871       if (sym->nuses == 5)
872 	break;
873 
874       /* We're going to need the arcs between these functions.
875 	 Unfortunately, we don't know all these functions
876 	 until we're done.  So we keep track of all the arcs
877 	 to the functions we care about, then prune out those
878 	 which are uninteresting.
879 
880 	 An interesting variation would be to quit when we found
881 	 multi-call site functions which account for some percentage
882 	 of the arcs.  */
883       arc = sym->cg.children;
884 
885       while (arc)
886 	{
887 	  if (arc->parent != arc->child)
888 	    scratch_arcs[scratch_arc_count++] = arc;
889 	  arc->has_been_placed = 1;
890 	  arc = arc->next_child;
891 	}
892 
893       arc = sym->cg.parents;
894 
895       while (arc)
896 	{
897 	  if (arc->parent != arc->child)
898 	    scratch_arcs[scratch_arc_count++] = arc;
899 	  arc->has_been_placed = 1;
900 	  arc = arc->next_parent;
901 	}
902 
903       /* Keep track of how many symbols we're going to place.  */
904       scratch_index = index;
905 
906       /* A lie, but it makes identifying
907 	 these functions easier later.  */
908       sym->has_been_placed = 1;
909     }
910 
911   /* Now walk through the temporary arcs and copy
912      those we care about into the high arcs array.  */
913   for (index = 0; index < scratch_arc_count; index++)
914     {
915       Arc *arc = scratch_arcs[index];
916 
917       /* If this arc refers to highly used functions, then
918 	 then we want to keep it.  */
919       if (arc->child->has_been_placed
920 	  && arc->parent->has_been_placed)
921 	{
922 	  high_arcs[high_arc_count++] = scratch_arcs[index];
923 
924 	  /* We need to turn of has_been_placed since we're going to
925 	     use the main arc placement algorithm on these arcs.  */
926 	  arc->child->has_been_placed = 0;
927 	  arc->parent->has_been_placed = 0;
928 	}
929     }
930 
931   /* Dump the multi-site high usage functions which are not
932      going to be ordered by the main ordering algorithm.  */
933   for (index = 0; index < scratch_index; index++)
934     {
935       if (scratch_syms[index]->has_been_placed)
936 	printf ("%s\n", scratch_syms[index]->name);
937     }
938 
939   /* Now we can order the multi-site high use
940      functions based on the arcs between them.  */
941   qsort (high_arcs, high_arc_count, sizeof (Arc *), cmp_arc_count);
942   order_and_dump_functions_by_arcs (high_arcs, high_arc_count, 1,
943 				    unplaced_arcs, &unplaced_arc_count);
944 
945   /* Order and dump the high use functions left,
946      these typically have only a few call sites.  */
947   order_and_dump_functions_by_arcs (arcs, numarcs, 0,
948 				    unplaced_arcs, &unplaced_arc_count);
949 
950   /* Now place the rarely used functions.  */
951   order_and_dump_functions_by_arcs (unplaced_arcs, unplaced_arc_count, 1,
952 				    scratch_arcs, &scratch_arc_count);
953 
954   /* Output any functions not emitted by the order_and_dump calls.  */
955   for (index = 0; index < used; index++)
956     if (used_syms[index]->has_been_placed == 0)
957       printf("%s\n", used_syms[index]->name);
958 
959   /* Output the unused functions.  */
960   for (index = 0; index < unused; index++)
961     printf("%s\n", unused_syms[index]->name);
962 
963   unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
964   used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
965   scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
966   high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
967   scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
968   unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
969 
970   free (unused_syms);
971   free (used_syms);
972   free (scratch_syms);
973   free (high_arcs);
974   free (scratch_arcs);
975   free (unplaced_arcs);
976 }
977 
978 /* Place functions based on the arcs in THE_ARCS with ARC_COUNT entries;
979    place unused arcs into UNPLACED_ARCS/UNPLACED_ARC_COUNT.
980 
981    If ALL is nonzero, then place all functions referenced by THE_ARCS,
982    else only place those referenced in the top 99% of the arcs in THE_ARCS.  */
983 
984 #define MOST 0.99
985 static void
order_and_dump_functions_by_arcs(the_arcs,arc_count,all,unplaced_arcs,unplaced_arc_count)986 order_and_dump_functions_by_arcs (the_arcs, arc_count, all,
987 				  unplaced_arcs, unplaced_arc_count)
988      Arc **the_arcs;
989      unsigned long arc_count;
990      int all;
991      Arc **unplaced_arcs;
992      unsigned long *unplaced_arc_count;
993 {
994 #ifdef __GNUC__
995   unsigned long long tmp_arcs, total_arcs;
996 #else
997   unsigned long tmp_arcs, total_arcs;
998 #endif
999   unsigned int index;
1000 
1001   /* If needed, compute the total arc count.
1002 
1003      Note we don't compensate for overflow if that happens!  */
1004   if (! all)
1005     {
1006       total_arcs = 0;
1007       for (index = 0; index < arc_count; index++)
1008 	total_arcs += the_arcs[index]->count;
1009     }
1010   else
1011     total_arcs = 0;
1012 
1013   tmp_arcs = 0;
1014 
1015   for (index = 0; index < arc_count; index++)
1016     {
1017       Sym *sym1, *sym2;
1018       Sym *child, *parent;
1019 
1020       tmp_arcs += the_arcs[index]->count;
1021 
1022       /* Ignore this arc if it's already been placed.  */
1023       if (the_arcs[index]->has_been_placed)
1024 	continue;
1025 
1026       child = the_arcs[index]->child;
1027       parent = the_arcs[index]->parent;
1028 
1029       /* If we're not using all arcs, and this is a rarely used
1030 	 arc, then put it on the unplaced_arc list.  Similarly
1031 	 if both the parent and child of this arc have been placed.  */
1032       if ((! all && (double)tmp_arcs / (double)total_arcs > MOST)
1033 	  || child->has_been_placed || parent->has_been_placed)
1034 	{
1035 	  unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[index];
1036 	  continue;
1037 	}
1038 
1039       /* If all slots in the parent and child are full, then there isn't
1040 	 anything we can do right now.  We'll place this arc on the
1041 	 unplaced arc list in the hope that a global positioning
1042 	 algorithm can use it to place function chains.  */
1043       if (parent->next && parent->prev && child->next && child->prev)
1044 	{
1045 	  unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[index];
1046 	  continue;
1047 	}
1048 
1049       /* If the parent is unattached, then find the closest
1050 	 place to attach it onto child's chain.   Similarly
1051 	 for the opposite case.  */
1052       if (!parent->next && !parent->prev)
1053 	{
1054 	  int next_count = 0;
1055 	  int prev_count = 0;
1056 	  Sym *prev = child;
1057 	  Sym *next = child;
1058 
1059 	  /* Walk to the beginning and end of the child's chain.  */
1060 	  while (next->next)
1061 	    {
1062 	      next = next->next;
1063 	      next_count++;
1064 	    }
1065 
1066 	  while (prev->prev)
1067 	    {
1068 	      prev = prev->prev;
1069 	      prev_count++;
1070 	    }
1071 
1072 	  /* Choose the closest.  */
1073 	  child = next_count < prev_count ? next : prev;
1074 	}
1075       else if (! child->next && !child->prev)
1076 	{
1077 	  int next_count = 0;
1078 	  int prev_count = 0;
1079 	  Sym *prev = parent;
1080 	  Sym *next = parent;
1081 
1082 	  while (next->next)
1083 	    {
1084 	      next = next->next;
1085 	      next_count++;
1086 	    }
1087 
1088 	  while (prev->prev)
1089 	    {
1090 	      prev = prev->prev;
1091 	      prev_count++;
1092 	    }
1093 
1094 	  parent = prev_count < next_count ? prev : next;
1095 	}
1096       else
1097 	{
1098 	  /* Couldn't find anywhere to attach the functions,
1099 	     put the arc on the unplaced arc list.  */
1100 	  unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[index];
1101 	  continue;
1102 	}
1103 
1104       /* Make sure we don't tie two ends together.  */
1105       sym1 = parent;
1106       if (sym1->next)
1107 	while (sym1->next)
1108 	  sym1 = sym1->next;
1109       else
1110 	while (sym1->prev)
1111 	  sym1 = sym1->prev;
1112 
1113       sym2 = child;
1114       if (sym2->next)
1115 	while (sym2->next)
1116 	  sym2 = sym2->next;
1117       else
1118 	while (sym2->prev)
1119 	  sym2 = sym2->prev;
1120 
1121       if (sym1 == child
1122 	  && sym2 == parent)
1123 	{
1124 	  /* This would tie two ends together.  */
1125 	  unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[index];
1126 	  continue;
1127 	}
1128 
1129       if (parent->next)
1130 	{
1131 	  /* Must attach to the parent's prev field.  */
1132 	  if (! child->next)
1133 	    {
1134 	      /* parent-prev and child-next */
1135 	      parent->prev = child;
1136 	      child->next = parent;
1137 	      the_arcs[index]->has_been_placed = 1;
1138 	    }
1139 	}
1140       else if (parent->prev)
1141 	{
1142 	  /* Must attach to the parent's next field.  */
1143 	  if (! child->prev)
1144 	    {
1145 	      /* parent-next and child-prev */
1146 	      parent->next = child;
1147 	      child->prev = parent;
1148 	      the_arcs[index]->has_been_placed = 1;
1149 	    }
1150 	}
1151       else
1152 	{
1153 	  /* Can attach to either field in the parent, depends
1154 	     on where we've got space in the child.  */
1155 	  if (child->prev)
1156 	    {
1157 	      /* parent-prev and child-next.  */
1158 	      parent->prev = child;
1159 	      child->next = parent;
1160 	      the_arcs[index]->has_been_placed = 1;
1161 	    }
1162 	  else
1163 	    {
1164 	      /* parent-next and child-prev.  */
1165 	      parent->next = child;
1166 	      child->prev = parent;
1167 	      the_arcs[index]->has_been_placed = 1;
1168 	    }
1169 	}
1170     }
1171 
1172   /* Dump the chains of functions we've made.  */
1173   for (index = 0; index < arc_count; index++)
1174     {
1175       Sym *sym;
1176       if (the_arcs[index]->parent->has_been_placed
1177 	  || the_arcs[index]->child->has_been_placed)
1178 	continue;
1179 
1180       sym = the_arcs[index]->parent;
1181 
1182       /* If this symbol isn't attached to any other
1183 	 symbols, then we've got a rarely used arc.
1184 
1185 	 Skip it for now, we'll deal with them later.  */
1186       if (sym->next == NULL
1187 	  && sym->prev == NULL)
1188 	continue;
1189 
1190       /* Get to the start of this chain.  */
1191       while (sym->prev)
1192 	sym = sym->prev;
1193 
1194       while (sym)
1195 	{
1196 	  /* Mark it as placed.  */
1197 	  sym->has_been_placed = 1;
1198 	  printf ("%s\n", sym->name);
1199 	  sym = sym->next;
1200 	}
1201     }
1202 
1203   /* If we want to place all the arcs, then output
1204      those which weren't placed by the main algorithm.  */
1205   if (all)
1206     for (index = 0; index < arc_count; index++)
1207       {
1208 	Sym *sym;
1209 	if (the_arcs[index]->parent->has_been_placed
1210 	    || the_arcs[index]->child->has_been_placed)
1211 	  continue;
1212 
1213 	sym = the_arcs[index]->parent;
1214 
1215 	sym->has_been_placed = 1;
1216 	printf ("%s\n", sym->name);
1217       }
1218 }
1219 
1220 /* Print a suggested .o ordering for files on a link line based
1221    on profiling information.  This uses the function placement
1222    code for the bulk of its work.  */
1223 
1224 void
cg_print_file_ordering()1225 cg_print_file_ordering ()
1226 {
1227   unsigned long scratch_arc_count, index;
1228   Arc **scratch_arcs;
1229   char *last;
1230 
1231   scratch_arc_count = 0;
1232 
1233   scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
1234   for (index = 0; index < numarcs; index++)
1235     {
1236       if (! arcs[index]->parent->mapped
1237 	  || ! arcs[index]->child->mapped)
1238 	arcs[index]->has_been_placed = 1;
1239     }
1240 
1241   order_and_dump_functions_by_arcs (arcs, numarcs, 0,
1242 				    scratch_arcs, &scratch_arc_count);
1243 
1244   /* Output .o's not handled by the main placement algorithm.  */
1245   for (index = 0; index < symtab.len; index++)
1246     {
1247       if (symtab.base[index].mapped
1248 	  && ! symtab.base[index].has_been_placed)
1249 	printf ("%s\n", symtab.base[index].name);
1250     }
1251 
1252   /* Now output any .o's that didn't have any text symbols.  */
1253   last = NULL;
1254   for (index = 0; index < symbol_map_count; index++)
1255     {
1256       unsigned int index2;
1257 
1258       /* Don't bother searching if this symbol
1259 	 is the same as the previous one.  */
1260       if (last && !strcmp (last, symbol_map[index].file_name))
1261 	continue;
1262 
1263       for (index2 = 0; index2 < symtab.len; index2++)
1264 	{
1265 	  if (! symtab.base[index2].mapped)
1266 	    continue;
1267 
1268 	  if (!strcmp (symtab.base[index2].name, symbol_map[index].file_name))
1269 	    break;
1270 	}
1271 
1272       /* If we didn't find it in the symbol table, then it must
1273 	 be a .o with no text symbols.  Output it last.  */
1274       if (index2 == symtab.len)
1275 	printf ("%s\n", symbol_map[index].file_name);
1276       last = symbol_map[index].file_name;
1277     }
1278 }
1279