xref: /openbsd/gnu/usr.bin/binutils/gprof/cg_print.c (revision 274d7c50)
1 /* cg_print.c -  Print routines for displaying call graphs.
2 
3    Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
4 
5    This file is part of GNU Binutils.
6 
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 2 of the License, or
10    (at your option) any later version.
11 
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16 
17    You should have received a copy of the GNU General Public License
18    along with this program; if not, write to the Free Software
19    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
20    02111-1307, USA.  */
21 
22 #include "libiberty.h"
23 #include "gprof.h"
24 #include "search_list.h"
25 #include "source.h"
26 #include "symtab.h"
27 #include "cg_arcs.h"
28 #include "cg_print.h"
29 #include "hist.h"
30 #include "utils.h"
31 #include "corefile.h"
32 
33 /* Return value of comparison functions used to sort tables.  */
34 #define	LESSTHAN	-1
35 #define	EQUALTO		0
36 #define	GREATERTHAN	1
37 
38 static void print_header PARAMS ((void));
39 static void print_cycle PARAMS ((Sym *));
40 static int cmp_member PARAMS ((Sym *, Sym *));
41 static void sort_members PARAMS ((Sym *));
42 static void print_members PARAMS ((Sym *));
43 static int cmp_arc PARAMS ((Arc *, Arc *));
44 static void sort_parents PARAMS ((Sym *));
45 static void print_parents PARAMS ((Sym *));
46 static void sort_children PARAMS ((Sym *));
47 static void print_children PARAMS ((Sym *));
48 static void print_line PARAMS ((Sym *));
49 static int cmp_name PARAMS ((const PTR, const PTR));
50 static int cmp_arc_count PARAMS ((const PTR, const PTR));
51 static int cmp_fun_nuses PARAMS ((const PTR, const PTR));
52 static void order_and_dump_functions_by_arcs
53   PARAMS ((Arc **, unsigned long, int, Arc **, unsigned long *));
54 
55 /* Declarations of automatically generated functions to output blurbs.  */
56 extern void bsd_callg_blurb PARAMS ((FILE * fp));
57 extern void fsf_callg_blurb PARAMS ((FILE * fp));
58 
59 double print_time = 0.0;
60 
61 
62 static void
63 print_header ()
64 {
65   if (first_output)
66     first_output = FALSE;
67   else
68     printf ("\f\n");
69 
70   if (!bsd_style_output)
71     {
72       if (print_descriptions)
73 	printf (_("\t\t     Call graph (explanation follows)\n\n"));
74       else
75 	printf (_("\t\t\tCall graph\n\n"));
76     }
77 
78   printf (_("\ngranularity: each sample hit covers %ld byte(s)"),
79 	  (long) hist_scale * sizeof (UNIT));
80 
81   if (print_time > 0.0)
82     printf (_(" for %.2f%% of %.2f seconds\n\n"),
83 	    100.0 / print_time, print_time / hz);
84   else
85     {
86       printf (_(" no time propagated\n\n"));
87 
88       /* This doesn't hurt, since all the numerators will be 0.0.  */
89       print_time = 1.0;
90     }
91 
92   if (bsd_style_output)
93     {
94       printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s     %-8.8s\n",
95 	      "", "", "", "", _("called"), _("total"), _("parents"));
96       printf ("%-6.6s %5.5s %7.7s %11.11s %7.7s+%-7.7s %-8.8s\t%5.5s\n",
97 	      _("index"), _("%time"), _("self"), _("descendants"),
98 	      _("called"), _("self"), _("name"), _("index"));
99       printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s     %-8.8s\n",
100 	      "", "", "", "", _("called"), _("total"), _("children"));
101       printf ("\n");
102     }
103   else
104     {
105       printf (_("index %% time    self  children    called     name\n"));
106     }
107 }
108 
109 /* Print a cycle header.  */
110 
111 static void
112 print_cycle (cyc)
113      Sym *cyc;
114 {
115   char buf[BUFSIZ];
116 
117   sprintf (buf, "[%d]", cyc->cg.index);
118   printf (bsd_style_output
119 	  ? "%-6.6s %5.1f %7.2f %11.2f %7lu"
120 	  : "%-6.6s %5.1f %7.2f %7.2f %7lu", buf,
121 	  100 * (cyc->cg.prop.self + cyc->cg.prop.child) / print_time,
122 	  cyc->cg.prop.self / hz, cyc->cg.prop.child / hz, cyc->ncalls);
123 
124   if (cyc->cg.self_calls != 0)
125     printf ("+%-7lu", cyc->cg.self_calls);
126   else
127     printf (" %7.7s", "");
128 
129   printf (_(" <cycle %d as a whole> [%d]\n"), cyc->cg.cyc.num, cyc->cg.index);
130 }
131 
132 /* Compare LEFT and RIGHT membmer.  Major comparison key is
133    CG.PROP.SELF+CG.PROP.CHILD, secondary key is NCALLS+CG.SELF_CALLS.  */
134 
135 static int
136 cmp_member (left, right)
137      Sym *left;
138      Sym *right;
139 {
140   double left_time = left->cg.prop.self + left->cg.prop.child;
141   double right_time = right->cg.prop.self + right->cg.prop.child;
142   unsigned long left_calls = left->ncalls + left->cg.self_calls;
143   unsigned long right_calls = right->ncalls + right->cg.self_calls;
144 
145   if (left_time > right_time)
146     return GREATERTHAN;
147 
148   if (left_time < right_time)
149     return LESSTHAN;
150 
151   if (left_calls > right_calls)
152     return GREATERTHAN;
153 
154   if (left_calls < right_calls)
155     return LESSTHAN;
156 
157   return EQUALTO;
158 }
159 
160 /* Sort members of a cycle.  */
161 
162 static void
163 sort_members (cyc)
164      Sym *cyc;
165 {
166   Sym *todo, *doing, *prev;
167 
168   /* Detach cycle members from cyclehead,
169      and insertion sort them back on.  */
170   todo = cyc->cg.cyc.next;
171   cyc->cg.cyc.next = 0;
172 
173   for (doing = todo; doing && doing->cg.cyc.next; doing = todo)
174     {
175       todo = doing->cg.cyc.next;
176 
177       for (prev = cyc; prev->cg.cyc.next; prev = prev->cg.cyc.next)
178 	{
179 	  if (cmp_member (doing, prev->cg.cyc.next) == GREATERTHAN)
180 	    break;
181 	}
182 
183       doing->cg.cyc.next = prev->cg.cyc.next;
184       prev->cg.cyc.next = doing;
185     }
186 }
187 
188 /* Print the members of a cycle.  */
189 
190 static void
191 print_members (cyc)
192      Sym *cyc;
193 {
194   Sym *member;
195 
196   sort_members (cyc);
197 
198   for (member = cyc->cg.cyc.next; member; member = member->cg.cyc.next)
199     {
200       printf (bsd_style_output
201 	      ? "%6.6s %5.5s %7.2f %11.2f %7lu"
202 	      : "%6.6s %5.5s %7.2f %7.2f %7lu",
203 	      "", "", member->cg.prop.self / hz, member->cg.prop.child / hz,
204 	      member->ncalls);
205 
206       if (member->cg.self_calls != 0)
207 	printf ("+%-7lu", member->cg.self_calls);
208       else
209 	printf (" %7.7s", "");
210 
211       printf ("     ");
212       print_name (member);
213       printf ("\n");
214     }
215 }
216 
217 /* Compare two arcs to/from the same child/parent.
218 	- if one arc is a self arc, it's least.
219 	- if one arc is within a cycle, it's less than.
220 	- if both arcs are within a cycle, compare arc counts.
221 	- if neither arc is within a cycle, compare with
222 		time + child_time as major key
223 		arc count as minor key.  */
224 
225 static int
226 cmp_arc (left, right)
227      Arc *left;
228      Arc *right;
229 {
230   Sym *left_parent = left->parent;
231   Sym *left_child = left->child;
232   Sym *right_parent = right->parent;
233   Sym *right_child = right->child;
234   double left_time, right_time;
235 
236   DBG (TIMEDEBUG,
237        printf ("[cmp_arc] ");
238        print_name (left_parent);
239        printf (" calls ");
240        print_name (left_child);
241        printf (" %f + %f %lu/%lu\n", left->time, left->child_time,
242 	       left->count, left_child->ncalls);
243        printf ("[cmp_arc] ");
244        print_name (right_parent);
245        printf (" calls ");
246        print_name (right_child);
247        printf (" %f + %f %lu/%lu\n", right->time, right->child_time,
248 	       right->count, right_child->ncalls);
249        printf ("\n");
250     );
251 
252   if (left_parent == left_child)
253     return LESSTHAN;		/* Left is a self call.  */
254 
255   if (right_parent == right_child)
256     return GREATERTHAN;		/* Right is a self call.  */
257 
258   if (left_parent->cg.cyc.num != 0 && left_child->cg.cyc.num != 0
259       && left_parent->cg.cyc.num == left_child->cg.cyc.num)
260     {
261       /* Left is a call within a cycle.  */
262       if (right_parent->cg.cyc.num != 0 && right_child->cg.cyc.num != 0
263 	  && right_parent->cg.cyc.num == right_child->cg.cyc.num)
264 	{
265 	  /* Right is a call within the cycle, too.  */
266 	  if (left->count < right->count)
267 	    return LESSTHAN;
268 
269 	  if (left->count > right->count)
270 	    return GREATERTHAN;
271 
272 	  return EQUALTO;
273 	}
274       else
275 	{
276 	  /* Right isn't a call within the cycle.  */
277 	  return LESSTHAN;
278 	}
279     }
280   else
281     {
282       /* Left isn't a call within a cycle.  */
283       if (right_parent->cg.cyc.num != 0 && right_child->cg.cyc.num != 0
284 	  && right_parent->cg.cyc.num == right_child->cg.cyc.num)
285 	{
286 	  /* Right is a call within a cycle.  */
287 	  return GREATERTHAN;
288 	}
289       else
290 	{
291 	  /* Neither is a call within a cycle.  */
292 	  left_time = left->time + left->child_time;
293 	  right_time = right->time + right->child_time;
294 
295 	  if (left_time < right_time)
296 	    return LESSTHAN;
297 
298 	  if (left_time > right_time)
299 	    return GREATERTHAN;
300 
301 	  if (left->count < right->count)
302 	    return LESSTHAN;
303 
304 	  if (left->count > right->count)
305 	    return GREATERTHAN;
306 
307 	  return EQUALTO;
308 	}
309     }
310 }
311 
312 
313 static void
314 sort_parents (child)
315      Sym * child;
316 {
317   Arc *arc, *detached, sorted, *prev;
318 
319   /* Unlink parents from child, then insertion sort back on to
320      sorted's parents.
321 	  *arc        the arc you have detached and are inserting.
322 	  *detached   the rest of the arcs to be sorted.
323 	  sorted      arc list onto which you insertion sort.
324 	  *prev       arc before the arc you are comparing.  */
325   sorted.next_parent = 0;
326 
327   for (arc = child->cg.parents; arc; arc = detached)
328     {
329       detached = arc->next_parent;
330 
331       /* Consider *arc as disconnected; insert it into sorted.  */
332       for (prev = &sorted; prev->next_parent; prev = prev->next_parent)
333 	{
334 	  if (cmp_arc (arc, prev->next_parent) != GREATERTHAN)
335 	    break;
336 	}
337 
338       arc->next_parent = prev->next_parent;
339       prev->next_parent = arc;
340     }
341 
342   /* Reattach sorted arcs to child.  */
343   child->cg.parents = sorted.next_parent;
344 }
345 
346 
347 static void
348 print_parents (child)
349      Sym *child;
350 {
351   Sym *parent;
352   Arc *arc;
353   Sym *cycle_head;
354 
355   if (child->cg.cyc.head != 0)
356     cycle_head = child->cg.cyc.head;
357   else
358     cycle_head = child;
359 
360   if (!child->cg.parents)
361     {
362       printf (bsd_style_output
363 	      ? _("%6.6s %5.5s %7.7s %11.11s %7.7s %7.7s     <spontaneous>\n")
364 	      : _("%6.6s %5.5s %7.7s %7.7s %7.7s %7.7s     <spontaneous>\n"),
365 	      "", "", "", "", "", "");
366       return;
367     }
368 
369   sort_parents (child);
370 
371   for (arc = child->cg.parents; arc; arc = arc->next_parent)
372     {
373       parent = arc->parent;
374       if (child == parent || (child->cg.cyc.num != 0
375 			      && parent->cg.cyc.num == child->cg.cyc.num))
376 	{
377 	  /* Selfcall or call among siblings.  */
378 	  printf (bsd_style_output
379 		  ? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s     "
380 		  : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s     ",
381 		  "", "", "", "",
382 		  arc->count, "");
383 	  print_name (parent);
384 	  printf ("\n");
385 	}
386       else
387 	{
388 	  /* Regular parent of child.  */
389 	  printf (bsd_style_output
390 		  ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu     "
391 		  : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu     ",
392 		  "", "",
393 		  arc->time / hz, arc->child_time / hz,
394 		  arc->count, cycle_head->ncalls);
395 	  print_name (parent);
396 	  printf ("\n");
397 	}
398     }
399 }
400 
401 
402 static void
403 sort_children (parent)
404      Sym *parent;
405 {
406   Arc *arc, *detached, sorted, *prev;
407 
408   /* Unlink children from parent, then insertion sort back on to
409      sorted's children.
410 	  *arc        the arc you have detached and are inserting.
411 	  *detached   the rest of the arcs to be sorted.
412 	  sorted      arc list onto which you insertion sort.
413 	  *prev       arc before the arc you are comparing.  */
414   sorted.next_child = 0;
415 
416   for (arc = parent->cg.children; arc; arc = detached)
417     {
418       detached = arc->next_child;
419 
420       /* Consider *arc as disconnected; insert it into sorted.  */
421       for (prev = &sorted; prev->next_child; prev = prev->next_child)
422 	{
423 	  if (cmp_arc (arc, prev->next_child) != LESSTHAN)
424 	    break;
425 	}
426 
427       arc->next_child = prev->next_child;
428       prev->next_child = arc;
429     }
430 
431   /* Reattach sorted children to parent.  */
432   parent->cg.children = sorted.next_child;
433 }
434 
435 
436 static void
437 print_children (parent)
438      Sym *parent;
439 {
440   Sym *child;
441   Arc *arc;
442 
443   sort_children (parent);
444   arc = parent->cg.children;
445 
446   for (arc = parent->cg.children; arc; arc = arc->next_child)
447     {
448       child = arc->child;
449       if (child == parent || (child->cg.cyc.num != 0
450 			      && child->cg.cyc.num == parent->cg.cyc.num))
451 	{
452 	  /* Self call or call to sibling.  */
453 	  printf (bsd_style_output
454 		  ? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s     "
455 		  : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s     ",
456 		  "", "", "", "", arc->count, "");
457 	  print_name (child);
458 	  printf ("\n");
459 	}
460       else
461 	{
462 	  /* Regular child of parent.  */
463 	  printf (bsd_style_output
464 		  ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu     "
465 		  : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu     ",
466 		  "", "",
467 		  arc->time / hz, arc->child_time / hz,
468 		  arc->count, child->cg.cyc.head->ncalls);
469 	  print_name (child);
470 	  printf ("\n");
471 	}
472     }
473 }
474 
475 
476 static void
477 print_line (np)
478      Sym *np;
479 {
480   char buf[BUFSIZ];
481 
482   sprintf (buf, "[%d]", np->cg.index);
483   printf (bsd_style_output
484 	  ? "%-6.6s %5.1f %7.2f %11.2f"
485 	  : "%-6.6s %5.1f %7.2f %7.2f", buf,
486 	  100 * (np->cg.prop.self + np->cg.prop.child) / print_time,
487 	  np->cg.prop.self / hz, np->cg.prop.child / hz);
488 
489   if ((np->ncalls + np->cg.self_calls) != 0)
490     {
491       printf (" %7lu", np->ncalls);
492 
493       if (np->cg.self_calls != 0)
494 	  printf ("+%-7lu ", np->cg.self_calls);
495       else
496 	  printf (" %7.7s ", "");
497     }
498   else
499     {
500       printf (" %7.7s %7.7s ", "", "");
501     }
502 
503   print_name (np);
504   printf ("\n");
505 }
506 
507 
508 /* Print dynamic call graph.  */
509 
510 void
511 cg_print (timesortsym)
512      Sym ** timesortsym;
513 {
514   unsigned int index;
515   Sym *parent;
516 
517   if (print_descriptions && bsd_style_output)
518     bsd_callg_blurb (stdout);
519 
520   print_header ();
521 
522   for (index = 0; index < symtab.len + num_cycles; ++index)
523     {
524       parent = timesortsym[index];
525 
526       if ((ignore_zeros && parent->ncalls == 0
527 	   && parent->cg.self_calls == 0 && parent->cg.prop.self == 0
528 	   && parent->cg.prop.child == 0)
529 	  || !parent->cg.print_flag
530 	  || (line_granularity && ! parent->is_func))
531 	continue;
532 
533       if (!parent->name && parent->cg.cyc.num != 0)
534 	{
535 	  /* Cycle header.  */
536 	  print_cycle (parent);
537 	  print_members (parent);
538 	}
539       else
540 	{
541 	  print_parents (parent);
542 	  print_line (parent);
543 	  print_children (parent);
544 	}
545 
546       if (bsd_style_output)
547 	printf ("\n");
548 
549       printf ("-----------------------------------------------\n");
550 
551       if (bsd_style_output)
552 	printf ("\n");
553     }
554 
555   free (timesortsym);
556 
557   if (print_descriptions && !bsd_style_output)
558     fsf_callg_blurb (stdout);
559 }
560 
561 
562 static int
563 cmp_name (left, right)
564      const PTR left;
565      const PTR right;
566 {
567   const Sym **npp1 = (const Sym **) left;
568   const Sym **npp2 = (const Sym **) right;
569 
570   return strcmp ((*npp1)->name, (*npp2)->name);
571 }
572 
573 
574 void
575 cg_print_index ()
576 {
577   unsigned int index;
578   unsigned int nnames, todo, i, j;
579   int col, starting_col;
580   Sym **name_sorted_syms, *sym;
581   const char *filename;
582   char buf[20];
583   int column_width = (output_width - 1) / 3;	/* Don't write in last col!  */
584 
585   /* Now, sort regular function name
586      alphabetically to create an index.  */
587   name_sorted_syms = (Sym **) xmalloc ((symtab.len + num_cycles) * sizeof (Sym *));
588 
589   for (index = 0, nnames = 0; index < symtab.len; index++)
590     {
591       if (ignore_zeros && symtab.base[index].ncalls == 0
592 	  && symtab.base[index].hist.time == 0)
593 	continue;
594 
595       name_sorted_syms[nnames++] = &symtab.base[index];
596     }
597 
598   qsort (name_sorted_syms, nnames, sizeof (Sym *), cmp_name);
599 
600   for (index = 1, todo = nnames; index <= num_cycles; index++)
601     name_sorted_syms[todo++] = &cycle_header[index];
602 
603   printf ("\f\n");
604   printf (_("Index by function name\n\n"));
605   index = (todo + 2) / 3;
606 
607   for (i = 0; i < index; i++)
608     {
609       col = 0;
610       starting_col = 0;
611 
612       for (j = i; j < todo; j += index)
613 	{
614 	  sym = name_sorted_syms[j];
615 
616 	  if (sym->cg.print_flag)
617 	    sprintf (buf, "[%d]", sym->cg.index);
618 	  else
619 	    sprintf (buf, "(%d)", sym->cg.index);
620 
621 	  if (j < nnames)
622 	    {
623 	      if (bsd_style_output)
624 		{
625 		  printf ("%6.6s %-19.19s", buf, sym->name);
626 		}
627 	      else
628 		{
629 		  col += strlen (buf);
630 
631 		  for (; col < starting_col + 5; ++col)
632 		    putchar (' ');
633 
634 		  printf (" %s ", buf);
635 		  col += print_name_only (sym);
636 
637 		  if (!line_granularity && sym->is_static && sym->file)
638 		    {
639 		      filename = sym->file->name;
640 
641 		      if (!print_path)
642 			{
643 			  filename = strrchr (filename, '/');
644 
645 			  if (filename)
646 			    ++filename;
647 			  else
648 			    filename = sym->file->name;
649 			}
650 
651 		      printf (" (%s)", filename);
652 		      col += strlen (filename) + 3;
653 		    }
654 		}
655 	    }
656 	  else
657 	    {
658 	      if (bsd_style_output)
659 		{
660 		  printf ("%6.6s ", buf);
661 		  sprintf (buf, _("<cycle %d>"), sym->cg.cyc.num);
662 		  printf ("%-19.19s", buf);
663 		}
664 	      else
665 		{
666 		  col += strlen (buf);
667 		  for (; col < starting_col + 5; ++col)
668 		    putchar (' ');
669 		  printf (" %s ", buf);
670 		  sprintf (buf, _("<cycle %d>"), sym->cg.cyc.num);
671 		  printf ("%s", buf);
672 		  col += strlen (buf);
673 		}
674 	    }
675 
676 	  starting_col += column_width;
677 	}
678 
679       printf ("\n");
680     }
681 
682   free (name_sorted_syms);
683 }
684 
685 /* Compare two arcs based on their usage counts.
686    We want to sort in descending order.  */
687 
688 static int
689 cmp_arc_count (left, right)
690      const PTR left;
691      const PTR right;
692 {
693   const Arc **npp1 = (const Arc **) left;
694   const Arc **npp2 = (const Arc **) right;
695 
696   if ((*npp1)->count > (*npp2)->count)
697     return -1;
698   else if ((*npp1)->count < (*npp2)->count)
699     return 1;
700   else
701     return 0;
702 }
703 
704 /* Compare two funtions based on their usage counts.
705    We want to sort in descending order.  */
706 
707 static int
708 cmp_fun_nuses (left, right)
709      const PTR left;
710      const PTR right;
711 {
712   const Sym **npp1 = (const Sym **) left;
713   const Sym **npp2 = (const Sym **) right;
714 
715   if ((*npp1)->nuses > (*npp2)->nuses)
716     return -1;
717   else if ((*npp1)->nuses < (*npp2)->nuses)
718     return 1;
719   else
720     return 0;
721 }
722 
723 /* Print a suggested function ordering based on the profiling data.
724 
725    We perform 4 major steps when ordering functions:
726 
727 	* Group unused functions together and place them at the
728 	end of the function order.
729 
730 	* Search the highest use arcs (those which account for 90% of
731 	the total arc count) for functions which have several parents.
732 
733 	Group those with the most call sites together (currently the
734 	top 1.25% which have at least five different call sites).
735 
736 	These are emitted at the start of the function order.
737 
738 	* Use a greedy placement algorithm to place functions which
739 	occur in the top 99% of the arcs in the profile.  Some provisions
740 	are made to handle high usage arcs where the parent and/or
741 	child has already been placed.
742 
743 	* Run the same greedy placement algorithm on the remaining
744 	arcs to place the leftover functions.
745 
746 
747    The various "magic numbers" should (one day) be tuneable by command
748    line options.  They were arrived at by benchmarking a few applications
749    with various values to see which values produced better overall function
750    orderings.
751 
752    Of course, profiling errors, machine limitations (PA long calls), and
753    poor cutoff values for the placement algorithm may limit the usefullness
754    of the resulting function order.  Improvements would be greatly appreciated.
755 
756    Suggestions:
757 
758 	* Place the functions with many callers near the middle of the
759 	list to reduce long calls.
760 
761 	* Propagate arc usage changes as functions are placed.  Ie if
762 	func1 and func2 are placed together, arcs to/from those arcs
763 	to the same parent/child should be combined, then resort the
764 	arcs to choose the next one.
765 
766 	* Implement some global positioning algorithm to place the
767 	chains made by the greedy local positioning algorithm.  Probably
768 	by examining arcs which haven't been placed yet to tie two
769 	chains together.
770 
771 	* Take a function's size and time into account in the algorithm;
772 	size in particular is important on the PA (long calls).  Placing
773 	many small functions onto their own page may be wise.
774 
775 	* Use better profiling information; many published algorithms
776 	are based on call sequences through time, rather than just
777 	arc counts.
778 
779 	* Prodecure cloning could improve performance when a small number
780 	of arcs account for most of the calls to a particular function.
781 
782 	* Use relocation information to avoid moving unused functions
783 	completely out of the code stream; this would avoid severe lossage
784 	when the profile data bears little resemblance to actual runs.
785 
786 	* Propagation of arc usages should also improve .o link line
787 	ordering which shares the same arc placement algorithm with
788 	the function ordering code (in fact it is a degenerate case
789 	of function ordering).  */
790 
791 void
792 cg_print_function_ordering ()
793 {
794   unsigned long index, used, unused, scratch_index;
795   unsigned long  unplaced_arc_count, high_arc_count, scratch_arc_count;
796 #ifdef __GNUC__
797   unsigned long long total_arcs, tmp_arcs_count;
798 #else
799   unsigned long total_arcs, tmp_arcs_count;
800 #endif
801   Sym **unused_syms, **used_syms, **scratch_syms;
802   Arc **unplaced_arcs, **high_arcs, **scratch_arcs;
803 
804   index = 0;
805   used = 0;
806   unused = 0;
807   scratch_index = 0;
808   unplaced_arc_count = 0;
809   high_arc_count = 0;
810   scratch_arc_count = 0;
811 
812   /* First group all the unused functions together.  */
813   unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
814   used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
815   scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
816   high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
817   scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
818   unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
819 
820   /* Walk through all the functions; mark those which are never
821      called as placed (we'll emit them as a group later).  */
822   for (index = 0, used = 0, unused = 0; index < symtab.len; index++)
823     {
824       if (symtab.base[index].ncalls == 0)
825 	{
826 	  /* Filter out gprof generated names.  */
827 	  if (strcmp (symtab.base[index].name, "<locore>")
828 	      && strcmp (symtab.base[index].name, "<hicore>"))
829 	    {
830 	      unused_syms[unused++] = &symtab.base[index];
831 	      symtab.base[index].has_been_placed = 1;
832 	    }
833 	}
834       else
835 	{
836 	  used_syms[used++] = &symtab.base[index];
837 	  symtab.base[index].has_been_placed = 0;
838 	  symtab.base[index].next = 0;
839 	  symtab.base[index].prev = 0;
840 	  symtab.base[index].nuses = 0;
841 	}
842     }
843 
844   /* Sort the arcs from most used to least used.  */
845   qsort (arcs, numarcs, sizeof (Arc *), cmp_arc_count);
846 
847   /* Compute the total arc count.  Also mark arcs as unplaced.
848 
849      Note we don't compensate for overflow if that happens!
850      Overflow is much less likely when this file is compiled
851      with GCC as it can double-wide integers via long long.  */
852   total_arcs = 0;
853   for (index = 0; index < numarcs; index++)
854     {
855       total_arcs += arcs[index]->count;
856       arcs[index]->has_been_placed = 0;
857     }
858 
859   /* We want to pull out those functions which are referenced
860      by many highly used arcs and emit them as a group.  This
861      could probably use some tuning.  */
862   tmp_arcs_count = 0;
863   for (index = 0; index < numarcs; index++)
864     {
865       tmp_arcs_count += arcs[index]->count;
866 
867       /* Count how many times each parent and child are used up
868 	 to our threshhold of arcs (90%).  */
869       if ((double)tmp_arcs_count / (double)total_arcs > 0.90)
870 	break;
871 
872       arcs[index]->child->nuses++;
873     }
874 
875   /* Now sort a temporary symbol table based on the number of
876      times each function was used in the highest used arcs.  */
877   memcpy (scratch_syms, used_syms, used * sizeof (Sym *));
878   qsort (scratch_syms, used, sizeof (Sym *), cmp_fun_nuses);
879 
880   /* Now pick out those symbols we're going to emit as
881      a group.  We take up to 1.25% of the used symbols.  */
882   for (index = 0; index < used / 80; index++)
883     {
884       Sym *sym = scratch_syms[index];
885       Arc *arc;
886 
887       /* If we hit symbols that aren't used from many call sites,
888 	 then we can quit.  We choose five as the low limit for
889 	 no particular reason.  */
890       if (sym->nuses == 5)
891 	break;
892 
893       /* We're going to need the arcs between these functions.
894 	 Unfortunately, we don't know all these functions
895 	 until we're done.  So we keep track of all the arcs
896 	 to the functions we care about, then prune out those
897 	 which are uninteresting.
898 
899 	 An interesting variation would be to quit when we found
900 	 multi-call site functions which account for some percentage
901 	 of the arcs.  */
902       arc = sym->cg.children;
903 
904       while (arc)
905 	{
906 	  if (arc->parent != arc->child)
907 	    scratch_arcs[scratch_arc_count++] = arc;
908 	  arc->has_been_placed = 1;
909 	  arc = arc->next_child;
910 	}
911 
912       arc = sym->cg.parents;
913 
914       while (arc)
915 	{
916 	  if (arc->parent != arc->child)
917 	    scratch_arcs[scratch_arc_count++] = arc;
918 	  arc->has_been_placed = 1;
919 	  arc = arc->next_parent;
920 	}
921 
922       /* Keep track of how many symbols we're going to place.  */
923       scratch_index = index;
924 
925       /* A lie, but it makes identifying
926 	 these functions easier later.  */
927       sym->has_been_placed = 1;
928     }
929 
930   /* Now walk through the temporary arcs and copy
931      those we care about into the high arcs array.  */
932   for (index = 0; index < scratch_arc_count; index++)
933     {
934       Arc *arc = scratch_arcs[index];
935 
936       /* If this arc refers to highly used functions, then
937 	 then we want to keep it.  */
938       if (arc->child->has_been_placed
939 	  && arc->parent->has_been_placed)
940 	{
941 	  high_arcs[high_arc_count++] = scratch_arcs[index];
942 
943 	  /* We need to turn of has_been_placed since we're going to
944 	     use the main arc placement algorithm on these arcs.  */
945 	  arc->child->has_been_placed = 0;
946 	  arc->parent->has_been_placed = 0;
947 	}
948     }
949 
950   /* Dump the multi-site high usage functions which are not
951      going to be ordered by the main ordering algorithm.  */
952   for (index = 0; index < scratch_index; index++)
953     {
954       if (scratch_syms[index]->has_been_placed)
955 	printf ("%s\n", scratch_syms[index]->name);
956     }
957 
958   /* Now we can order the multi-site high use
959      functions based on the arcs between them.  */
960   qsort (high_arcs, high_arc_count, sizeof (Arc *), cmp_arc_count);
961   order_and_dump_functions_by_arcs (high_arcs, high_arc_count, 1,
962 				    unplaced_arcs, &unplaced_arc_count);
963 
964   /* Order and dump the high use functions left,
965      these typically have only a few call sites.  */
966   order_and_dump_functions_by_arcs (arcs, numarcs, 0,
967 				    unplaced_arcs, &unplaced_arc_count);
968 
969   /* Now place the rarely used functions.  */
970   order_and_dump_functions_by_arcs (unplaced_arcs, unplaced_arc_count, 1,
971 				    scratch_arcs, &scratch_arc_count);
972 
973   /* Output any functions not emitted by the order_and_dump calls.  */
974   for (index = 0; index < used; index++)
975     if (used_syms[index]->has_been_placed == 0)
976       printf("%s\n", used_syms[index]->name);
977 
978   /* Output the unused functions.  */
979   for (index = 0; index < unused; index++)
980     printf("%s\n", unused_syms[index]->name);
981 
982   unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
983   used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
984   scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
985   high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
986   scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
987   unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
988 
989   free (unused_syms);
990   free (used_syms);
991   free (scratch_syms);
992   free (high_arcs);
993   free (scratch_arcs);
994   free (unplaced_arcs);
995 }
996 
997 /* Place functions based on the arcs in THE_ARCS with ARC_COUNT entries;
998    place unused arcs into UNPLACED_ARCS/UNPLACED_ARC_COUNT.
999 
1000    If ALL is nonzero, then place all functions referenced by THE_ARCS,
1001    else only place those referenced in the top 99% of the arcs in THE_ARCS.  */
1002 
1003 #define MOST 0.99
1004 static void
1005 order_and_dump_functions_by_arcs (the_arcs, arc_count, all,
1006 				  unplaced_arcs, unplaced_arc_count)
1007      Arc **the_arcs;
1008      unsigned long arc_count;
1009      int all;
1010      Arc **unplaced_arcs;
1011      unsigned long *unplaced_arc_count;
1012 {
1013 #ifdef __GNUC__
1014   unsigned long long tmp_arcs, total_arcs;
1015 #else
1016   unsigned long tmp_arcs, total_arcs;
1017 #endif
1018   unsigned int index;
1019 
1020   /* If needed, compute the total arc count.
1021 
1022      Note we don't compensate for overflow if that happens!  */
1023   if (! all)
1024     {
1025       total_arcs = 0;
1026       for (index = 0; index < arc_count; index++)
1027 	total_arcs += the_arcs[index]->count;
1028     }
1029   else
1030     total_arcs = 0;
1031 
1032   tmp_arcs = 0;
1033 
1034   for (index = 0; index < arc_count; index++)
1035     {
1036       Sym *sym1, *sym2;
1037       Sym *child, *parent;
1038 
1039       tmp_arcs += the_arcs[index]->count;
1040 
1041       /* Ignore this arc if it's already been placed.  */
1042       if (the_arcs[index]->has_been_placed)
1043 	continue;
1044 
1045       child = the_arcs[index]->child;
1046       parent = the_arcs[index]->parent;
1047 
1048       /* If we're not using all arcs, and this is a rarely used
1049 	 arc, then put it on the unplaced_arc list.  Similarly
1050 	 if both the parent and child of this arc have been placed.  */
1051       if ((! all && (double)tmp_arcs / (double)total_arcs > MOST)
1052 	  || child->has_been_placed || parent->has_been_placed)
1053 	{
1054 	  unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[index];
1055 	  continue;
1056 	}
1057 
1058       /* If all slots in the parent and child are full, then there isn't
1059 	 anything we can do right now.  We'll place this arc on the
1060 	 unplaced arc list in the hope that a global positioning
1061 	 algorithm can use it to place function chains.  */
1062       if (parent->next && parent->prev && child->next && child->prev)
1063 	{
1064 	  unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[index];
1065 	  continue;
1066 	}
1067 
1068       /* If the parent is unattached, then find the closest
1069 	 place to attach it onto child's chain.   Similarly
1070 	 for the opposite case.  */
1071       if (!parent->next && !parent->prev)
1072 	{
1073 	  int next_count = 0;
1074 	  int prev_count = 0;
1075 	  Sym *prev = child;
1076 	  Sym *next = child;
1077 
1078 	  /* Walk to the beginning and end of the child's chain.  */
1079 	  while (next->next)
1080 	    {
1081 	      next = next->next;
1082 	      next_count++;
1083 	    }
1084 
1085 	  while (prev->prev)
1086 	    {
1087 	      prev = prev->prev;
1088 	      prev_count++;
1089 	    }
1090 
1091 	  /* Choose the closest.  */
1092 	  child = next_count < prev_count ? next : prev;
1093 	}
1094       else if (! child->next && !child->prev)
1095 	{
1096 	  int next_count = 0;
1097 	  int prev_count = 0;
1098 	  Sym *prev = parent;
1099 	  Sym *next = parent;
1100 
1101 	  while (next->next)
1102 	    {
1103 	      next = next->next;
1104 	      next_count++;
1105 	    }
1106 
1107 	  while (prev->prev)
1108 	    {
1109 	      prev = prev->prev;
1110 	      prev_count++;
1111 	    }
1112 
1113 	  parent = prev_count < next_count ? prev : next;
1114 	}
1115       else
1116 	{
1117 	  /* Couldn't find anywhere to attach the functions,
1118 	     put the arc on the unplaced arc list.  */
1119 	  unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[index];
1120 	  continue;
1121 	}
1122 
1123       /* Make sure we don't tie two ends together.  */
1124       sym1 = parent;
1125       if (sym1->next)
1126 	while (sym1->next)
1127 	  sym1 = sym1->next;
1128       else
1129 	while (sym1->prev)
1130 	  sym1 = sym1->prev;
1131 
1132       sym2 = child;
1133       if (sym2->next)
1134 	while (sym2->next)
1135 	  sym2 = sym2->next;
1136       else
1137 	while (sym2->prev)
1138 	  sym2 = sym2->prev;
1139 
1140       if (sym1 == child
1141 	  && sym2 == parent)
1142 	{
1143 	  /* This would tie two ends together.  */
1144 	  unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[index];
1145 	  continue;
1146 	}
1147 
1148       if (parent->next)
1149 	{
1150 	  /* Must attach to the parent's prev field.  */
1151 	  if (! child->next)
1152 	    {
1153 	      /* parent-prev and child-next */
1154 	      parent->prev = child;
1155 	      child->next = parent;
1156 	      the_arcs[index]->has_been_placed = 1;
1157 	    }
1158 	}
1159       else if (parent->prev)
1160 	{
1161 	  /* Must attach to the parent's next field.  */
1162 	  if (! child->prev)
1163 	    {
1164 	      /* parent-next and child-prev */
1165 	      parent->next = child;
1166 	      child->prev = parent;
1167 	      the_arcs[index]->has_been_placed = 1;
1168 	    }
1169 	}
1170       else
1171 	{
1172 	  /* Can attach to either field in the parent, depends
1173 	     on where we've got space in the child.  */
1174 	  if (child->prev)
1175 	    {
1176 	      /* parent-prev and child-next.  */
1177 	      parent->prev = child;
1178 	      child->next = parent;
1179 	      the_arcs[index]->has_been_placed = 1;
1180 	    }
1181 	  else
1182 	    {
1183 	      /* parent-next and child-prev.  */
1184 	      parent->next = child;
1185 	      child->prev = parent;
1186 	      the_arcs[index]->has_been_placed = 1;
1187 	    }
1188 	}
1189     }
1190 
1191   /* Dump the chains of functions we've made.  */
1192   for (index = 0; index < arc_count; index++)
1193     {
1194       Sym *sym;
1195       if (the_arcs[index]->parent->has_been_placed
1196 	  || the_arcs[index]->child->has_been_placed)
1197 	continue;
1198 
1199       sym = the_arcs[index]->parent;
1200 
1201       /* If this symbol isn't attached to any other
1202 	 symbols, then we've got a rarely used arc.
1203 
1204 	 Skip it for now, we'll deal with them later.  */
1205       if (sym->next == NULL
1206 	  && sym->prev == NULL)
1207 	continue;
1208 
1209       /* Get to the start of this chain.  */
1210       while (sym->prev)
1211 	sym = sym->prev;
1212 
1213       while (sym)
1214 	{
1215 	  /* Mark it as placed.  */
1216 	  sym->has_been_placed = 1;
1217 	  printf ("%s\n", sym->name);
1218 	  sym = sym->next;
1219 	}
1220     }
1221 
1222   /* If we want to place all the arcs, then output
1223      those which weren't placed by the main algorithm.  */
1224   if (all)
1225     for (index = 0; index < arc_count; index++)
1226       {
1227 	Sym *sym;
1228 	if (the_arcs[index]->parent->has_been_placed
1229 	    || the_arcs[index]->child->has_been_placed)
1230 	  continue;
1231 
1232 	sym = the_arcs[index]->parent;
1233 
1234 	sym->has_been_placed = 1;
1235 	printf ("%s\n", sym->name);
1236       }
1237 }
1238 
1239 /* Print a suggested .o ordering for files on a link line based
1240    on profiling information.  This uses the function placement
1241    code for the bulk of its work.  */
1242 
1243 void
1244 cg_print_file_ordering ()
1245 {
1246   unsigned long scratch_arc_count, index;
1247   Arc **scratch_arcs;
1248   char *last;
1249 
1250   scratch_arc_count = 0;
1251 
1252   scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
1253   for (index = 0; index < numarcs; index++)
1254     {
1255       if (! arcs[index]->parent->mapped
1256 	  || ! arcs[index]->child->mapped)
1257 	arcs[index]->has_been_placed = 1;
1258     }
1259 
1260   order_and_dump_functions_by_arcs (arcs, numarcs, 0,
1261 				    scratch_arcs, &scratch_arc_count);
1262 
1263   /* Output .o's not handled by the main placement algorithm.  */
1264   for (index = 0; index < symtab.len; index++)
1265     {
1266       if (symtab.base[index].mapped
1267 	  && ! symtab.base[index].has_been_placed)
1268 	printf ("%s\n", symtab.base[index].name);
1269     }
1270 
1271   /* Now output any .o's that didn't have any text symbols.  */
1272   last = NULL;
1273   for (index = 0; index < symbol_map_count; index++)
1274     {
1275       unsigned int index2;
1276 
1277       /* Don't bother searching if this symbol
1278 	 is the same as the previous one.  */
1279       if (last && !strcmp (last, symbol_map[index].file_name))
1280 	continue;
1281 
1282       for (index2 = 0; index2 < symtab.len; index2++)
1283 	{
1284 	  if (! symtab.base[index2].mapped)
1285 	    continue;
1286 
1287 	  if (!strcmp (symtab.base[index2].name, symbol_map[index].file_name))
1288 	    break;
1289 	}
1290 
1291       /* If we didn't find it in the symbol table, then it must
1292 	 be a .o with no text symbols.  Output it last.  */
1293       if (index2 == symtab.len)
1294 	printf ("%s\n", symbol_map[index].file_name);
1295       last = symbol_map[index].file_name;
1296     }
1297 }
1298