xref: /openbsd/gnu/usr.bin/binutils/gprof/hist.c (revision 4cfece93)
1 /* hist.c  -  Histogram related operations.
2 
3    Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
4 
5    This file is part of GNU Binutils.
6 
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 2 of the License, or
10    (at your option) any later version.
11 
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16 
17    You should have received a copy of the GNU General Public License
18    along with this program; if not, write to the Free Software
19    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
20    02111-1307, USA.  */
21 
22 #include "libiberty.h"
23 #include "gprof.h"
24 #include "search_list.h"
25 #include "source.h"
26 #include "symtab.h"
27 #include "corefile.h"
28 #include "gmon_io.h"
29 #include "gmon_out.h"
30 #include "hist.h"
31 #include "sym_ids.h"
32 #include "utils.h"
33 
34 #define UNITS_TO_CODE (offset_to_code / sizeof(UNIT))
35 
36 static void scale_and_align_entries PARAMS ((void));
37 static void print_header PARAMS ((int));
38 static void print_line PARAMS ((Sym *, double));
39 static int cmp_time PARAMS ((const PTR, const PTR));
40 
41 /* Declarations of automatically generated functions to output blurbs.  */
42 extern void flat_blurb PARAMS ((FILE * fp));
43 
44 bfd_vma s_lowpc;		/* Lowest address in .text.  */
45 bfd_vma s_highpc = 0;		/* Highest address in .text.  */
46 bfd_vma lowpc, highpc;		/* Same, but expressed in UNITs.  */
47 int hist_num_bins = 0;		/* Number of histogram samples.  */
48 int *hist_sample = 0;		/* Histogram samples (shorts in the file!).  */
49 double hist_scale;
50 char hist_dimension[16] = "seconds";
51 char hist_dimension_abbrev = 's';
52 
53 static double accum_time;	/* Accumulated time so far for print_line(). */
54 static double total_time;	/* Total time for all routines.  */
55 
56 /* Table of SI prefixes for powers of 10 (used to automatically
57    scale some of the values in the flat profile).  */
58 const struct
59   {
60     char prefix;
61     double scale;
62   }
63 SItab[] =
64 {
65   { 'T', 1e-12 },				/* tera */
66   { 'G', 1e-09 },				/* giga */
67   { 'M', 1e-06 },				/* mega */
68   { 'K', 1e-03 },				/* kilo */
69   { ' ', 1e-00 },
70   { 'm', 1e+03 },				/* milli */
71   { 'u', 1e+06 },				/* micro */
72   { 'n', 1e+09 },				/* nano */
73   { 'p', 1e+12 },				/* pico */
74   { 'f', 1e+15 },				/* femto */
75   { 'a', 1e+18 }				/* ato */
76 };
77 
78 
79 /* Read the histogram from file IFP.  FILENAME is the name of IFP and
80    is provided for formatting error messages only.  */
81 
82 void
83 hist_read_rec (ifp, filename)
84      FILE * ifp;
85      const char *filename;
86 {
87   bfd_vma n_lowpc, n_highpc;
88   int i, ncnt, profrate;
89   UNIT count;
90 
91   if (gmon_io_read_vma (ifp, &n_lowpc)
92       || gmon_io_read_vma (ifp, &n_highpc)
93       || gmon_io_read_32 (ifp, &ncnt)
94       || gmon_io_read_32 (ifp, &profrate)
95       || gmon_io_read (ifp, hist_dimension, 15)
96       || gmon_io_read (ifp, &hist_dimension_abbrev, 1))
97     {
98       fprintf (stderr, _("%s: %s: unexpected end of file\n"),
99 	       whoami, filename);
100 
101       done (1);
102     }
103 
104   if (!s_highpc)
105     {
106       /* This is the first histogram record.  */
107       s_lowpc = n_lowpc;
108       s_highpc = n_highpc;
109       lowpc = (bfd_vma) n_lowpc / sizeof (UNIT);
110       highpc = (bfd_vma) n_highpc / sizeof (UNIT);
111       hist_num_bins = ncnt;
112       hz = profrate;
113     }
114 
115   DBG (SAMPLEDEBUG,
116        printf ("[hist_read_rec] n_lowpc 0x%lx n_highpc 0x%lx ncnt %d\n",
117 	       (unsigned long) n_lowpc, (unsigned long) n_highpc, ncnt);
118        printf ("[hist_read_rec] s_lowpc 0x%lx s_highpc 0x%lx nsamples %d\n",
119 	       (unsigned long) s_lowpc, (unsigned long) s_highpc,
120 	       hist_num_bins);
121        printf ("[hist_read_rec]   lowpc 0x%lx   highpc 0x%lx\n",
122 	       (unsigned long) lowpc, (unsigned long) highpc));
123 
124   if (n_lowpc != s_lowpc || n_highpc != s_highpc
125       || ncnt != hist_num_bins || hz != profrate)
126     {
127       fprintf (stderr, _("%s: `%s' is incompatible with first gmon file\n"),
128 	       whoami, filename);
129       done (1);
130     }
131 
132   if (!hist_sample)
133     {
134       hist_sample = (int *) xmalloc (hist_num_bins * sizeof (hist_sample[0]));
135       memset (hist_sample, 0, hist_num_bins * sizeof (hist_sample[0]));
136     }
137 
138   for (i = 0; i < hist_num_bins; ++i)
139     {
140       if (fread (&count[0], sizeof (count), 1, ifp) != 1)
141 	{
142 	  fprintf (stderr,
143 		  _("%s: %s: unexpected EOF after reading %d of %d samples\n"),
144 		   whoami, filename, i, hist_num_bins);
145 	  done (1);
146 	}
147       hist_sample[i] += bfd_get_16 (core_bfd, (bfd_byte *) & count[0]);
148       DBG (SAMPLEDEBUG,
149 	   printf ("[hist_read_rec] 0x%lx: %u\n",
150 		   (unsigned long) (n_lowpc + i * (n_highpc - n_lowpc) / ncnt),
151 		   hist_sample[i]));
152     }
153 }
154 
155 
156 /* Write execution histogram to file OFP.  FILENAME is the name
157    of OFP and is provided for formatting error-messages only.  */
158 
159 void
160 hist_write_hist (ofp, filename)
161      FILE * ofp;
162      const char *filename;
163 {
164   UNIT count;
165   int i;
166 
167   /* Write header.  */
168 
169   if (gmon_io_write_8 (ofp, GMON_TAG_TIME_HIST)
170       || gmon_io_write_vma (ofp, s_lowpc)
171       || gmon_io_write_vma (ofp, s_highpc)
172       || gmon_io_write_32 (ofp, hist_num_bins)
173       || gmon_io_write_32 (ofp, hz)
174       || gmon_io_write (ofp, hist_dimension, 15)
175       || gmon_io_write (ofp, &hist_dimension_abbrev, 1))
176     {
177       perror (filename);
178       done (1);
179     }
180 
181   for (i = 0; i < hist_num_bins; ++i)
182     {
183       bfd_put_16 (core_bfd, (bfd_vma) hist_sample[i], (bfd_byte *) &count[0]);
184 
185       if (fwrite (&count[0], sizeof (count), 1, ofp) != 1)
186 	{
187 	  perror (filename);
188 	  done (1);
189 	}
190     }
191 }
192 
193 
194 /* Calculate scaled entry point addresses (to save time in
195    hist_assign_samples), and, on architectures that have procedure
196    entry masks at the start of a function, possibly push the scaled
197    entry points over the procedure entry mask, if it turns out that
198    the entry point is in one bin and the code for a routine is in the
199    next bin.  */
200 
201 static void
202 scale_and_align_entries ()
203 {
204   Sym *sym;
205   bfd_vma bin_of_entry;
206   bfd_vma bin_of_code;
207 
208   for (sym = symtab.base; sym < symtab.limit; sym++)
209     {
210       sym->hist.scaled_addr = sym->addr / sizeof (UNIT);
211       bin_of_entry = (sym->hist.scaled_addr - lowpc) / hist_scale;
212       bin_of_code = ((sym->hist.scaled_addr + UNITS_TO_CODE - lowpc)
213 		     / hist_scale);
214       if (bin_of_entry < bin_of_code)
215 	{
216 	  DBG (SAMPLEDEBUG,
217 	       printf ("[scale_and_align_entries] pushing 0x%lx to 0x%lx\n",
218 		       (unsigned long) sym->hist.scaled_addr,
219 		       (unsigned long) (sym->hist.scaled_addr
220 					+ UNITS_TO_CODE)));
221 	  sym->hist.scaled_addr += UNITS_TO_CODE;
222 	}
223     }
224 }
225 
226 
227 /* Assign samples to the symbol to which they belong.
228 
229    Histogram bin I covers some address range [BIN_LOWPC,BIN_HIGH_PC)
230    which may overlap one more symbol address ranges.  If a symbol
231    overlaps with the bin's address range by O percent, then O percent
232    of the bin's count is credited to that symbol.
233 
234    There are three cases as to where BIN_LOW_PC and BIN_HIGH_PC can be
235    with respect to the symbol's address range [SYM_LOW_PC,
236    SYM_HIGH_PC) as shown in the following diagram.  OVERLAP computes
237    the distance (in UNITs) between the arrows, the fraction of the
238    sample that is to be credited to the symbol which starts at
239    SYM_LOW_PC.
240 
241 	  sym_low_pc                                      sym_high_pc
242 	       |                                               |
243 	       v                                               v
244 
245 	       +-----------------------------------------------+
246 	       |                                               |
247 	  |  ->|    |<-         ->|         |<-         ->|    |<-  |
248 	  |         |             |         |             |         |
249 	  +---------+             +---------+             +---------+
250 
251 	  ^         ^             ^         ^             ^         ^
252 	  |         |             |         |             |         |
253      bin_low_pc bin_high_pc  bin_low_pc bin_high_pc  bin_low_pc bin_high_pc
254 
255    For the VAX we assert that samples will never fall in the first two
256    bytes of any routine, since that is the entry mask, thus we call
257    scale_and_align_entries() to adjust the entry points if the entry
258    mask falls in one bin but the code for the routine doesn't start
259    until the next bin.  In conjunction with the alignment of routine
260    addresses, this should allow us to have only one sample for every
261    four bytes of text space and never have any overlap (the two end
262    cases, above).  */
263 
264 void
265 hist_assign_samples ()
266 {
267   bfd_vma bin_low_pc, bin_high_pc;
268   bfd_vma sym_low_pc, sym_high_pc;
269   bfd_vma overlap, addr;
270   int bin_count, i;
271   unsigned int j;
272   double time, credit;
273 
274   /* Read samples and assign to symbols.  */
275   hist_scale = highpc - lowpc;
276   hist_scale /= hist_num_bins;
277   scale_and_align_entries ();
278 
279   /* Iterate over all sample bins.  */
280   for (i = 0, j = 1; i < hist_num_bins; ++i)
281     {
282       bin_count = hist_sample[i];
283       if (! bin_count)
284 	continue;
285 
286       bin_low_pc = lowpc + (bfd_vma) (hist_scale * i);
287       bin_high_pc = lowpc + (bfd_vma) (hist_scale * (i + 1));
288       time = bin_count;
289 
290       DBG (SAMPLEDEBUG,
291 	   printf (
292       "[assign_samples] bin_low_pc=0x%lx, bin_high_pc=0x%lx, bin_count=%d\n",
293 		    (unsigned long) (sizeof (UNIT) * bin_low_pc),
294 		    (unsigned long) (sizeof (UNIT) * bin_high_pc),
295 		    bin_count));
296       total_time += time;
297 
298       /* Credit all symbols that are covered by bin I.  */
299       for (j = j - 1; j < symtab.len; ++j)
300 	{
301 	  sym_low_pc = symtab.base[j].hist.scaled_addr;
302 	  sym_high_pc = symtab.base[j + 1].hist.scaled_addr;
303 
304 	  /* If high end of bin is below entry address,
305 	     go for next bin.  */
306 	  if (bin_high_pc < sym_low_pc)
307 	    break;
308 
309 	  /* If low end of bin is above high end of symbol,
310 	     go for next symbol.  */
311 	  if (bin_low_pc >= sym_high_pc)
312 	    continue;
313 
314 	  overlap =
315 	    MIN (bin_high_pc, sym_high_pc) - MAX (bin_low_pc, sym_low_pc);
316 	  if (overlap > 0)
317 	    {
318 	      DBG (SAMPLEDEBUG,
319 		   printf (
320 	       "[assign_samples] [0x%lx,0x%lx) %s gets %f ticks %ld overlap\n",
321 			   (unsigned long) symtab.base[j].addr,
322 			   (unsigned long) (sizeof (UNIT) * sym_high_pc),
323 			   symtab.base[j].name, overlap * time / hist_scale,
324 			   (long) overlap));
325 
326 	      addr = symtab.base[j].addr;
327 	      credit = overlap * time / hist_scale;
328 
329 	      /* Credit symbol if it appears in INCL_FLAT or that
330 		 table is empty and it does not appear it in
331 		 EXCL_FLAT.  */
332 	      if (sym_lookup (&syms[INCL_FLAT], addr)
333 		  || (syms[INCL_FLAT].len == 0
334 		      && !sym_lookup (&syms[EXCL_FLAT], addr)))
335 		{
336 		  symtab.base[j].hist.time += credit;
337 		}
338 	      else
339 		{
340 		  total_time -= credit;
341 		}
342 	    }
343 	}
344     }
345 
346   DBG (SAMPLEDEBUG, printf ("[assign_samples] total_time %f\n",
347 			    total_time));
348 }
349 
350 
351 /* Print header for flag histogram profile.  */
352 
353 static void
354 print_header (prefix)
355      int prefix;
356 {
357   char unit[64];
358 
359   sprintf (unit, _("%c%c/call"), prefix, hist_dimension_abbrev);
360 
361   if (bsd_style_output)
362     {
363       printf (_("\ngranularity: each sample hit covers %ld byte(s)"),
364 	      (long) hist_scale * sizeof (UNIT));
365       if (total_time > 0.0)
366 	{
367 	  printf (_(" for %.2f%% of %.2f %s\n\n"),
368 		  100.0 / total_time, total_time / hz, hist_dimension);
369 	}
370     }
371   else
372     {
373       printf (_("\nEach sample counts as %g %s.\n"), 1.0 / hz, hist_dimension);
374     }
375 
376   if (total_time <= 0.0)
377     {
378       printf (_(" no time accumulated\n\n"));
379 
380       /* This doesn't hurt since all the numerators will be zero.  */
381       total_time = 1.0;
382     }
383 
384   printf ("%5.5s %10.10s %8.8s %8.8s %8.8s %8.8s  %-8.8s\n",
385 	  "%  ", _("cumulative"), _("self  "), "", _("self  "), _("total "),
386 	  "");
387   printf ("%5.5s %9.9s  %8.8s %8.8s %8.8s %8.8s  %-8.8s\n",
388 	  _("time"), hist_dimension, hist_dimension, _("calls"), unit, unit,
389 	  _("name"));
390 }
391 
392 
393 static void
394 print_line (sym, scale)
395      Sym *sym;
396      double scale;
397 {
398   if (ignore_zeros && sym->ncalls == 0 && sym->hist.time == 0)
399     return;
400 
401   accum_time += sym->hist.time;
402 
403   if (bsd_style_output)
404     printf ("%5.1f %10.2f %8.2f",
405 	    total_time > 0.0 ? 100 * sym->hist.time / total_time : 0.0,
406 	    accum_time / hz, sym->hist.time / hz);
407   else
408     printf ("%6.2f %9.2f %8.2f",
409 	    total_time > 0.0 ? 100 * sym->hist.time / total_time : 0.0,
410 	    accum_time / hz, sym->hist.time / hz);
411 
412   if (sym->ncalls != 0)
413     printf (" %8lu %8.2f %8.2f  ",
414 	    sym->ncalls, scale * sym->hist.time / hz / sym->ncalls,
415 	    scale * (sym->hist.time + sym->cg.child_time) / hz / sym->ncalls);
416   else
417     printf (" %8.8s %8.8s %8.8s  ", "", "", "");
418 
419   if (bsd_style_output)
420     print_name (sym);
421   else
422     print_name_only (sym);
423 
424   printf ("\n");
425 }
426 
427 
428 /* Compare LP and RP.  The primary comparison key is execution time,
429    the secondary is number of invocation, and the tertiary is the
430    lexicographic order of the function names.  */
431 
432 static int
433 cmp_time (lp, rp)
434      const PTR lp;
435      const PTR rp;
436 {
437   const Sym *left = *(const Sym **) lp;
438   const Sym *right = *(const Sym **) rp;
439   double time_diff;
440 
441   time_diff = right->hist.time - left->hist.time;
442 
443   if (time_diff > 0.0)
444     return 1;
445 
446   if (time_diff < 0.0)
447     return -1;
448 
449   if (right->ncalls > left->ncalls)
450     return 1;
451 
452   if (right->ncalls < left->ncalls)
453     return -1;
454 
455   return strcmp (left->name, right->name);
456 }
457 
458 
459 /* Print the flat histogram profile.  */
460 
461 void
462 hist_print ()
463 {
464   Sym **time_sorted_syms, *top_dog, *sym;
465   unsigned int index;
466   unsigned log_scale;
467   double top_time, time;
468   bfd_vma addr;
469 
470   if (first_output)
471     first_output = FALSE;
472   else
473     printf ("\f\n");
474 
475   accum_time = 0.0;
476 
477   if (bsd_style_output)
478     {
479       if (print_descriptions)
480 	{
481 	  printf (_("\n\n\nflat profile:\n"));
482 	  flat_blurb (stdout);
483 	}
484     }
485   else
486     {
487       printf (_("Flat profile:\n"));
488     }
489 
490   /* Sort the symbol table by time (call-count and name as secondary
491      and tertiary keys).  */
492   time_sorted_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
493 
494   for (index = 0; index < symtab.len; ++index)
495     time_sorted_syms[index] = &symtab.base[index];
496 
497   qsort (time_sorted_syms, symtab.len, sizeof (Sym *), cmp_time);
498 
499   if (bsd_style_output)
500     {
501       log_scale = 5;		/* Milli-seconds is BSD-default.  */
502     }
503   else
504     {
505       /* Search for symbol with highest per-call
506 	 execution time and scale accordingly.  */
507       log_scale = 0;
508       top_dog = 0;
509       top_time = 0.0;
510 
511       for (index = 0; index < symtab.len; ++index)
512 	{
513 	  sym = time_sorted_syms[index];
514 
515 	  if (sym->ncalls != 0)
516 	    {
517 	      time = (sym->hist.time + sym->cg.child_time) / sym->ncalls;
518 
519 	      if (time > top_time)
520 		{
521 		  top_dog = sym;
522 		  top_time = time;
523 		}
524 	    }
525 	}
526 
527       if (top_dog && top_dog->ncalls != 0 && top_time > 0.0)
528 	{
529 	  top_time /= hz;
530 
531 	  for (log_scale = 0; log_scale < ARRAY_SIZE (SItab); log_scale ++)
532 	    {
533 	      double scaled_value = SItab[log_scale].scale * top_time;
534 
535 	      if (scaled_value >= 1.0 && scaled_value < 1000.0)
536 		break;
537 	    }
538 	}
539     }
540 
541   /* For now, the dimension is always seconds.  In the future, we
542      may also want to support other (pseudo-)dimensions (such as
543      I-cache misses etc.).  */
544   print_header (SItab[log_scale].prefix);
545 
546   for (index = 0; index < symtab.len; ++index)
547     {
548       addr = time_sorted_syms[index]->addr;
549 
550       /* Print symbol if its in INCL_FLAT table or that table
551 	is empty and the symbol is not in EXCL_FLAT.  */
552       if (sym_lookup (&syms[INCL_FLAT], addr)
553 	  || (syms[INCL_FLAT].len == 0
554 	      && !sym_lookup (&syms[EXCL_FLAT], addr)))
555 	print_line (time_sorted_syms[index], SItab[log_scale].scale);
556     }
557 
558   free (time_sorted_syms);
559 
560   if (print_descriptions && !bsd_style_output)
561     flat_blurb (stdout);
562 }
563