1 /* hist.c - Histogram related operations. 2 3 Copyright 2000, 2001, 2002 Free Software Foundation, Inc. 4 5 This file is part of GNU Binutils. 6 7 This program is free software; you can redistribute it and/or modify 8 it under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 2 of the License, or 10 (at your option) any later version. 11 12 This program is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with this program; if not, write to the Free Software 19 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 20 02111-1307, USA. */ 21 22 #include "libiberty.h" 23 #include "gprof.h" 24 #include "search_list.h" 25 #include "source.h" 26 #include "symtab.h" 27 #include "corefile.h" 28 #include "gmon_io.h" 29 #include "gmon_out.h" 30 #include "hist.h" 31 #include "sym_ids.h" 32 #include "utils.h" 33 34 #define UNITS_TO_CODE (offset_to_code / sizeof(UNIT)) 35 36 static void scale_and_align_entries PARAMS ((void)); 37 static void print_header PARAMS ((int)); 38 static void print_line PARAMS ((Sym *, double)); 39 static int cmp_time PARAMS ((const PTR, const PTR)); 40 41 /* Declarations of automatically generated functions to output blurbs. */ 42 extern void flat_blurb PARAMS ((FILE * fp)); 43 44 bfd_vma s_lowpc; /* Lowest address in .text. */ 45 bfd_vma s_highpc = 0; /* Highest address in .text. */ 46 bfd_vma lowpc, highpc; /* Same, but expressed in UNITs. */ 47 int hist_num_bins = 0; /* Number of histogram samples. */ 48 int *hist_sample = 0; /* Histogram samples (shorts in the file!). */ 49 double hist_scale; 50 char hist_dimension[16] = "seconds"; 51 char hist_dimension_abbrev = 's'; 52 53 static double accum_time; /* Accumulated time so far for print_line(). */ 54 static double total_time; /* Total time for all routines. */ 55 56 /* Table of SI prefixes for powers of 10 (used to automatically 57 scale some of the values in the flat profile). */ 58 const struct 59 { 60 char prefix; 61 double scale; 62 } 63 SItab[] = 64 { 65 { 'T', 1e-12 }, /* tera */ 66 { 'G', 1e-09 }, /* giga */ 67 { 'M', 1e-06 }, /* mega */ 68 { 'K', 1e-03 }, /* kilo */ 69 { ' ', 1e-00 }, 70 { 'm', 1e+03 }, /* milli */ 71 { 'u', 1e+06 }, /* micro */ 72 { 'n', 1e+09 }, /* nano */ 73 { 'p', 1e+12 }, /* pico */ 74 { 'f', 1e+15 }, /* femto */ 75 { 'a', 1e+18 } /* ato */ 76 }; 77 78 79 /* Read the histogram from file IFP. FILENAME is the name of IFP and 80 is provided for formatting error messages only. */ 81 82 void 83 hist_read_rec (ifp, filename) 84 FILE * ifp; 85 const char *filename; 86 { 87 bfd_vma n_lowpc, n_highpc; 88 int i, ncnt, profrate; 89 UNIT count; 90 91 if (gmon_io_read_vma (ifp, &n_lowpc) 92 || gmon_io_read_vma (ifp, &n_highpc) 93 || gmon_io_read_32 (ifp, &ncnt) 94 || gmon_io_read_32 (ifp, &profrate) 95 || gmon_io_read (ifp, hist_dimension, 15) 96 || gmon_io_read (ifp, &hist_dimension_abbrev, 1)) 97 { 98 fprintf (stderr, _("%s: %s: unexpected end of file\n"), 99 whoami, filename); 100 101 done (1); 102 } 103 104 if (!s_highpc) 105 { 106 /* This is the first histogram record. */ 107 s_lowpc = n_lowpc; 108 s_highpc = n_highpc; 109 lowpc = (bfd_vma) n_lowpc / sizeof (UNIT); 110 highpc = (bfd_vma) n_highpc / sizeof (UNIT); 111 hist_num_bins = ncnt; 112 hz = profrate; 113 } 114 115 DBG (SAMPLEDEBUG, 116 printf ("[hist_read_rec] n_lowpc 0x%lx n_highpc 0x%lx ncnt %d\n", 117 (unsigned long) n_lowpc, (unsigned long) n_highpc, ncnt); 118 printf ("[hist_read_rec] s_lowpc 0x%lx s_highpc 0x%lx nsamples %d\n", 119 (unsigned long) s_lowpc, (unsigned long) s_highpc, 120 hist_num_bins); 121 printf ("[hist_read_rec] lowpc 0x%lx highpc 0x%lx\n", 122 (unsigned long) lowpc, (unsigned long) highpc)); 123 124 if (n_lowpc != s_lowpc || n_highpc != s_highpc 125 || ncnt != hist_num_bins || hz != profrate) 126 { 127 fprintf (stderr, _("%s: `%s' is incompatible with first gmon file\n"), 128 whoami, filename); 129 done (1); 130 } 131 132 if (!hist_sample) 133 { 134 hist_sample = (int *) xmalloc (hist_num_bins * sizeof (hist_sample[0])); 135 memset (hist_sample, 0, hist_num_bins * sizeof (hist_sample[0])); 136 } 137 138 for (i = 0; i < hist_num_bins; ++i) 139 { 140 if (fread (&count[0], sizeof (count), 1, ifp) != 1) 141 { 142 fprintf (stderr, 143 _("%s: %s: unexpected EOF after reading %d of %d samples\n"), 144 whoami, filename, i, hist_num_bins); 145 done (1); 146 } 147 hist_sample[i] += bfd_get_16 (core_bfd, (bfd_byte *) & count[0]); 148 DBG (SAMPLEDEBUG, 149 printf ("[hist_read_rec] 0x%lx: %u\n", 150 (unsigned long) (n_lowpc + i * (n_highpc - n_lowpc) / ncnt), 151 hist_sample[i])); 152 } 153 } 154 155 156 /* Write execution histogram to file OFP. FILENAME is the name 157 of OFP and is provided for formatting error-messages only. */ 158 159 void 160 hist_write_hist (ofp, filename) 161 FILE * ofp; 162 const char *filename; 163 { 164 UNIT count; 165 int i; 166 167 /* Write header. */ 168 169 if (gmon_io_write_8 (ofp, GMON_TAG_TIME_HIST) 170 || gmon_io_write_vma (ofp, s_lowpc) 171 || gmon_io_write_vma (ofp, s_highpc) 172 || gmon_io_write_32 (ofp, hist_num_bins) 173 || gmon_io_write_32 (ofp, hz) 174 || gmon_io_write (ofp, hist_dimension, 15) 175 || gmon_io_write (ofp, &hist_dimension_abbrev, 1)) 176 { 177 perror (filename); 178 done (1); 179 } 180 181 for (i = 0; i < hist_num_bins; ++i) 182 { 183 bfd_put_16 (core_bfd, (bfd_vma) hist_sample[i], (bfd_byte *) &count[0]); 184 185 if (fwrite (&count[0], sizeof (count), 1, ofp) != 1) 186 { 187 perror (filename); 188 done (1); 189 } 190 } 191 } 192 193 194 /* Calculate scaled entry point addresses (to save time in 195 hist_assign_samples), and, on architectures that have procedure 196 entry masks at the start of a function, possibly push the scaled 197 entry points over the procedure entry mask, if it turns out that 198 the entry point is in one bin and the code for a routine is in the 199 next bin. */ 200 201 static void 202 scale_and_align_entries () 203 { 204 Sym *sym; 205 bfd_vma bin_of_entry; 206 bfd_vma bin_of_code; 207 208 for (sym = symtab.base; sym < symtab.limit; sym++) 209 { 210 sym->hist.scaled_addr = sym->addr / sizeof (UNIT); 211 bin_of_entry = (sym->hist.scaled_addr - lowpc) / hist_scale; 212 bin_of_code = ((sym->hist.scaled_addr + UNITS_TO_CODE - lowpc) 213 / hist_scale); 214 if (bin_of_entry < bin_of_code) 215 { 216 DBG (SAMPLEDEBUG, 217 printf ("[scale_and_align_entries] pushing 0x%lx to 0x%lx\n", 218 (unsigned long) sym->hist.scaled_addr, 219 (unsigned long) (sym->hist.scaled_addr 220 + UNITS_TO_CODE))); 221 sym->hist.scaled_addr += UNITS_TO_CODE; 222 } 223 } 224 } 225 226 227 /* Assign samples to the symbol to which they belong. 228 229 Histogram bin I covers some address range [BIN_LOWPC,BIN_HIGH_PC) 230 which may overlap one more symbol address ranges. If a symbol 231 overlaps with the bin's address range by O percent, then O percent 232 of the bin's count is credited to that symbol. 233 234 There are three cases as to where BIN_LOW_PC and BIN_HIGH_PC can be 235 with respect to the symbol's address range [SYM_LOW_PC, 236 SYM_HIGH_PC) as shown in the following diagram. OVERLAP computes 237 the distance (in UNITs) between the arrows, the fraction of the 238 sample that is to be credited to the symbol which starts at 239 SYM_LOW_PC. 240 241 sym_low_pc sym_high_pc 242 | | 243 v v 244 245 +-----------------------------------------------+ 246 | | 247 | ->| |<- ->| |<- ->| |<- | 248 | | | | | | 249 +---------+ +---------+ +---------+ 250 251 ^ ^ ^ ^ ^ ^ 252 | | | | | | 253 bin_low_pc bin_high_pc bin_low_pc bin_high_pc bin_low_pc bin_high_pc 254 255 For the VAX we assert that samples will never fall in the first two 256 bytes of any routine, since that is the entry mask, thus we call 257 scale_and_align_entries() to adjust the entry points if the entry 258 mask falls in one bin but the code for the routine doesn't start 259 until the next bin. In conjunction with the alignment of routine 260 addresses, this should allow us to have only one sample for every 261 four bytes of text space and never have any overlap (the two end 262 cases, above). */ 263 264 void 265 hist_assign_samples () 266 { 267 bfd_vma bin_low_pc, bin_high_pc; 268 bfd_vma sym_low_pc, sym_high_pc; 269 bfd_vma overlap, addr; 270 int bin_count, i; 271 unsigned int j; 272 double time, credit; 273 274 /* Read samples and assign to symbols. */ 275 hist_scale = highpc - lowpc; 276 hist_scale /= hist_num_bins; 277 scale_and_align_entries (); 278 279 /* Iterate over all sample bins. */ 280 for (i = 0, j = 1; i < hist_num_bins; ++i) 281 { 282 bin_count = hist_sample[i]; 283 if (! bin_count) 284 continue; 285 286 bin_low_pc = lowpc + (bfd_vma) (hist_scale * i); 287 bin_high_pc = lowpc + (bfd_vma) (hist_scale * (i + 1)); 288 time = bin_count; 289 290 DBG (SAMPLEDEBUG, 291 printf ( 292 "[assign_samples] bin_low_pc=0x%lx, bin_high_pc=0x%lx, bin_count=%d\n", 293 (unsigned long) (sizeof (UNIT) * bin_low_pc), 294 (unsigned long) (sizeof (UNIT) * bin_high_pc), 295 bin_count)); 296 total_time += time; 297 298 /* Credit all symbols that are covered by bin I. */ 299 for (j = j - 1; j < symtab.len; ++j) 300 { 301 sym_low_pc = symtab.base[j].hist.scaled_addr; 302 sym_high_pc = symtab.base[j + 1].hist.scaled_addr; 303 304 /* If high end of bin is below entry address, 305 go for next bin. */ 306 if (bin_high_pc < sym_low_pc) 307 break; 308 309 /* If low end of bin is above high end of symbol, 310 go for next symbol. */ 311 if (bin_low_pc >= sym_high_pc) 312 continue; 313 314 overlap = 315 MIN (bin_high_pc, sym_high_pc) - MAX (bin_low_pc, sym_low_pc); 316 if (overlap > 0) 317 { 318 DBG (SAMPLEDEBUG, 319 printf ( 320 "[assign_samples] [0x%lx,0x%lx) %s gets %f ticks %ld overlap\n", 321 (unsigned long) symtab.base[j].addr, 322 (unsigned long) (sizeof (UNIT) * sym_high_pc), 323 symtab.base[j].name, overlap * time / hist_scale, 324 (long) overlap)); 325 326 addr = symtab.base[j].addr; 327 credit = overlap * time / hist_scale; 328 329 /* Credit symbol if it appears in INCL_FLAT or that 330 table is empty and it does not appear it in 331 EXCL_FLAT. */ 332 if (sym_lookup (&syms[INCL_FLAT], addr) 333 || (syms[INCL_FLAT].len == 0 334 && !sym_lookup (&syms[EXCL_FLAT], addr))) 335 { 336 symtab.base[j].hist.time += credit; 337 } 338 else 339 { 340 total_time -= credit; 341 } 342 } 343 } 344 } 345 346 DBG (SAMPLEDEBUG, printf ("[assign_samples] total_time %f\n", 347 total_time)); 348 } 349 350 351 /* Print header for flag histogram profile. */ 352 353 static void 354 print_header (prefix) 355 int prefix; 356 { 357 char unit[64]; 358 359 sprintf (unit, _("%c%c/call"), prefix, hist_dimension_abbrev); 360 361 if (bsd_style_output) 362 { 363 printf (_("\ngranularity: each sample hit covers %ld byte(s)"), 364 (long) hist_scale * sizeof (UNIT)); 365 if (total_time > 0.0) 366 { 367 printf (_(" for %.2f%% of %.2f %s\n\n"), 368 100.0 / total_time, total_time / hz, hist_dimension); 369 } 370 } 371 else 372 { 373 printf (_("\nEach sample counts as %g %s.\n"), 1.0 / hz, hist_dimension); 374 } 375 376 if (total_time <= 0.0) 377 { 378 printf (_(" no time accumulated\n\n")); 379 380 /* This doesn't hurt since all the numerators will be zero. */ 381 total_time = 1.0; 382 } 383 384 printf ("%5.5s %10.10s %8.8s %8.8s %8.8s %8.8s %-8.8s\n", 385 "% ", _("cumulative"), _("self "), "", _("self "), _("total "), 386 ""); 387 printf ("%5.5s %9.9s %8.8s %8.8s %8.8s %8.8s %-8.8s\n", 388 _("time"), hist_dimension, hist_dimension, _("calls"), unit, unit, 389 _("name")); 390 } 391 392 393 static void 394 print_line (sym, scale) 395 Sym *sym; 396 double scale; 397 { 398 if (ignore_zeros && sym->ncalls == 0 && sym->hist.time == 0) 399 return; 400 401 accum_time += sym->hist.time; 402 403 if (bsd_style_output) 404 printf ("%5.1f %10.2f %8.2f", 405 total_time > 0.0 ? 100 * sym->hist.time / total_time : 0.0, 406 accum_time / hz, sym->hist.time / hz); 407 else 408 printf ("%6.2f %9.2f %8.2f", 409 total_time > 0.0 ? 100 * sym->hist.time / total_time : 0.0, 410 accum_time / hz, sym->hist.time / hz); 411 412 if (sym->ncalls != 0) 413 printf (" %8lu %8.2f %8.2f ", 414 sym->ncalls, scale * sym->hist.time / hz / sym->ncalls, 415 scale * (sym->hist.time + sym->cg.child_time) / hz / sym->ncalls); 416 else 417 printf (" %8.8s %8.8s %8.8s ", "", "", ""); 418 419 if (bsd_style_output) 420 print_name (sym); 421 else 422 print_name_only (sym); 423 424 printf ("\n"); 425 } 426 427 428 /* Compare LP and RP. The primary comparison key is execution time, 429 the secondary is number of invocation, and the tertiary is the 430 lexicographic order of the function names. */ 431 432 static int 433 cmp_time (lp, rp) 434 const PTR lp; 435 const PTR rp; 436 { 437 const Sym *left = *(const Sym **) lp; 438 const Sym *right = *(const Sym **) rp; 439 double time_diff; 440 441 time_diff = right->hist.time - left->hist.time; 442 443 if (time_diff > 0.0) 444 return 1; 445 446 if (time_diff < 0.0) 447 return -1; 448 449 if (right->ncalls > left->ncalls) 450 return 1; 451 452 if (right->ncalls < left->ncalls) 453 return -1; 454 455 return strcmp (left->name, right->name); 456 } 457 458 459 /* Print the flat histogram profile. */ 460 461 void 462 hist_print () 463 { 464 Sym **time_sorted_syms, *top_dog, *sym; 465 unsigned int index; 466 unsigned log_scale; 467 double top_time, time; 468 bfd_vma addr; 469 470 if (first_output) 471 first_output = FALSE; 472 else 473 printf ("\f\n"); 474 475 accum_time = 0.0; 476 477 if (bsd_style_output) 478 { 479 if (print_descriptions) 480 { 481 printf (_("\n\n\nflat profile:\n")); 482 flat_blurb (stdout); 483 } 484 } 485 else 486 { 487 printf (_("Flat profile:\n")); 488 } 489 490 /* Sort the symbol table by time (call-count and name as secondary 491 and tertiary keys). */ 492 time_sorted_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *)); 493 494 for (index = 0; index < symtab.len; ++index) 495 time_sorted_syms[index] = &symtab.base[index]; 496 497 qsort (time_sorted_syms, symtab.len, sizeof (Sym *), cmp_time); 498 499 if (bsd_style_output) 500 { 501 log_scale = 5; /* Milli-seconds is BSD-default. */ 502 } 503 else 504 { 505 /* Search for symbol with highest per-call 506 execution time and scale accordingly. */ 507 log_scale = 0; 508 top_dog = 0; 509 top_time = 0.0; 510 511 for (index = 0; index < symtab.len; ++index) 512 { 513 sym = time_sorted_syms[index]; 514 515 if (sym->ncalls != 0) 516 { 517 time = (sym->hist.time + sym->cg.child_time) / sym->ncalls; 518 519 if (time > top_time) 520 { 521 top_dog = sym; 522 top_time = time; 523 } 524 } 525 } 526 527 if (top_dog && top_dog->ncalls != 0 && top_time > 0.0) 528 { 529 top_time /= hz; 530 531 for (log_scale = 0; log_scale < ARRAY_SIZE (SItab); log_scale ++) 532 { 533 double scaled_value = SItab[log_scale].scale * top_time; 534 535 if (scaled_value >= 1.0 && scaled_value < 1000.0) 536 break; 537 } 538 } 539 } 540 541 /* For now, the dimension is always seconds. In the future, we 542 may also want to support other (pseudo-)dimensions (such as 543 I-cache misses etc.). */ 544 print_header (SItab[log_scale].prefix); 545 546 for (index = 0; index < symtab.len; ++index) 547 { 548 addr = time_sorted_syms[index]->addr; 549 550 /* Print symbol if its in INCL_FLAT table or that table 551 is empty and the symbol is not in EXCL_FLAT. */ 552 if (sym_lookup (&syms[INCL_FLAT], addr) 553 || (syms[INCL_FLAT].len == 0 554 && !sym_lookup (&syms[EXCL_FLAT], addr))) 555 print_line (time_sorted_syms[index], SItab[log_scale].scale); 556 } 557 558 free (time_sorted_syms); 559 560 if (print_descriptions && !bsd_style_output) 561 flat_blurb (stdout); 562 } 563