1 /* cg_print.c - Print routines for displaying call graphs. 2 3 Copyright (C) 2000-2016 Free Software Foundation, Inc. 4 5 This file is part of GNU Binutils. 6 7 This program is free software; you can redistribute it and/or modify 8 it under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 3 of the License, or 10 (at your option) any later version. 11 12 This program is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with this program; if not, write to the Free Software 19 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 20 02110-1301, USA. */ 21 22 #include "gprof.h" 23 #include "libiberty.h" 24 #include "filenames.h" 25 #include "search_list.h" 26 #include "source.h" 27 #include "symtab.h" 28 #include "cg_arcs.h" 29 #include "cg_print.h" 30 #include "hist.h" 31 #include "utils.h" 32 #include "corefile.h" 33 34 /* Return value of comparison functions used to sort tables. */ 35 #define LESSTHAN -1 36 #define EQUALTO 0 37 #define GREATERTHAN 1 38 39 static void print_header (void); 40 static void print_cycle (Sym *); 41 static int cmp_member (Sym *, Sym *); 42 static void sort_members (Sym *); 43 static void print_members (Sym *); 44 static int cmp_arc (Arc *, Arc *); 45 static void sort_parents (Sym *); 46 static void print_parents (Sym *); 47 static void sort_children (Sym *); 48 static void print_children (Sym *); 49 static void print_line (Sym *); 50 static int cmp_name (const PTR, const PTR); 51 static int cmp_arc_count (const PTR, const PTR); 52 static int cmp_fun_nuses (const PTR, const PTR); 53 static void order_and_dump_functions_by_arcs 54 (Arc **, unsigned long, int, Arc **, unsigned long *); 55 56 /* Declarations of automatically generated functions to output blurbs. */ 57 extern void bsd_callg_blurb (FILE * fp); 58 extern void fsf_callg_blurb (FILE * fp); 59 60 double print_time = 0.0; 61 62 63 static void 64 print_header (void) 65 { 66 if (first_output) 67 first_output = FALSE; 68 else 69 printf ("\f\n"); 70 71 if (!bsd_style_output) 72 { 73 if (print_descriptions) 74 printf (_("\t\t Call graph (explanation follows)\n\n")); 75 else 76 printf (_("\t\t\tCall graph\n\n")); 77 } 78 79 printf (_("\ngranularity: each sample hit covers %ld byte(s)"), 80 (long) hist_scale * (long) sizeof (UNIT)); 81 82 if (print_time > 0.0) 83 printf (_(" for %.2f%% of %.2f seconds\n\n"), 84 100.0 / print_time, print_time / hz); 85 else 86 { 87 printf (_(" no time propagated\n\n")); 88 89 /* This doesn't hurt, since all the numerators will be 0.0. */ 90 print_time = 1.0; 91 } 92 93 if (bsd_style_output) 94 { 95 printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s %-8.8s\n", 96 "", "", "", "", _("called"), _("total"), _("parents")); 97 printf ("%-6.6s %5.5s %7.7s %11.11s %7.7s+%-7.7s %-8.8s\t%5.5s\n", 98 _("index"), 99 /* xgettext:no-c-format */ 100 _("%time"), 101 _("self"), _("descendants"), _("called"), _("self"), 102 _("name"), _("index")); 103 printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s %-8.8s\n", 104 "", "", "", "", _("called"), _("total"), _("children")); 105 printf ("\n"); 106 } 107 else 108 { 109 printf (_("index %% time self children called name\n")); 110 } 111 } 112 113 /* Print a cycle header. */ 114 115 static void 116 print_cycle (Sym *cyc) 117 { 118 char buf[BUFSIZ]; 119 120 sprintf (buf, "[%d]", cyc->cg.index); 121 printf (bsd_style_output 122 ? "%-6.6s %5.1f %7.2f %11.2f %7lu" 123 : "%-6.6s %5.1f %7.2f %7.2f %7lu", buf, 124 100 * (cyc->cg.prop.self + cyc->cg.prop.child) / print_time, 125 cyc->cg.prop.self / hz, cyc->cg.prop.child / hz, cyc->ncalls); 126 127 if (cyc->cg.self_calls != 0) 128 printf ("+%-7lu", cyc->cg.self_calls); 129 else 130 printf (" %7.7s", ""); 131 132 printf (_(" <cycle %d as a whole> [%d]\n"), cyc->cg.cyc.num, cyc->cg.index); 133 } 134 135 /* Compare LEFT and RIGHT membmer. Major comparison key is 136 CG.PROP.SELF+CG.PROP.CHILD, secondary key is NCALLS+CG.SELF_CALLS. */ 137 138 static int 139 cmp_member (Sym *left, Sym *right) 140 { 141 double left_time = left->cg.prop.self + left->cg.prop.child; 142 double right_time = right->cg.prop.self + right->cg.prop.child; 143 unsigned long left_calls = left->ncalls + left->cg.self_calls; 144 unsigned long right_calls = right->ncalls + right->cg.self_calls; 145 146 if (left_time > right_time) 147 return GREATERTHAN; 148 149 if (left_time < right_time) 150 return LESSTHAN; 151 152 if (left_calls > right_calls) 153 return GREATERTHAN; 154 155 if (left_calls < right_calls) 156 return LESSTHAN; 157 158 return EQUALTO; 159 } 160 161 /* Sort members of a cycle. */ 162 163 static void 164 sort_members (Sym *cyc) 165 { 166 Sym *todo, *doing, *prev; 167 168 /* Detach cycle members from cyclehead, 169 and insertion sort them back on. */ 170 todo = cyc->cg.cyc.next; 171 cyc->cg.cyc.next = 0; 172 173 for (doing = todo; doing != NULL; doing = todo) 174 { 175 todo = doing->cg.cyc.next; 176 177 for (prev = cyc; prev->cg.cyc.next; prev = prev->cg.cyc.next) 178 { 179 if (cmp_member (doing, prev->cg.cyc.next) == GREATERTHAN) 180 break; 181 } 182 183 doing->cg.cyc.next = prev->cg.cyc.next; 184 prev->cg.cyc.next = doing; 185 } 186 } 187 188 /* Print the members of a cycle. */ 189 190 static void 191 print_members (Sym *cyc) 192 { 193 Sym *member; 194 195 sort_members (cyc); 196 197 for (member = cyc->cg.cyc.next; member; member = member->cg.cyc.next) 198 { 199 printf (bsd_style_output 200 ? "%6.6s %5.5s %7.2f %11.2f %7lu" 201 : "%6.6s %5.5s %7.2f %7.2f %7lu", 202 "", "", member->cg.prop.self / hz, member->cg.prop.child / hz, 203 member->ncalls); 204 205 if (member->cg.self_calls != 0) 206 printf ("+%-7lu", member->cg.self_calls); 207 else 208 printf (" %7.7s", ""); 209 210 printf (" "); 211 print_name (member); 212 printf ("\n"); 213 } 214 } 215 216 /* Compare two arcs to/from the same child/parent. 217 - if one arc is a self arc, it's least. 218 - if one arc is within a cycle, it's less than. 219 - if both arcs are within a cycle, compare arc counts. 220 - if neither arc is within a cycle, compare with 221 time + child_time as major key 222 arc count as minor key. */ 223 224 static int 225 cmp_arc (Arc *left, Arc *right) 226 { 227 Sym *left_parent = left->parent; 228 Sym *left_child = left->child; 229 Sym *right_parent = right->parent; 230 Sym *right_child = right->child; 231 double left_time, right_time; 232 233 DBG (TIMEDEBUG, 234 printf ("[cmp_arc] "); 235 print_name (left_parent); 236 printf (" calls "); 237 print_name (left_child); 238 printf (" %f + %f %lu/%lu\n", left->time, left->child_time, 239 left->count, left_child->ncalls); 240 printf ("[cmp_arc] "); 241 print_name (right_parent); 242 printf (" calls "); 243 print_name (right_child); 244 printf (" %f + %f %lu/%lu\n", right->time, right->child_time, 245 right->count, right_child->ncalls); 246 printf ("\n"); 247 ); 248 249 if (left_parent == left_child) 250 return LESSTHAN; /* Left is a self call. */ 251 252 if (right_parent == right_child) 253 return GREATERTHAN; /* Right is a self call. */ 254 255 if (left_parent->cg.cyc.num != 0 && left_child->cg.cyc.num != 0 256 && left_parent->cg.cyc.num == left_child->cg.cyc.num) 257 { 258 /* Left is a call within a cycle. */ 259 if (right_parent->cg.cyc.num != 0 && right_child->cg.cyc.num != 0 260 && right_parent->cg.cyc.num == right_child->cg.cyc.num) 261 { 262 /* Right is a call within the cycle, too. */ 263 if (left->count < right->count) 264 return LESSTHAN; 265 266 if (left->count > right->count) 267 return GREATERTHAN; 268 269 return EQUALTO; 270 } 271 else 272 { 273 /* Right isn't a call within the cycle. */ 274 return LESSTHAN; 275 } 276 } 277 else 278 { 279 /* Left isn't a call within a cycle. */ 280 if (right_parent->cg.cyc.num != 0 && right_child->cg.cyc.num != 0 281 && right_parent->cg.cyc.num == right_child->cg.cyc.num) 282 { 283 /* Right is a call within a cycle. */ 284 return GREATERTHAN; 285 } 286 else 287 { 288 /* Neither is a call within a cycle. */ 289 left_time = left->time + left->child_time; 290 right_time = right->time + right->child_time; 291 292 if (left_time < right_time) 293 return LESSTHAN; 294 295 if (left_time > right_time) 296 return GREATERTHAN; 297 298 if (left->count < right->count) 299 return LESSTHAN; 300 301 if (left->count > right->count) 302 return GREATERTHAN; 303 304 return EQUALTO; 305 } 306 } 307 } 308 309 310 static void 311 sort_parents (Sym * child) 312 { 313 Arc *arc, *detached, sorted, *prev; 314 315 /* Unlink parents from child, then insertion sort back on to 316 sorted's parents. 317 *arc the arc you have detached and are inserting. 318 *detached the rest of the arcs to be sorted. 319 sorted arc list onto which you insertion sort. 320 *prev arc before the arc you are comparing. */ 321 sorted.next_parent = 0; 322 323 for (arc = child->cg.parents; arc; arc = detached) 324 { 325 detached = arc->next_parent; 326 327 /* Consider *arc as disconnected; insert it into sorted. */ 328 for (prev = &sorted; prev->next_parent; prev = prev->next_parent) 329 { 330 if (cmp_arc (arc, prev->next_parent) != GREATERTHAN) 331 break; 332 } 333 334 arc->next_parent = prev->next_parent; 335 prev->next_parent = arc; 336 } 337 338 /* Reattach sorted arcs to child. */ 339 child->cg.parents = sorted.next_parent; 340 } 341 342 343 static void 344 print_parents (Sym *child) 345 { 346 Sym *parent; 347 Arc *arc; 348 Sym *cycle_head; 349 350 if (child->cg.cyc.head != 0) 351 cycle_head = child->cg.cyc.head; 352 else 353 cycle_head = child; 354 355 if (!child->cg.parents) 356 { 357 printf (bsd_style_output 358 ? _("%6.6s %5.5s %7.7s %11.11s %7.7s %7.7s <spontaneous>\n") 359 : _("%6.6s %5.5s %7.7s %7.7s %7.7s %7.7s <spontaneous>\n"), 360 "", "", "", "", "", ""); 361 return; 362 } 363 364 sort_parents (child); 365 366 for (arc = child->cg.parents; arc; arc = arc->next_parent) 367 { 368 parent = arc->parent; 369 if (child == parent || (child->cg.cyc.num != 0 370 && parent->cg.cyc.num == child->cg.cyc.num)) 371 { 372 /* Selfcall or call among siblings. */ 373 printf (bsd_style_output 374 ? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s " 375 : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s ", 376 "", "", "", "", 377 arc->count, ""); 378 print_name (parent); 379 printf ("\n"); 380 } 381 else 382 { 383 /* Regular parent of child. */ 384 printf (bsd_style_output 385 ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu " 386 : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu ", 387 "", "", 388 arc->time / hz, arc->child_time / hz, 389 arc->count, cycle_head->ncalls); 390 print_name (parent); 391 printf ("\n"); 392 } 393 } 394 } 395 396 397 static void 398 sort_children (Sym *parent) 399 { 400 Arc *arc, *detached, sorted, *prev; 401 402 /* Unlink children from parent, then insertion sort back on to 403 sorted's children. 404 *arc the arc you have detached and are inserting. 405 *detached the rest of the arcs to be sorted. 406 sorted arc list onto which you insertion sort. 407 *prev arc before the arc you are comparing. */ 408 sorted.next_child = 0; 409 410 for (arc = parent->cg.children; arc; arc = detached) 411 { 412 detached = arc->next_child; 413 414 /* Consider *arc as disconnected; insert it into sorted. */ 415 for (prev = &sorted; prev->next_child; prev = prev->next_child) 416 { 417 if (cmp_arc (arc, prev->next_child) != LESSTHAN) 418 break; 419 } 420 421 arc->next_child = prev->next_child; 422 prev->next_child = arc; 423 } 424 425 /* Reattach sorted children to parent. */ 426 parent->cg.children = sorted.next_child; 427 } 428 429 430 static void 431 print_children (Sym *parent) 432 { 433 Sym *child; 434 Arc *arc; 435 436 sort_children (parent); 437 arc = parent->cg.children; 438 439 for (arc = parent->cg.children; arc; arc = arc->next_child) 440 { 441 child = arc->child; 442 if (child == parent || (child->cg.cyc.num != 0 443 && child->cg.cyc.num == parent->cg.cyc.num)) 444 { 445 /* Self call or call to sibling. */ 446 printf (bsd_style_output 447 ? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s " 448 : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s ", 449 "", "", "", "", arc->count, ""); 450 print_name (child); 451 printf ("\n"); 452 } 453 else 454 { 455 /* Regular child of parent. */ 456 printf (bsd_style_output 457 ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu " 458 : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu ", 459 "", "", 460 arc->time / hz, arc->child_time / hz, 461 arc->count, child->cg.cyc.head->ncalls); 462 print_name (child); 463 printf ("\n"); 464 } 465 } 466 } 467 468 469 static void 470 print_line (Sym *np) 471 { 472 char buf[BUFSIZ]; 473 474 sprintf (buf, "[%d]", np->cg.index); 475 printf (bsd_style_output 476 ? "%-6.6s %5.1f %7.2f %11.2f" 477 : "%-6.6s %5.1f %7.2f %7.2f", buf, 478 100 * (np->cg.prop.self + np->cg.prop.child) / print_time, 479 np->cg.prop.self / hz, np->cg.prop.child / hz); 480 481 if ((np->ncalls + np->cg.self_calls) != 0) 482 { 483 printf (" %7lu", np->ncalls); 484 485 if (np->cg.self_calls != 0) 486 printf ("+%-7lu ", np->cg.self_calls); 487 else 488 printf (" %7.7s ", ""); 489 } 490 else 491 { 492 printf (" %7.7s %7.7s ", "", ""); 493 } 494 495 print_name (np); 496 printf ("\n"); 497 } 498 499 500 /* Print dynamic call graph. */ 501 502 void 503 cg_print (Sym ** timesortsym) 504 { 505 unsigned int sym_index; 506 Sym *parent; 507 508 if (print_descriptions && bsd_style_output) 509 bsd_callg_blurb (stdout); 510 511 print_header (); 512 513 for (sym_index = 0; sym_index < symtab.len + num_cycles; ++sym_index) 514 { 515 parent = timesortsym[sym_index]; 516 517 if ((ignore_zeros && parent->ncalls == 0 518 && parent->cg.self_calls == 0 && parent->cg.prop.self == 0 519 && parent->cg.prop.child == 0) 520 || !parent->cg.print_flag 521 || (line_granularity && ! parent->is_func)) 522 continue; 523 524 if (!parent->name && parent->cg.cyc.num != 0) 525 { 526 /* Cycle header. */ 527 print_cycle (parent); 528 print_members (parent); 529 } 530 else 531 { 532 print_parents (parent); 533 print_line (parent); 534 print_children (parent); 535 } 536 537 if (bsd_style_output) 538 printf ("\n"); 539 540 printf ("-----------------------------------------------\n"); 541 542 if (bsd_style_output) 543 printf ("\n"); 544 } 545 546 free (timesortsym); 547 548 if (print_descriptions && !bsd_style_output) 549 fsf_callg_blurb (stdout); 550 } 551 552 553 static int 554 cmp_name (const PTR left, const PTR right) 555 { 556 const Sym **npp1 = (const Sym **) left; 557 const Sym **npp2 = (const Sym **) right; 558 559 return strcmp ((*npp1)->name, (*npp2)->name); 560 } 561 562 563 void 564 cg_print_index (void) 565 { 566 unsigned int sym_index; 567 unsigned int nnames, todo, i, j; 568 int col, starting_col; 569 Sym **name_sorted_syms, *sym; 570 const char *filename; 571 char buf[20]; 572 int column_width = (output_width - 1) / 3; /* Don't write in last col! */ 573 574 /* Now, sort regular function name 575 alphabetically to create an index. */ 576 name_sorted_syms = (Sym **) xmalloc ((symtab.len + num_cycles) * sizeof (Sym *)); 577 578 for (sym_index = 0, nnames = 0; sym_index < symtab.len; sym_index++) 579 { 580 if (ignore_zeros && symtab.base[sym_index].ncalls == 0 581 && symtab.base[sym_index].hist.time == 0) 582 continue; 583 584 name_sorted_syms[nnames++] = &symtab.base[sym_index]; 585 } 586 587 qsort (name_sorted_syms, nnames, sizeof (Sym *), cmp_name); 588 589 for (sym_index = 1, todo = nnames; sym_index <= num_cycles; sym_index++) 590 name_sorted_syms[todo++] = &cycle_header[sym_index]; 591 592 printf ("\f\n"); 593 printf (_("Index by function name\n\n")); 594 sym_index = (todo + 2) / 3; 595 596 for (i = 0; i < sym_index; i++) 597 { 598 col = 0; 599 starting_col = 0; 600 601 for (j = i; j < todo; j += sym_index) 602 { 603 sym = name_sorted_syms[j]; 604 605 if (sym->cg.print_flag) 606 sprintf (buf, "[%d]", sym->cg.index); 607 else 608 sprintf (buf, "(%d)", sym->cg.index); 609 610 if (j < nnames) 611 { 612 if (bsd_style_output) 613 { 614 printf ("%6.6s %-19.19s", buf, sym->name); 615 } 616 else 617 { 618 col += strlen (buf); 619 620 for (; col < starting_col + 5; ++col) 621 putchar (' '); 622 623 printf (" %s ", buf); 624 col += print_name_only (sym); 625 626 if (!line_granularity && sym->is_static && sym->file) 627 { 628 filename = sym->file->name; 629 630 if (!print_path) 631 { 632 filename = strrchr (filename, '/'); 633 634 if (filename) 635 ++filename; 636 else 637 filename = sym->file->name; 638 } 639 640 printf (" (%s)", filename); 641 col += strlen (filename) + 3; 642 } 643 } 644 } 645 else 646 { 647 if (bsd_style_output) 648 { 649 printf ("%6.6s ", buf); 650 sprintf (buf, _("<cycle %d>"), sym->cg.cyc.num); 651 printf ("%-19.19s", buf); 652 } 653 else 654 { 655 col += strlen (buf); 656 for (; col < starting_col + 5; ++col) 657 putchar (' '); 658 printf (" %s ", buf); 659 sprintf (buf, _("<cycle %d>"), sym->cg.cyc.num); 660 printf ("%s", buf); 661 col += strlen (buf); 662 } 663 } 664 665 starting_col += column_width; 666 } 667 668 printf ("\n"); 669 } 670 671 free (name_sorted_syms); 672 } 673 674 /* Compare two arcs based on their usage counts. 675 We want to sort in descending order. */ 676 677 static int 678 cmp_arc_count (const PTR left, const PTR right) 679 { 680 const Arc **npp1 = (const Arc **) left; 681 const Arc **npp2 = (const Arc **) right; 682 683 if ((*npp1)->count > (*npp2)->count) 684 return -1; 685 else if ((*npp1)->count < (*npp2)->count) 686 return 1; 687 else 688 return 0; 689 } 690 691 /* Compare two funtions based on their usage counts. 692 We want to sort in descending order. */ 693 694 static int 695 cmp_fun_nuses (const PTR left, const PTR right) 696 { 697 const Sym **npp1 = (const Sym **) left; 698 const Sym **npp2 = (const Sym **) right; 699 700 if ((*npp1)->nuses > (*npp2)->nuses) 701 return -1; 702 else if ((*npp1)->nuses < (*npp2)->nuses) 703 return 1; 704 else 705 return 0; 706 } 707 708 /* Print a suggested function ordering based on the profiling data. 709 710 We perform 4 major steps when ordering functions: 711 712 * Group unused functions together and place them at the 713 end of the function order. 714 715 * Search the highest use arcs (those which account for 90% of 716 the total arc count) for functions which have several parents. 717 718 Group those with the most call sites together (currently the 719 top 1.25% which have at least five different call sites). 720 721 These are emitted at the start of the function order. 722 723 * Use a greedy placement algorithm to place functions which 724 occur in the top 99% of the arcs in the profile. Some provisions 725 are made to handle high usage arcs where the parent and/or 726 child has already been placed. 727 728 * Run the same greedy placement algorithm on the remaining 729 arcs to place the leftover functions. 730 731 732 The various "magic numbers" should (one day) be tuneable by command 733 line options. They were arrived at by benchmarking a few applications 734 with various values to see which values produced better overall function 735 orderings. 736 737 Of course, profiling errors, machine limitations (PA long calls), and 738 poor cutoff values for the placement algorithm may limit the usefullness 739 of the resulting function order. Improvements would be greatly appreciated. 740 741 Suggestions: 742 743 * Place the functions with many callers near the middle of the 744 list to reduce long calls. 745 746 * Propagate arc usage changes as functions are placed. Ie if 747 func1 and func2 are placed together, arcs to/from those arcs 748 to the same parent/child should be combined, then resort the 749 arcs to choose the next one. 750 751 * Implement some global positioning algorithm to place the 752 chains made by the greedy local positioning algorithm. Probably 753 by examining arcs which haven't been placed yet to tie two 754 chains together. 755 756 * Take a function's size and time into account in the algorithm; 757 size in particular is important on the PA (long calls). Placing 758 many small functions onto their own page may be wise. 759 760 * Use better profiling information; many published algorithms 761 are based on call sequences through time, rather than just 762 arc counts. 763 764 * Prodecure cloning could improve performance when a small number 765 of arcs account for most of the calls to a particular function. 766 767 * Use relocation information to avoid moving unused functions 768 completely out of the code stream; this would avoid severe lossage 769 when the profile data bears little resemblance to actual runs. 770 771 * Propagation of arc usages should also improve .o link line 772 ordering which shares the same arc placement algorithm with 773 the function ordering code (in fact it is a degenerate case 774 of function ordering). */ 775 776 void 777 cg_print_function_ordering (void) 778 { 779 unsigned long sym_index; 780 unsigned long arc_index; 781 unsigned long used, unused, scratch_index; 782 unsigned long unplaced_arc_count, high_arc_count, scratch_arc_count; 783 #ifdef __GNUC__ 784 unsigned long long total_arcs, tmp_arcs_count; 785 #else 786 unsigned long total_arcs, tmp_arcs_count; 787 #endif 788 Sym **unused_syms, **used_syms, **scratch_syms; 789 Arc **unplaced_arcs, **high_arcs, **scratch_arcs; 790 791 sym_index = 0; 792 used = 0; 793 unused = 0; 794 scratch_index = 0; 795 unplaced_arc_count = 0; 796 high_arc_count = 0; 797 scratch_arc_count = 0; 798 799 /* First group all the unused functions together. */ 800 unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *)); 801 used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *)); 802 scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *)); 803 high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *)); 804 scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *)); 805 unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *)); 806 807 /* Walk through all the functions; mark those which are never 808 called as placed (we'll emit them as a group later). */ 809 for (sym_index = 0, used = 0, unused = 0; sym_index < symtab.len; sym_index++) 810 { 811 if (symtab.base[sym_index].ncalls == 0) 812 { 813 unused_syms[unused++] = &symtab.base[sym_index]; 814 symtab.base[sym_index].has_been_placed = 1; 815 } 816 else 817 { 818 used_syms[used++] = &symtab.base[sym_index]; 819 symtab.base[sym_index].has_been_placed = 0; 820 symtab.base[sym_index].next = 0; 821 symtab.base[sym_index].prev = 0; 822 symtab.base[sym_index].nuses = 0; 823 } 824 } 825 826 /* Sort the arcs from most used to least used. */ 827 qsort (arcs, numarcs, sizeof (Arc *), cmp_arc_count); 828 829 /* Compute the total arc count. Also mark arcs as unplaced. 830 831 Note we don't compensate for overflow if that happens! 832 Overflow is much less likely when this file is compiled 833 with GCC as it can double-wide integers via long long. */ 834 total_arcs = 0; 835 for (arc_index = 0; arc_index < numarcs; arc_index++) 836 { 837 total_arcs += arcs[arc_index]->count; 838 arcs[arc_index]->has_been_placed = 0; 839 } 840 841 /* We want to pull out those functions which are referenced 842 by many highly used arcs and emit them as a group. This 843 could probably use some tuning. */ 844 tmp_arcs_count = 0; 845 for (arc_index = 0; arc_index < numarcs; arc_index++) 846 { 847 tmp_arcs_count += arcs[arc_index]->count; 848 849 /* Count how many times each parent and child are used up 850 to our threshhold of arcs (90%). */ 851 if ((double)tmp_arcs_count / (double)total_arcs > 0.90) 852 break; 853 854 arcs[arc_index]->child->nuses++; 855 } 856 857 /* Now sort a temporary symbol table based on the number of 858 times each function was used in the highest used arcs. */ 859 memcpy (scratch_syms, used_syms, used * sizeof (Sym *)); 860 qsort (scratch_syms, used, sizeof (Sym *), cmp_fun_nuses); 861 862 /* Now pick out those symbols we're going to emit as 863 a group. We take up to 1.25% of the used symbols. */ 864 for (sym_index = 0; sym_index < used / 80; sym_index++) 865 { 866 Sym *sym = scratch_syms[sym_index]; 867 Arc *arc; 868 869 /* If we hit symbols that aren't used from many call sites, 870 then we can quit. We choose five as the low limit for 871 no particular reason. */ 872 if (sym->nuses == 5) 873 break; 874 875 /* We're going to need the arcs between these functions. 876 Unfortunately, we don't know all these functions 877 until we're done. So we keep track of all the arcs 878 to the functions we care about, then prune out those 879 which are uninteresting. 880 881 An interesting variation would be to quit when we found 882 multi-call site functions which account for some percentage 883 of the arcs. */ 884 arc = sym->cg.children; 885 886 while (arc) 887 { 888 if (arc->parent != arc->child) 889 scratch_arcs[scratch_arc_count++] = arc; 890 arc->has_been_placed = 1; 891 arc = arc->next_child; 892 } 893 894 arc = sym->cg.parents; 895 896 while (arc) 897 { 898 if (arc->parent != arc->child) 899 scratch_arcs[scratch_arc_count++] = arc; 900 arc->has_been_placed = 1; 901 arc = arc->next_parent; 902 } 903 904 /* Keep track of how many symbols we're going to place. */ 905 scratch_index = sym_index; 906 907 /* A lie, but it makes identifying 908 these functions easier later. */ 909 sym->has_been_placed = 1; 910 } 911 912 /* Now walk through the temporary arcs and copy 913 those we care about into the high arcs array. */ 914 for (arc_index = 0; arc_index < scratch_arc_count; arc_index++) 915 { 916 Arc *arc = scratch_arcs[arc_index]; 917 918 /* If this arc refers to highly used functions, then 919 then we want to keep it. */ 920 if (arc->child->has_been_placed 921 && arc->parent->has_been_placed) 922 { 923 high_arcs[high_arc_count++] = scratch_arcs[arc_index]; 924 925 /* We need to turn of has_been_placed since we're going to 926 use the main arc placement algorithm on these arcs. */ 927 arc->child->has_been_placed = 0; 928 arc->parent->has_been_placed = 0; 929 } 930 } 931 932 /* Dump the multi-site high usage functions which are not 933 going to be ordered by the main ordering algorithm. */ 934 for (sym_index = 0; sym_index < scratch_index; sym_index++) 935 { 936 if (scratch_syms[sym_index]->has_been_placed) 937 printf ("%s\n", scratch_syms[sym_index]->name); 938 } 939 940 /* Now we can order the multi-site high use 941 functions based on the arcs between them. */ 942 qsort (high_arcs, high_arc_count, sizeof (Arc *), cmp_arc_count); 943 order_and_dump_functions_by_arcs (high_arcs, high_arc_count, 1, 944 unplaced_arcs, &unplaced_arc_count); 945 946 /* Order and dump the high use functions left, 947 these typically have only a few call sites. */ 948 order_and_dump_functions_by_arcs (arcs, numarcs, 0, 949 unplaced_arcs, &unplaced_arc_count); 950 951 /* Now place the rarely used functions. */ 952 order_and_dump_functions_by_arcs (unplaced_arcs, unplaced_arc_count, 1, 953 scratch_arcs, &scratch_arc_count); 954 955 /* Output any functions not emitted by the order_and_dump calls. */ 956 for (sym_index = 0; sym_index < used; sym_index++) 957 if (used_syms[sym_index]->has_been_placed == 0) 958 printf("%s\n", used_syms[sym_index]->name); 959 960 /* Output the unused functions. */ 961 for (sym_index = 0; sym_index < unused; sym_index++) 962 printf("%s\n", unused_syms[sym_index]->name); 963 964 unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *)); 965 used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *)); 966 scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *)); 967 high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *)); 968 scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *)); 969 unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *)); 970 971 free (unused_syms); 972 free (used_syms); 973 free (scratch_syms); 974 free (high_arcs); 975 free (scratch_arcs); 976 free (unplaced_arcs); 977 } 978 979 /* Place functions based on the arcs in THE_ARCS with ARC_COUNT entries; 980 place unused arcs into UNPLACED_ARCS/UNPLACED_ARC_COUNT. 981 982 If ALL is nonzero, then place all functions referenced by THE_ARCS, 983 else only place those referenced in the top 99% of the arcs in THE_ARCS. */ 984 985 #define MOST 0.99 986 static void 987 order_and_dump_functions_by_arcs (Arc **the_arcs, unsigned long arc_count, 988 int all, Arc **unplaced_arcs, 989 unsigned long *unplaced_arc_count) 990 { 991 #ifdef __GNUC__ 992 unsigned long long tmp_arcs, total_arcs; 993 #else 994 unsigned long tmp_arcs, total_arcs; 995 #endif 996 unsigned int arc_index; 997 998 /* If needed, compute the total arc count. 999 1000 Note we don't compensate for overflow if that happens! */ 1001 if (! all) 1002 { 1003 total_arcs = 0; 1004 for (arc_index = 0; arc_index < arc_count; arc_index++) 1005 total_arcs += the_arcs[arc_index]->count; 1006 } 1007 else 1008 total_arcs = 0; 1009 1010 tmp_arcs = 0; 1011 1012 for (arc_index = 0; arc_index < arc_count; arc_index++) 1013 { 1014 Sym *sym1, *sym2; 1015 Sym *child, *parent; 1016 1017 tmp_arcs += the_arcs[arc_index]->count; 1018 1019 /* Ignore this arc if it's already been placed. */ 1020 if (the_arcs[arc_index]->has_been_placed) 1021 continue; 1022 1023 child = the_arcs[arc_index]->child; 1024 parent = the_arcs[arc_index]->parent; 1025 1026 /* If we're not using all arcs, and this is a rarely used 1027 arc, then put it on the unplaced_arc list. Similarly 1028 if both the parent and child of this arc have been placed. */ 1029 if ((! all && (double)tmp_arcs / (double)total_arcs > MOST) 1030 || child->has_been_placed || parent->has_been_placed) 1031 { 1032 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index]; 1033 continue; 1034 } 1035 1036 /* If all slots in the parent and child are full, then there isn't 1037 anything we can do right now. We'll place this arc on the 1038 unplaced arc list in the hope that a global positioning 1039 algorithm can use it to place function chains. */ 1040 if (parent->next && parent->prev && child->next && child->prev) 1041 { 1042 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index]; 1043 continue; 1044 } 1045 1046 /* If the parent is unattached, then find the closest 1047 place to attach it onto child's chain. Similarly 1048 for the opposite case. */ 1049 if (!parent->next && !parent->prev) 1050 { 1051 int next_count = 0; 1052 int prev_count = 0; 1053 Sym *prev = child; 1054 Sym *next = child; 1055 1056 /* Walk to the beginning and end of the child's chain. */ 1057 while (next->next) 1058 { 1059 next = next->next; 1060 next_count++; 1061 } 1062 1063 while (prev->prev) 1064 { 1065 prev = prev->prev; 1066 prev_count++; 1067 } 1068 1069 /* Choose the closest. */ 1070 child = next_count < prev_count ? next : prev; 1071 } 1072 else if (! child->next && !child->prev) 1073 { 1074 int next_count = 0; 1075 int prev_count = 0; 1076 Sym *prev = parent; 1077 Sym *next = parent; 1078 1079 while (next->next) 1080 { 1081 next = next->next; 1082 next_count++; 1083 } 1084 1085 while (prev->prev) 1086 { 1087 prev = prev->prev; 1088 prev_count++; 1089 } 1090 1091 parent = prev_count < next_count ? prev : next; 1092 } 1093 else 1094 { 1095 /* Couldn't find anywhere to attach the functions, 1096 put the arc on the unplaced arc list. */ 1097 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index]; 1098 continue; 1099 } 1100 1101 /* Make sure we don't tie two ends together. */ 1102 sym1 = parent; 1103 if (sym1->next) 1104 while (sym1->next) 1105 sym1 = sym1->next; 1106 else 1107 while (sym1->prev) 1108 sym1 = sym1->prev; 1109 1110 sym2 = child; 1111 if (sym2->next) 1112 while (sym2->next) 1113 sym2 = sym2->next; 1114 else 1115 while (sym2->prev) 1116 sym2 = sym2->prev; 1117 1118 if (sym1 == child 1119 && sym2 == parent) 1120 { 1121 /* This would tie two ends together. */ 1122 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index]; 1123 continue; 1124 } 1125 1126 if (parent->next) 1127 { 1128 /* Must attach to the parent's prev field. */ 1129 if (! child->next) 1130 { 1131 /* parent-prev and child-next */ 1132 parent->prev = child; 1133 child->next = parent; 1134 the_arcs[arc_index]->has_been_placed = 1; 1135 } 1136 } 1137 else if (parent->prev) 1138 { 1139 /* Must attach to the parent's next field. */ 1140 if (! child->prev) 1141 { 1142 /* parent-next and child-prev */ 1143 parent->next = child; 1144 child->prev = parent; 1145 the_arcs[arc_index]->has_been_placed = 1; 1146 } 1147 } 1148 else 1149 { 1150 /* Can attach to either field in the parent, depends 1151 on where we've got space in the child. */ 1152 if (child->prev) 1153 { 1154 /* parent-prev and child-next. */ 1155 parent->prev = child; 1156 child->next = parent; 1157 the_arcs[arc_index]->has_been_placed = 1; 1158 } 1159 else 1160 { 1161 /* parent-next and child-prev. */ 1162 parent->next = child; 1163 child->prev = parent; 1164 the_arcs[arc_index]->has_been_placed = 1; 1165 } 1166 } 1167 } 1168 1169 /* Dump the chains of functions we've made. */ 1170 for (arc_index = 0; arc_index < arc_count; arc_index++) 1171 { 1172 Sym *sym; 1173 if (the_arcs[arc_index]->parent->has_been_placed 1174 || the_arcs[arc_index]->child->has_been_placed) 1175 continue; 1176 1177 sym = the_arcs[arc_index]->parent; 1178 1179 /* If this symbol isn't attached to any other 1180 symbols, then we've got a rarely used arc. 1181 1182 Skip it for now, we'll deal with them later. */ 1183 if (sym->next == NULL 1184 && sym->prev == NULL) 1185 continue; 1186 1187 /* Get to the start of this chain. */ 1188 while (sym->prev) 1189 sym = sym->prev; 1190 1191 while (sym) 1192 { 1193 /* Mark it as placed. */ 1194 sym->has_been_placed = 1; 1195 printf ("%s\n", sym->name); 1196 sym = sym->next; 1197 } 1198 } 1199 1200 /* If we want to place all the arcs, then output 1201 those which weren't placed by the main algorithm. */ 1202 if (all) 1203 for (arc_index = 0; arc_index < arc_count; arc_index++) 1204 { 1205 Sym *sym; 1206 if (the_arcs[arc_index]->parent->has_been_placed 1207 || the_arcs[arc_index]->child->has_been_placed) 1208 continue; 1209 1210 sym = the_arcs[arc_index]->parent; 1211 1212 sym->has_been_placed = 1; 1213 printf ("%s\n", sym->name); 1214 } 1215 } 1216 1217 /* Compare two function_map structs based on file name. 1218 We want to sort in ascending order. */ 1219 1220 static int 1221 cmp_symbol_map (const void * l, const void * r) 1222 { 1223 return filename_cmp (((struct function_map *) l)->file_name, 1224 ((struct function_map *) r)->file_name); 1225 } 1226 1227 /* Print a suggested .o ordering for files on a link line based 1228 on profiling information. This uses the function placement 1229 code for the bulk of its work. */ 1230 1231 void 1232 cg_print_file_ordering (void) 1233 { 1234 unsigned long scratch_arc_count; 1235 unsigned long arc_index; 1236 unsigned long sym_index; 1237 Arc **scratch_arcs; 1238 char *last; 1239 1240 scratch_arc_count = 0; 1241 1242 scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *)); 1243 for (arc_index = 0; arc_index < numarcs; arc_index++) 1244 { 1245 if (! arcs[arc_index]->parent->mapped 1246 || ! arcs[arc_index]->child->mapped) 1247 arcs[arc_index]->has_been_placed = 1; 1248 } 1249 1250 order_and_dump_functions_by_arcs (arcs, numarcs, 0, 1251 scratch_arcs, &scratch_arc_count); 1252 1253 /* Output .o's not handled by the main placement algorithm. */ 1254 for (sym_index = 0; sym_index < symtab.len; sym_index++) 1255 { 1256 if (symtab.base[sym_index].mapped 1257 && ! symtab.base[sym_index].has_been_placed) 1258 printf ("%s\n", symtab.base[sym_index].name); 1259 } 1260 1261 qsort (symbol_map, symbol_map_count, sizeof (struct function_map), cmp_symbol_map); 1262 1263 /* Now output any .o's that didn't have any text symbols. */ 1264 last = NULL; 1265 for (sym_index = 0; sym_index < symbol_map_count; sym_index++) 1266 { 1267 unsigned int index2; 1268 1269 /* Don't bother searching if this symbol 1270 is the same as the previous one. */ 1271 if (last && !filename_cmp (last, symbol_map[sym_index].file_name)) 1272 continue; 1273 1274 for (index2 = 0; index2 < symtab.len; index2++) 1275 { 1276 if (! symtab.base[index2].mapped) 1277 continue; 1278 1279 if (!filename_cmp (symtab.base[index2].name, 1280 symbol_map[sym_index].file_name)) 1281 break; 1282 } 1283 1284 /* If we didn't find it in the symbol table, then it must 1285 be a .o with no text symbols. Output it last. */ 1286 if (index2 == symtab.len) 1287 printf ("%s\n", symbol_map[sym_index].file_name); 1288 last = symbol_map[sym_index].file_name; 1289 } 1290 } 1291