1 /* cg_print.c - Print routines for displaying call graphs. 2 3 Copyright 2000, 2001, 2002, 2004 Free Software Foundation, Inc. 4 5 This file is part of GNU Binutils. 6 7 This program is free software; you can redistribute it and/or modify 8 it under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 2 of the License, or 10 (at your option) any later version. 11 12 This program is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with this program; if not, write to the Free Software 19 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 20 02110-1301, USA. */ 21 22 #include "libiberty.h" 23 #include "gprof.h" 24 #include "search_list.h" 25 #include "source.h" 26 #include "symtab.h" 27 #include "cg_arcs.h" 28 #include "cg_print.h" 29 #include "hist.h" 30 #include "utils.h" 31 #include "corefile.h" 32 33 /* Return value of comparison functions used to sort tables. */ 34 #define LESSTHAN -1 35 #define EQUALTO 0 36 #define GREATERTHAN 1 37 38 static void print_header (void); 39 static void print_cycle (Sym *); 40 static int cmp_member (Sym *, Sym *); 41 static void sort_members (Sym *); 42 static void print_members (Sym *); 43 static int cmp_arc (Arc *, Arc *); 44 static void sort_parents (Sym *); 45 static void print_parents (Sym *); 46 static void sort_children (Sym *); 47 static void print_children (Sym *); 48 static void print_line (Sym *); 49 static int cmp_name (const PTR, const PTR); 50 static int cmp_arc_count (const PTR, const PTR); 51 static int cmp_fun_nuses (const PTR, const PTR); 52 static void order_and_dump_functions_by_arcs 53 (Arc **, unsigned long, int, Arc **, unsigned long *); 54 55 /* Declarations of automatically generated functions to output blurbs. */ 56 extern void bsd_callg_blurb (FILE * fp); 57 extern void fsf_callg_blurb (FILE * fp); 58 59 double print_time = 0.0; 60 61 62 static void 63 print_header () 64 { 65 if (first_output) 66 first_output = FALSE; 67 else 68 printf ("\f\n"); 69 70 if (!bsd_style_output) 71 { 72 if (print_descriptions) 73 printf (_("\t\t Call graph (explanation follows)\n\n")); 74 else 75 printf (_("\t\t\tCall graph\n\n")); 76 } 77 78 printf (_("\ngranularity: each sample hit covers %ld byte(s)"), 79 (long) hist_scale * sizeof (UNIT)); 80 81 if (print_time > 0.0) 82 printf (_(" for %.2f%% of %.2f seconds\n\n"), 83 100.0 / print_time, print_time / hz); 84 else 85 { 86 printf (_(" no time propagated\n\n")); 87 88 /* This doesn't hurt, since all the numerators will be 0.0. */ 89 print_time = 1.0; 90 } 91 92 if (bsd_style_output) 93 { 94 printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s %-8.8s\n", 95 "", "", "", "", _("called"), _("total"), _("parents")); 96 printf ("%-6.6s %5.5s %7.7s %11.11s %7.7s+%-7.7s %-8.8s\t%5.5s\n", 97 _("index"), _("%time"), _("self"), _("descendants"), 98 _("called"), _("self"), _("name"), _("index")); 99 printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s %-8.8s\n", 100 "", "", "", "", _("called"), _("total"), _("children")); 101 printf ("\n"); 102 } 103 else 104 { 105 printf (_("index %% time self children called name\n")); 106 } 107 } 108 109 /* Print a cycle header. */ 110 111 static void 112 print_cycle (Sym *cyc) 113 { 114 char buf[BUFSIZ]; 115 116 sprintf (buf, "[%d]", cyc->cg.index); 117 printf (bsd_style_output 118 ? "%-6.6s %5.1f %7.2f %11.2f %7lu" 119 : "%-6.6s %5.1f %7.2f %7.2f %7lu", buf, 120 100 * (cyc->cg.prop.self + cyc->cg.prop.child) / print_time, 121 cyc->cg.prop.self / hz, cyc->cg.prop.child / hz, cyc->ncalls); 122 123 if (cyc->cg.self_calls != 0) 124 printf ("+%-7lu", cyc->cg.self_calls); 125 else 126 printf (" %7.7s", ""); 127 128 printf (_(" <cycle %d as a whole> [%d]\n"), cyc->cg.cyc.num, cyc->cg.index); 129 } 130 131 /* Compare LEFT and RIGHT membmer. Major comparison key is 132 CG.PROP.SELF+CG.PROP.CHILD, secondary key is NCALLS+CG.SELF_CALLS. */ 133 134 static int 135 cmp_member (Sym *left, Sym *right) 136 { 137 double left_time = left->cg.prop.self + left->cg.prop.child; 138 double right_time = right->cg.prop.self + right->cg.prop.child; 139 unsigned long left_calls = left->ncalls + left->cg.self_calls; 140 unsigned long right_calls = right->ncalls + right->cg.self_calls; 141 142 if (left_time > right_time) 143 return GREATERTHAN; 144 145 if (left_time < right_time) 146 return LESSTHAN; 147 148 if (left_calls > right_calls) 149 return GREATERTHAN; 150 151 if (left_calls < right_calls) 152 return LESSTHAN; 153 154 return EQUALTO; 155 } 156 157 /* Sort members of a cycle. */ 158 159 static void 160 sort_members (Sym *cyc) 161 { 162 Sym *todo, *doing, *prev; 163 164 /* Detach cycle members from cyclehead, 165 and insertion sort them back on. */ 166 todo = cyc->cg.cyc.next; 167 cyc->cg.cyc.next = 0; 168 169 for (doing = todo; doing && doing->cg.cyc.next; doing = todo) 170 { 171 todo = doing->cg.cyc.next; 172 173 for (prev = cyc; prev->cg.cyc.next; prev = prev->cg.cyc.next) 174 { 175 if (cmp_member (doing, prev->cg.cyc.next) == GREATERTHAN) 176 break; 177 } 178 179 doing->cg.cyc.next = prev->cg.cyc.next; 180 prev->cg.cyc.next = doing; 181 } 182 } 183 184 /* Print the members of a cycle. */ 185 186 static void 187 print_members (Sym *cyc) 188 { 189 Sym *member; 190 191 sort_members (cyc); 192 193 for (member = cyc->cg.cyc.next; member; member = member->cg.cyc.next) 194 { 195 printf (bsd_style_output 196 ? "%6.6s %5.5s %7.2f %11.2f %7lu" 197 : "%6.6s %5.5s %7.2f %7.2f %7lu", 198 "", "", member->cg.prop.self / hz, member->cg.prop.child / hz, 199 member->ncalls); 200 201 if (member->cg.self_calls != 0) 202 printf ("+%-7lu", member->cg.self_calls); 203 else 204 printf (" %7.7s", ""); 205 206 printf (" "); 207 print_name (member); 208 printf ("\n"); 209 } 210 } 211 212 /* Compare two arcs to/from the same child/parent. 213 - if one arc is a self arc, it's least. 214 - if one arc is within a cycle, it's less than. 215 - if both arcs are within a cycle, compare arc counts. 216 - if neither arc is within a cycle, compare with 217 time + child_time as major key 218 arc count as minor key. */ 219 220 static int 221 cmp_arc (Arc *left, Arc *right) 222 { 223 Sym *left_parent = left->parent; 224 Sym *left_child = left->child; 225 Sym *right_parent = right->parent; 226 Sym *right_child = right->child; 227 double left_time, right_time; 228 229 DBG (TIMEDEBUG, 230 printf ("[cmp_arc] "); 231 print_name (left_parent); 232 printf (" calls "); 233 print_name (left_child); 234 printf (" %f + %f %lu/%lu\n", left->time, left->child_time, 235 left->count, left_child->ncalls); 236 printf ("[cmp_arc] "); 237 print_name (right_parent); 238 printf (" calls "); 239 print_name (right_child); 240 printf (" %f + %f %lu/%lu\n", right->time, right->child_time, 241 right->count, right_child->ncalls); 242 printf ("\n"); 243 ); 244 245 if (left_parent == left_child) 246 return LESSTHAN; /* Left is a self call. */ 247 248 if (right_parent == right_child) 249 return GREATERTHAN; /* Right is a self call. */ 250 251 if (left_parent->cg.cyc.num != 0 && left_child->cg.cyc.num != 0 252 && left_parent->cg.cyc.num == left_child->cg.cyc.num) 253 { 254 /* Left is a call within a cycle. */ 255 if (right_parent->cg.cyc.num != 0 && right_child->cg.cyc.num != 0 256 && right_parent->cg.cyc.num == right_child->cg.cyc.num) 257 { 258 /* Right is a call within the cycle, too. */ 259 if (left->count < right->count) 260 return LESSTHAN; 261 262 if (left->count > right->count) 263 return GREATERTHAN; 264 265 return EQUALTO; 266 } 267 else 268 { 269 /* Right isn't a call within the cycle. */ 270 return LESSTHAN; 271 } 272 } 273 else 274 { 275 /* Left isn't a call within a cycle. */ 276 if (right_parent->cg.cyc.num != 0 && right_child->cg.cyc.num != 0 277 && right_parent->cg.cyc.num == right_child->cg.cyc.num) 278 { 279 /* Right is a call within a cycle. */ 280 return GREATERTHAN; 281 } 282 else 283 { 284 /* Neither is a call within a cycle. */ 285 left_time = left->time + left->child_time; 286 right_time = right->time + right->child_time; 287 288 if (left_time < right_time) 289 return LESSTHAN; 290 291 if (left_time > right_time) 292 return GREATERTHAN; 293 294 if (left->count < right->count) 295 return LESSTHAN; 296 297 if (left->count > right->count) 298 return GREATERTHAN; 299 300 return EQUALTO; 301 } 302 } 303 } 304 305 306 static void 307 sort_parents (Sym * child) 308 { 309 Arc *arc, *detached, sorted, *prev; 310 311 /* Unlink parents from child, then insertion sort back on to 312 sorted's parents. 313 *arc the arc you have detached and are inserting. 314 *detached the rest of the arcs to be sorted. 315 sorted arc list onto which you insertion sort. 316 *prev arc before the arc you are comparing. */ 317 sorted.next_parent = 0; 318 319 for (arc = child->cg.parents; arc; arc = detached) 320 { 321 detached = arc->next_parent; 322 323 /* Consider *arc as disconnected; insert it into sorted. */ 324 for (prev = &sorted; prev->next_parent; prev = prev->next_parent) 325 { 326 if (cmp_arc (arc, prev->next_parent) != GREATERTHAN) 327 break; 328 } 329 330 arc->next_parent = prev->next_parent; 331 prev->next_parent = arc; 332 } 333 334 /* Reattach sorted arcs to child. */ 335 child->cg.parents = sorted.next_parent; 336 } 337 338 339 static void 340 print_parents (Sym *child) 341 { 342 Sym *parent; 343 Arc *arc; 344 Sym *cycle_head; 345 346 if (child->cg.cyc.head != 0) 347 cycle_head = child->cg.cyc.head; 348 else 349 cycle_head = child; 350 351 if (!child->cg.parents) 352 { 353 printf (bsd_style_output 354 ? _("%6.6s %5.5s %7.7s %11.11s %7.7s %7.7s <spontaneous>\n") 355 : _("%6.6s %5.5s %7.7s %7.7s %7.7s %7.7s <spontaneous>\n"), 356 "", "", "", "", "", ""); 357 return; 358 } 359 360 sort_parents (child); 361 362 for (arc = child->cg.parents; arc; arc = arc->next_parent) 363 { 364 parent = arc->parent; 365 if (child == parent || (child->cg.cyc.num != 0 366 && parent->cg.cyc.num == child->cg.cyc.num)) 367 { 368 /* Selfcall or call among siblings. */ 369 printf (bsd_style_output 370 ? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s " 371 : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s ", 372 "", "", "", "", 373 arc->count, ""); 374 print_name (parent); 375 printf ("\n"); 376 } 377 else 378 { 379 /* Regular parent of child. */ 380 printf (bsd_style_output 381 ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu " 382 : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu ", 383 "", "", 384 arc->time / hz, arc->child_time / hz, 385 arc->count, cycle_head->ncalls); 386 print_name (parent); 387 printf ("\n"); 388 } 389 } 390 } 391 392 393 static void 394 sort_children (Sym *parent) 395 { 396 Arc *arc, *detached, sorted, *prev; 397 398 /* Unlink children from parent, then insertion sort back on to 399 sorted's children. 400 *arc the arc you have detached and are inserting. 401 *detached the rest of the arcs to be sorted. 402 sorted arc list onto which you insertion sort. 403 *prev arc before the arc you are comparing. */ 404 sorted.next_child = 0; 405 406 for (arc = parent->cg.children; arc; arc = detached) 407 { 408 detached = arc->next_child; 409 410 /* Consider *arc as disconnected; insert it into sorted. */ 411 for (prev = &sorted; prev->next_child; prev = prev->next_child) 412 { 413 if (cmp_arc (arc, prev->next_child) != LESSTHAN) 414 break; 415 } 416 417 arc->next_child = prev->next_child; 418 prev->next_child = arc; 419 } 420 421 /* Reattach sorted children to parent. */ 422 parent->cg.children = sorted.next_child; 423 } 424 425 426 static void 427 print_children (Sym *parent) 428 { 429 Sym *child; 430 Arc *arc; 431 432 sort_children (parent); 433 arc = parent->cg.children; 434 435 for (arc = parent->cg.children; arc; arc = arc->next_child) 436 { 437 child = arc->child; 438 if (child == parent || (child->cg.cyc.num != 0 439 && child->cg.cyc.num == parent->cg.cyc.num)) 440 { 441 /* Self call or call to sibling. */ 442 printf (bsd_style_output 443 ? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s " 444 : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s ", 445 "", "", "", "", arc->count, ""); 446 print_name (child); 447 printf ("\n"); 448 } 449 else 450 { 451 /* Regular child of parent. */ 452 printf (bsd_style_output 453 ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu " 454 : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu ", 455 "", "", 456 arc->time / hz, arc->child_time / hz, 457 arc->count, child->cg.cyc.head->ncalls); 458 print_name (child); 459 printf ("\n"); 460 } 461 } 462 } 463 464 465 static void 466 print_line (Sym *np) 467 { 468 char buf[BUFSIZ]; 469 470 sprintf (buf, "[%d]", np->cg.index); 471 printf (bsd_style_output 472 ? "%-6.6s %5.1f %7.2f %11.2f" 473 : "%-6.6s %5.1f %7.2f %7.2f", buf, 474 100 * (np->cg.prop.self + np->cg.prop.child) / print_time, 475 np->cg.prop.self / hz, np->cg.prop.child / hz); 476 477 if ((np->ncalls + np->cg.self_calls) != 0) 478 { 479 printf (" %7lu", np->ncalls); 480 481 if (np->cg.self_calls != 0) 482 printf ("+%-7lu ", np->cg.self_calls); 483 else 484 printf (" %7.7s ", ""); 485 } 486 else 487 { 488 printf (" %7.7s %7.7s ", "", ""); 489 } 490 491 print_name (np); 492 printf ("\n"); 493 } 494 495 496 /* Print dynamic call graph. */ 497 498 void 499 cg_print (Sym ** timesortsym) 500 { 501 unsigned int index; 502 Sym *parent; 503 504 if (print_descriptions && bsd_style_output) 505 bsd_callg_blurb (stdout); 506 507 print_header (); 508 509 for (index = 0; index < symtab.len + num_cycles; ++index) 510 { 511 parent = timesortsym[index]; 512 513 if ((ignore_zeros && parent->ncalls == 0 514 && parent->cg.self_calls == 0 && parent->cg.prop.self == 0 515 && parent->cg.prop.child == 0) 516 || !parent->cg.print_flag 517 || (line_granularity && ! parent->is_func)) 518 continue; 519 520 if (!parent->name && parent->cg.cyc.num != 0) 521 { 522 /* Cycle header. */ 523 print_cycle (parent); 524 print_members (parent); 525 } 526 else 527 { 528 print_parents (parent); 529 print_line (parent); 530 print_children (parent); 531 } 532 533 if (bsd_style_output) 534 printf ("\n"); 535 536 printf ("-----------------------------------------------\n"); 537 538 if (bsd_style_output) 539 printf ("\n"); 540 } 541 542 free (timesortsym); 543 544 if (print_descriptions && !bsd_style_output) 545 fsf_callg_blurb (stdout); 546 } 547 548 549 static int 550 cmp_name (const PTR left, const PTR right) 551 { 552 const Sym **npp1 = (const Sym **) left; 553 const Sym **npp2 = (const Sym **) right; 554 555 return strcmp ((*npp1)->name, (*npp2)->name); 556 } 557 558 559 void 560 cg_print_index () 561 { 562 unsigned int index; 563 unsigned int nnames, todo, i, j; 564 int col, starting_col; 565 Sym **name_sorted_syms, *sym; 566 const char *filename; 567 char buf[20]; 568 int column_width = (output_width - 1) / 3; /* Don't write in last col! */ 569 570 /* Now, sort regular function name 571 alphabetically to create an index. */ 572 name_sorted_syms = (Sym **) xmalloc ((symtab.len + num_cycles) * sizeof (Sym *)); 573 574 for (index = 0, nnames = 0; index < symtab.len; index++) 575 { 576 if (ignore_zeros && symtab.base[index].ncalls == 0 577 && symtab.base[index].hist.time == 0) 578 continue; 579 580 name_sorted_syms[nnames++] = &symtab.base[index]; 581 } 582 583 qsort (name_sorted_syms, nnames, sizeof (Sym *), cmp_name); 584 585 for (index = 1, todo = nnames; index <= num_cycles; index++) 586 name_sorted_syms[todo++] = &cycle_header[index]; 587 588 printf ("\f\n"); 589 printf (_("Index by function name\n\n")); 590 index = (todo + 2) / 3; 591 592 for (i = 0; i < index; i++) 593 { 594 col = 0; 595 starting_col = 0; 596 597 for (j = i; j < todo; j += index) 598 { 599 sym = name_sorted_syms[j]; 600 601 if (sym->cg.print_flag) 602 sprintf (buf, "[%d]", sym->cg.index); 603 else 604 sprintf (buf, "(%d)", sym->cg.index); 605 606 if (j < nnames) 607 { 608 if (bsd_style_output) 609 { 610 printf ("%6.6s %-19.19s", buf, sym->name); 611 } 612 else 613 { 614 col += strlen (buf); 615 616 for (; col < starting_col + 5; ++col) 617 putchar (' '); 618 619 printf (" %s ", buf); 620 col += print_name_only (sym); 621 622 if (!line_granularity && sym->is_static && sym->file) 623 { 624 filename = sym->file->name; 625 626 if (!print_path) 627 { 628 filename = strrchr (filename, '/'); 629 630 if (filename) 631 ++filename; 632 else 633 filename = sym->file->name; 634 } 635 636 printf (" (%s)", filename); 637 col += strlen (filename) + 3; 638 } 639 } 640 } 641 else 642 { 643 if (bsd_style_output) 644 { 645 printf ("%6.6s ", buf); 646 sprintf (buf, _("<cycle %d>"), sym->cg.cyc.num); 647 printf ("%-19.19s", buf); 648 } 649 else 650 { 651 col += strlen (buf); 652 for (; col < starting_col + 5; ++col) 653 putchar (' '); 654 printf (" %s ", buf); 655 sprintf (buf, _("<cycle %d>"), sym->cg.cyc.num); 656 printf ("%s", buf); 657 col += strlen (buf); 658 } 659 } 660 661 starting_col += column_width; 662 } 663 664 printf ("\n"); 665 } 666 667 free (name_sorted_syms); 668 } 669 670 /* Compare two arcs based on their usage counts. 671 We want to sort in descending order. */ 672 673 static int 674 cmp_arc_count (const PTR left, const PTR right) 675 { 676 const Arc **npp1 = (const Arc **) left; 677 const Arc **npp2 = (const Arc **) right; 678 679 if ((*npp1)->count > (*npp2)->count) 680 return -1; 681 else if ((*npp1)->count < (*npp2)->count) 682 return 1; 683 else 684 return 0; 685 } 686 687 /* Compare two funtions based on their usage counts. 688 We want to sort in descending order. */ 689 690 static int 691 cmp_fun_nuses (const PTR left, const PTR right) 692 { 693 const Sym **npp1 = (const Sym **) left; 694 const Sym **npp2 = (const Sym **) right; 695 696 if ((*npp1)->nuses > (*npp2)->nuses) 697 return -1; 698 else if ((*npp1)->nuses < (*npp2)->nuses) 699 return 1; 700 else 701 return 0; 702 } 703 704 /* Print a suggested function ordering based on the profiling data. 705 706 We perform 4 major steps when ordering functions: 707 708 * Group unused functions together and place them at the 709 end of the function order. 710 711 * Search the highest use arcs (those which account for 90% of 712 the total arc count) for functions which have several parents. 713 714 Group those with the most call sites together (currently the 715 top 1.25% which have at least five different call sites). 716 717 These are emitted at the start of the function order. 718 719 * Use a greedy placement algorithm to place functions which 720 occur in the top 99% of the arcs in the profile. Some provisions 721 are made to handle high usage arcs where the parent and/or 722 child has already been placed. 723 724 * Run the same greedy placement algorithm on the remaining 725 arcs to place the leftover functions. 726 727 728 The various "magic numbers" should (one day) be tuneable by command 729 line options. They were arrived at by benchmarking a few applications 730 with various values to see which values produced better overall function 731 orderings. 732 733 Of course, profiling errors, machine limitations (PA long calls), and 734 poor cutoff values for the placement algorithm may limit the usefullness 735 of the resulting function order. Improvements would be greatly appreciated. 736 737 Suggestions: 738 739 * Place the functions with many callers near the middle of the 740 list to reduce long calls. 741 742 * Propagate arc usage changes as functions are placed. Ie if 743 func1 and func2 are placed together, arcs to/from those arcs 744 to the same parent/child should be combined, then resort the 745 arcs to choose the next one. 746 747 * Implement some global positioning algorithm to place the 748 chains made by the greedy local positioning algorithm. Probably 749 by examining arcs which haven't been placed yet to tie two 750 chains together. 751 752 * Take a function's size and time into account in the algorithm; 753 size in particular is important on the PA (long calls). Placing 754 many small functions onto their own page may be wise. 755 756 * Use better profiling information; many published algorithms 757 are based on call sequences through time, rather than just 758 arc counts. 759 760 * Prodecure cloning could improve performance when a small number 761 of arcs account for most of the calls to a particular function. 762 763 * Use relocation information to avoid moving unused functions 764 completely out of the code stream; this would avoid severe lossage 765 when the profile data bears little resemblance to actual runs. 766 767 * Propagation of arc usages should also improve .o link line 768 ordering which shares the same arc placement algorithm with 769 the function ordering code (in fact it is a degenerate case 770 of function ordering). */ 771 772 void 773 cg_print_function_ordering () 774 { 775 unsigned long index, used, unused, scratch_index; 776 unsigned long unplaced_arc_count, high_arc_count, scratch_arc_count; 777 #ifdef __GNUC__ 778 unsigned long long total_arcs, tmp_arcs_count; 779 #else 780 unsigned long total_arcs, tmp_arcs_count; 781 #endif 782 Sym **unused_syms, **used_syms, **scratch_syms; 783 Arc **unplaced_arcs, **high_arcs, **scratch_arcs; 784 785 index = 0; 786 used = 0; 787 unused = 0; 788 scratch_index = 0; 789 unplaced_arc_count = 0; 790 high_arc_count = 0; 791 scratch_arc_count = 0; 792 793 /* First group all the unused functions together. */ 794 unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *)); 795 used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *)); 796 scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *)); 797 high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *)); 798 scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *)); 799 unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *)); 800 801 /* Walk through all the functions; mark those which are never 802 called as placed (we'll emit them as a group later). */ 803 for (index = 0, used = 0, unused = 0; index < symtab.len; index++) 804 { 805 if (symtab.base[index].ncalls == 0) 806 { 807 /* Filter out gprof generated names. */ 808 if (strcmp (symtab.base[index].name, "<locore>") 809 && strcmp (symtab.base[index].name, "<hicore>")) 810 { 811 unused_syms[unused++] = &symtab.base[index]; 812 symtab.base[index].has_been_placed = 1; 813 } 814 } 815 else 816 { 817 used_syms[used++] = &symtab.base[index]; 818 symtab.base[index].has_been_placed = 0; 819 symtab.base[index].next = 0; 820 symtab.base[index].prev = 0; 821 symtab.base[index].nuses = 0; 822 } 823 } 824 825 /* Sort the arcs from most used to least used. */ 826 qsort (arcs, numarcs, sizeof (Arc *), cmp_arc_count); 827 828 /* Compute the total arc count. Also mark arcs as unplaced. 829 830 Note we don't compensate for overflow if that happens! 831 Overflow is much less likely when this file is compiled 832 with GCC as it can double-wide integers via long long. */ 833 total_arcs = 0; 834 for (index = 0; index < numarcs; index++) 835 { 836 total_arcs += arcs[index]->count; 837 arcs[index]->has_been_placed = 0; 838 } 839 840 /* We want to pull out those functions which are referenced 841 by many highly used arcs and emit them as a group. This 842 could probably use some tuning. */ 843 tmp_arcs_count = 0; 844 for (index = 0; index < numarcs; index++) 845 { 846 tmp_arcs_count += arcs[index]->count; 847 848 /* Count how many times each parent and child are used up 849 to our threshhold of arcs (90%). */ 850 if ((double)tmp_arcs_count / (double)total_arcs > 0.90) 851 break; 852 853 arcs[index]->child->nuses++; 854 } 855 856 /* Now sort a temporary symbol table based on the number of 857 times each function was used in the highest used arcs. */ 858 memcpy (scratch_syms, used_syms, used * sizeof (Sym *)); 859 qsort (scratch_syms, used, sizeof (Sym *), cmp_fun_nuses); 860 861 /* Now pick out those symbols we're going to emit as 862 a group. We take up to 1.25% of the used symbols. */ 863 for (index = 0; index < used / 80; index++) 864 { 865 Sym *sym = scratch_syms[index]; 866 Arc *arc; 867 868 /* If we hit symbols that aren't used from many call sites, 869 then we can quit. We choose five as the low limit for 870 no particular reason. */ 871 if (sym->nuses == 5) 872 break; 873 874 /* We're going to need the arcs between these functions. 875 Unfortunately, we don't know all these functions 876 until we're done. So we keep track of all the arcs 877 to the functions we care about, then prune out those 878 which are uninteresting. 879 880 An interesting variation would be to quit when we found 881 multi-call site functions which account for some percentage 882 of the arcs. */ 883 arc = sym->cg.children; 884 885 while (arc) 886 { 887 if (arc->parent != arc->child) 888 scratch_arcs[scratch_arc_count++] = arc; 889 arc->has_been_placed = 1; 890 arc = arc->next_child; 891 } 892 893 arc = sym->cg.parents; 894 895 while (arc) 896 { 897 if (arc->parent != arc->child) 898 scratch_arcs[scratch_arc_count++] = arc; 899 arc->has_been_placed = 1; 900 arc = arc->next_parent; 901 } 902 903 /* Keep track of how many symbols we're going to place. */ 904 scratch_index = index; 905 906 /* A lie, but it makes identifying 907 these functions easier later. */ 908 sym->has_been_placed = 1; 909 } 910 911 /* Now walk through the temporary arcs and copy 912 those we care about into the high arcs array. */ 913 for (index = 0; index < scratch_arc_count; index++) 914 { 915 Arc *arc = scratch_arcs[index]; 916 917 /* If this arc refers to highly used functions, then 918 then we want to keep it. */ 919 if (arc->child->has_been_placed 920 && arc->parent->has_been_placed) 921 { 922 high_arcs[high_arc_count++] = scratch_arcs[index]; 923 924 /* We need to turn of has_been_placed since we're going to 925 use the main arc placement algorithm on these arcs. */ 926 arc->child->has_been_placed = 0; 927 arc->parent->has_been_placed = 0; 928 } 929 } 930 931 /* Dump the multi-site high usage functions which are not 932 going to be ordered by the main ordering algorithm. */ 933 for (index = 0; index < scratch_index; index++) 934 { 935 if (scratch_syms[index]->has_been_placed) 936 printf ("%s\n", scratch_syms[index]->name); 937 } 938 939 /* Now we can order the multi-site high use 940 functions based on the arcs between them. */ 941 qsort (high_arcs, high_arc_count, sizeof (Arc *), cmp_arc_count); 942 order_and_dump_functions_by_arcs (high_arcs, high_arc_count, 1, 943 unplaced_arcs, &unplaced_arc_count); 944 945 /* Order and dump the high use functions left, 946 these typically have only a few call sites. */ 947 order_and_dump_functions_by_arcs (arcs, numarcs, 0, 948 unplaced_arcs, &unplaced_arc_count); 949 950 /* Now place the rarely used functions. */ 951 order_and_dump_functions_by_arcs (unplaced_arcs, unplaced_arc_count, 1, 952 scratch_arcs, &scratch_arc_count); 953 954 /* Output any functions not emitted by the order_and_dump calls. */ 955 for (index = 0; index < used; index++) 956 if (used_syms[index]->has_been_placed == 0) 957 printf("%s\n", used_syms[index]->name); 958 959 /* Output the unused functions. */ 960 for (index = 0; index < unused; index++) 961 printf("%s\n", unused_syms[index]->name); 962 963 unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *)); 964 used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *)); 965 scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *)); 966 high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *)); 967 scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *)); 968 unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *)); 969 970 free (unused_syms); 971 free (used_syms); 972 free (scratch_syms); 973 free (high_arcs); 974 free (scratch_arcs); 975 free (unplaced_arcs); 976 } 977 978 /* Place functions based on the arcs in THE_ARCS with ARC_COUNT entries; 979 place unused arcs into UNPLACED_ARCS/UNPLACED_ARC_COUNT. 980 981 If ALL is nonzero, then place all functions referenced by THE_ARCS, 982 else only place those referenced in the top 99% of the arcs in THE_ARCS. */ 983 984 #define MOST 0.99 985 static void 986 order_and_dump_functions_by_arcs (the_arcs, arc_count, all, 987 unplaced_arcs, unplaced_arc_count) 988 Arc **the_arcs; 989 unsigned long arc_count; 990 int all; 991 Arc **unplaced_arcs; 992 unsigned long *unplaced_arc_count; 993 { 994 #ifdef __GNUC__ 995 unsigned long long tmp_arcs, total_arcs; 996 #else 997 unsigned long tmp_arcs, total_arcs; 998 #endif 999 unsigned int index; 1000 1001 /* If needed, compute the total arc count. 1002 1003 Note we don't compensate for overflow if that happens! */ 1004 if (! all) 1005 { 1006 total_arcs = 0; 1007 for (index = 0; index < arc_count; index++) 1008 total_arcs += the_arcs[index]->count; 1009 } 1010 else 1011 total_arcs = 0; 1012 1013 tmp_arcs = 0; 1014 1015 for (index = 0; index < arc_count; index++) 1016 { 1017 Sym *sym1, *sym2; 1018 Sym *child, *parent; 1019 1020 tmp_arcs += the_arcs[index]->count; 1021 1022 /* Ignore this arc if it's already been placed. */ 1023 if (the_arcs[index]->has_been_placed) 1024 continue; 1025 1026 child = the_arcs[index]->child; 1027 parent = the_arcs[index]->parent; 1028 1029 /* If we're not using all arcs, and this is a rarely used 1030 arc, then put it on the unplaced_arc list. Similarly 1031 if both the parent and child of this arc have been placed. */ 1032 if ((! all && (double)tmp_arcs / (double)total_arcs > MOST) 1033 || child->has_been_placed || parent->has_been_placed) 1034 { 1035 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[index]; 1036 continue; 1037 } 1038 1039 /* If all slots in the parent and child are full, then there isn't 1040 anything we can do right now. We'll place this arc on the 1041 unplaced arc list in the hope that a global positioning 1042 algorithm can use it to place function chains. */ 1043 if (parent->next && parent->prev && child->next && child->prev) 1044 { 1045 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[index]; 1046 continue; 1047 } 1048 1049 /* If the parent is unattached, then find the closest 1050 place to attach it onto child's chain. Similarly 1051 for the opposite case. */ 1052 if (!parent->next && !parent->prev) 1053 { 1054 int next_count = 0; 1055 int prev_count = 0; 1056 Sym *prev = child; 1057 Sym *next = child; 1058 1059 /* Walk to the beginning and end of the child's chain. */ 1060 while (next->next) 1061 { 1062 next = next->next; 1063 next_count++; 1064 } 1065 1066 while (prev->prev) 1067 { 1068 prev = prev->prev; 1069 prev_count++; 1070 } 1071 1072 /* Choose the closest. */ 1073 child = next_count < prev_count ? next : prev; 1074 } 1075 else if (! child->next && !child->prev) 1076 { 1077 int next_count = 0; 1078 int prev_count = 0; 1079 Sym *prev = parent; 1080 Sym *next = parent; 1081 1082 while (next->next) 1083 { 1084 next = next->next; 1085 next_count++; 1086 } 1087 1088 while (prev->prev) 1089 { 1090 prev = prev->prev; 1091 prev_count++; 1092 } 1093 1094 parent = prev_count < next_count ? prev : next; 1095 } 1096 else 1097 { 1098 /* Couldn't find anywhere to attach the functions, 1099 put the arc on the unplaced arc list. */ 1100 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[index]; 1101 continue; 1102 } 1103 1104 /* Make sure we don't tie two ends together. */ 1105 sym1 = parent; 1106 if (sym1->next) 1107 while (sym1->next) 1108 sym1 = sym1->next; 1109 else 1110 while (sym1->prev) 1111 sym1 = sym1->prev; 1112 1113 sym2 = child; 1114 if (sym2->next) 1115 while (sym2->next) 1116 sym2 = sym2->next; 1117 else 1118 while (sym2->prev) 1119 sym2 = sym2->prev; 1120 1121 if (sym1 == child 1122 && sym2 == parent) 1123 { 1124 /* This would tie two ends together. */ 1125 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[index]; 1126 continue; 1127 } 1128 1129 if (parent->next) 1130 { 1131 /* Must attach to the parent's prev field. */ 1132 if (! child->next) 1133 { 1134 /* parent-prev and child-next */ 1135 parent->prev = child; 1136 child->next = parent; 1137 the_arcs[index]->has_been_placed = 1; 1138 } 1139 } 1140 else if (parent->prev) 1141 { 1142 /* Must attach to the parent's next field. */ 1143 if (! child->prev) 1144 { 1145 /* parent-next and child-prev */ 1146 parent->next = child; 1147 child->prev = parent; 1148 the_arcs[index]->has_been_placed = 1; 1149 } 1150 } 1151 else 1152 { 1153 /* Can attach to either field in the parent, depends 1154 on where we've got space in the child. */ 1155 if (child->prev) 1156 { 1157 /* parent-prev and child-next. */ 1158 parent->prev = child; 1159 child->next = parent; 1160 the_arcs[index]->has_been_placed = 1; 1161 } 1162 else 1163 { 1164 /* parent-next and child-prev. */ 1165 parent->next = child; 1166 child->prev = parent; 1167 the_arcs[index]->has_been_placed = 1; 1168 } 1169 } 1170 } 1171 1172 /* Dump the chains of functions we've made. */ 1173 for (index = 0; index < arc_count; index++) 1174 { 1175 Sym *sym; 1176 if (the_arcs[index]->parent->has_been_placed 1177 || the_arcs[index]->child->has_been_placed) 1178 continue; 1179 1180 sym = the_arcs[index]->parent; 1181 1182 /* If this symbol isn't attached to any other 1183 symbols, then we've got a rarely used arc. 1184 1185 Skip it for now, we'll deal with them later. */ 1186 if (sym->next == NULL 1187 && sym->prev == NULL) 1188 continue; 1189 1190 /* Get to the start of this chain. */ 1191 while (sym->prev) 1192 sym = sym->prev; 1193 1194 while (sym) 1195 { 1196 /* Mark it as placed. */ 1197 sym->has_been_placed = 1; 1198 printf ("%s\n", sym->name); 1199 sym = sym->next; 1200 } 1201 } 1202 1203 /* If we want to place all the arcs, then output 1204 those which weren't placed by the main algorithm. */ 1205 if (all) 1206 for (index = 0; index < arc_count; index++) 1207 { 1208 Sym *sym; 1209 if (the_arcs[index]->parent->has_been_placed 1210 || the_arcs[index]->child->has_been_placed) 1211 continue; 1212 1213 sym = the_arcs[index]->parent; 1214 1215 sym->has_been_placed = 1; 1216 printf ("%s\n", sym->name); 1217 } 1218 } 1219 1220 /* Print a suggested .o ordering for files on a link line based 1221 on profiling information. This uses the function placement 1222 code for the bulk of its work. */ 1223 1224 void 1225 cg_print_file_ordering () 1226 { 1227 unsigned long scratch_arc_count, index; 1228 Arc **scratch_arcs; 1229 char *last; 1230 1231 scratch_arc_count = 0; 1232 1233 scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *)); 1234 for (index = 0; index < numarcs; index++) 1235 { 1236 if (! arcs[index]->parent->mapped 1237 || ! arcs[index]->child->mapped) 1238 arcs[index]->has_been_placed = 1; 1239 } 1240 1241 order_and_dump_functions_by_arcs (arcs, numarcs, 0, 1242 scratch_arcs, &scratch_arc_count); 1243 1244 /* Output .o's not handled by the main placement algorithm. */ 1245 for (index = 0; index < symtab.len; index++) 1246 { 1247 if (symtab.base[index].mapped 1248 && ! symtab.base[index].has_been_placed) 1249 printf ("%s\n", symtab.base[index].name); 1250 } 1251 1252 /* Now output any .o's that didn't have any text symbols. */ 1253 last = NULL; 1254 for (index = 0; index < symbol_map_count; index++) 1255 { 1256 unsigned int index2; 1257 1258 /* Don't bother searching if this symbol 1259 is the same as the previous one. */ 1260 if (last && !strcmp (last, symbol_map[index].file_name)) 1261 continue; 1262 1263 for (index2 = 0; index2 < symtab.len; index2++) 1264 { 1265 if (! symtab.base[index2].mapped) 1266 continue; 1267 1268 if (!strcmp (symtab.base[index2].name, symbol_map[index].file_name)) 1269 break; 1270 } 1271 1272 /* If we didn't find it in the symbol table, then it must 1273 be a .o with no text symbols. Output it last. */ 1274 if (index2 == symtab.len) 1275 printf ("%s\n", symbol_map[index].file_name); 1276 last = symbol_map[index].file_name; 1277 } 1278 } 1279