1 /* cg_print.c - Print routines for displaying call graphs. 2 3 Copyright 2000, 2001, 2002 Free Software Foundation, Inc. 4 5 This file is part of GNU Binutils. 6 7 This program is free software; you can redistribute it and/or modify 8 it under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 2 of the License, or 10 (at your option) any later version. 11 12 This program is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with this program; if not, write to the Free Software 19 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 20 02111-1307, USA. */ 21 22 #include "libiberty.h" 23 #include "gprof.h" 24 #include "search_list.h" 25 #include "source.h" 26 #include "symtab.h" 27 #include "cg_arcs.h" 28 #include "cg_print.h" 29 #include "hist.h" 30 #include "utils.h" 31 #include "corefile.h" 32 33 /* Return value of comparison functions used to sort tables. */ 34 #define LESSTHAN -1 35 #define EQUALTO 0 36 #define GREATERTHAN 1 37 38 static void print_header PARAMS ((void)); 39 static void print_cycle PARAMS ((Sym *)); 40 static int cmp_member PARAMS ((Sym *, Sym *)); 41 static void sort_members PARAMS ((Sym *)); 42 static void print_members PARAMS ((Sym *)); 43 static int cmp_arc PARAMS ((Arc *, Arc *)); 44 static void sort_parents PARAMS ((Sym *)); 45 static void print_parents PARAMS ((Sym *)); 46 static void sort_children PARAMS ((Sym *)); 47 static void print_children PARAMS ((Sym *)); 48 static void print_line PARAMS ((Sym *)); 49 static int cmp_name PARAMS ((const PTR, const PTR)); 50 static int cmp_arc_count PARAMS ((const PTR, const PTR)); 51 static int cmp_fun_nuses PARAMS ((const PTR, const PTR)); 52 static void order_and_dump_functions_by_arcs 53 PARAMS ((Arc **, unsigned long, int, Arc **, unsigned long *)); 54 55 /* Declarations of automatically generated functions to output blurbs. */ 56 extern void bsd_callg_blurb PARAMS ((FILE * fp)); 57 extern void fsf_callg_blurb PARAMS ((FILE * fp)); 58 59 double print_time = 0.0; 60 61 62 static void 63 print_header () 64 { 65 if (first_output) 66 first_output = FALSE; 67 else 68 printf ("\f\n"); 69 70 if (!bsd_style_output) 71 { 72 if (print_descriptions) 73 printf (_("\t\t Call graph (explanation follows)\n\n")); 74 else 75 printf (_("\t\t\tCall graph\n\n")); 76 } 77 78 printf (_("\ngranularity: each sample hit covers %ld byte(s)"), 79 (long) hist_scale * sizeof (UNIT)); 80 81 if (print_time > 0.0) 82 printf (_(" for %.2f%% of %.2f seconds\n\n"), 83 100.0 / print_time, print_time / hz); 84 else 85 { 86 printf (_(" no time propagated\n\n")); 87 88 /* This doesn't hurt, since all the numerators will be 0.0. */ 89 print_time = 1.0; 90 } 91 92 if (bsd_style_output) 93 { 94 printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s %-8.8s\n", 95 "", "", "", "", _("called"), _("total"), _("parents")); 96 printf ("%-6.6s %5.5s %7.7s %11.11s %7.7s+%-7.7s %-8.8s\t%5.5s\n", 97 _("index"), _("%time"), _("self"), _("descendants"), 98 _("called"), _("self"), _("name"), _("index")); 99 printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s %-8.8s\n", 100 "", "", "", "", _("called"), _("total"), _("children")); 101 printf ("\n"); 102 } 103 else 104 { 105 printf (_("index %% time self children called name\n")); 106 } 107 } 108 109 /* Print a cycle header. */ 110 111 static void 112 print_cycle (cyc) 113 Sym *cyc; 114 { 115 char buf[BUFSIZ]; 116 117 sprintf (buf, "[%d]", cyc->cg.index); 118 printf (bsd_style_output 119 ? "%-6.6s %5.1f %7.2f %11.2f %7lu" 120 : "%-6.6s %5.1f %7.2f %7.2f %7lu", buf, 121 100 * (cyc->cg.prop.self + cyc->cg.prop.child) / print_time, 122 cyc->cg.prop.self / hz, cyc->cg.prop.child / hz, cyc->ncalls); 123 124 if (cyc->cg.self_calls != 0) 125 printf ("+%-7lu", cyc->cg.self_calls); 126 else 127 printf (" %7.7s", ""); 128 129 printf (_(" <cycle %d as a whole> [%d]\n"), cyc->cg.cyc.num, cyc->cg.index); 130 } 131 132 /* Compare LEFT and RIGHT membmer. Major comparison key is 133 CG.PROP.SELF+CG.PROP.CHILD, secondary key is NCALLS+CG.SELF_CALLS. */ 134 135 static int 136 cmp_member (left, right) 137 Sym *left; 138 Sym *right; 139 { 140 double left_time = left->cg.prop.self + left->cg.prop.child; 141 double right_time = right->cg.prop.self + right->cg.prop.child; 142 unsigned long left_calls = left->ncalls + left->cg.self_calls; 143 unsigned long right_calls = right->ncalls + right->cg.self_calls; 144 145 if (left_time > right_time) 146 return GREATERTHAN; 147 148 if (left_time < right_time) 149 return LESSTHAN; 150 151 if (left_calls > right_calls) 152 return GREATERTHAN; 153 154 if (left_calls < right_calls) 155 return LESSTHAN; 156 157 return EQUALTO; 158 } 159 160 /* Sort members of a cycle. */ 161 162 static void 163 sort_members (cyc) 164 Sym *cyc; 165 { 166 Sym *todo, *doing, *prev; 167 168 /* Detach cycle members from cyclehead, 169 and insertion sort them back on. */ 170 todo = cyc->cg.cyc.next; 171 cyc->cg.cyc.next = 0; 172 173 for (doing = todo; doing && doing->cg.cyc.next; doing = todo) 174 { 175 todo = doing->cg.cyc.next; 176 177 for (prev = cyc; prev->cg.cyc.next; prev = prev->cg.cyc.next) 178 { 179 if (cmp_member (doing, prev->cg.cyc.next) == GREATERTHAN) 180 break; 181 } 182 183 doing->cg.cyc.next = prev->cg.cyc.next; 184 prev->cg.cyc.next = doing; 185 } 186 } 187 188 /* Print the members of a cycle. */ 189 190 static void 191 print_members (cyc) 192 Sym *cyc; 193 { 194 Sym *member; 195 196 sort_members (cyc); 197 198 for (member = cyc->cg.cyc.next; member; member = member->cg.cyc.next) 199 { 200 printf (bsd_style_output 201 ? "%6.6s %5.5s %7.2f %11.2f %7lu" 202 : "%6.6s %5.5s %7.2f %7.2f %7lu", 203 "", "", member->cg.prop.self / hz, member->cg.prop.child / hz, 204 member->ncalls); 205 206 if (member->cg.self_calls != 0) 207 printf ("+%-7lu", member->cg.self_calls); 208 else 209 printf (" %7.7s", ""); 210 211 printf (" "); 212 print_name (member); 213 printf ("\n"); 214 } 215 } 216 217 /* Compare two arcs to/from the same child/parent. 218 - if one arc is a self arc, it's least. 219 - if one arc is within a cycle, it's less than. 220 - if both arcs are within a cycle, compare arc counts. 221 - if neither arc is within a cycle, compare with 222 time + child_time as major key 223 arc count as minor key. */ 224 225 static int 226 cmp_arc (left, right) 227 Arc *left; 228 Arc *right; 229 { 230 Sym *left_parent = left->parent; 231 Sym *left_child = left->child; 232 Sym *right_parent = right->parent; 233 Sym *right_child = right->child; 234 double left_time, right_time; 235 236 DBG (TIMEDEBUG, 237 printf ("[cmp_arc] "); 238 print_name (left_parent); 239 printf (" calls "); 240 print_name (left_child); 241 printf (" %f + %f %lu/%lu\n", left->time, left->child_time, 242 left->count, left_child->ncalls); 243 printf ("[cmp_arc] "); 244 print_name (right_parent); 245 printf (" calls "); 246 print_name (right_child); 247 printf (" %f + %f %lu/%lu\n", right->time, right->child_time, 248 right->count, right_child->ncalls); 249 printf ("\n"); 250 ); 251 252 if (left_parent == left_child) 253 return LESSTHAN; /* Left is a self call. */ 254 255 if (right_parent == right_child) 256 return GREATERTHAN; /* Right is a self call. */ 257 258 if (left_parent->cg.cyc.num != 0 && left_child->cg.cyc.num != 0 259 && left_parent->cg.cyc.num == left_child->cg.cyc.num) 260 { 261 /* Left is a call within a cycle. */ 262 if (right_parent->cg.cyc.num != 0 && right_child->cg.cyc.num != 0 263 && right_parent->cg.cyc.num == right_child->cg.cyc.num) 264 { 265 /* Right is a call within the cycle, too. */ 266 if (left->count < right->count) 267 return LESSTHAN; 268 269 if (left->count > right->count) 270 return GREATERTHAN; 271 272 return EQUALTO; 273 } 274 else 275 { 276 /* Right isn't a call within the cycle. */ 277 return LESSTHAN; 278 } 279 } 280 else 281 { 282 /* Left isn't a call within a cycle. */ 283 if (right_parent->cg.cyc.num != 0 && right_child->cg.cyc.num != 0 284 && right_parent->cg.cyc.num == right_child->cg.cyc.num) 285 { 286 /* Right is a call within a cycle. */ 287 return GREATERTHAN; 288 } 289 else 290 { 291 /* Neither is a call within a cycle. */ 292 left_time = left->time + left->child_time; 293 right_time = right->time + right->child_time; 294 295 if (left_time < right_time) 296 return LESSTHAN; 297 298 if (left_time > right_time) 299 return GREATERTHAN; 300 301 if (left->count < right->count) 302 return LESSTHAN; 303 304 if (left->count > right->count) 305 return GREATERTHAN; 306 307 return EQUALTO; 308 } 309 } 310 } 311 312 313 static void 314 sort_parents (child) 315 Sym * child; 316 { 317 Arc *arc, *detached, sorted, *prev; 318 319 /* Unlink parents from child, then insertion sort back on to 320 sorted's parents. 321 *arc the arc you have detached and are inserting. 322 *detached the rest of the arcs to be sorted. 323 sorted arc list onto which you insertion sort. 324 *prev arc before the arc you are comparing. */ 325 sorted.next_parent = 0; 326 327 for (arc = child->cg.parents; arc; arc = detached) 328 { 329 detached = arc->next_parent; 330 331 /* Consider *arc as disconnected; insert it into sorted. */ 332 for (prev = &sorted; prev->next_parent; prev = prev->next_parent) 333 { 334 if (cmp_arc (arc, prev->next_parent) != GREATERTHAN) 335 break; 336 } 337 338 arc->next_parent = prev->next_parent; 339 prev->next_parent = arc; 340 } 341 342 /* Reattach sorted arcs to child. */ 343 child->cg.parents = sorted.next_parent; 344 } 345 346 347 static void 348 print_parents (child) 349 Sym *child; 350 { 351 Sym *parent; 352 Arc *arc; 353 Sym *cycle_head; 354 355 if (child->cg.cyc.head != 0) 356 cycle_head = child->cg.cyc.head; 357 else 358 cycle_head = child; 359 360 if (!child->cg.parents) 361 { 362 printf (bsd_style_output 363 ? _("%6.6s %5.5s %7.7s %11.11s %7.7s %7.7s <spontaneous>\n") 364 : _("%6.6s %5.5s %7.7s %7.7s %7.7s %7.7s <spontaneous>\n"), 365 "", "", "", "", "", ""); 366 return; 367 } 368 369 sort_parents (child); 370 371 for (arc = child->cg.parents; arc; arc = arc->next_parent) 372 { 373 parent = arc->parent; 374 if (child == parent || (child->cg.cyc.num != 0 375 && parent->cg.cyc.num == child->cg.cyc.num)) 376 { 377 /* Selfcall or call among siblings. */ 378 printf (bsd_style_output 379 ? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s " 380 : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s ", 381 "", "", "", "", 382 arc->count, ""); 383 print_name (parent); 384 printf ("\n"); 385 } 386 else 387 { 388 /* Regular parent of child. */ 389 printf (bsd_style_output 390 ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu " 391 : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu ", 392 "", "", 393 arc->time / hz, arc->child_time / hz, 394 arc->count, cycle_head->ncalls); 395 print_name (parent); 396 printf ("\n"); 397 } 398 } 399 } 400 401 402 static void 403 sort_children (parent) 404 Sym *parent; 405 { 406 Arc *arc, *detached, sorted, *prev; 407 408 /* Unlink children from parent, then insertion sort back on to 409 sorted's children. 410 *arc the arc you have detached and are inserting. 411 *detached the rest of the arcs to be sorted. 412 sorted arc list onto which you insertion sort. 413 *prev arc before the arc you are comparing. */ 414 sorted.next_child = 0; 415 416 for (arc = parent->cg.children; arc; arc = detached) 417 { 418 detached = arc->next_child; 419 420 /* Consider *arc as disconnected; insert it into sorted. */ 421 for (prev = &sorted; prev->next_child; prev = prev->next_child) 422 { 423 if (cmp_arc (arc, prev->next_child) != LESSTHAN) 424 break; 425 } 426 427 arc->next_child = prev->next_child; 428 prev->next_child = arc; 429 } 430 431 /* Reattach sorted children to parent. */ 432 parent->cg.children = sorted.next_child; 433 } 434 435 436 static void 437 print_children (parent) 438 Sym *parent; 439 { 440 Sym *child; 441 Arc *arc; 442 443 sort_children (parent); 444 arc = parent->cg.children; 445 446 for (arc = parent->cg.children; arc; arc = arc->next_child) 447 { 448 child = arc->child; 449 if (child == parent || (child->cg.cyc.num != 0 450 && child->cg.cyc.num == parent->cg.cyc.num)) 451 { 452 /* Self call or call to sibling. */ 453 printf (bsd_style_output 454 ? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s " 455 : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s ", 456 "", "", "", "", arc->count, ""); 457 print_name (child); 458 printf ("\n"); 459 } 460 else 461 { 462 /* Regular child of parent. */ 463 printf (bsd_style_output 464 ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu " 465 : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu ", 466 "", "", 467 arc->time / hz, arc->child_time / hz, 468 arc->count, child->cg.cyc.head->ncalls); 469 print_name (child); 470 printf ("\n"); 471 } 472 } 473 } 474 475 476 static void 477 print_line (np) 478 Sym *np; 479 { 480 char buf[BUFSIZ]; 481 482 sprintf (buf, "[%d]", np->cg.index); 483 printf (bsd_style_output 484 ? "%-6.6s %5.1f %7.2f %11.2f" 485 : "%-6.6s %5.1f %7.2f %7.2f", buf, 486 100 * (np->cg.prop.self + np->cg.prop.child) / print_time, 487 np->cg.prop.self / hz, np->cg.prop.child / hz); 488 489 if ((np->ncalls + np->cg.self_calls) != 0) 490 { 491 printf (" %7lu", np->ncalls); 492 493 if (np->cg.self_calls != 0) 494 printf ("+%-7lu ", np->cg.self_calls); 495 else 496 printf (" %7.7s ", ""); 497 } 498 else 499 { 500 printf (" %7.7s %7.7s ", "", ""); 501 } 502 503 print_name (np); 504 printf ("\n"); 505 } 506 507 508 /* Print dynamic call graph. */ 509 510 void 511 cg_print (timesortsym) 512 Sym ** timesortsym; 513 { 514 unsigned int index; 515 Sym *parent; 516 517 if (print_descriptions && bsd_style_output) 518 bsd_callg_blurb (stdout); 519 520 print_header (); 521 522 for (index = 0; index < symtab.len + num_cycles; ++index) 523 { 524 parent = timesortsym[index]; 525 526 if ((ignore_zeros && parent->ncalls == 0 527 && parent->cg.self_calls == 0 && parent->cg.prop.self == 0 528 && parent->cg.prop.child == 0) 529 || !parent->cg.print_flag 530 || (line_granularity && ! parent->is_func)) 531 continue; 532 533 if (!parent->name && parent->cg.cyc.num != 0) 534 { 535 /* Cycle header. */ 536 print_cycle (parent); 537 print_members (parent); 538 } 539 else 540 { 541 print_parents (parent); 542 print_line (parent); 543 print_children (parent); 544 } 545 546 if (bsd_style_output) 547 printf ("\n"); 548 549 printf ("-----------------------------------------------\n"); 550 551 if (bsd_style_output) 552 printf ("\n"); 553 } 554 555 free (timesortsym); 556 557 if (print_descriptions && !bsd_style_output) 558 fsf_callg_blurb (stdout); 559 } 560 561 562 static int 563 cmp_name (left, right) 564 const PTR left; 565 const PTR right; 566 { 567 const Sym **npp1 = (const Sym **) left; 568 const Sym **npp2 = (const Sym **) right; 569 570 return strcmp ((*npp1)->name, (*npp2)->name); 571 } 572 573 574 void 575 cg_print_index () 576 { 577 unsigned int index; 578 unsigned int nnames, todo, i, j; 579 int col, starting_col; 580 Sym **name_sorted_syms, *sym; 581 const char *filename; 582 char buf[20]; 583 int column_width = (output_width - 1) / 3; /* Don't write in last col! */ 584 585 /* Now, sort regular function name 586 alphabetically to create an index. */ 587 name_sorted_syms = (Sym **) xmalloc ((symtab.len + num_cycles) * sizeof (Sym *)); 588 589 for (index = 0, nnames = 0; index < symtab.len; index++) 590 { 591 if (ignore_zeros && symtab.base[index].ncalls == 0 592 && symtab.base[index].hist.time == 0) 593 continue; 594 595 name_sorted_syms[nnames++] = &symtab.base[index]; 596 } 597 598 qsort (name_sorted_syms, nnames, sizeof (Sym *), cmp_name); 599 600 for (index = 1, todo = nnames; index <= num_cycles; index++) 601 name_sorted_syms[todo++] = &cycle_header[index]; 602 603 printf ("\f\n"); 604 printf (_("Index by function name\n\n")); 605 index = (todo + 2) / 3; 606 607 for (i = 0; i < index; i++) 608 { 609 col = 0; 610 starting_col = 0; 611 612 for (j = i; j < todo; j += index) 613 { 614 sym = name_sorted_syms[j]; 615 616 if (sym->cg.print_flag) 617 sprintf (buf, "[%d]", sym->cg.index); 618 else 619 sprintf (buf, "(%d)", sym->cg.index); 620 621 if (j < nnames) 622 { 623 if (bsd_style_output) 624 { 625 printf ("%6.6s %-19.19s", buf, sym->name); 626 } 627 else 628 { 629 col += strlen (buf); 630 631 for (; col < starting_col + 5; ++col) 632 putchar (' '); 633 634 printf (" %s ", buf); 635 col += print_name_only (sym); 636 637 if (!line_granularity && sym->is_static && sym->file) 638 { 639 filename = sym->file->name; 640 641 if (!print_path) 642 { 643 filename = strrchr (filename, '/'); 644 645 if (filename) 646 ++filename; 647 else 648 filename = sym->file->name; 649 } 650 651 printf (" (%s)", filename); 652 col += strlen (filename) + 3; 653 } 654 } 655 } 656 else 657 { 658 if (bsd_style_output) 659 { 660 printf ("%6.6s ", buf); 661 sprintf (buf, _("<cycle %d>"), sym->cg.cyc.num); 662 printf ("%-19.19s", buf); 663 } 664 else 665 { 666 col += strlen (buf); 667 for (; col < starting_col + 5; ++col) 668 putchar (' '); 669 printf (" %s ", buf); 670 sprintf (buf, _("<cycle %d>"), sym->cg.cyc.num); 671 printf ("%s", buf); 672 col += strlen (buf); 673 } 674 } 675 676 starting_col += column_width; 677 } 678 679 printf ("\n"); 680 } 681 682 free (name_sorted_syms); 683 } 684 685 /* Compare two arcs based on their usage counts. 686 We want to sort in descending order. */ 687 688 static int 689 cmp_arc_count (left, right) 690 const PTR left; 691 const PTR right; 692 { 693 const Arc **npp1 = (const Arc **) left; 694 const Arc **npp2 = (const Arc **) right; 695 696 if ((*npp1)->count > (*npp2)->count) 697 return -1; 698 else if ((*npp1)->count < (*npp2)->count) 699 return 1; 700 else 701 return 0; 702 } 703 704 /* Compare two funtions based on their usage counts. 705 We want to sort in descending order. */ 706 707 static int 708 cmp_fun_nuses (left, right) 709 const PTR left; 710 const PTR right; 711 { 712 const Sym **npp1 = (const Sym **) left; 713 const Sym **npp2 = (const Sym **) right; 714 715 if ((*npp1)->nuses > (*npp2)->nuses) 716 return -1; 717 else if ((*npp1)->nuses < (*npp2)->nuses) 718 return 1; 719 else 720 return 0; 721 } 722 723 /* Print a suggested function ordering based on the profiling data. 724 725 We perform 4 major steps when ordering functions: 726 727 * Group unused functions together and place them at the 728 end of the function order. 729 730 * Search the highest use arcs (those which account for 90% of 731 the total arc count) for functions which have several parents. 732 733 Group those with the most call sites together (currently the 734 top 1.25% which have at least five different call sites). 735 736 These are emitted at the start of the function order. 737 738 * Use a greedy placement algorithm to place functions which 739 occur in the top 99% of the arcs in the profile. Some provisions 740 are made to handle high usage arcs where the parent and/or 741 child has already been placed. 742 743 * Run the same greedy placement algorithm on the remaining 744 arcs to place the leftover functions. 745 746 747 The various "magic numbers" should (one day) be tuneable by command 748 line options. They were arrived at by benchmarking a few applications 749 with various values to see which values produced better overall function 750 orderings. 751 752 Of course, profiling errors, machine limitations (PA long calls), and 753 poor cutoff values for the placement algorithm may limit the usefullness 754 of the resulting function order. Improvements would be greatly appreciated. 755 756 Suggestions: 757 758 * Place the functions with many callers near the middle of the 759 list to reduce long calls. 760 761 * Propagate arc usage changes as functions are placed. Ie if 762 func1 and func2 are placed together, arcs to/from those arcs 763 to the same parent/child should be combined, then resort the 764 arcs to choose the next one. 765 766 * Implement some global positioning algorithm to place the 767 chains made by the greedy local positioning algorithm. Probably 768 by examining arcs which haven't been placed yet to tie two 769 chains together. 770 771 * Take a function's size and time into account in the algorithm; 772 size in particular is important on the PA (long calls). Placing 773 many small functions onto their own page may be wise. 774 775 * Use better profiling information; many published algorithms 776 are based on call sequences through time, rather than just 777 arc counts. 778 779 * Prodecure cloning could improve performance when a small number 780 of arcs account for most of the calls to a particular function. 781 782 * Use relocation information to avoid moving unused functions 783 completely out of the code stream; this would avoid severe lossage 784 when the profile data bears little resemblance to actual runs. 785 786 * Propagation of arc usages should also improve .o link line 787 ordering which shares the same arc placement algorithm with 788 the function ordering code (in fact it is a degenerate case 789 of function ordering). */ 790 791 void 792 cg_print_function_ordering () 793 { 794 unsigned long index, used, unused, scratch_index; 795 unsigned long unplaced_arc_count, high_arc_count, scratch_arc_count; 796 #ifdef __GNUC__ 797 unsigned long long total_arcs, tmp_arcs_count; 798 #else 799 unsigned long total_arcs, tmp_arcs_count; 800 #endif 801 Sym **unused_syms, **used_syms, **scratch_syms; 802 Arc **unplaced_arcs, **high_arcs, **scratch_arcs; 803 804 index = 0; 805 used = 0; 806 unused = 0; 807 scratch_index = 0; 808 unplaced_arc_count = 0; 809 high_arc_count = 0; 810 scratch_arc_count = 0; 811 812 /* First group all the unused functions together. */ 813 unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *)); 814 used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *)); 815 scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *)); 816 high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *)); 817 scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *)); 818 unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *)); 819 820 /* Walk through all the functions; mark those which are never 821 called as placed (we'll emit them as a group later). */ 822 for (index = 0, used = 0, unused = 0; index < symtab.len; index++) 823 { 824 if (symtab.base[index].ncalls == 0) 825 { 826 /* Filter out gprof generated names. */ 827 if (strcmp (symtab.base[index].name, "<locore>") 828 && strcmp (symtab.base[index].name, "<hicore>")) 829 { 830 unused_syms[unused++] = &symtab.base[index]; 831 symtab.base[index].has_been_placed = 1; 832 } 833 } 834 else 835 { 836 used_syms[used++] = &symtab.base[index]; 837 symtab.base[index].has_been_placed = 0; 838 symtab.base[index].next = 0; 839 symtab.base[index].prev = 0; 840 symtab.base[index].nuses = 0; 841 } 842 } 843 844 /* Sort the arcs from most used to least used. */ 845 qsort (arcs, numarcs, sizeof (Arc *), cmp_arc_count); 846 847 /* Compute the total arc count. Also mark arcs as unplaced. 848 849 Note we don't compensate for overflow if that happens! 850 Overflow is much less likely when this file is compiled 851 with GCC as it can double-wide integers via long long. */ 852 total_arcs = 0; 853 for (index = 0; index < numarcs; index++) 854 { 855 total_arcs += arcs[index]->count; 856 arcs[index]->has_been_placed = 0; 857 } 858 859 /* We want to pull out those functions which are referenced 860 by many highly used arcs and emit them as a group. This 861 could probably use some tuning. */ 862 tmp_arcs_count = 0; 863 for (index = 0; index < numarcs; index++) 864 { 865 tmp_arcs_count += arcs[index]->count; 866 867 /* Count how many times each parent and child are used up 868 to our threshhold of arcs (90%). */ 869 if ((double)tmp_arcs_count / (double)total_arcs > 0.90) 870 break; 871 872 arcs[index]->child->nuses++; 873 } 874 875 /* Now sort a temporary symbol table based on the number of 876 times each function was used in the highest used arcs. */ 877 memcpy (scratch_syms, used_syms, used * sizeof (Sym *)); 878 qsort (scratch_syms, used, sizeof (Sym *), cmp_fun_nuses); 879 880 /* Now pick out those symbols we're going to emit as 881 a group. We take up to 1.25% of the used symbols. */ 882 for (index = 0; index < used / 80; index++) 883 { 884 Sym *sym = scratch_syms[index]; 885 Arc *arc; 886 887 /* If we hit symbols that aren't used from many call sites, 888 then we can quit. We choose five as the low limit for 889 no particular reason. */ 890 if (sym->nuses == 5) 891 break; 892 893 /* We're going to need the arcs between these functions. 894 Unfortunately, we don't know all these functions 895 until we're done. So we keep track of all the arcs 896 to the functions we care about, then prune out those 897 which are uninteresting. 898 899 An interesting variation would be to quit when we found 900 multi-call site functions which account for some percentage 901 of the arcs. */ 902 arc = sym->cg.children; 903 904 while (arc) 905 { 906 if (arc->parent != arc->child) 907 scratch_arcs[scratch_arc_count++] = arc; 908 arc->has_been_placed = 1; 909 arc = arc->next_child; 910 } 911 912 arc = sym->cg.parents; 913 914 while (arc) 915 { 916 if (arc->parent != arc->child) 917 scratch_arcs[scratch_arc_count++] = arc; 918 arc->has_been_placed = 1; 919 arc = arc->next_parent; 920 } 921 922 /* Keep track of how many symbols we're going to place. */ 923 scratch_index = index; 924 925 /* A lie, but it makes identifying 926 these functions easier later. */ 927 sym->has_been_placed = 1; 928 } 929 930 /* Now walk through the temporary arcs and copy 931 those we care about into the high arcs array. */ 932 for (index = 0; index < scratch_arc_count; index++) 933 { 934 Arc *arc = scratch_arcs[index]; 935 936 /* If this arc refers to highly used functions, then 937 then we want to keep it. */ 938 if (arc->child->has_been_placed 939 && arc->parent->has_been_placed) 940 { 941 high_arcs[high_arc_count++] = scratch_arcs[index]; 942 943 /* We need to turn of has_been_placed since we're going to 944 use the main arc placement algorithm on these arcs. */ 945 arc->child->has_been_placed = 0; 946 arc->parent->has_been_placed = 0; 947 } 948 } 949 950 /* Dump the multi-site high usage functions which are not 951 going to be ordered by the main ordering algorithm. */ 952 for (index = 0; index < scratch_index; index++) 953 { 954 if (scratch_syms[index]->has_been_placed) 955 printf ("%s\n", scratch_syms[index]->name); 956 } 957 958 /* Now we can order the multi-site high use 959 functions based on the arcs between them. */ 960 qsort (high_arcs, high_arc_count, sizeof (Arc *), cmp_arc_count); 961 order_and_dump_functions_by_arcs (high_arcs, high_arc_count, 1, 962 unplaced_arcs, &unplaced_arc_count); 963 964 /* Order and dump the high use functions left, 965 these typically have only a few call sites. */ 966 order_and_dump_functions_by_arcs (arcs, numarcs, 0, 967 unplaced_arcs, &unplaced_arc_count); 968 969 /* Now place the rarely used functions. */ 970 order_and_dump_functions_by_arcs (unplaced_arcs, unplaced_arc_count, 1, 971 scratch_arcs, &scratch_arc_count); 972 973 /* Output any functions not emitted by the order_and_dump calls. */ 974 for (index = 0; index < used; index++) 975 if (used_syms[index]->has_been_placed == 0) 976 printf("%s\n", used_syms[index]->name); 977 978 /* Output the unused functions. */ 979 for (index = 0; index < unused; index++) 980 printf("%s\n", unused_syms[index]->name); 981 982 unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *)); 983 used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *)); 984 scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *)); 985 high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *)); 986 scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *)); 987 unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *)); 988 989 free (unused_syms); 990 free (used_syms); 991 free (scratch_syms); 992 free (high_arcs); 993 free (scratch_arcs); 994 free (unplaced_arcs); 995 } 996 997 /* Place functions based on the arcs in THE_ARCS with ARC_COUNT entries; 998 place unused arcs into UNPLACED_ARCS/UNPLACED_ARC_COUNT. 999 1000 If ALL is nonzero, then place all functions referenced by THE_ARCS, 1001 else only place those referenced in the top 99% of the arcs in THE_ARCS. */ 1002 1003 #define MOST 0.99 1004 static void 1005 order_and_dump_functions_by_arcs (the_arcs, arc_count, all, 1006 unplaced_arcs, unplaced_arc_count) 1007 Arc **the_arcs; 1008 unsigned long arc_count; 1009 int all; 1010 Arc **unplaced_arcs; 1011 unsigned long *unplaced_arc_count; 1012 { 1013 #ifdef __GNUC__ 1014 unsigned long long tmp_arcs, total_arcs; 1015 #else 1016 unsigned long tmp_arcs, total_arcs; 1017 #endif 1018 unsigned int index; 1019 1020 /* If needed, compute the total arc count. 1021 1022 Note we don't compensate for overflow if that happens! */ 1023 if (! all) 1024 { 1025 total_arcs = 0; 1026 for (index = 0; index < arc_count; index++) 1027 total_arcs += the_arcs[index]->count; 1028 } 1029 else 1030 total_arcs = 0; 1031 1032 tmp_arcs = 0; 1033 1034 for (index = 0; index < arc_count; index++) 1035 { 1036 Sym *sym1, *sym2; 1037 Sym *child, *parent; 1038 1039 tmp_arcs += the_arcs[index]->count; 1040 1041 /* Ignore this arc if it's already been placed. */ 1042 if (the_arcs[index]->has_been_placed) 1043 continue; 1044 1045 child = the_arcs[index]->child; 1046 parent = the_arcs[index]->parent; 1047 1048 /* If we're not using all arcs, and this is a rarely used 1049 arc, then put it on the unplaced_arc list. Similarly 1050 if both the parent and child of this arc have been placed. */ 1051 if ((! all && (double)tmp_arcs / (double)total_arcs > MOST) 1052 || child->has_been_placed || parent->has_been_placed) 1053 { 1054 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[index]; 1055 continue; 1056 } 1057 1058 /* If all slots in the parent and child are full, then there isn't 1059 anything we can do right now. We'll place this arc on the 1060 unplaced arc list in the hope that a global positioning 1061 algorithm can use it to place function chains. */ 1062 if (parent->next && parent->prev && child->next && child->prev) 1063 { 1064 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[index]; 1065 continue; 1066 } 1067 1068 /* If the parent is unattached, then find the closest 1069 place to attach it onto child's chain. Similarly 1070 for the opposite case. */ 1071 if (!parent->next && !parent->prev) 1072 { 1073 int next_count = 0; 1074 int prev_count = 0; 1075 Sym *prev = child; 1076 Sym *next = child; 1077 1078 /* Walk to the beginning and end of the child's chain. */ 1079 while (next->next) 1080 { 1081 next = next->next; 1082 next_count++; 1083 } 1084 1085 while (prev->prev) 1086 { 1087 prev = prev->prev; 1088 prev_count++; 1089 } 1090 1091 /* Choose the closest. */ 1092 child = next_count < prev_count ? next : prev; 1093 } 1094 else if (! child->next && !child->prev) 1095 { 1096 int next_count = 0; 1097 int prev_count = 0; 1098 Sym *prev = parent; 1099 Sym *next = parent; 1100 1101 while (next->next) 1102 { 1103 next = next->next; 1104 next_count++; 1105 } 1106 1107 while (prev->prev) 1108 { 1109 prev = prev->prev; 1110 prev_count++; 1111 } 1112 1113 parent = prev_count < next_count ? prev : next; 1114 } 1115 else 1116 { 1117 /* Couldn't find anywhere to attach the functions, 1118 put the arc on the unplaced arc list. */ 1119 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[index]; 1120 continue; 1121 } 1122 1123 /* Make sure we don't tie two ends together. */ 1124 sym1 = parent; 1125 if (sym1->next) 1126 while (sym1->next) 1127 sym1 = sym1->next; 1128 else 1129 while (sym1->prev) 1130 sym1 = sym1->prev; 1131 1132 sym2 = child; 1133 if (sym2->next) 1134 while (sym2->next) 1135 sym2 = sym2->next; 1136 else 1137 while (sym2->prev) 1138 sym2 = sym2->prev; 1139 1140 if (sym1 == child 1141 && sym2 == parent) 1142 { 1143 /* This would tie two ends together. */ 1144 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[index]; 1145 continue; 1146 } 1147 1148 if (parent->next) 1149 { 1150 /* Must attach to the parent's prev field. */ 1151 if (! child->next) 1152 { 1153 /* parent-prev and child-next */ 1154 parent->prev = child; 1155 child->next = parent; 1156 the_arcs[index]->has_been_placed = 1; 1157 } 1158 } 1159 else if (parent->prev) 1160 { 1161 /* Must attach to the parent's next field. */ 1162 if (! child->prev) 1163 { 1164 /* parent-next and child-prev */ 1165 parent->next = child; 1166 child->prev = parent; 1167 the_arcs[index]->has_been_placed = 1; 1168 } 1169 } 1170 else 1171 { 1172 /* Can attach to either field in the parent, depends 1173 on where we've got space in the child. */ 1174 if (child->prev) 1175 { 1176 /* parent-prev and child-next. */ 1177 parent->prev = child; 1178 child->next = parent; 1179 the_arcs[index]->has_been_placed = 1; 1180 } 1181 else 1182 { 1183 /* parent-next and child-prev. */ 1184 parent->next = child; 1185 child->prev = parent; 1186 the_arcs[index]->has_been_placed = 1; 1187 } 1188 } 1189 } 1190 1191 /* Dump the chains of functions we've made. */ 1192 for (index = 0; index < arc_count; index++) 1193 { 1194 Sym *sym; 1195 if (the_arcs[index]->parent->has_been_placed 1196 || the_arcs[index]->child->has_been_placed) 1197 continue; 1198 1199 sym = the_arcs[index]->parent; 1200 1201 /* If this symbol isn't attached to any other 1202 symbols, then we've got a rarely used arc. 1203 1204 Skip it for now, we'll deal with them later. */ 1205 if (sym->next == NULL 1206 && sym->prev == NULL) 1207 continue; 1208 1209 /* Get to the start of this chain. */ 1210 while (sym->prev) 1211 sym = sym->prev; 1212 1213 while (sym) 1214 { 1215 /* Mark it as placed. */ 1216 sym->has_been_placed = 1; 1217 printf ("%s\n", sym->name); 1218 sym = sym->next; 1219 } 1220 } 1221 1222 /* If we want to place all the arcs, then output 1223 those which weren't placed by the main algorithm. */ 1224 if (all) 1225 for (index = 0; index < arc_count; index++) 1226 { 1227 Sym *sym; 1228 if (the_arcs[index]->parent->has_been_placed 1229 || the_arcs[index]->child->has_been_placed) 1230 continue; 1231 1232 sym = the_arcs[index]->parent; 1233 1234 sym->has_been_placed = 1; 1235 printf ("%s\n", sym->name); 1236 } 1237 } 1238 1239 /* Print a suggested .o ordering for files on a link line based 1240 on profiling information. This uses the function placement 1241 code for the bulk of its work. */ 1242 1243 void 1244 cg_print_file_ordering () 1245 { 1246 unsigned long scratch_arc_count, index; 1247 Arc **scratch_arcs; 1248 char *last; 1249 1250 scratch_arc_count = 0; 1251 1252 scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *)); 1253 for (index = 0; index < numarcs; index++) 1254 { 1255 if (! arcs[index]->parent->mapped 1256 || ! arcs[index]->child->mapped) 1257 arcs[index]->has_been_placed = 1; 1258 } 1259 1260 order_and_dump_functions_by_arcs (arcs, numarcs, 0, 1261 scratch_arcs, &scratch_arc_count); 1262 1263 /* Output .o's not handled by the main placement algorithm. */ 1264 for (index = 0; index < symtab.len; index++) 1265 { 1266 if (symtab.base[index].mapped 1267 && ! symtab.base[index].has_been_placed) 1268 printf ("%s\n", symtab.base[index].name); 1269 } 1270 1271 /* Now output any .o's that didn't have any text symbols. */ 1272 last = NULL; 1273 for (index = 0; index < symbol_map_count; index++) 1274 { 1275 unsigned int index2; 1276 1277 /* Don't bother searching if this symbol 1278 is the same as the previous one. */ 1279 if (last && !strcmp (last, symbol_map[index].file_name)) 1280 continue; 1281 1282 for (index2 = 0; index2 < symtab.len; index2++) 1283 { 1284 if (! symtab.base[index2].mapped) 1285 continue; 1286 1287 if (!strcmp (symtab.base[index2].name, symbol_map[index].file_name)) 1288 break; 1289 } 1290 1291 /* If we didn't find it in the symbol table, then it must 1292 be a .o with no text symbols. Output it last. */ 1293 if (index2 == symtab.len) 1294 printf ("%s\n", symbol_map[index].file_name); 1295 last = symbol_map[index].file_name; 1296 } 1297 } 1298