1// Copyright 2014 Google Inc. All Rights Reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15// Package graph collects a set of samples into a directed graph. 16package graph 17 18import ( 19 "fmt" 20 "math" 21 "path/filepath" 22 "regexp" 23 "sort" 24 "strconv" 25 "strings" 26 27 "github.com/google/pprof/profile" 28) 29 30var ( 31 // Removes package name and method arguments for Java method names. 32 // See tests for examples. 33 javaRegExp = regexp.MustCompile(`^(?:[a-z]\w*\.)*([A-Z][\w\$]*\.(?:<init>|[a-z][\w\$]*(?:\$\d+)?))(?:(?:\()|$)`) 34 // Removes package name and method arguments for Go function names. 35 // See tests for examples. 36 goRegExp = regexp.MustCompile(`^(?:[\w\-\.]+\/)+(.+)`) 37 // Removes potential module versions in a package path. 38 goVerRegExp = regexp.MustCompile(`^(.*?)/v(?:[2-9]|[1-9][0-9]+)([./].*)$`) 39 // Strips C++ namespace prefix from a C++ function / method name. 40 // NOTE: Make sure to keep the template parameters in the name. Normally, 41 // template parameters are stripped from the C++ names but when 42 // -symbolize=demangle=templates flag is used, they will not be. 43 // See tests for examples. 44 cppRegExp = regexp.MustCompile(`^(?:[_a-zA-Z]\w*::)+(_*[A-Z]\w*::~?[_a-zA-Z]\w*(?:<.*>)?)`) 45 cppAnonymousPrefixRegExp = regexp.MustCompile(`^\(anonymous namespace\)::`) 46) 47 48// Graph summarizes a performance profile into a format that is 49// suitable for visualization. 50type Graph struct { 51 Nodes Nodes 52} 53 54// Options encodes the options for constructing a graph 55type Options struct { 56 SampleValue func(s []int64) int64 // Function to compute the value of a sample 57 SampleMeanDivisor func(s []int64) int64 // Function to compute the divisor for mean graphs, or nil 58 FormatTag func(int64, string) string // Function to format a sample tag value into a string 59 ObjNames bool // Always preserve obj filename 60 OrigFnNames bool // Preserve original (eg mangled) function names 61 62 CallTree bool // Build a tree instead of a graph 63 DropNegative bool // Drop nodes with overall negative values 64 65 KeptNodes NodeSet // If non-nil, only use nodes in this set 66} 67 68// Nodes is an ordered collection of graph nodes. 69type Nodes []*Node 70 71// Node is an entry on a profiling report. It represents a unique 72// program location. 73type Node struct { 74 // Info describes the source location associated to this node. 75 Info NodeInfo 76 77 // Function represents the function that this node belongs to. On 78 // graphs with sub-function resolution (eg line number or 79 // addresses), two nodes in a NodeMap that are part of the same 80 // function have the same value of Node.Function. If the Node 81 // represents the whole function, it points back to itself. 82 Function *Node 83 84 // Values associated to this node. Flat is exclusive to this node, 85 // Cum includes all descendents. 86 Flat, FlatDiv, Cum, CumDiv int64 87 88 // In and out Contains the nodes immediately reaching or reached by 89 // this node. 90 In, Out EdgeMap 91 92 // LabelTags provide additional information about subsets of a sample. 93 LabelTags TagMap 94 95 // NumericTags provide additional values for subsets of a sample. 96 // Numeric tags are optionally associated to a label tag. The key 97 // for NumericTags is the name of the LabelTag they are associated 98 // to, or "" for numeric tags not associated to a label tag. 99 NumericTags map[string]TagMap 100} 101 102// FlatValue returns the exclusive value for this node, computing the 103// mean if a divisor is available. 104func (n *Node) FlatValue() int64 { 105 if n.FlatDiv == 0 { 106 return n.Flat 107 } 108 return n.Flat / n.FlatDiv 109} 110 111// CumValue returns the inclusive value for this node, computing the 112// mean if a divisor is available. 113func (n *Node) CumValue() int64 { 114 if n.CumDiv == 0 { 115 return n.Cum 116 } 117 return n.Cum / n.CumDiv 118} 119 120// AddToEdge increases the weight of an edge between two nodes. If 121// there isn't such an edge one is created. 122func (n *Node) AddToEdge(to *Node, v int64, residual, inline bool) { 123 n.AddToEdgeDiv(to, 0, v, residual, inline) 124} 125 126// AddToEdgeDiv increases the weight of an edge between two nodes. If 127// there isn't such an edge one is created. 128func (n *Node) AddToEdgeDiv(to *Node, dv, v int64, residual, inline bool) { 129 if n.Out[to] != to.In[n] { 130 panic(fmt.Errorf("asymmetric edges %v %v", *n, *to)) 131 } 132 133 if e := n.Out[to]; e != nil { 134 e.WeightDiv += dv 135 e.Weight += v 136 if residual { 137 e.Residual = true 138 } 139 if !inline { 140 e.Inline = false 141 } 142 return 143 } 144 145 info := &Edge{Src: n, Dest: to, WeightDiv: dv, Weight: v, Residual: residual, Inline: inline} 146 n.Out[to] = info 147 to.In[n] = info 148} 149 150// NodeInfo contains the attributes for a node. 151type NodeInfo struct { 152 Name string 153 OrigName string 154 Address uint64 155 File string 156 StartLine, Lineno int 157 Objfile string 158} 159 160// PrintableName calls the Node's Formatter function with a single space separator. 161func (i *NodeInfo) PrintableName() string { 162 return strings.Join(i.NameComponents(), " ") 163} 164 165// NameComponents returns the components of the printable name to be used for a node. 166func (i *NodeInfo) NameComponents() []string { 167 var name []string 168 if i.Address != 0 { 169 name = append(name, fmt.Sprintf("%016x", i.Address)) 170 } 171 if fun := i.Name; fun != "" { 172 name = append(name, fun) 173 } 174 175 switch { 176 case i.Lineno != 0: 177 // User requested line numbers, provide what we have. 178 name = append(name, fmt.Sprintf("%s:%d", i.File, i.Lineno)) 179 case i.File != "": 180 // User requested file name, provide it. 181 name = append(name, i.File) 182 case i.Name != "": 183 // User requested function name. It was already included. 184 case i.Objfile != "": 185 // Only binary name is available 186 name = append(name, "["+filepath.Base(i.Objfile)+"]") 187 default: 188 // Do not leave it empty if there is no information at all. 189 name = append(name, "<unknown>") 190 } 191 return name 192} 193 194// NodeMap maps from a node info struct to a node. It is used to merge 195// report entries with the same info. 196type NodeMap map[NodeInfo]*Node 197 198// NodeSet is a collection of node info structs. 199type NodeSet map[NodeInfo]bool 200 201// NodePtrSet is a collection of nodes. Trimming a graph or tree requires a set 202// of objects which uniquely identify the nodes to keep. In a graph, NodeInfo 203// works as a unique identifier; however, in a tree multiple nodes may share 204// identical NodeInfos. A *Node does uniquely identify a node so we can use that 205// instead. Though a *Node also uniquely identifies a node in a graph, 206// currently, during trimming, graphs are rebuilt from scratch using only the 207// NodeSet, so there would not be the required context of the initial graph to 208// allow for the use of *Node. 209type NodePtrSet map[*Node]bool 210 211// FindOrInsertNode takes the info for a node and either returns a matching node 212// from the node map if one exists, or adds one to the map if one does not. 213// If kept is non-nil, nodes are only added if they can be located on it. 214func (nm NodeMap) FindOrInsertNode(info NodeInfo, kept NodeSet) *Node { 215 if kept != nil { 216 if _, ok := kept[info]; !ok { 217 return nil 218 } 219 } 220 221 if n, ok := nm[info]; ok { 222 return n 223 } 224 225 n := &Node{ 226 Info: info, 227 In: make(EdgeMap), 228 Out: make(EdgeMap), 229 LabelTags: make(TagMap), 230 NumericTags: make(map[string]TagMap), 231 } 232 nm[info] = n 233 if info.Address == 0 && info.Lineno == 0 { 234 // This node represents the whole function, so point Function 235 // back to itself. 236 n.Function = n 237 return n 238 } 239 // Find a node that represents the whole function. 240 info.Address = 0 241 info.Lineno = 0 242 n.Function = nm.FindOrInsertNode(info, nil) 243 return n 244} 245 246// EdgeMap is used to represent the incoming/outgoing edges from a node. 247type EdgeMap map[*Node]*Edge 248 249// Edge contains any attributes to be represented about edges in a graph. 250type Edge struct { 251 Src, Dest *Node 252 // The summary weight of the edge 253 Weight, WeightDiv int64 254 255 // residual edges connect nodes that were connected through a 256 // separate node, which has been removed from the report. 257 Residual bool 258 // An inline edge represents a call that was inlined into the caller. 259 Inline bool 260} 261 262// WeightValue returns the weight value for this edge, normalizing if a 263// divisor is available. 264func (e *Edge) WeightValue() int64 { 265 if e.WeightDiv == 0 { 266 return e.Weight 267 } 268 return e.Weight / e.WeightDiv 269} 270 271// Tag represent sample annotations 272type Tag struct { 273 Name string 274 Unit string // Describe the value, "" for non-numeric tags 275 Value int64 276 Flat, FlatDiv int64 277 Cum, CumDiv int64 278} 279 280// FlatValue returns the exclusive value for this tag, computing the 281// mean if a divisor is available. 282func (t *Tag) FlatValue() int64 { 283 if t.FlatDiv == 0 { 284 return t.Flat 285 } 286 return t.Flat / t.FlatDiv 287} 288 289// CumValue returns the inclusive value for this tag, computing the 290// mean if a divisor is available. 291func (t *Tag) CumValue() int64 { 292 if t.CumDiv == 0 { 293 return t.Cum 294 } 295 return t.Cum / t.CumDiv 296} 297 298// TagMap is a collection of tags, classified by their name. 299type TagMap map[string]*Tag 300 301// SortTags sorts a slice of tags based on their weight. 302func SortTags(t []*Tag, flat bool) []*Tag { 303 ts := tags{t, flat} 304 sort.Sort(ts) 305 return ts.t 306} 307 308// New summarizes performance data from a profile into a graph. 309func New(prof *profile.Profile, o *Options) *Graph { 310 if o.CallTree { 311 return newTree(prof, o) 312 } 313 g, _ := newGraph(prof, o) 314 return g 315} 316 317// newGraph computes a graph from a profile. It returns the graph, and 318// a map from the profile location indices to the corresponding graph 319// nodes. 320func newGraph(prof *profile.Profile, o *Options) (*Graph, map[uint64]Nodes) { 321 nodes, locationMap := CreateNodes(prof, o) 322 seenNode := make(map[*Node]bool) 323 seenEdge := make(map[nodePair]bool) 324 for _, sample := range prof.Sample { 325 var w, dw int64 326 w = o.SampleValue(sample.Value) 327 if o.SampleMeanDivisor != nil { 328 dw = o.SampleMeanDivisor(sample.Value) 329 } 330 if dw == 0 && w == 0 { 331 continue 332 } 333 for k := range seenNode { 334 delete(seenNode, k) 335 } 336 for k := range seenEdge { 337 delete(seenEdge, k) 338 } 339 var parent *Node 340 // A residual edge goes over one or more nodes that were not kept. 341 residual := false 342 343 labels := joinLabels(sample) 344 // Group the sample frames, based on a global map. 345 for i := len(sample.Location) - 1; i >= 0; i-- { 346 l := sample.Location[i] 347 locNodes := locationMap[l.ID] 348 for ni := len(locNodes) - 1; ni >= 0; ni-- { 349 n := locNodes[ni] 350 if n == nil { 351 residual = true 352 continue 353 } 354 // Add cum weight to all nodes in stack, avoiding double counting. 355 if _, ok := seenNode[n]; !ok { 356 seenNode[n] = true 357 n.addSample(dw, w, labels, sample.NumLabel, sample.NumUnit, o.FormatTag, false) 358 } 359 // Update edge weights for all edges in stack, avoiding double counting. 360 if _, ok := seenEdge[nodePair{n, parent}]; !ok && parent != nil && n != parent { 361 seenEdge[nodePair{n, parent}] = true 362 parent.AddToEdgeDiv(n, dw, w, residual, ni != len(locNodes)-1) 363 } 364 parent = n 365 residual = false 366 } 367 } 368 if parent != nil && !residual { 369 // Add flat weight to leaf node. 370 parent.addSample(dw, w, labels, sample.NumLabel, sample.NumUnit, o.FormatTag, true) 371 } 372 } 373 374 return selectNodesForGraph(nodes, o.DropNegative), locationMap 375} 376 377func selectNodesForGraph(nodes Nodes, dropNegative bool) *Graph { 378 // Collect nodes into a graph. 379 gNodes := make(Nodes, 0, len(nodes)) 380 for _, n := range nodes { 381 if n == nil { 382 continue 383 } 384 if n.Cum == 0 && n.Flat == 0 { 385 continue 386 } 387 if dropNegative && isNegative(n) { 388 continue 389 } 390 gNodes = append(gNodes, n) 391 } 392 return &Graph{gNodes} 393} 394 395type nodePair struct { 396 src, dest *Node 397} 398 399func newTree(prof *profile.Profile, o *Options) (g *Graph) { 400 parentNodeMap := make(map[*Node]NodeMap, len(prof.Sample)) 401 for _, sample := range prof.Sample { 402 var w, dw int64 403 w = o.SampleValue(sample.Value) 404 if o.SampleMeanDivisor != nil { 405 dw = o.SampleMeanDivisor(sample.Value) 406 } 407 if dw == 0 && w == 0 { 408 continue 409 } 410 var parent *Node 411 labels := joinLabels(sample) 412 // Group the sample frames, based on a per-node map. 413 for i := len(sample.Location) - 1; i >= 0; i-- { 414 l := sample.Location[i] 415 lines := l.Line 416 if len(lines) == 0 { 417 lines = []profile.Line{{}} // Create empty line to include location info. 418 } 419 for lidx := len(lines) - 1; lidx >= 0; lidx-- { 420 nodeMap := parentNodeMap[parent] 421 if nodeMap == nil { 422 nodeMap = make(NodeMap) 423 parentNodeMap[parent] = nodeMap 424 } 425 n := nodeMap.findOrInsertLine(l, lines[lidx], o) 426 if n == nil { 427 continue 428 } 429 n.addSample(dw, w, labels, sample.NumLabel, sample.NumUnit, o.FormatTag, false) 430 if parent != nil { 431 parent.AddToEdgeDiv(n, dw, w, false, lidx != len(lines)-1) 432 } 433 parent = n 434 } 435 } 436 if parent != nil { 437 parent.addSample(dw, w, labels, sample.NumLabel, sample.NumUnit, o.FormatTag, true) 438 } 439 } 440 441 nodes := make(Nodes, len(prof.Location)) 442 for _, nm := range parentNodeMap { 443 nodes = append(nodes, nm.nodes()...) 444 } 445 return selectNodesForGraph(nodes, o.DropNegative) 446} 447 448// ShortenFunctionName returns a shortened version of a function's name. 449func ShortenFunctionName(f string) string { 450 f = cppAnonymousPrefixRegExp.ReplaceAllString(f, "") 451 f = goVerRegExp.ReplaceAllString(f, `${1}${2}`) 452 for _, re := range []*regexp.Regexp{goRegExp, javaRegExp, cppRegExp} { 453 if matches := re.FindStringSubmatch(f); len(matches) >= 2 { 454 return strings.Join(matches[1:], "") 455 } 456 } 457 return f 458} 459 460// TrimTree trims a Graph in forest form, keeping only the nodes in kept. This 461// will not work correctly if even a single node has multiple parents. 462func (g *Graph) TrimTree(kept NodePtrSet) { 463 // Creates a new list of nodes 464 oldNodes := g.Nodes 465 g.Nodes = make(Nodes, 0, len(kept)) 466 467 for _, cur := range oldNodes { 468 // A node may not have multiple parents 469 if len(cur.In) > 1 { 470 panic("TrimTree only works on trees") 471 } 472 473 // If a node should be kept, add it to the new list of nodes 474 if _, ok := kept[cur]; ok { 475 g.Nodes = append(g.Nodes, cur) 476 continue 477 } 478 479 // If a node has no parents, then delete all of the in edges of its 480 // children to make them each roots of their own trees. 481 if len(cur.In) == 0 { 482 for _, outEdge := range cur.Out { 483 delete(outEdge.Dest.In, cur) 484 } 485 continue 486 } 487 488 // Get the parent. This works since at this point cur.In must contain only 489 // one element. 490 if len(cur.In) != 1 { 491 panic("Get parent assertion failed. cur.In expected to be of length 1.") 492 } 493 var parent *Node 494 for _, edge := range cur.In { 495 parent = edge.Src 496 } 497 498 parentEdgeInline := parent.Out[cur].Inline 499 500 // Remove the edge from the parent to this node 501 delete(parent.Out, cur) 502 503 // Reconfigure every edge from the current node to now begin at the parent. 504 for _, outEdge := range cur.Out { 505 child := outEdge.Dest 506 507 delete(child.In, cur) 508 child.In[parent] = outEdge 509 parent.Out[child] = outEdge 510 511 outEdge.Src = parent 512 outEdge.Residual = true 513 // If the edge from the parent to the current node and the edge from the 514 // current node to the child are both inline, then this resulting residual 515 // edge should also be inline 516 outEdge.Inline = parentEdgeInline && outEdge.Inline 517 } 518 } 519 g.RemoveRedundantEdges() 520} 521 522func joinLabels(s *profile.Sample) string { 523 if len(s.Label) == 0 { 524 return "" 525 } 526 527 var labels []string 528 for key, vals := range s.Label { 529 for _, v := range vals { 530 labels = append(labels, key+":"+v) 531 } 532 } 533 sort.Strings(labels) 534 return strings.Join(labels, `\n`) 535} 536 537// isNegative returns true if the node is considered as "negative" for the 538// purposes of drop_negative. 539func isNegative(n *Node) bool { 540 switch { 541 case n.Flat < 0: 542 return true 543 case n.Flat == 0 && n.Cum < 0: 544 return true 545 default: 546 return false 547 } 548} 549 550// CreateNodes creates graph nodes for all locations in a profile. It 551// returns set of all nodes, plus a mapping of each location to the 552// set of corresponding nodes (one per location.Line). 553func CreateNodes(prof *profile.Profile, o *Options) (Nodes, map[uint64]Nodes) { 554 locations := make(map[uint64]Nodes, len(prof.Location)) 555 nm := make(NodeMap, len(prof.Location)) 556 for _, l := range prof.Location { 557 lines := l.Line 558 if len(lines) == 0 { 559 lines = []profile.Line{{}} // Create empty line to include location info. 560 } 561 nodes := make(Nodes, len(lines)) 562 for ln := range lines { 563 nodes[ln] = nm.findOrInsertLine(l, lines[ln], o) 564 } 565 locations[l.ID] = nodes 566 } 567 return nm.nodes(), locations 568} 569 570func (nm NodeMap) nodes() Nodes { 571 nodes := make(Nodes, 0, len(nm)) 572 for _, n := range nm { 573 nodes = append(nodes, n) 574 } 575 return nodes 576} 577 578func (nm NodeMap) findOrInsertLine(l *profile.Location, li profile.Line, o *Options) *Node { 579 var objfile string 580 if m := l.Mapping; m != nil && m.File != "" { 581 objfile = m.File 582 } 583 584 if ni := nodeInfo(l, li, objfile, o); ni != nil { 585 return nm.FindOrInsertNode(*ni, o.KeptNodes) 586 } 587 return nil 588} 589 590func nodeInfo(l *profile.Location, line profile.Line, objfile string, o *Options) *NodeInfo { 591 if line.Function == nil { 592 return &NodeInfo{Address: l.Address, Objfile: objfile} 593 } 594 ni := &NodeInfo{ 595 Address: l.Address, 596 Lineno: int(line.Line), 597 Name: line.Function.Name, 598 } 599 if fname := line.Function.Filename; fname != "" { 600 ni.File = filepath.Clean(fname) 601 } 602 if o.OrigFnNames { 603 ni.OrigName = line.Function.SystemName 604 } 605 if o.ObjNames || (ni.Name == "" && ni.OrigName == "") { 606 ni.Objfile = objfile 607 ni.StartLine = int(line.Function.StartLine) 608 } 609 return ni 610} 611 612type tags struct { 613 t []*Tag 614 flat bool 615} 616 617func (t tags) Len() int { return len(t.t) } 618func (t tags) Swap(i, j int) { t.t[i], t.t[j] = t.t[j], t.t[i] } 619func (t tags) Less(i, j int) bool { 620 if !t.flat { 621 if t.t[i].Cum != t.t[j].Cum { 622 return abs64(t.t[i].Cum) > abs64(t.t[j].Cum) 623 } 624 } 625 if t.t[i].Flat != t.t[j].Flat { 626 return abs64(t.t[i].Flat) > abs64(t.t[j].Flat) 627 } 628 return t.t[i].Name < t.t[j].Name 629} 630 631// Sum adds the flat and cum values of a set of nodes. 632func (ns Nodes) Sum() (flat int64, cum int64) { 633 for _, n := range ns { 634 flat += n.Flat 635 cum += n.Cum 636 } 637 return 638} 639 640func (n *Node) addSample(dw, w int64, labels string, numLabel map[string][]int64, numUnit map[string][]string, format func(int64, string) string, flat bool) { 641 // Update sample value 642 if flat { 643 n.FlatDiv += dw 644 n.Flat += w 645 } else { 646 n.CumDiv += dw 647 n.Cum += w 648 } 649 650 // Add string tags 651 if labels != "" { 652 t := n.LabelTags.findOrAddTag(labels, "", 0) 653 if flat { 654 t.FlatDiv += dw 655 t.Flat += w 656 } else { 657 t.CumDiv += dw 658 t.Cum += w 659 } 660 } 661 662 numericTags := n.NumericTags[labels] 663 if numericTags == nil { 664 numericTags = TagMap{} 665 n.NumericTags[labels] = numericTags 666 } 667 // Add numeric tags 668 if format == nil { 669 format = defaultLabelFormat 670 } 671 for k, nvals := range numLabel { 672 units := numUnit[k] 673 for i, v := range nvals { 674 var t *Tag 675 if len(units) > 0 { 676 t = numericTags.findOrAddTag(format(v, units[i]), units[i], v) 677 } else { 678 t = numericTags.findOrAddTag(format(v, k), k, v) 679 } 680 if flat { 681 t.FlatDiv += dw 682 t.Flat += w 683 } else { 684 t.CumDiv += dw 685 t.Cum += w 686 } 687 } 688 } 689} 690 691func defaultLabelFormat(v int64, key string) string { 692 return strconv.FormatInt(v, 10) 693} 694 695func (m TagMap) findOrAddTag(label, unit string, value int64) *Tag { 696 l := m[label] 697 if l == nil { 698 l = &Tag{ 699 Name: label, 700 Unit: unit, 701 Value: value, 702 } 703 m[label] = l 704 } 705 return l 706} 707 708// String returns a text representation of a graph, for debugging purposes. 709func (g *Graph) String() string { 710 var s []string 711 712 nodeIndex := make(map[*Node]int, len(g.Nodes)) 713 714 for i, n := range g.Nodes { 715 nodeIndex[n] = i + 1 716 } 717 718 for i, n := range g.Nodes { 719 name := n.Info.PrintableName() 720 var in, out []int 721 722 for _, from := range n.In { 723 in = append(in, nodeIndex[from.Src]) 724 } 725 for _, to := range n.Out { 726 out = append(out, nodeIndex[to.Dest]) 727 } 728 s = append(s, fmt.Sprintf("%d: %s[flat=%d cum=%d] %x -> %v ", i+1, name, n.Flat, n.Cum, in, out)) 729 } 730 return strings.Join(s, "\n") 731} 732 733// DiscardLowFrequencyNodes returns a set of the nodes at or over a 734// specific cum value cutoff. 735func (g *Graph) DiscardLowFrequencyNodes(nodeCutoff int64) NodeSet { 736 return makeNodeSet(g.Nodes, nodeCutoff) 737} 738 739// DiscardLowFrequencyNodePtrs returns a NodePtrSet of nodes at or over a 740// specific cum value cutoff. 741func (g *Graph) DiscardLowFrequencyNodePtrs(nodeCutoff int64) NodePtrSet { 742 cutNodes := getNodesAboveCumCutoff(g.Nodes, nodeCutoff) 743 kept := make(NodePtrSet, len(cutNodes)) 744 for _, n := range cutNodes { 745 kept[n] = true 746 } 747 return kept 748} 749 750func makeNodeSet(nodes Nodes, nodeCutoff int64) NodeSet { 751 cutNodes := getNodesAboveCumCutoff(nodes, nodeCutoff) 752 kept := make(NodeSet, len(cutNodes)) 753 for _, n := range cutNodes { 754 kept[n.Info] = true 755 } 756 return kept 757} 758 759// getNodesAboveCumCutoff returns all the nodes which have a Cum value greater 760// than or equal to cutoff. 761func getNodesAboveCumCutoff(nodes Nodes, nodeCutoff int64) Nodes { 762 cutoffNodes := make(Nodes, 0, len(nodes)) 763 for _, n := range nodes { 764 if abs64(n.Cum) < nodeCutoff { 765 continue 766 } 767 cutoffNodes = append(cutoffNodes, n) 768 } 769 return cutoffNodes 770} 771 772// TrimLowFrequencyTags removes tags that have less than 773// the specified weight. 774func (g *Graph) TrimLowFrequencyTags(tagCutoff int64) { 775 // Remove nodes with value <= total*nodeFraction 776 for _, n := range g.Nodes { 777 n.LabelTags = trimLowFreqTags(n.LabelTags, tagCutoff) 778 for s, nt := range n.NumericTags { 779 n.NumericTags[s] = trimLowFreqTags(nt, tagCutoff) 780 } 781 } 782} 783 784func trimLowFreqTags(tags TagMap, minValue int64) TagMap { 785 kept := TagMap{} 786 for s, t := range tags { 787 if abs64(t.Flat) >= minValue || abs64(t.Cum) >= minValue { 788 kept[s] = t 789 } 790 } 791 return kept 792} 793 794// TrimLowFrequencyEdges removes edges that have less than 795// the specified weight. Returns the number of edges removed 796func (g *Graph) TrimLowFrequencyEdges(edgeCutoff int64) int { 797 var droppedEdges int 798 for _, n := range g.Nodes { 799 for src, e := range n.In { 800 if abs64(e.Weight) < edgeCutoff { 801 delete(n.In, src) 802 delete(src.Out, n) 803 droppedEdges++ 804 } 805 } 806 } 807 return droppedEdges 808} 809 810// SortNodes sorts the nodes in a graph based on a specific heuristic. 811func (g *Graph) SortNodes(cum bool, visualMode bool) { 812 // Sort nodes based on requested mode 813 switch { 814 case visualMode: 815 // Specialized sort to produce a more visually-interesting graph 816 g.Nodes.Sort(EntropyOrder) 817 case cum: 818 g.Nodes.Sort(CumNameOrder) 819 default: 820 g.Nodes.Sort(FlatNameOrder) 821 } 822} 823 824// SelectTopNodePtrs returns a set of the top maxNodes *Node in a graph. 825func (g *Graph) SelectTopNodePtrs(maxNodes int, visualMode bool) NodePtrSet { 826 set := make(NodePtrSet) 827 for _, node := range g.selectTopNodes(maxNodes, visualMode) { 828 set[node] = true 829 } 830 return set 831} 832 833// SelectTopNodes returns a set of the top maxNodes nodes in a graph. 834func (g *Graph) SelectTopNodes(maxNodes int, visualMode bool) NodeSet { 835 return makeNodeSet(g.selectTopNodes(maxNodes, visualMode), 0) 836} 837 838// selectTopNodes returns a slice of the top maxNodes nodes in a graph. 839func (g *Graph) selectTopNodes(maxNodes int, visualMode bool) Nodes { 840 if maxNodes > 0 { 841 if visualMode { 842 var count int 843 // If generating a visual graph, count tags as nodes. Update 844 // maxNodes to account for them. 845 for i, n := range g.Nodes { 846 tags := countTags(n) 847 if tags > maxNodelets { 848 tags = maxNodelets 849 } 850 if count += tags + 1; count >= maxNodes { 851 maxNodes = i + 1 852 break 853 } 854 } 855 } 856 } 857 if maxNodes > len(g.Nodes) { 858 maxNodes = len(g.Nodes) 859 } 860 return g.Nodes[:maxNodes] 861} 862 863// countTags counts the tags with flat count. This underestimates the 864// number of tags being displayed, but in practice is close enough. 865func countTags(n *Node) int { 866 count := 0 867 for _, e := range n.LabelTags { 868 if e.Flat != 0 { 869 count++ 870 } 871 } 872 for _, t := range n.NumericTags { 873 for _, e := range t { 874 if e.Flat != 0 { 875 count++ 876 } 877 } 878 } 879 return count 880} 881 882// RemoveRedundantEdges removes residual edges if the destination can 883// be reached through another path. This is done to simplify the graph 884// while preserving connectivity. 885func (g *Graph) RemoveRedundantEdges() { 886 // Walk the nodes and outgoing edges in reverse order to prefer 887 // removing edges with the lowest weight. 888 for i := len(g.Nodes); i > 0; i-- { 889 n := g.Nodes[i-1] 890 in := n.In.Sort() 891 for j := len(in); j > 0; j-- { 892 e := in[j-1] 893 if !e.Residual { 894 // Do not remove edges heavier than a non-residual edge, to 895 // avoid potential confusion. 896 break 897 } 898 if isRedundantEdge(e) { 899 delete(e.Src.Out, e.Dest) 900 delete(e.Dest.In, e.Src) 901 } 902 } 903 } 904} 905 906// isRedundantEdge determines if there is a path that allows e.Src 907// to reach e.Dest after removing e. 908func isRedundantEdge(e *Edge) bool { 909 src, n := e.Src, e.Dest 910 seen := map[*Node]bool{n: true} 911 queue := Nodes{n} 912 for len(queue) > 0 { 913 n := queue[0] 914 queue = queue[1:] 915 for _, ie := range n.In { 916 if e == ie || seen[ie.Src] { 917 continue 918 } 919 if ie.Src == src { 920 return true 921 } 922 seen[ie.Src] = true 923 queue = append(queue, ie.Src) 924 } 925 } 926 return false 927} 928 929// nodeSorter is a mechanism used to allow a report to be sorted 930// in different ways. 931type nodeSorter struct { 932 rs Nodes 933 less func(l, r *Node) bool 934} 935 936func (s nodeSorter) Len() int { return len(s.rs) } 937func (s nodeSorter) Swap(i, j int) { s.rs[i], s.rs[j] = s.rs[j], s.rs[i] } 938func (s nodeSorter) Less(i, j int) bool { return s.less(s.rs[i], s.rs[j]) } 939 940// Sort reorders a slice of nodes based on the specified ordering 941// criteria. The result is sorted in decreasing order for (absolute) 942// numeric quantities, alphabetically for text, and increasing for 943// addresses. 944func (ns Nodes) Sort(o NodeOrder) error { 945 var s nodeSorter 946 947 switch o { 948 case FlatNameOrder: 949 s = nodeSorter{ns, 950 func(l, r *Node) bool { 951 if iv, jv := abs64(l.Flat), abs64(r.Flat); iv != jv { 952 return iv > jv 953 } 954 if iv, jv := l.Info.PrintableName(), r.Info.PrintableName(); iv != jv { 955 return iv < jv 956 } 957 if iv, jv := abs64(l.Cum), abs64(r.Cum); iv != jv { 958 return iv > jv 959 } 960 return compareNodes(l, r) 961 }, 962 } 963 case FlatCumNameOrder: 964 s = nodeSorter{ns, 965 func(l, r *Node) bool { 966 if iv, jv := abs64(l.Flat), abs64(r.Flat); iv != jv { 967 return iv > jv 968 } 969 if iv, jv := abs64(l.Cum), abs64(r.Cum); iv != jv { 970 return iv > jv 971 } 972 if iv, jv := l.Info.PrintableName(), r.Info.PrintableName(); iv != jv { 973 return iv < jv 974 } 975 return compareNodes(l, r) 976 }, 977 } 978 case NameOrder: 979 s = nodeSorter{ns, 980 func(l, r *Node) bool { 981 if iv, jv := l.Info.Name, r.Info.Name; iv != jv { 982 return iv < jv 983 } 984 return compareNodes(l, r) 985 }, 986 } 987 case FileOrder: 988 s = nodeSorter{ns, 989 func(l, r *Node) bool { 990 if iv, jv := l.Info.File, r.Info.File; iv != jv { 991 return iv < jv 992 } 993 if iv, jv := l.Info.StartLine, r.Info.StartLine; iv != jv { 994 return iv < jv 995 } 996 return compareNodes(l, r) 997 }, 998 } 999 case AddressOrder: 1000 s = nodeSorter{ns, 1001 func(l, r *Node) bool { 1002 if iv, jv := l.Info.Address, r.Info.Address; iv != jv { 1003 return iv < jv 1004 } 1005 return compareNodes(l, r) 1006 }, 1007 } 1008 case CumNameOrder, EntropyOrder: 1009 // Hold scoring for score-based ordering 1010 var score map[*Node]int64 1011 scoreOrder := func(l, r *Node) bool { 1012 if iv, jv := abs64(score[l]), abs64(score[r]); iv != jv { 1013 return iv > jv 1014 } 1015 if iv, jv := l.Info.PrintableName(), r.Info.PrintableName(); iv != jv { 1016 return iv < jv 1017 } 1018 if iv, jv := abs64(l.Flat), abs64(r.Flat); iv != jv { 1019 return iv > jv 1020 } 1021 return compareNodes(l, r) 1022 } 1023 1024 switch o { 1025 case CumNameOrder: 1026 score = make(map[*Node]int64, len(ns)) 1027 for _, n := range ns { 1028 score[n] = n.Cum 1029 } 1030 s = nodeSorter{ns, scoreOrder} 1031 case EntropyOrder: 1032 score = make(map[*Node]int64, len(ns)) 1033 for _, n := range ns { 1034 score[n] = entropyScore(n) 1035 } 1036 s = nodeSorter{ns, scoreOrder} 1037 } 1038 default: 1039 return fmt.Errorf("report: unrecognized sort ordering: %d", o) 1040 } 1041 sort.Sort(s) 1042 return nil 1043} 1044 1045// compareNodes compares two nodes to provide a deterministic ordering 1046// between them. Two nodes cannot have the same Node.Info value. 1047func compareNodes(l, r *Node) bool { 1048 return fmt.Sprint(l.Info) < fmt.Sprint(r.Info) 1049} 1050 1051// entropyScore computes a score for a node representing how important 1052// it is to include this node on a graph visualization. It is used to 1053// sort the nodes and select which ones to display if we have more 1054// nodes than desired in the graph. This number is computed by looking 1055// at the flat and cum weights of the node and the incoming/outgoing 1056// edges. The fundamental idea is to penalize nodes that have a simple 1057// fallthrough from their incoming to the outgoing edge. 1058func entropyScore(n *Node) int64 { 1059 score := float64(0) 1060 1061 if len(n.In) == 0 { 1062 score++ // Favor entry nodes 1063 } else { 1064 score += edgeEntropyScore(n, n.In, 0) 1065 } 1066 1067 if len(n.Out) == 0 { 1068 score++ // Favor leaf nodes 1069 } else { 1070 score += edgeEntropyScore(n, n.Out, n.Flat) 1071 } 1072 1073 return int64(score*float64(n.Cum)) + n.Flat 1074} 1075 1076// edgeEntropyScore computes the entropy value for a set of edges 1077// coming in or out of a node. Entropy (as defined in information 1078// theory) refers to the amount of information encoded by the set of 1079// edges. A set of edges that have a more interesting distribution of 1080// samples gets a higher score. 1081func edgeEntropyScore(n *Node, edges EdgeMap, self int64) float64 { 1082 score := float64(0) 1083 total := self 1084 for _, e := range edges { 1085 if e.Weight > 0 { 1086 total += abs64(e.Weight) 1087 } 1088 } 1089 if total != 0 { 1090 for _, e := range edges { 1091 frac := float64(abs64(e.Weight)) / float64(total) 1092 score += -frac * math.Log2(frac) 1093 } 1094 if self > 0 { 1095 frac := float64(abs64(self)) / float64(total) 1096 score += -frac * math.Log2(frac) 1097 } 1098 } 1099 return score 1100} 1101 1102// NodeOrder sets the ordering for a Sort operation 1103type NodeOrder int 1104 1105// Sorting options for node sort. 1106const ( 1107 FlatNameOrder NodeOrder = iota 1108 FlatCumNameOrder 1109 CumNameOrder 1110 NameOrder 1111 FileOrder 1112 AddressOrder 1113 EntropyOrder 1114) 1115 1116// Sort returns a slice of the edges in the map, in a consistent 1117// order. The sort order is first based on the edge weight 1118// (higher-to-lower) and then by the node names to avoid flakiness. 1119func (e EdgeMap) Sort() []*Edge { 1120 el := make(edgeList, 0, len(e)) 1121 for _, w := range e { 1122 el = append(el, w) 1123 } 1124 1125 sort.Sort(el) 1126 return el 1127} 1128 1129// Sum returns the total weight for a set of nodes. 1130func (e EdgeMap) Sum() int64 { 1131 var ret int64 1132 for _, edge := range e { 1133 ret += edge.Weight 1134 } 1135 return ret 1136} 1137 1138type edgeList []*Edge 1139 1140func (el edgeList) Len() int { 1141 return len(el) 1142} 1143 1144func (el edgeList) Less(i, j int) bool { 1145 if el[i].Weight != el[j].Weight { 1146 return abs64(el[i].Weight) > abs64(el[j].Weight) 1147 } 1148 1149 from1 := el[i].Src.Info.PrintableName() 1150 from2 := el[j].Src.Info.PrintableName() 1151 if from1 != from2 { 1152 return from1 < from2 1153 } 1154 1155 to1 := el[i].Dest.Info.PrintableName() 1156 to2 := el[j].Dest.Info.PrintableName() 1157 1158 return to1 < to2 1159} 1160 1161func (el edgeList) Swap(i, j int) { 1162 el[i], el[j] = el[j], el[i] 1163} 1164 1165func abs64(i int64) int64 { 1166 if i < 0 { 1167 return -i 1168 } 1169 return i 1170} 1171