1// Copyright 2013 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package pointer 6 7// This file implements Hash-Value Numbering (HVN), a pre-solver 8// constraint optimization described in Hardekopf & Lin, SAS'07 (see 9// doc.go) that analyses the graph topology to determine which sets of 10// variables are "pointer equivalent" (PE), i.e. must have identical 11// points-to sets in the solution. 12// 13// A separate ("offline") graph is constructed. Its nodes are those of 14// the main-graph, plus an additional node *X for each pointer node X. 15// With this graph we can reason about the unknown points-to set of 16// dereferenced pointers. (We do not generalize this to represent 17// unknown fields x->f, perhaps because such fields would be numerous, 18// though it might be worth an experiment.) 19// 20// Nodes whose points-to relations are not entirely captured by the 21// graph are marked as "indirect": the *X nodes, the parameters of 22// address-taken functions (which includes all functions in method 23// sets), or nodes updated by the solver rules for reflection, etc. 24// 25// All addr (y=&x) nodes are initially assigned a pointer-equivalence 26// (PE) label equal to x's nodeid in the main graph. (These are the 27// only PE labels that are less than len(a.nodes).) 28// 29// All offsetAddr (y=&x.f) constraints are initially assigned a PE 30// label; such labels are memoized, keyed by (x, f), so that equivalent 31// nodes y as assigned the same label. 32// 33// Then we process each strongly connected component (SCC) of the graph 34// in topological order, assigning it a PE label based on the set P of 35// PE labels that flow to it from its immediate dependencies. 36// 37// If any node in P is "indirect", the entire SCC is assigned a fresh PE 38// label. Otherwise: 39// 40// |P|=0 if P is empty, all nodes in the SCC are non-pointers (e.g. 41// uninitialized variables, or formal params of dead functions) 42// and the SCC is assigned the PE label of zero. 43// 44// |P|=1 if P is a singleton, the SCC is assigned the same label as the 45// sole element of P. 46// 47// |P|>1 if P contains multiple labels, a unique label representing P is 48// invented and recorded in an hash table, so that other 49// equivalent SCCs may also be assigned this label, akin to 50// conventional hash-value numbering in a compiler. 51// 52// Finally, a renumbering is computed such that each node is replaced by 53// the lowest-numbered node with the same PE label. All constraints are 54// renumbered, and any resulting duplicates are eliminated. 55// 56// The only nodes that are not renumbered are the objects x in addr 57// (y=&x) constraints, since the ids of these nodes (and fields derived 58// from them via offsetAddr rules) are the elements of all points-to 59// sets, so they must remain as they are if we want the same solution. 60// 61// The solverStates (node.solve) for nodes in the same equivalence class 62// are linked together so that all nodes in the class have the same 63// solution. This avoids the need to renumber nodeids buried in 64// Queries, cgnodes, etc (like (*analysis).renumber() does) since only 65// the solution is needed. 66// 67// The result of HVN is that the number of distinct nodes and 68// constraints is reduced, but the solution is identical (almost---see 69// CROSS-CHECK below). In particular, both linear and cyclic chains of 70// copies are each replaced by a single node. 71// 72// Nodes and constraints created "online" (e.g. while solving reflection 73// constraints) are not subject to this optimization. 74// 75// PERFORMANCE 76// 77// In two benchmarks (guru and godoc), HVN eliminates about two thirds 78// of nodes, the majority accounted for by non-pointers: nodes of 79// non-pointer type, pointers that remain nil, formal parameters of dead 80// functions, nodes of untracked types, etc. It also reduces the number 81// of constraints, also by about two thirds, and the solving time by 82// 30--42%, although we must pay about 15% for the running time of HVN 83// itself. The benefit is greater for larger applications. 84// 85// There are many possible optimizations to improve the performance: 86// * Use fewer than 1:1 onodes to main graph nodes: many of the onodes 87// we create are not needed. 88// * HU (HVN with Union---see paper): coalesce "union" peLabels when 89// their expanded-out sets are equal. 90// * HR (HVN with deReference---see paper): this will require that we 91// apply HVN until fixed point, which may need more bookkeeping of the 92// correspondence of main nodes to onodes. 93// * Location Equivalence (see paper): have points-to sets contain not 94// locations but location-equivalence class labels, each representing 95// a set of locations. 96// * HVN with field-sensitive ref: model each of the fields of a 97// pointer-to-struct. 98// 99// CROSS-CHECK 100// 101// To verify the soundness of the optimization, when the 102// debugHVNCrossCheck option is enabled, we run the solver twice, once 103// before and once after running HVN, dumping the solution to disk, and 104// then we compare the results. If they are not identical, the analysis 105// panics. 106// 107// The solution dumped to disk includes only the N*N submatrix of the 108// complete solution where N is the number of nodes after generation. 109// In other words, we ignore pointer variables and objects created by 110// the solver itself, since their numbering depends on the solver order, 111// which is affected by the optimization. In any case, that's the only 112// part the client cares about. 113// 114// The cross-check is too strict and may fail spuriously. Although the 115// H&L paper describing HVN states that the solutions obtained should be 116// identical, this is not the case in practice because HVN can collapse 117// cycles involving *p even when pts(p)={}. Consider this example 118// distilled from testdata/hello.go: 119// 120// var x T 121// func f(p **T) { 122// t0 = *p 123// ... 124// t1 = φ(t0, &x) 125// *p = t1 126// } 127// 128// If f is dead code, we get: 129// unoptimized: pts(p)={} pts(t0)={} pts(t1)={&x} 130// optimized: pts(p)={} pts(t0)=pts(t1)=pts(*p)={&x} 131// 132// It's hard to argue that this is a bug: the result is sound and the 133// loss of precision is inconsequential---f is dead code, after all. 134// But unfortunately it limits the usefulness of the cross-check since 135// failures must be carefully analyzed. Ben Hardekopf suggests (in 136// personal correspondence) some approaches to mitigating it: 137// 138// If there is a node with an HVN points-to set that is a superset 139// of the NORM points-to set, then either it's a bug or it's a 140// result of this issue. If it's a result of this issue, then in 141// the offline constraint graph there should be a REF node inside 142// some cycle that reaches this node, and in the NORM solution the 143// pointer being dereferenced by that REF node should be the empty 144// set. If that isn't true then this is a bug. If it is true, then 145// you can further check that in the NORM solution the "extra" 146// points-to info in the HVN solution does in fact come from that 147// purported cycle (if it doesn't, then this is still a bug). If 148// you're doing the further check then you'll need to do it for 149// each "extra" points-to element in the HVN points-to set. 150// 151// There are probably ways to optimize these checks by taking 152// advantage of graph properties. For example, extraneous points-to 153// info will flow through the graph and end up in many 154// nodes. Rather than checking every node with extra info, you 155// could probably work out the "origin point" of the extra info and 156// just check there. Note that the check in the first bullet is 157// looking for soundness bugs, while the check in the second bullet 158// is looking for precision bugs; depending on your needs, you may 159// care more about one than the other. 160// 161// which we should evaluate. The cross-check is nonetheless invaluable 162// for all but one of the programs in the pointer_test suite. 163 164import ( 165 "fmt" 166 "go/types" 167 "io" 168 "reflect" 169 170 "golang.org/x/tools/container/intsets" 171) 172 173// A peLabel is a pointer-equivalence label: two nodes with the same 174// peLabel have identical points-to solutions. 175// 176// The numbers are allocated consecutively like so: 177// 0 not a pointer 178// 1..N-1 addrConstraints (equals the constraint's .src field, hence sparse) 179// ... offsetAddr constraints 180// ... SCCs (with indirect nodes or multiple inputs) 181// 182// Each PE label denotes a set of pointers containing a single addr, a 183// single offsetAddr, or some set of other PE labels. 184// 185type peLabel int 186 187type hvn struct { 188 a *analysis 189 N int // len(a.nodes) immediately after constraint generation 190 log io.Writer // (optional) log of HVN lemmas 191 onodes []*onode // nodes of the offline graph 192 label peLabel // the next available PE label 193 hvnLabel map[string]peLabel // hash-value numbering (PE label) for each set of onodeids 194 stack []onodeid // DFS stack 195 index int32 // next onode.index, from Tarjan's SCC algorithm 196 197 // For each distinct offsetAddrConstraint (src, offset) pair, 198 // offsetAddrLabels records a unique PE label >= N. 199 offsetAddrLabels map[offsetAddr]peLabel 200} 201 202// The index of an node in the offline graph. 203// (Currently the first N align with the main nodes, 204// but this may change with HRU.) 205type onodeid uint32 206 207// An onode is a node in the offline constraint graph. 208// (Where ambiguous, members of analysis.nodes are referred to as 209// "main graph" nodes.) 210// 211// Edges in the offline constraint graph (edges and implicit) point to 212// the source, i.e. against the flow of values: they are dependencies. 213// Implicit edges are used for SCC computation, but not for gathering 214// incoming labels. 215// 216type onode struct { 217 rep onodeid // index of representative of SCC in offline constraint graph 218 219 edges intsets.Sparse // constraint edges X-->Y (this onode is X) 220 implicit intsets.Sparse // implicit edges *X-->*Y (this onode is X) 221 peLabels intsets.Sparse // set of peLabels are pointer-equivalent to this one 222 indirect bool // node has points-to relations not represented in graph 223 224 // Tarjan's SCC algorithm 225 index, lowlink int32 // Tarjan numbering 226 scc int32 // -ve => on stack; 0 => unvisited; +ve => node is root of a found SCC 227} 228 229type offsetAddr struct { 230 ptr nodeid 231 offset uint32 232} 233 234// nextLabel issues the next unused pointer-equivalence label. 235func (h *hvn) nextLabel() peLabel { 236 h.label++ 237 return h.label 238} 239 240// ref(X) returns the index of the onode for *X. 241func (h *hvn) ref(id onodeid) onodeid { 242 return id + onodeid(len(h.a.nodes)) 243} 244 245// hvn computes pointer-equivalence labels (peLabels) using the Hash-based 246// Value Numbering (HVN) algorithm described in Hardekopf & Lin, SAS'07. 247// 248func (a *analysis) hvn() { 249 start("HVN") 250 251 if a.log != nil { 252 fmt.Fprintf(a.log, "\n\n==== Pointer equivalence optimization\n\n") 253 } 254 255 h := hvn{ 256 a: a, 257 N: len(a.nodes), 258 log: a.log, 259 hvnLabel: make(map[string]peLabel), 260 offsetAddrLabels: make(map[offsetAddr]peLabel), 261 } 262 263 if h.log != nil { 264 fmt.Fprintf(h.log, "\nCreating offline graph nodes...\n") 265 } 266 267 // Create offline nodes. The first N nodes correspond to main 268 // graph nodes; the next N are their corresponding ref() nodes. 269 h.onodes = make([]*onode, 2*h.N) 270 for id := range a.nodes { 271 id := onodeid(id) 272 h.onodes[id] = &onode{} 273 h.onodes[h.ref(id)] = &onode{indirect: true} 274 } 275 276 // Each node initially represents just itself. 277 for id, o := range h.onodes { 278 o.rep = onodeid(id) 279 } 280 281 h.markIndirectNodes() 282 283 // Reserve the first N PE labels for addrConstraints. 284 h.label = peLabel(h.N) 285 286 // Add offline constraint edges. 287 if h.log != nil { 288 fmt.Fprintf(h.log, "\nAdding offline graph edges...\n") 289 } 290 for _, c := range a.constraints { 291 if debugHVNVerbose && h.log != nil { 292 fmt.Fprintf(h.log, "; %s\n", c) 293 } 294 c.presolve(&h) 295 } 296 297 // Find and collapse SCCs. 298 if h.log != nil { 299 fmt.Fprintf(h.log, "\nFinding SCCs...\n") 300 } 301 h.index = 1 302 for id, o := range h.onodes { 303 if id > 0 && o.index == 0 { 304 // Start depth-first search at each unvisited node. 305 h.visit(onodeid(id)) 306 } 307 } 308 309 // Dump the solution 310 // (NB: somewhat redundant with logging from simplify().) 311 if debugHVNVerbose && h.log != nil { 312 fmt.Fprintf(h.log, "\nPointer equivalences:\n") 313 for id, o := range h.onodes { 314 if id == 0 { 315 continue 316 } 317 if id == int(h.N) { 318 fmt.Fprintf(h.log, "---\n") 319 } 320 fmt.Fprintf(h.log, "o%d\t", id) 321 if o.rep != onodeid(id) { 322 fmt.Fprintf(h.log, "rep=o%d", o.rep) 323 } else { 324 fmt.Fprintf(h.log, "p%d", o.peLabels.Min()) 325 if o.indirect { 326 fmt.Fprint(h.log, " indirect") 327 } 328 } 329 fmt.Fprintln(h.log) 330 } 331 } 332 333 // Simplify the main constraint graph 334 h.simplify() 335 336 a.showCounts() 337 338 stop("HVN") 339} 340 341// ---- constraint-specific rules ---- 342 343// dst := &src 344func (c *addrConstraint) presolve(h *hvn) { 345 // Each object (src) is an initial PE label. 346 label := peLabel(c.src) // label < N 347 if debugHVNVerbose && h.log != nil { 348 // duplicate log messages are possible 349 fmt.Fprintf(h.log, "\tcreate p%d: {&n%d}\n", label, c.src) 350 } 351 odst := onodeid(c.dst) 352 osrc := onodeid(c.src) 353 354 // Assign dst this label. 355 h.onodes[odst].peLabels.Insert(int(label)) 356 if debugHVNVerbose && h.log != nil { 357 fmt.Fprintf(h.log, "\to%d has p%d\n", odst, label) 358 } 359 360 h.addImplicitEdge(h.ref(odst), osrc) // *dst ~~> src. 361} 362 363// dst = src 364func (c *copyConstraint) presolve(h *hvn) { 365 odst := onodeid(c.dst) 366 osrc := onodeid(c.src) 367 h.addEdge(odst, osrc) // dst --> src 368 h.addImplicitEdge(h.ref(odst), h.ref(osrc)) // *dst ~~> *src 369} 370 371// dst = *src + offset 372func (c *loadConstraint) presolve(h *hvn) { 373 odst := onodeid(c.dst) 374 osrc := onodeid(c.src) 375 if c.offset == 0 { 376 h.addEdge(odst, h.ref(osrc)) // dst --> *src 377 } else { 378 // We don't interpret load-with-offset, e.g. results 379 // of map value lookup, R-block of dynamic call, slice 380 // copy/append, reflection. 381 h.markIndirect(odst, "load with offset") 382 } 383} 384 385// *dst + offset = src 386func (c *storeConstraint) presolve(h *hvn) { 387 odst := onodeid(c.dst) 388 osrc := onodeid(c.src) 389 if c.offset == 0 { 390 h.onodes[h.ref(odst)].edges.Insert(int(osrc)) // *dst --> src 391 if debugHVNVerbose && h.log != nil { 392 fmt.Fprintf(h.log, "\to%d --> o%d\n", h.ref(odst), osrc) 393 } 394 } 395 // We don't interpret store-with-offset. 396 // See discussion of soundness at markIndirectNodes. 397} 398 399// dst = &src.offset 400func (c *offsetAddrConstraint) presolve(h *hvn) { 401 // Give each distinct (addr, offset) pair a fresh PE label. 402 // The cache performs CSE, effectively. 403 key := offsetAddr{c.src, c.offset} 404 label, ok := h.offsetAddrLabels[key] 405 if !ok { 406 label = h.nextLabel() 407 h.offsetAddrLabels[key] = label 408 if debugHVNVerbose && h.log != nil { 409 fmt.Fprintf(h.log, "\tcreate p%d: {&n%d.#%d}\n", 410 label, c.src, c.offset) 411 } 412 } 413 414 // Assign dst this label. 415 h.onodes[c.dst].peLabels.Insert(int(label)) 416 if debugHVNVerbose && h.log != nil { 417 fmt.Fprintf(h.log, "\to%d has p%d\n", c.dst, label) 418 } 419} 420 421// dst = src.(typ) where typ is an interface 422func (c *typeFilterConstraint) presolve(h *hvn) { 423 h.markIndirect(onodeid(c.dst), "typeFilter result") 424} 425 426// dst = src.(typ) where typ is concrete 427func (c *untagConstraint) presolve(h *hvn) { 428 odst := onodeid(c.dst) 429 for end := odst + onodeid(h.a.sizeof(c.typ)); odst < end; odst++ { 430 h.markIndirect(odst, "untag result") 431 } 432} 433 434// dst = src.method(c.params...) 435func (c *invokeConstraint) presolve(h *hvn) { 436 // All methods are address-taken functions, so 437 // their formal P-blocks were already marked indirect. 438 439 // Mark the caller's targets node as indirect. 440 sig := c.method.Type().(*types.Signature) 441 id := c.params 442 h.markIndirect(onodeid(c.params), "invoke targets node") 443 id++ 444 445 id += nodeid(h.a.sizeof(sig.Params())) 446 447 // Mark the caller's R-block as indirect. 448 end := id + nodeid(h.a.sizeof(sig.Results())) 449 for id < end { 450 h.markIndirect(onodeid(id), "invoke R-block") 451 id++ 452 } 453} 454 455// markIndirectNodes marks as indirect nodes whose points-to relations 456// are not entirely captured by the offline graph, including: 457// 458// (a) All address-taken nodes (including the following nodes within 459// the same object). This is described in the paper. 460// 461// The most subtle cause of indirect nodes is the generation of 462// store-with-offset constraints since the offline graph doesn't 463// represent them. A global audit of constraint generation reveals the 464// following uses of store-with-offset: 465// 466// (b) genDynamicCall, for P-blocks of dynamically called functions, 467// to which dynamic copy edges will be added to them during 468// solving: from storeConstraint for standalone functions, 469// and from invokeConstraint for methods. 470// All such P-blocks must be marked indirect. 471// (c) MakeUpdate, to update the value part of a map object. 472// All MakeMap objects's value parts must be marked indirect. 473// (d) copyElems, to update the destination array. 474// All array elements must be marked indirect. 475// 476// Not all indirect marking happens here. ref() nodes are marked 477// indirect at construction, and each constraint's presolve() method may 478// mark additional nodes. 479// 480func (h *hvn) markIndirectNodes() { 481 // (a) all address-taken nodes, plus all nodes following them 482 // within the same object, since these may be indirectly 483 // stored or address-taken. 484 for _, c := range h.a.constraints { 485 if c, ok := c.(*addrConstraint); ok { 486 start := h.a.enclosingObj(c.src) 487 end := start + nodeid(h.a.nodes[start].obj.size) 488 for id := c.src; id < end; id++ { 489 h.markIndirect(onodeid(id), "A-T object") 490 } 491 } 492 } 493 494 // (b) P-blocks of all address-taken functions. 495 for id := 0; id < h.N; id++ { 496 obj := h.a.nodes[id].obj 497 498 // TODO(adonovan): opt: if obj.cgn.fn is a method and 499 // obj.cgn is not its shared contour, this is an 500 // "inlined" static method call. We needn't consider it 501 // address-taken since no invokeConstraint will affect it. 502 503 if obj != nil && obj.flags&otFunction != 0 && h.a.atFuncs[obj.cgn.fn] { 504 // address-taken function 505 if debugHVNVerbose && h.log != nil { 506 fmt.Fprintf(h.log, "n%d is address-taken: %s\n", id, obj.cgn.fn) 507 } 508 h.markIndirect(onodeid(id), "A-T func identity") 509 id++ 510 sig := obj.cgn.fn.Signature 511 psize := h.a.sizeof(sig.Params()) 512 if sig.Recv() != nil { 513 psize += h.a.sizeof(sig.Recv().Type()) 514 } 515 for end := id + int(psize); id < end; id++ { 516 h.markIndirect(onodeid(id), "A-T func P-block") 517 } 518 id-- 519 continue 520 } 521 } 522 523 // (c) all map objects' value fields. 524 for _, id := range h.a.mapValues { 525 h.markIndirect(onodeid(id), "makemap.value") 526 } 527 528 // (d) all array element objects. 529 // TODO(adonovan): opt: can we do better? 530 for id := 0; id < h.N; id++ { 531 // Identity node for an object of array type? 532 if tArray, ok := h.a.nodes[id].typ.(*types.Array); ok { 533 // Mark the array element nodes indirect. 534 // (Skip past the identity field.) 535 for range h.a.flatten(tArray.Elem()) { 536 id++ 537 h.markIndirect(onodeid(id), "array elem") 538 } 539 } 540 } 541} 542 543func (h *hvn) markIndirect(oid onodeid, comment string) { 544 h.onodes[oid].indirect = true 545 if debugHVNVerbose && h.log != nil { 546 fmt.Fprintf(h.log, "\to%d is indirect: %s\n", oid, comment) 547 } 548} 549 550// Adds an edge dst-->src. 551// Note the unusual convention: edges are dependency (contraflow) edges. 552func (h *hvn) addEdge(odst, osrc onodeid) { 553 h.onodes[odst].edges.Insert(int(osrc)) 554 if debugHVNVerbose && h.log != nil { 555 fmt.Fprintf(h.log, "\to%d --> o%d\n", odst, osrc) 556 } 557} 558 559func (h *hvn) addImplicitEdge(odst, osrc onodeid) { 560 h.onodes[odst].implicit.Insert(int(osrc)) 561 if debugHVNVerbose && h.log != nil { 562 fmt.Fprintf(h.log, "\to%d ~~> o%d\n", odst, osrc) 563 } 564} 565 566// visit implements the depth-first search of Tarjan's SCC algorithm. 567// Precondition: x is canonical. 568func (h *hvn) visit(x onodeid) { 569 h.checkCanonical(x) 570 xo := h.onodes[x] 571 xo.index = h.index 572 xo.lowlink = h.index 573 h.index++ 574 575 h.stack = append(h.stack, x) // push 576 assert(xo.scc == 0, "node revisited") 577 xo.scc = -1 578 579 var deps []int 580 deps = xo.edges.AppendTo(deps) 581 deps = xo.implicit.AppendTo(deps) 582 583 for _, y := range deps { 584 // Loop invariant: x is canonical. 585 586 y := h.find(onodeid(y)) 587 588 if x == y { 589 continue // nodes already coalesced 590 } 591 592 xo := h.onodes[x] 593 yo := h.onodes[y] 594 595 switch { 596 case yo.scc > 0: 597 // y is already a collapsed SCC 598 599 case yo.scc < 0: 600 // y is on the stack, and thus in the current SCC. 601 if yo.index < xo.lowlink { 602 xo.lowlink = yo.index 603 } 604 605 default: 606 // y is unvisited; visit it now. 607 h.visit(y) 608 // Note: x and y are now non-canonical. 609 610 x = h.find(onodeid(x)) 611 612 if yo.lowlink < xo.lowlink { 613 xo.lowlink = yo.lowlink 614 } 615 } 616 } 617 h.checkCanonical(x) 618 619 // Is x the root of an SCC? 620 if xo.lowlink == xo.index { 621 // Coalesce all nodes in the SCC. 622 if debugHVNVerbose && h.log != nil { 623 fmt.Fprintf(h.log, "scc o%d\n", x) 624 } 625 for { 626 // Pop y from stack. 627 i := len(h.stack) - 1 628 y := h.stack[i] 629 h.stack = h.stack[:i] 630 631 h.checkCanonical(x) 632 xo := h.onodes[x] 633 h.checkCanonical(y) 634 yo := h.onodes[y] 635 636 if xo == yo { 637 // SCC is complete. 638 xo.scc = 1 639 h.labelSCC(x) 640 break 641 } 642 h.coalesce(x, y) 643 } 644 } 645} 646 647// Precondition: x is canonical. 648func (h *hvn) labelSCC(x onodeid) { 649 h.checkCanonical(x) 650 xo := h.onodes[x] 651 xpe := &xo.peLabels 652 653 // All indirect nodes get new labels. 654 if xo.indirect { 655 label := h.nextLabel() 656 if debugHVNVerbose && h.log != nil { 657 fmt.Fprintf(h.log, "\tcreate p%d: indirect SCC\n", label) 658 fmt.Fprintf(h.log, "\to%d has p%d\n", x, label) 659 } 660 661 // Remove pre-labeling, in case a direct pre-labeled node was 662 // merged with an indirect one. 663 xpe.Clear() 664 xpe.Insert(int(label)) 665 666 return 667 } 668 669 // Invariant: all peLabels sets are non-empty. 670 // Those that are logically empty contain zero as their sole element. 671 // No other sets contains zero. 672 673 // Find all labels coming in to the coalesced SCC node. 674 for _, y := range xo.edges.AppendTo(nil) { 675 y := h.find(onodeid(y)) 676 if y == x { 677 continue // already coalesced 678 } 679 ype := &h.onodes[y].peLabels 680 if debugHVNVerbose && h.log != nil { 681 fmt.Fprintf(h.log, "\tedge from o%d = %s\n", y, ype) 682 } 683 684 if ype.IsEmpty() { 685 if debugHVNVerbose && h.log != nil { 686 fmt.Fprintf(h.log, "\tnode has no PE label\n") 687 } 688 } 689 assert(!ype.IsEmpty(), "incoming node has no PE label") 690 691 if ype.Has(0) { 692 // {0} represents a non-pointer. 693 assert(ype.Len() == 1, "PE set contains {0, ...}") 694 } else { 695 xpe.UnionWith(ype) 696 } 697 } 698 699 switch xpe.Len() { 700 case 0: 701 // SCC has no incoming non-zero PE labels: it is a non-pointer. 702 xpe.Insert(0) 703 704 case 1: 705 // already a singleton 706 707 default: 708 // SCC has multiple incoming non-zero PE labels. 709 // Find the canonical label representing this set. 710 // We use String() as a fingerprint consistent with Equals(). 711 key := xpe.String() 712 label, ok := h.hvnLabel[key] 713 if !ok { 714 label = h.nextLabel() 715 if debugHVNVerbose && h.log != nil { 716 fmt.Fprintf(h.log, "\tcreate p%d: union %s\n", label, xpe.String()) 717 } 718 h.hvnLabel[key] = label 719 } 720 xpe.Clear() 721 xpe.Insert(int(label)) 722 } 723 724 if debugHVNVerbose && h.log != nil { 725 fmt.Fprintf(h.log, "\to%d has p%d\n", x, xpe.Min()) 726 } 727} 728 729// coalesce combines two nodes in the offline constraint graph. 730// Precondition: x and y are canonical. 731func (h *hvn) coalesce(x, y onodeid) { 732 xo := h.onodes[x] 733 yo := h.onodes[y] 734 735 // x becomes y's canonical representative. 736 yo.rep = x 737 738 if debugHVNVerbose && h.log != nil { 739 fmt.Fprintf(h.log, "\tcoalesce o%d into o%d\n", y, x) 740 } 741 742 // x accumulates y's edges. 743 xo.edges.UnionWith(&yo.edges) 744 yo.edges.Clear() 745 746 // x accumulates y's implicit edges. 747 xo.implicit.UnionWith(&yo.implicit) 748 yo.implicit.Clear() 749 750 // x accumulates y's pointer-equivalence labels. 751 xo.peLabels.UnionWith(&yo.peLabels) 752 yo.peLabels.Clear() 753 754 // x accumulates y's indirect flag. 755 if yo.indirect { 756 xo.indirect = true 757 } 758} 759 760// simplify computes a degenerate renumbering of nodeids from the PE 761// labels assigned by the hvn, and uses it to simplify the main 762// constraint graph, eliminating non-pointer nodes and duplicate 763// constraints. 764// 765func (h *hvn) simplify() { 766 // canon maps each peLabel to its canonical main node. 767 canon := make([]nodeid, h.label) 768 for i := range canon { 769 canon[i] = nodeid(h.N) // indicates "unset" 770 } 771 772 // mapping maps each main node index to the index of the canonical node. 773 mapping := make([]nodeid, len(h.a.nodes)) 774 775 for id := range h.a.nodes { 776 id := nodeid(id) 777 if id == 0 { 778 canon[0] = 0 779 mapping[0] = 0 780 continue 781 } 782 oid := h.find(onodeid(id)) 783 peLabels := &h.onodes[oid].peLabels 784 assert(peLabels.Len() == 1, "PE class is not a singleton") 785 label := peLabel(peLabels.Min()) 786 787 canonID := canon[label] 788 if canonID == nodeid(h.N) { 789 // id becomes the representative of the PE label. 790 canonID = id 791 canon[label] = canonID 792 793 if h.a.log != nil { 794 fmt.Fprintf(h.a.log, "\tpts(n%d) is canonical : \t(%s)\n", 795 id, h.a.nodes[id].typ) 796 } 797 798 } else { 799 // Link the solver states for the two nodes. 800 assert(h.a.nodes[canonID].solve != nil, "missing solver state") 801 h.a.nodes[id].solve = h.a.nodes[canonID].solve 802 803 if h.a.log != nil { 804 // TODO(adonovan): debug: reorganize the log so it prints 805 // one line: 806 // pe y = x1, ..., xn 807 // for each canonical y. Requires allocation. 808 fmt.Fprintf(h.a.log, "\tpts(n%d) = pts(n%d) : %s\n", 809 id, canonID, h.a.nodes[id].typ) 810 } 811 } 812 813 mapping[id] = canonID 814 } 815 816 // Renumber the constraints, eliminate duplicates, and eliminate 817 // any containing non-pointers (n0). 818 addrs := make(map[addrConstraint]bool) 819 copys := make(map[copyConstraint]bool) 820 loads := make(map[loadConstraint]bool) 821 stores := make(map[storeConstraint]bool) 822 offsetAddrs := make(map[offsetAddrConstraint]bool) 823 untags := make(map[untagConstraint]bool) 824 typeFilters := make(map[typeFilterConstraint]bool) 825 invokes := make(map[invokeConstraint]bool) 826 827 nbefore := len(h.a.constraints) 828 cc := h.a.constraints[:0] // in-situ compaction 829 for _, c := range h.a.constraints { 830 // Renumber. 831 switch c := c.(type) { 832 case *addrConstraint: 833 // Don't renumber c.src since it is the label of 834 // an addressable object and will appear in PT sets. 835 c.dst = mapping[c.dst] 836 default: 837 c.renumber(mapping) 838 } 839 840 if c.ptr() == 0 { 841 continue // skip: constraint attached to non-pointer 842 } 843 844 var dup bool 845 switch c := c.(type) { 846 case *addrConstraint: 847 _, dup = addrs[*c] 848 addrs[*c] = true 849 850 case *copyConstraint: 851 if c.src == c.dst { 852 continue // skip degenerate copies 853 } 854 if c.src == 0 { 855 continue // skip copy from non-pointer 856 } 857 _, dup = copys[*c] 858 copys[*c] = true 859 860 case *loadConstraint: 861 if c.src == 0 { 862 continue // skip load from non-pointer 863 } 864 _, dup = loads[*c] 865 loads[*c] = true 866 867 case *storeConstraint: 868 if c.src == 0 { 869 continue // skip store from non-pointer 870 } 871 _, dup = stores[*c] 872 stores[*c] = true 873 874 case *offsetAddrConstraint: 875 if c.src == 0 { 876 continue // skip offset from non-pointer 877 } 878 _, dup = offsetAddrs[*c] 879 offsetAddrs[*c] = true 880 881 case *untagConstraint: 882 if c.src == 0 { 883 continue // skip untag of non-pointer 884 } 885 _, dup = untags[*c] 886 untags[*c] = true 887 888 case *typeFilterConstraint: 889 if c.src == 0 { 890 continue // skip filter of non-pointer 891 } 892 _, dup = typeFilters[*c] 893 typeFilters[*c] = true 894 895 case *invokeConstraint: 896 if c.params == 0 { 897 panic("non-pointer invoke.params") 898 } 899 if c.iface == 0 { 900 continue // skip invoke on non-pointer 901 } 902 _, dup = invokes[*c] 903 invokes[*c] = true 904 905 default: 906 // We don't bother de-duping advanced constraints 907 // (e.g. reflection) since they are uncommon. 908 909 // Eliminate constraints containing non-pointer nodeids. 910 // 911 // We use reflection to find the fields to avoid 912 // adding yet another method to constraint. 913 // 914 // TODO(adonovan): experiment with a constraint 915 // method that returns a slice of pointers to 916 // nodeids fields to enable uniform iteration; 917 // the renumber() method could be removed and 918 // implemented using the new one. 919 // 920 // TODO(adonovan): opt: this is unsound since 921 // some constraints still have an effect if one 922 // of the operands is zero: rVCall, rVMapIndex, 923 // rvSetMapIndex. Handle them specially. 924 rtNodeid := reflect.TypeOf(nodeid(0)) 925 x := reflect.ValueOf(c).Elem() 926 for i, nf := 0, x.NumField(); i < nf; i++ { 927 f := x.Field(i) 928 if f.Type() == rtNodeid { 929 if f.Uint() == 0 { 930 dup = true // skip it 931 break 932 } 933 } 934 } 935 } 936 if dup { 937 continue // skip duplicates 938 } 939 940 cc = append(cc, c) 941 } 942 h.a.constraints = cc 943 944 if h.log != nil { 945 fmt.Fprintf(h.log, "#constraints: was %d, now %d\n", nbefore, len(h.a.constraints)) 946 } 947} 948 949// find returns the canonical onodeid for x. 950// (The onodes form a disjoint set forest.) 951func (h *hvn) find(x onodeid) onodeid { 952 // TODO(adonovan): opt: this is a CPU hotspot. Try "union by rank". 953 xo := h.onodes[x] 954 rep := xo.rep 955 if rep != x { 956 rep = h.find(rep) // simple path compression 957 xo.rep = rep 958 } 959 return rep 960} 961 962func (h *hvn) checkCanonical(x onodeid) { 963 if debugHVN { 964 assert(x == h.find(x), "not canonical") 965 } 966} 967 968func assert(p bool, msg string) { 969 if debugHVN && !p { 970 panic("assertion failed: " + msg) 971 } 972} 973