1// Copyright 2015 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package ssa 6 7import ( 8 "cmd/compile/internal/types" 9 "cmd/internal/obj" 10 "cmd/internal/objabi" 11 "cmd/internal/src" 12 "encoding/binary" 13 "fmt" 14 "io" 15 "math" 16 "math/bits" 17 "os" 18 "path/filepath" 19) 20 21func applyRewrite(f *Func, rb blockRewriter, rv valueRewriter) { 22 // repeat rewrites until we find no more rewrites 23 pendingLines := f.cachedLineStarts // Holds statement boundaries that need to be moved to a new value/block 24 pendingLines.clear() 25 for { 26 change := false 27 for _, b := range f.Blocks { 28 for i, c := range b.ControlValues() { 29 for c.Op == OpCopy { 30 c = c.Args[0] 31 b.ReplaceControl(i, c) 32 } 33 } 34 if rb(b) { 35 change = true 36 } 37 for j, v := range b.Values { 38 change = phielimValue(v) || change 39 40 // Eliminate copy inputs. 41 // If any copy input becomes unused, mark it 42 // as invalid and discard its argument. Repeat 43 // recursively on the discarded argument. 44 // This phase helps remove phantom "dead copy" uses 45 // of a value so that a x.Uses==1 rule condition 46 // fires reliably. 47 for i, a := range v.Args { 48 if a.Op != OpCopy { 49 continue 50 } 51 aa := copySource(a) 52 v.SetArg(i, aa) 53 // If a, a copy, has a line boundary indicator, attempt to find a new value 54 // to hold it. The first candidate is the value that will replace a (aa), 55 // if it shares the same block and line and is eligible. 56 // The second option is v, which has a as an input. Because aa is earlier in 57 // the data flow, it is the better choice. 58 if a.Pos.IsStmt() == src.PosIsStmt { 59 if aa.Block == a.Block && aa.Pos.Line() == a.Pos.Line() && aa.Pos.IsStmt() != src.PosNotStmt { 60 aa.Pos = aa.Pos.WithIsStmt() 61 } else if v.Block == a.Block && v.Pos.Line() == a.Pos.Line() && v.Pos.IsStmt() != src.PosNotStmt { 62 v.Pos = v.Pos.WithIsStmt() 63 } else { 64 // Record the lost line and look for a new home after all rewrites are complete. 65 // TODO: it's possible (in FOR loops, in particular) for statement boundaries for the same 66 // line to appear in more than one block, but only one block is stored, so if both end 67 // up here, then one will be lost. 68 pendingLines.set(a.Pos, int32(a.Block.ID)) 69 } 70 a.Pos = a.Pos.WithNotStmt() 71 } 72 change = true 73 for a.Uses == 0 { 74 b := a.Args[0] 75 a.reset(OpInvalid) 76 a = b 77 } 78 } 79 80 // apply rewrite function 81 if rv(v) { 82 change = true 83 // If value changed to a poor choice for a statement boundary, move the boundary 84 if v.Pos.IsStmt() == src.PosIsStmt { 85 if k := nextGoodStatementIndex(v, j, b); k != j { 86 v.Pos = v.Pos.WithNotStmt() 87 b.Values[k].Pos = b.Values[k].Pos.WithIsStmt() 88 } 89 } 90 } 91 } 92 } 93 if !change { 94 break 95 } 96 } 97 // remove clobbered values 98 for _, b := range f.Blocks { 99 j := 0 100 for i, v := range b.Values { 101 vl := v.Pos 102 if v.Op == OpInvalid { 103 if v.Pos.IsStmt() == src.PosIsStmt { 104 pendingLines.set(vl, int32(b.ID)) 105 } 106 f.freeValue(v) 107 continue 108 } 109 if v.Pos.IsStmt() != src.PosNotStmt && pendingLines.get(vl) == int32(b.ID) { 110 pendingLines.remove(vl) 111 v.Pos = v.Pos.WithIsStmt() 112 } 113 if i != j { 114 b.Values[j] = v 115 } 116 j++ 117 } 118 if pendingLines.get(b.Pos) == int32(b.ID) { 119 b.Pos = b.Pos.WithIsStmt() 120 pendingLines.remove(b.Pos) 121 } 122 if j != len(b.Values) { 123 tail := b.Values[j:] 124 for j := range tail { 125 tail[j] = nil 126 } 127 b.Values = b.Values[:j] 128 } 129 } 130} 131 132// Common functions called from rewriting rules 133 134func is64BitFloat(t *types.Type) bool { 135 return t.Size() == 8 && t.IsFloat() 136} 137 138func is32BitFloat(t *types.Type) bool { 139 return t.Size() == 4 && t.IsFloat() 140} 141 142func is64BitInt(t *types.Type) bool { 143 return t.Size() == 8 && t.IsInteger() 144} 145 146func is32BitInt(t *types.Type) bool { 147 return t.Size() == 4 && t.IsInteger() 148} 149 150func is16BitInt(t *types.Type) bool { 151 return t.Size() == 2 && t.IsInteger() 152} 153 154func is8BitInt(t *types.Type) bool { 155 return t.Size() == 1 && t.IsInteger() 156} 157 158func isPtr(t *types.Type) bool { 159 return t.IsPtrShaped() 160} 161 162func isSigned(t *types.Type) bool { 163 return t.IsSigned() 164} 165 166// mergeSym merges two symbolic offsets. There is no real merging of 167// offsets, we just pick the non-nil one. 168func mergeSym(x, y interface{}) interface{} { 169 if x == nil { 170 return y 171 } 172 if y == nil { 173 return x 174 } 175 panic(fmt.Sprintf("mergeSym with two non-nil syms %s %s", x, y)) 176} 177func canMergeSym(x, y interface{}) bool { 178 return x == nil || y == nil 179} 180 181// canMergeLoadClobber reports whether the load can be merged into target without 182// invalidating the schedule. 183// It also checks that the other non-load argument x is something we 184// are ok with clobbering. 185func canMergeLoadClobber(target, load, x *Value) bool { 186 // The register containing x is going to get clobbered. 187 // Don't merge if we still need the value of x. 188 // We don't have liveness information here, but we can 189 // approximate x dying with: 190 // 1) target is x's only use. 191 // 2) target is not in a deeper loop than x. 192 if x.Uses != 1 { 193 return false 194 } 195 loopnest := x.Block.Func.loopnest() 196 loopnest.calculateDepths() 197 if loopnest.depth(target.Block.ID) > loopnest.depth(x.Block.ID) { 198 return false 199 } 200 return canMergeLoad(target, load) 201} 202 203// canMergeLoad reports whether the load can be merged into target without 204// invalidating the schedule. 205func canMergeLoad(target, load *Value) bool { 206 if target.Block.ID != load.Block.ID { 207 // If the load is in a different block do not merge it. 208 return false 209 } 210 211 // We can't merge the load into the target if the load 212 // has more than one use. 213 if load.Uses != 1 { 214 return false 215 } 216 217 mem := load.MemoryArg() 218 219 // We need the load's memory arg to still be alive at target. That 220 // can't be the case if one of target's args depends on a memory 221 // state that is a successor of load's memory arg. 222 // 223 // For example, it would be invalid to merge load into target in 224 // the following situation because newmem has killed oldmem 225 // before target is reached: 226 // load = read ... oldmem 227 // newmem = write ... oldmem 228 // arg0 = read ... newmem 229 // target = add arg0 load 230 // 231 // If the argument comes from a different block then we can exclude 232 // it immediately because it must dominate load (which is in the 233 // same block as target). 234 var args []*Value 235 for _, a := range target.Args { 236 if a != load && a.Block.ID == target.Block.ID { 237 args = append(args, a) 238 } 239 } 240 241 // memPreds contains memory states known to be predecessors of load's 242 // memory state. It is lazily initialized. 243 var memPreds map[*Value]bool 244 for i := 0; len(args) > 0; i++ { 245 const limit = 100 246 if i >= limit { 247 // Give up if we have done a lot of iterations. 248 return false 249 } 250 v := args[len(args)-1] 251 args = args[:len(args)-1] 252 if target.Block.ID != v.Block.ID { 253 // Since target and load are in the same block 254 // we can stop searching when we leave the block. 255 continue 256 } 257 if v.Op == OpPhi { 258 // A Phi implies we have reached the top of the block. 259 // The memory phi, if it exists, is always 260 // the first logical store in the block. 261 continue 262 } 263 if v.Type.IsTuple() && v.Type.FieldType(1).IsMemory() { 264 // We could handle this situation however it is likely 265 // to be very rare. 266 return false 267 } 268 if v.Op.SymEffect()&SymAddr != 0 { 269 // This case prevents an operation that calculates the 270 // address of a local variable from being forced to schedule 271 // before its corresponding VarDef. 272 // See issue 28445. 273 // v1 = LOAD ... 274 // v2 = VARDEF 275 // v3 = LEAQ 276 // v4 = CMPQ v1 v3 277 // We don't want to combine the CMPQ with the load, because 278 // that would force the CMPQ to schedule before the VARDEF, which 279 // in turn requires the LEAQ to schedule before the VARDEF. 280 return false 281 } 282 if v.Type.IsMemory() { 283 if memPreds == nil { 284 // Initialise a map containing memory states 285 // known to be predecessors of load's memory 286 // state. 287 memPreds = make(map[*Value]bool) 288 m := mem 289 const limit = 50 290 for i := 0; i < limit; i++ { 291 if m.Op == OpPhi { 292 // The memory phi, if it exists, is always 293 // the first logical store in the block. 294 break 295 } 296 if m.Block.ID != target.Block.ID { 297 break 298 } 299 if !m.Type.IsMemory() { 300 break 301 } 302 memPreds[m] = true 303 if len(m.Args) == 0 { 304 break 305 } 306 m = m.MemoryArg() 307 } 308 } 309 310 // We can merge if v is a predecessor of mem. 311 // 312 // For example, we can merge load into target in the 313 // following scenario: 314 // x = read ... v 315 // mem = write ... v 316 // load = read ... mem 317 // target = add x load 318 if memPreds[v] { 319 continue 320 } 321 return false 322 } 323 if len(v.Args) > 0 && v.Args[len(v.Args)-1] == mem { 324 // If v takes mem as an input then we know mem 325 // is valid at this point. 326 continue 327 } 328 for _, a := range v.Args { 329 if target.Block.ID == a.Block.ID { 330 args = append(args, a) 331 } 332 } 333 } 334 335 return true 336} 337 338// isSameSym reports whether sym is the same as the given named symbol 339func isSameSym(sym interface{}, name string) bool { 340 s, ok := sym.(fmt.Stringer) 341 return ok && s.String() == name 342} 343 344// nlz returns the number of leading zeros. 345func nlz(x int64) int64 { 346 return int64(bits.LeadingZeros64(uint64(x))) 347} 348 349// ntz returns the number of trailing zeros. 350func ntz(x int64) int64 { 351 return int64(bits.TrailingZeros64(uint64(x))) 352} 353 354func oneBit(x int64) bool { 355 return bits.OnesCount64(uint64(x)) == 1 356} 357 358// nlo returns the number of leading ones. 359func nlo(x int64) int64 { 360 return nlz(^x) 361} 362 363// nto returns the number of trailing ones. 364func nto(x int64) int64 { 365 return ntz(^x) 366} 367 368// log2 returns logarithm in base 2 of uint64(n), with log2(0) = -1. 369// Rounds down. 370func log2(n int64) int64 { 371 return int64(bits.Len64(uint64(n))) - 1 372} 373 374// log2uint32 returns logarithm in base 2 of uint32(n), with log2(0) = -1. 375// Rounds down. 376func log2uint32(n int64) int64 { 377 return int64(bits.Len32(uint32(n))) - 1 378} 379 380// isPowerOfTwo reports whether n is a power of 2. 381func isPowerOfTwo(n int64) bool { 382 return n > 0 && n&(n-1) == 0 383} 384 385// isUint64PowerOfTwo reports whether uint64(n) is a power of 2. 386func isUint64PowerOfTwo(in int64) bool { 387 n := uint64(in) 388 return n > 0 && n&(n-1) == 0 389} 390 391// isUint32PowerOfTwo reports whether uint32(n) is a power of 2. 392func isUint32PowerOfTwo(in int64) bool { 393 n := uint64(uint32(in)) 394 return n > 0 && n&(n-1) == 0 395} 396 397// is32Bit reports whether n can be represented as a signed 32 bit integer. 398func is32Bit(n int64) bool { 399 return n == int64(int32(n)) 400} 401 402// is16Bit reports whether n can be represented as a signed 16 bit integer. 403func is16Bit(n int64) bool { 404 return n == int64(int16(n)) 405} 406 407// is8Bit reports whether n can be represented as a signed 8 bit integer. 408func is8Bit(n int64) bool { 409 return n == int64(int8(n)) 410} 411 412// isU8Bit reports whether n can be represented as an unsigned 8 bit integer. 413func isU8Bit(n int64) bool { 414 return n == int64(uint8(n)) 415} 416 417// isU12Bit reports whether n can be represented as an unsigned 12 bit integer. 418func isU12Bit(n int64) bool { 419 return 0 <= n && n < (1<<12) 420} 421 422// isU16Bit reports whether n can be represented as an unsigned 16 bit integer. 423func isU16Bit(n int64) bool { 424 return n == int64(uint16(n)) 425} 426 427// isU32Bit reports whether n can be represented as an unsigned 32 bit integer. 428func isU32Bit(n int64) bool { 429 return n == int64(uint32(n)) 430} 431 432// is20Bit reports whether n can be represented as a signed 20 bit integer. 433func is20Bit(n int64) bool { 434 return -(1<<19) <= n && n < (1<<19) 435} 436 437// b2i translates a boolean value to 0 or 1 for assigning to auxInt. 438func b2i(b bool) int64 { 439 if b { 440 return 1 441 } 442 return 0 443} 444 445// shiftIsBounded reports whether (left/right) shift Value v is known to be bounded. 446// A shift is bounded if it is shifting by less than the width of the shifted value. 447func shiftIsBounded(v *Value) bool { 448 return v.AuxInt != 0 449} 450 451// truncate64Fto32F converts a float64 value to a float32 preserving the bit pattern 452// of the mantissa. It will panic if the truncation results in lost information. 453func truncate64Fto32F(f float64) float32 { 454 if !isExactFloat32(f) { 455 panic("truncate64Fto32F: truncation is not exact") 456 } 457 if !math.IsNaN(f) { 458 return float32(f) 459 } 460 // NaN bit patterns aren't necessarily preserved across conversion 461 // instructions so we need to do the conversion manually. 462 b := math.Float64bits(f) 463 m := b & ((1 << 52) - 1) // mantissa (a.k.a. significand) 464 // | sign | exponent | mantissa | 465 r := uint32(((b >> 32) & (1 << 31)) | 0x7f800000 | (m >> (52 - 23))) 466 return math.Float32frombits(r) 467} 468 469// extend32Fto64F converts a float32 value to a float64 value preserving the bit 470// pattern of the mantissa. 471func extend32Fto64F(f float32) float64 { 472 if !math.IsNaN(float64(f)) { 473 return float64(f) 474 } 475 // NaN bit patterns aren't necessarily preserved across conversion 476 // instructions so we need to do the conversion manually. 477 b := uint64(math.Float32bits(f)) 478 // | sign | exponent | mantissa | 479 r := ((b << 32) & (1 << 63)) | (0x7ff << 52) | ((b & 0x7fffff) << (52 - 23)) 480 return math.Float64frombits(r) 481} 482 483// NeedsFixUp reports whether the division needs fix-up code. 484func NeedsFixUp(v *Value) bool { 485 return v.AuxInt == 0 486} 487 488// auxFrom64F encodes a float64 value so it can be stored in an AuxInt. 489func auxFrom64F(f float64) int64 { 490 return int64(math.Float64bits(f)) 491} 492 493// auxFrom32F encodes a float32 value so it can be stored in an AuxInt. 494func auxFrom32F(f float32) int64 { 495 return int64(math.Float64bits(extend32Fto64F(f))) 496} 497 498// auxTo32F decodes a float32 from the AuxInt value provided. 499func auxTo32F(i int64) float32 { 500 return truncate64Fto32F(math.Float64frombits(uint64(i))) 501} 502 503// auxTo64F decodes a float64 from the AuxInt value provided. 504func auxTo64F(i int64) float64 { 505 return math.Float64frombits(uint64(i)) 506} 507 508// uaddOvf reports whether unsigned a+b would overflow. 509func uaddOvf(a, b int64) bool { 510 return uint64(a)+uint64(b) < uint64(a) 511} 512 513// de-virtualize an InterCall 514// 'sym' is the symbol for the itab 515func devirt(v *Value, sym interface{}, offset int64) *obj.LSym { 516 f := v.Block.Func 517 n, ok := sym.(*obj.LSym) 518 if !ok { 519 return nil 520 } 521 lsym := f.fe.DerefItab(n, offset) 522 if f.pass.debug > 0 { 523 if lsym != nil { 524 f.Warnl(v.Pos, "de-virtualizing call") 525 } else { 526 f.Warnl(v.Pos, "couldn't de-virtualize call") 527 } 528 } 529 return lsym 530} 531 532// isSamePtr reports whether p1 and p2 point to the same address. 533func isSamePtr(p1, p2 *Value) bool { 534 if p1 == p2 { 535 return true 536 } 537 if p1.Op != p2.Op { 538 return false 539 } 540 switch p1.Op { 541 case OpOffPtr: 542 return p1.AuxInt == p2.AuxInt && isSamePtr(p1.Args[0], p2.Args[0]) 543 case OpAddr, OpLocalAddr: 544 // OpAddr's 0th arg is either OpSP or OpSB, which means that it is uniquely identified by its Op. 545 // Checking for value equality only works after [z]cse has run. 546 return p1.Aux == p2.Aux && p1.Args[0].Op == p2.Args[0].Op 547 case OpAddPtr: 548 return p1.Args[1] == p2.Args[1] && isSamePtr(p1.Args[0], p2.Args[0]) 549 } 550 return false 551} 552 553func isStackPtr(v *Value) bool { 554 for v.Op == OpOffPtr || v.Op == OpAddPtr { 555 v = v.Args[0] 556 } 557 return v.Op == OpSP || v.Op == OpLocalAddr 558} 559 560// disjoint reports whether the memory region specified by [p1:p1+n1) 561// does not overlap with [p2:p2+n2). 562// A return value of false does not imply the regions overlap. 563func disjoint(p1 *Value, n1 int64, p2 *Value, n2 int64) bool { 564 if n1 == 0 || n2 == 0 { 565 return true 566 } 567 if p1 == p2 { 568 return false 569 } 570 baseAndOffset := func(ptr *Value) (base *Value, offset int64) { 571 base, offset = ptr, 0 572 for base.Op == OpOffPtr { 573 offset += base.AuxInt 574 base = base.Args[0] 575 } 576 return base, offset 577 } 578 p1, off1 := baseAndOffset(p1) 579 p2, off2 := baseAndOffset(p2) 580 if isSamePtr(p1, p2) { 581 return !overlap(off1, n1, off2, n2) 582 } 583 // p1 and p2 are not the same, so if they are both OpAddrs then 584 // they point to different variables. 585 // If one pointer is on the stack and the other is an argument 586 // then they can't overlap. 587 switch p1.Op { 588 case OpAddr, OpLocalAddr: 589 if p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpSP { 590 return true 591 } 592 return p2.Op == OpArg && p1.Args[0].Op == OpSP 593 case OpArg: 594 if p2.Op == OpSP || p2.Op == OpLocalAddr { 595 return true 596 } 597 case OpSP: 598 return p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpArg || p2.Op == OpSP 599 } 600 return false 601} 602 603// moveSize returns the number of bytes an aligned MOV instruction moves 604func moveSize(align int64, c *Config) int64 { 605 switch { 606 case align%8 == 0 && c.PtrSize == 8: 607 return 8 608 case align%4 == 0: 609 return 4 610 case align%2 == 0: 611 return 2 612 } 613 return 1 614} 615 616// mergePoint finds a block among a's blocks which dominates b and is itself 617// dominated by all of a's blocks. Returns nil if it can't find one. 618// Might return nil even if one does exist. 619func mergePoint(b *Block, a ...*Value) *Block { 620 // Walk backward from b looking for one of the a's blocks. 621 622 // Max distance 623 d := 100 624 625 for d > 0 { 626 for _, x := range a { 627 if b == x.Block { 628 goto found 629 } 630 } 631 if len(b.Preds) > 1 { 632 // Don't know which way to go back. Abort. 633 return nil 634 } 635 b = b.Preds[0].b 636 d-- 637 } 638 return nil // too far away 639found: 640 // At this point, r is the first value in a that we find by walking backwards. 641 // if we return anything, r will be it. 642 r := b 643 644 // Keep going, counting the other a's that we find. They must all dominate r. 645 na := 0 646 for d > 0 { 647 for _, x := range a { 648 if b == x.Block { 649 na++ 650 } 651 } 652 if na == len(a) { 653 // Found all of a in a backwards walk. We can return r. 654 return r 655 } 656 if len(b.Preds) > 1 { 657 return nil 658 } 659 b = b.Preds[0].b 660 d-- 661 662 } 663 return nil // too far away 664} 665 666// clobber invalidates v. Returns true. 667// clobber is used by rewrite rules to: 668// A) make sure v is really dead and never used again. 669// B) decrement use counts of v's args. 670func clobber(v *Value) bool { 671 v.reset(OpInvalid) 672 // Note: leave v.Block intact. The Block field is used after clobber. 673 return true 674} 675 676// clobberIfDead resets v when use count is 1. Returns true. 677// clobberIfDead is used by rewrite rules to decrement 678// use counts of v's args when v is dead and never used. 679func clobberIfDead(v *Value) bool { 680 if v.Uses == 1 { 681 v.reset(OpInvalid) 682 } 683 // Note: leave v.Block intact. The Block field is used after clobberIfDead. 684 return true 685} 686 687// noteRule is an easy way to track if a rule is matched when writing 688// new ones. Make the rule of interest also conditional on 689// noteRule("note to self: rule of interest matched") 690// and that message will print when the rule matches. 691func noteRule(s string) bool { 692 fmt.Println(s) 693 return true 694} 695 696// countRule increments Func.ruleMatches[key]. 697// If Func.ruleMatches is non-nil at the end 698// of compilation, it will be printed to stdout. 699// This is intended to make it easier to find which functions 700// which contain lots of rules matches when developing new rules. 701func countRule(v *Value, key string) bool { 702 f := v.Block.Func 703 if f.ruleMatches == nil { 704 f.ruleMatches = make(map[string]int) 705 } 706 f.ruleMatches[key]++ 707 return true 708} 709 710// warnRule generates compiler debug output with string s when 711// v is not in autogenerated code, cond is true and the rule has fired. 712func warnRule(cond bool, v *Value, s string) bool { 713 if pos := v.Pos; pos.Line() > 1 && cond { 714 v.Block.Func.Warnl(pos, s) 715 } 716 return true 717} 718 719// for a pseudo-op like (LessThan x), extract x 720func flagArg(v *Value) *Value { 721 if len(v.Args) != 1 || !v.Args[0].Type.IsFlags() { 722 return nil 723 } 724 return v.Args[0] 725} 726 727// arm64Negate finds the complement to an ARM64 condition code, 728// for example Equal -> NotEqual or LessThan -> GreaterEqual 729// 730// TODO: add floating-point conditions 731func arm64Negate(op Op) Op { 732 switch op { 733 case OpARM64LessThan: 734 return OpARM64GreaterEqual 735 case OpARM64LessThanU: 736 return OpARM64GreaterEqualU 737 case OpARM64GreaterThan: 738 return OpARM64LessEqual 739 case OpARM64GreaterThanU: 740 return OpARM64LessEqualU 741 case OpARM64LessEqual: 742 return OpARM64GreaterThan 743 case OpARM64LessEqualU: 744 return OpARM64GreaterThanU 745 case OpARM64GreaterEqual: 746 return OpARM64LessThan 747 case OpARM64GreaterEqualU: 748 return OpARM64LessThanU 749 case OpARM64Equal: 750 return OpARM64NotEqual 751 case OpARM64NotEqual: 752 return OpARM64Equal 753 case OpARM64LessThanF: 754 return OpARM64GreaterEqualF 755 case OpARM64GreaterThanF: 756 return OpARM64LessEqualF 757 case OpARM64LessEqualF: 758 return OpARM64GreaterThanF 759 case OpARM64GreaterEqualF: 760 return OpARM64LessThanF 761 default: 762 panic("unreachable") 763 } 764} 765 766// arm64Invert evaluates (InvertFlags op), which 767// is the same as altering the condition codes such 768// that the same result would be produced if the arguments 769// to the flag-generating instruction were reversed, e.g. 770// (InvertFlags (CMP x y)) -> (CMP y x) 771// 772// TODO: add floating-point conditions 773func arm64Invert(op Op) Op { 774 switch op { 775 case OpARM64LessThan: 776 return OpARM64GreaterThan 777 case OpARM64LessThanU: 778 return OpARM64GreaterThanU 779 case OpARM64GreaterThan: 780 return OpARM64LessThan 781 case OpARM64GreaterThanU: 782 return OpARM64LessThanU 783 case OpARM64LessEqual: 784 return OpARM64GreaterEqual 785 case OpARM64LessEqualU: 786 return OpARM64GreaterEqualU 787 case OpARM64GreaterEqual: 788 return OpARM64LessEqual 789 case OpARM64GreaterEqualU: 790 return OpARM64LessEqualU 791 case OpARM64Equal, OpARM64NotEqual: 792 return op 793 case OpARM64LessThanF: 794 return OpARM64GreaterThanF 795 case OpARM64GreaterThanF: 796 return OpARM64LessThanF 797 case OpARM64LessEqualF: 798 return OpARM64GreaterEqualF 799 case OpARM64GreaterEqualF: 800 return OpARM64LessEqualF 801 default: 802 panic("unreachable") 803 } 804} 805 806// evaluate an ARM64 op against a flags value 807// that is potentially constant; return 1 for true, 808// -1 for false, and 0 for not constant. 809func ccARM64Eval(cc interface{}, flags *Value) int { 810 op := cc.(Op) 811 fop := flags.Op 812 switch fop { 813 case OpARM64InvertFlags: 814 return -ccARM64Eval(op, flags.Args[0]) 815 case OpARM64FlagEQ: 816 switch op { 817 case OpARM64Equal, OpARM64GreaterEqual, OpARM64LessEqual, 818 OpARM64GreaterEqualU, OpARM64LessEqualU: 819 return 1 820 default: 821 return -1 822 } 823 case OpARM64FlagLT_ULT: 824 switch op { 825 case OpARM64LessThan, OpARM64LessThanU, 826 OpARM64LessEqual, OpARM64LessEqualU: 827 return 1 828 default: 829 return -1 830 } 831 case OpARM64FlagLT_UGT: 832 switch op { 833 case OpARM64LessThan, OpARM64GreaterThanU, 834 OpARM64LessEqual, OpARM64GreaterEqualU: 835 return 1 836 default: 837 return -1 838 } 839 case OpARM64FlagGT_ULT: 840 switch op { 841 case OpARM64GreaterThan, OpARM64LessThanU, 842 OpARM64GreaterEqual, OpARM64LessEqualU: 843 return 1 844 default: 845 return -1 846 } 847 case OpARM64FlagGT_UGT: 848 switch op { 849 case OpARM64GreaterThan, OpARM64GreaterThanU, 850 OpARM64GreaterEqual, OpARM64GreaterEqualU: 851 return 1 852 default: 853 return -1 854 } 855 default: 856 return 0 857 } 858} 859 860// logRule logs the use of the rule s. This will only be enabled if 861// rewrite rules were generated with the -log option, see gen/rulegen.go. 862func logRule(s string) { 863 if ruleFile == nil { 864 // Open a log file to write log to. We open in append 865 // mode because all.bash runs the compiler lots of times, 866 // and we want the concatenation of all of those logs. 867 // This means, of course, that users need to rm the old log 868 // to get fresh data. 869 // TODO: all.bash runs compilers in parallel. Need to synchronize logging somehow? 870 w, err := os.OpenFile(filepath.Join(os.Getenv("GOROOT"), "src", "rulelog"), 871 os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666) 872 if err != nil { 873 panic(err) 874 } 875 ruleFile = w 876 } 877 _, err := fmt.Fprintln(ruleFile, s) 878 if err != nil { 879 panic(err) 880 } 881} 882 883var ruleFile io.Writer 884 885func min(x, y int64) int64 { 886 if x < y { 887 return x 888 } 889 return y 890} 891 892func isConstZero(v *Value) bool { 893 switch v.Op { 894 case OpConstNil: 895 return true 896 case OpConst64, OpConst32, OpConst16, OpConst8, OpConstBool, OpConst32F, OpConst64F: 897 return v.AuxInt == 0 898 } 899 return false 900} 901 902// reciprocalExact64 reports whether 1/c is exactly representable. 903func reciprocalExact64(c float64) bool { 904 b := math.Float64bits(c) 905 man := b & (1<<52 - 1) 906 if man != 0 { 907 return false // not a power of 2, denormal, or NaN 908 } 909 exp := b >> 52 & (1<<11 - 1) 910 // exponent bias is 0x3ff. So taking the reciprocal of a number 911 // changes the exponent to 0x7fe-exp. 912 switch exp { 913 case 0: 914 return false // ±0 915 case 0x7ff: 916 return false // ±inf 917 case 0x7fe: 918 return false // exponent is not representable 919 default: 920 return true 921 } 922} 923 924// reciprocalExact32 reports whether 1/c is exactly representable. 925func reciprocalExact32(c float32) bool { 926 b := math.Float32bits(c) 927 man := b & (1<<23 - 1) 928 if man != 0 { 929 return false // not a power of 2, denormal, or NaN 930 } 931 exp := b >> 23 & (1<<8 - 1) 932 // exponent bias is 0x7f. So taking the reciprocal of a number 933 // changes the exponent to 0xfe-exp. 934 switch exp { 935 case 0: 936 return false // ±0 937 case 0xff: 938 return false // ±inf 939 case 0xfe: 940 return false // exponent is not representable 941 default: 942 return true 943 } 944} 945 946// check if an immediate can be directly encoded into an ARM's instruction 947func isARMImmRot(v uint32) bool { 948 for i := 0; i < 16; i++ { 949 if v&^0xff == 0 { 950 return true 951 } 952 v = v<<2 | v>>30 953 } 954 955 return false 956} 957 958// overlap reports whether the ranges given by the given offset and 959// size pairs overlap. 960func overlap(offset1, size1, offset2, size2 int64) bool { 961 if offset1 >= offset2 && offset2+size2 > offset1 { 962 return true 963 } 964 if offset2 >= offset1 && offset1+size1 > offset2 { 965 return true 966 } 967 return false 968} 969 970func areAdjacentOffsets(off1, off2, size int64) bool { 971 return off1+size == off2 || off1 == off2+size 972} 973 974// check if value zeroes out upper 32-bit of 64-bit register. 975// depth limits recursion depth. In AMD64.rules 3 is used as limit, 976// because it catches same amount of cases as 4. 977func zeroUpper32Bits(x *Value, depth int) bool { 978 switch x.Op { 979 case OpAMD64MOVLconst, OpAMD64MOVLload, OpAMD64MOVLQZX, OpAMD64MOVLloadidx1, 980 OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVBload, OpAMD64MOVBloadidx1, 981 OpAMD64MOVLloadidx4, OpAMD64ADDLload, OpAMD64SUBLload, OpAMD64ANDLload, 982 OpAMD64ORLload, OpAMD64XORLload, OpAMD64CVTTSD2SL, 983 OpAMD64ADDL, OpAMD64ADDLconst, OpAMD64SUBL, OpAMD64SUBLconst, 984 OpAMD64ANDL, OpAMD64ANDLconst, OpAMD64ORL, OpAMD64ORLconst, 985 OpAMD64XORL, OpAMD64XORLconst, OpAMD64NEGL, OpAMD64NOTL: 986 return true 987 case OpArg: 988 return x.Type.Width == 4 989 case OpPhi, OpSelect0, OpSelect1: 990 // Phis can use each-other as an arguments, instead of tracking visited values, 991 // just limit recursion depth. 992 if depth <= 0 { 993 return false 994 } 995 for i := range x.Args { 996 if !zeroUpper32Bits(x.Args[i], depth-1) { 997 return false 998 } 999 } 1000 return true 1001 1002 } 1003 return false 1004} 1005 1006// zeroUpper48Bits is similar to zeroUpper32Bits, but for upper 48 bits 1007func zeroUpper48Bits(x *Value, depth int) bool { 1008 switch x.Op { 1009 case OpAMD64MOVWQZX, OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVWloadidx2: 1010 return true 1011 case OpArg: 1012 return x.Type.Width == 2 1013 case OpPhi, OpSelect0, OpSelect1: 1014 // Phis can use each-other as an arguments, instead of tracking visited values, 1015 // just limit recursion depth. 1016 if depth <= 0 { 1017 return false 1018 } 1019 for i := range x.Args { 1020 if !zeroUpper48Bits(x.Args[i], depth-1) { 1021 return false 1022 } 1023 } 1024 return true 1025 1026 } 1027 return false 1028} 1029 1030// zeroUpper56Bits is similar to zeroUpper32Bits, but for upper 56 bits 1031func zeroUpper56Bits(x *Value, depth int) bool { 1032 switch x.Op { 1033 case OpAMD64MOVBQZX, OpAMD64MOVBload, OpAMD64MOVBloadidx1: 1034 return true 1035 case OpArg: 1036 return x.Type.Width == 1 1037 case OpPhi, OpSelect0, OpSelect1: 1038 // Phis can use each-other as an arguments, instead of tracking visited values, 1039 // just limit recursion depth. 1040 if depth <= 0 { 1041 return false 1042 } 1043 for i := range x.Args { 1044 if !zeroUpper56Bits(x.Args[i], depth-1) { 1045 return false 1046 } 1047 } 1048 return true 1049 1050 } 1051 return false 1052} 1053 1054// isInlinableMemmove reports whether the given arch performs a Move of the given size 1055// faster than memmove. It will only return true if replacing the memmove with a Move is 1056// safe, either because Move is small or because the arguments are disjoint. 1057// This is used as a check for replacing memmove with Move ops. 1058func isInlinableMemmove(dst, src *Value, sz int64, c *Config) bool { 1059 // It is always safe to convert memmove into Move when its arguments are disjoint. 1060 // Move ops may or may not be faster for large sizes depending on how the platform 1061 // lowers them, so we only perform this optimization on platforms that we know to 1062 // have fast Move ops. 1063 switch c.arch { 1064 case "amd64": 1065 return sz <= 16 || (sz < 1024 && disjoint(dst, sz, src, sz)) 1066 case "386", "ppc64", "ppc64le", "arm64": 1067 return sz <= 8 1068 case "s390x": 1069 return sz <= 8 || disjoint(dst, sz, src, sz) 1070 case "arm", "mips", "mips64", "mipsle", "mips64le": 1071 return sz <= 4 1072 } 1073 return false 1074} 1075 1076// hasSmallRotate reports whether the architecture has rotate instructions 1077// for sizes < 32-bit. This is used to decide whether to promote some rotations. 1078func hasSmallRotate(c *Config) bool { 1079 switch c.arch { 1080 case "amd64", "386": 1081 return true 1082 default: 1083 return false 1084 } 1085} 1086 1087// encodes the lsb and width for arm(64) bitfield ops into the expected auxInt format. 1088func armBFAuxInt(lsb, width int64) int64 { 1089 if lsb < 0 || lsb > 63 { 1090 panic("ARM(64) bit field lsb constant out of range") 1091 } 1092 if width < 1 || width > 64 { 1093 panic("ARM(64) bit field width constant out of range") 1094 } 1095 return width | lsb<<8 1096} 1097 1098// returns the lsb part of the auxInt field of arm64 bitfield ops. 1099func getARM64BFlsb(bfc int64) int64 { 1100 return int64(uint64(bfc) >> 8) 1101} 1102 1103// returns the width part of the auxInt field of arm64 bitfield ops. 1104func getARM64BFwidth(bfc int64) int64 { 1105 return bfc & 0xff 1106} 1107 1108// checks if mask >> rshift applied at lsb is a valid arm64 bitfield op mask. 1109func isARM64BFMask(lsb, mask, rshift int64) bool { 1110 shiftedMask := int64(uint64(mask) >> uint64(rshift)) 1111 return shiftedMask != 0 && isPowerOfTwo(shiftedMask+1) && nto(shiftedMask)+lsb < 64 1112} 1113 1114// returns the bitfield width of mask >> rshift for arm64 bitfield ops 1115func arm64BFWidth(mask, rshift int64) int64 { 1116 shiftedMask := int64(uint64(mask) >> uint64(rshift)) 1117 if shiftedMask == 0 { 1118 panic("ARM64 BF mask is zero") 1119 } 1120 return nto(shiftedMask) 1121} 1122 1123// sizeof returns the size of t in bytes. 1124// It will panic if t is not a *types.Type. 1125func sizeof(t interface{}) int64 { 1126 return t.(*types.Type).Size() 1127} 1128 1129// alignof returns the alignment of t in bytes. 1130// It will panic if t is not a *types.Type. 1131func alignof(t interface{}) int64 { 1132 return t.(*types.Type).Alignment() 1133} 1134 1135// registerizable reports whether t is a primitive type that fits in 1136// a register. It assumes float64 values will always fit into registers 1137// even if that isn't strictly true. 1138// It will panic if t is not a *types.Type. 1139func registerizable(b *Block, t interface{}) bool { 1140 typ := t.(*types.Type) 1141 if typ.IsPtrShaped() || typ.IsFloat() { 1142 return true 1143 } 1144 if typ.IsInteger() { 1145 return typ.Size() <= b.Func.Config.RegSize 1146 } 1147 return false 1148} 1149 1150// needRaceCleanup reports whether this call to racefuncenter/exit isn't needed. 1151func needRaceCleanup(sym interface{}, v *Value) bool { 1152 f := v.Block.Func 1153 if !f.Config.Race { 1154 return false 1155 } 1156 if !isSameSym(sym, "runtime.racefuncenter") && !isSameSym(sym, "runtime.racefuncexit") { 1157 return false 1158 } 1159 for _, b := range f.Blocks { 1160 for _, v := range b.Values { 1161 switch v.Op { 1162 case OpStaticCall: 1163 // Check for racefuncenter will encounter racefuncexit and vice versa. 1164 // Allow calls to panic* 1165 s := v.Aux.(fmt.Stringer).String() 1166 switch s { 1167 case "runtime.racefuncenter", "runtime.racefuncexit", 1168 "runtime.panicdivide", "runtime.panicwrap", 1169 "runtime.panicshift": 1170 continue 1171 } 1172 // If we encountered any call, we need to keep racefunc*, 1173 // for accurate stacktraces. 1174 return false 1175 case OpPanicBounds, OpPanicExtend: 1176 // Note: these are panic generators that are ok (like the static calls above). 1177 case OpClosureCall, OpInterCall: 1178 // We must keep the race functions if there are any other call types. 1179 return false 1180 } 1181 } 1182 } 1183 return true 1184} 1185 1186// symIsRO reports whether sym is a read-only global. 1187func symIsRO(sym interface{}) bool { 1188 lsym := sym.(*obj.LSym) 1189 return lsym.Type == objabi.SRODATA && len(lsym.R) == 0 1190} 1191 1192// read8 reads one byte from the read-only global sym at offset off. 1193func read8(sym interface{}, off int64) uint8 { 1194 lsym := sym.(*obj.LSym) 1195 if off >= int64(len(lsym.P)) || off < 0 { 1196 // Invalid index into the global sym. 1197 // This can happen in dead code, so we don't want to panic. 1198 // Just return any value, it will eventually get ignored. 1199 // See issue 29215. 1200 return 0 1201 } 1202 return lsym.P[off] 1203} 1204 1205// read16 reads two bytes from the read-only global sym at offset off. 1206func read16(sym interface{}, off int64, bigEndian bool) uint16 { 1207 lsym := sym.(*obj.LSym) 1208 if off >= int64(len(lsym.P))-1 || off < 0 { 1209 return 0 1210 } 1211 if bigEndian { 1212 return binary.BigEndian.Uint16(lsym.P[off:]) 1213 } else { 1214 return binary.LittleEndian.Uint16(lsym.P[off:]) 1215 } 1216} 1217 1218// read32 reads four bytes from the read-only global sym at offset off. 1219func read32(sym interface{}, off int64, bigEndian bool) uint32 { 1220 lsym := sym.(*obj.LSym) 1221 if off >= int64(len(lsym.P))-3 || off < 0 { 1222 return 0 1223 } 1224 if bigEndian { 1225 return binary.BigEndian.Uint32(lsym.P[off:]) 1226 } else { 1227 return binary.LittleEndian.Uint32(lsym.P[off:]) 1228 } 1229} 1230 1231// read64 reads eight bytes from the read-only global sym at offset off. 1232func read64(sym interface{}, off int64, bigEndian bool) uint64 { 1233 lsym := sym.(*obj.LSym) 1234 if off >= int64(len(lsym.P))-7 || off < 0 { 1235 return 0 1236 } 1237 if bigEndian { 1238 return binary.BigEndian.Uint64(lsym.P[off:]) 1239 } else { 1240 return binary.LittleEndian.Uint64(lsym.P[off:]) 1241 } 1242} 1243