1// Copyright 2015 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package ssa 6 7import ( 8 "cmd/compile/internal/logopt" 9 "cmd/compile/internal/types" 10 "cmd/internal/obj" 11 "cmd/internal/obj/s390x" 12 "cmd/internal/objabi" 13 "cmd/internal/src" 14 "encoding/binary" 15 "fmt" 16 "io" 17 "math" 18 "math/bits" 19 "os" 20 "path/filepath" 21) 22 23type deadValueChoice bool 24 25const ( 26 leaveDeadValues deadValueChoice = false 27 removeDeadValues = true 28) 29 30// deadcode indicates whether rewrite should try to remove any values that become dead. 31func applyRewrite(f *Func, rb blockRewriter, rv valueRewriter, deadcode deadValueChoice) { 32 // repeat rewrites until we find no more rewrites 33 pendingLines := f.cachedLineStarts // Holds statement boundaries that need to be moved to a new value/block 34 pendingLines.clear() 35 debug := f.pass.debug 36 if debug > 1 { 37 fmt.Printf("%s: rewriting for %s\n", f.pass.name, f.Name) 38 } 39 var iters int 40 var states map[string]bool 41 for { 42 change := false 43 for _, b := range f.Blocks { 44 var b0 *Block 45 if debug > 1 { 46 b0 = new(Block) 47 *b0 = *b 48 b0.Succs = append([]Edge{}, b.Succs...) // make a new copy, not aliasing 49 } 50 for i, c := range b.ControlValues() { 51 for c.Op == OpCopy { 52 c = c.Args[0] 53 b.ReplaceControl(i, c) 54 } 55 } 56 if rb(b) { 57 change = true 58 if debug > 1 { 59 fmt.Printf("rewriting %s -> %s\n", b0.LongString(), b.LongString()) 60 } 61 } 62 for j, v := range b.Values { 63 var v0 *Value 64 if debug > 1 { 65 v0 = new(Value) 66 *v0 = *v 67 v0.Args = append([]*Value{}, v.Args...) // make a new copy, not aliasing 68 } 69 if v.Uses == 0 && v.removeable() { 70 if v.Op != OpInvalid && deadcode == removeDeadValues { 71 // Reset any values that are now unused, so that we decrement 72 // the use count of all of its arguments. 73 // Not quite a deadcode pass, because it does not handle cycles. 74 // But it should help Uses==1 rules to fire. 75 v.reset(OpInvalid) 76 change = true 77 } 78 // No point rewriting values which aren't used. 79 continue 80 } 81 82 vchange := phielimValue(v) 83 if vchange && debug > 1 { 84 fmt.Printf("rewriting %s -> %s\n", v0.LongString(), v.LongString()) 85 } 86 87 // Eliminate copy inputs. 88 // If any copy input becomes unused, mark it 89 // as invalid and discard its argument. Repeat 90 // recursively on the discarded argument. 91 // This phase helps remove phantom "dead copy" uses 92 // of a value so that a x.Uses==1 rule condition 93 // fires reliably. 94 for i, a := range v.Args { 95 if a.Op != OpCopy { 96 continue 97 } 98 aa := copySource(a) 99 v.SetArg(i, aa) 100 // If a, a copy, has a line boundary indicator, attempt to find a new value 101 // to hold it. The first candidate is the value that will replace a (aa), 102 // if it shares the same block and line and is eligible. 103 // The second option is v, which has a as an input. Because aa is earlier in 104 // the data flow, it is the better choice. 105 if a.Pos.IsStmt() == src.PosIsStmt { 106 if aa.Block == a.Block && aa.Pos.Line() == a.Pos.Line() && aa.Pos.IsStmt() != src.PosNotStmt { 107 aa.Pos = aa.Pos.WithIsStmt() 108 } else if v.Block == a.Block && v.Pos.Line() == a.Pos.Line() && v.Pos.IsStmt() != src.PosNotStmt { 109 v.Pos = v.Pos.WithIsStmt() 110 } else { 111 // Record the lost line and look for a new home after all rewrites are complete. 112 // TODO: it's possible (in FOR loops, in particular) for statement boundaries for the same 113 // line to appear in more than one block, but only one block is stored, so if both end 114 // up here, then one will be lost. 115 pendingLines.set(a.Pos, int32(a.Block.ID)) 116 } 117 a.Pos = a.Pos.WithNotStmt() 118 } 119 vchange = true 120 for a.Uses == 0 { 121 b := a.Args[0] 122 a.reset(OpInvalid) 123 a = b 124 } 125 } 126 if vchange && debug > 1 { 127 fmt.Printf("rewriting %s -> %s\n", v0.LongString(), v.LongString()) 128 } 129 130 // apply rewrite function 131 if rv(v) { 132 vchange = true 133 // If value changed to a poor choice for a statement boundary, move the boundary 134 if v.Pos.IsStmt() == src.PosIsStmt { 135 if k := nextGoodStatementIndex(v, j, b); k != j { 136 v.Pos = v.Pos.WithNotStmt() 137 b.Values[k].Pos = b.Values[k].Pos.WithIsStmt() 138 } 139 } 140 } 141 142 change = change || vchange 143 if vchange && debug > 1 { 144 fmt.Printf("rewriting %s -> %s\n", v0.LongString(), v.LongString()) 145 } 146 } 147 } 148 if !change { 149 break 150 } 151 iters++ 152 if iters > 1000 || debug >= 2 { 153 // We've done a suspiciously large number of rewrites (or we're in debug mode). 154 // As of Sep 2021, 90% of rewrites complete in 4 iterations or fewer 155 // and the maximum value encountered during make.bash is 12. 156 // Start checking for cycles. (This is too expensive to do routinely.) 157 if states == nil { 158 states = make(map[string]bool) 159 } 160 h := f.rewriteHash() 161 if _, ok := states[h]; ok { 162 // We've found a cycle. 163 // To diagnose it, set debug to 2 and start again, 164 // so that we'll print all rules applied until we complete another cycle. 165 // If debug is already >= 2, we've already done that, so it's time to crash. 166 if debug < 2 { 167 debug = 2 168 states = make(map[string]bool) 169 } else { 170 f.Fatalf("rewrite cycle detected") 171 } 172 } 173 states[h] = true 174 } 175 } 176 // remove clobbered values 177 for _, b := range f.Blocks { 178 j := 0 179 for i, v := range b.Values { 180 vl := v.Pos 181 if v.Op == OpInvalid { 182 if v.Pos.IsStmt() == src.PosIsStmt { 183 pendingLines.set(vl, int32(b.ID)) 184 } 185 f.freeValue(v) 186 continue 187 } 188 if v.Pos.IsStmt() != src.PosNotStmt && !notStmtBoundary(v.Op) && pendingLines.get(vl) == int32(b.ID) { 189 pendingLines.remove(vl) 190 v.Pos = v.Pos.WithIsStmt() 191 } 192 if i != j { 193 b.Values[j] = v 194 } 195 j++ 196 } 197 if pendingLines.get(b.Pos) == int32(b.ID) { 198 b.Pos = b.Pos.WithIsStmt() 199 pendingLines.remove(b.Pos) 200 } 201 b.truncateValues(j) 202 } 203} 204 205// Common functions called from rewriting rules 206 207func is64BitFloat(t *types.Type) bool { 208 return t.Size() == 8 && t.IsFloat() 209} 210 211func is32BitFloat(t *types.Type) bool { 212 return t.Size() == 4 && t.IsFloat() 213} 214 215func is64BitInt(t *types.Type) bool { 216 return t.Size() == 8 && t.IsInteger() 217} 218 219func is32BitInt(t *types.Type) bool { 220 return t.Size() == 4 && t.IsInteger() 221} 222 223func is16BitInt(t *types.Type) bool { 224 return t.Size() == 2 && t.IsInteger() 225} 226 227func is8BitInt(t *types.Type) bool { 228 return t.Size() == 1 && t.IsInteger() 229} 230 231func isPtr(t *types.Type) bool { 232 return t.IsPtrShaped() 233} 234 235func isSigned(t *types.Type) bool { 236 return t.IsSigned() 237} 238 239// mergeSym merges two symbolic offsets. There is no real merging of 240// offsets, we just pick the non-nil one. 241func mergeSym(x, y Sym) Sym { 242 if x == nil { 243 return y 244 } 245 if y == nil { 246 return x 247 } 248 panic(fmt.Sprintf("mergeSym with two non-nil syms %v %v", x, y)) 249} 250 251func canMergeSym(x, y Sym) bool { 252 return x == nil || y == nil 253} 254 255// canMergeLoadClobber reports whether the load can be merged into target without 256// invalidating the schedule. 257// It also checks that the other non-load argument x is something we 258// are ok with clobbering. 259func canMergeLoadClobber(target, load, x *Value) bool { 260 // The register containing x is going to get clobbered. 261 // Don't merge if we still need the value of x. 262 // We don't have liveness information here, but we can 263 // approximate x dying with: 264 // 1) target is x's only use. 265 // 2) target is not in a deeper loop than x. 266 if x.Uses != 1 { 267 return false 268 } 269 loopnest := x.Block.Func.loopnest() 270 loopnest.calculateDepths() 271 if loopnest.depth(target.Block.ID) > loopnest.depth(x.Block.ID) { 272 return false 273 } 274 return canMergeLoad(target, load) 275} 276 277// canMergeLoad reports whether the load can be merged into target without 278// invalidating the schedule. 279func canMergeLoad(target, load *Value) bool { 280 if target.Block.ID != load.Block.ID { 281 // If the load is in a different block do not merge it. 282 return false 283 } 284 285 // We can't merge the load into the target if the load 286 // has more than one use. 287 if load.Uses != 1 { 288 return false 289 } 290 291 mem := load.MemoryArg() 292 293 // We need the load's memory arg to still be alive at target. That 294 // can't be the case if one of target's args depends on a memory 295 // state that is a successor of load's memory arg. 296 // 297 // For example, it would be invalid to merge load into target in 298 // the following situation because newmem has killed oldmem 299 // before target is reached: 300 // load = read ... oldmem 301 // newmem = write ... oldmem 302 // arg0 = read ... newmem 303 // target = add arg0 load 304 // 305 // If the argument comes from a different block then we can exclude 306 // it immediately because it must dominate load (which is in the 307 // same block as target). 308 var args []*Value 309 for _, a := range target.Args { 310 if a != load && a.Block.ID == target.Block.ID { 311 args = append(args, a) 312 } 313 } 314 315 // memPreds contains memory states known to be predecessors of load's 316 // memory state. It is lazily initialized. 317 var memPreds map[*Value]bool 318 for i := 0; len(args) > 0; i++ { 319 const limit = 100 320 if i >= limit { 321 // Give up if we have done a lot of iterations. 322 return false 323 } 324 v := args[len(args)-1] 325 args = args[:len(args)-1] 326 if target.Block.ID != v.Block.ID { 327 // Since target and load are in the same block 328 // we can stop searching when we leave the block. 329 continue 330 } 331 if v.Op == OpPhi { 332 // A Phi implies we have reached the top of the block. 333 // The memory phi, if it exists, is always 334 // the first logical store in the block. 335 continue 336 } 337 if v.Type.IsTuple() && v.Type.FieldType(1).IsMemory() { 338 // We could handle this situation however it is likely 339 // to be very rare. 340 return false 341 } 342 if v.Op.SymEffect()&SymAddr != 0 { 343 // This case prevents an operation that calculates the 344 // address of a local variable from being forced to schedule 345 // before its corresponding VarDef. 346 // See issue 28445. 347 // v1 = LOAD ... 348 // v2 = VARDEF 349 // v3 = LEAQ 350 // v4 = CMPQ v1 v3 351 // We don't want to combine the CMPQ with the load, because 352 // that would force the CMPQ to schedule before the VARDEF, which 353 // in turn requires the LEAQ to schedule before the VARDEF. 354 return false 355 } 356 if v.Type.IsMemory() { 357 if memPreds == nil { 358 // Initialise a map containing memory states 359 // known to be predecessors of load's memory 360 // state. 361 memPreds = make(map[*Value]bool) 362 m := mem 363 const limit = 50 364 for i := 0; i < limit; i++ { 365 if m.Op == OpPhi { 366 // The memory phi, if it exists, is always 367 // the first logical store in the block. 368 break 369 } 370 if m.Block.ID != target.Block.ID { 371 break 372 } 373 if !m.Type.IsMemory() { 374 break 375 } 376 memPreds[m] = true 377 if len(m.Args) == 0 { 378 break 379 } 380 m = m.MemoryArg() 381 } 382 } 383 384 // We can merge if v is a predecessor of mem. 385 // 386 // For example, we can merge load into target in the 387 // following scenario: 388 // x = read ... v 389 // mem = write ... v 390 // load = read ... mem 391 // target = add x load 392 if memPreds[v] { 393 continue 394 } 395 return false 396 } 397 if len(v.Args) > 0 && v.Args[len(v.Args)-1] == mem { 398 // If v takes mem as an input then we know mem 399 // is valid at this point. 400 continue 401 } 402 for _, a := range v.Args { 403 if target.Block.ID == a.Block.ID { 404 args = append(args, a) 405 } 406 } 407 } 408 409 return true 410} 411 412// isSameCall reports whether sym is the same as the given named symbol 413func isSameCall(sym interface{}, name string) bool { 414 fn := sym.(*AuxCall).Fn 415 return fn != nil && fn.String() == name 416} 417 418// canLoadUnaligned reports if the achitecture supports unaligned load operations 419func canLoadUnaligned(c *Config) bool { 420 return c.ctxt.Arch.Alignment == 1 421} 422 423// nlz returns the number of leading zeros. 424func nlz64(x int64) int { return bits.LeadingZeros64(uint64(x)) } 425func nlz32(x int32) int { return bits.LeadingZeros32(uint32(x)) } 426func nlz16(x int16) int { return bits.LeadingZeros16(uint16(x)) } 427func nlz8(x int8) int { return bits.LeadingZeros8(uint8(x)) } 428 429// ntzX returns the number of trailing zeros. 430func ntz64(x int64) int { return bits.TrailingZeros64(uint64(x)) } 431func ntz32(x int32) int { return bits.TrailingZeros32(uint32(x)) } 432func ntz16(x int16) int { return bits.TrailingZeros16(uint16(x)) } 433func ntz8(x int8) int { return bits.TrailingZeros8(uint8(x)) } 434 435func oneBit(x int64) bool { return x&(x-1) == 0 && x != 0 } 436func oneBit8(x int8) bool { return x&(x-1) == 0 && x != 0 } 437func oneBit16(x int16) bool { return x&(x-1) == 0 && x != 0 } 438func oneBit32(x int32) bool { return x&(x-1) == 0 && x != 0 } 439func oneBit64(x int64) bool { return x&(x-1) == 0 && x != 0 } 440 441// nto returns the number of trailing ones. 442func nto(x int64) int64 { 443 return int64(ntz64(^x)) 444} 445 446// logX returns logarithm of n base 2. 447// n must be a positive power of 2 (isPowerOfTwoX returns true). 448func log8(n int8) int64 { 449 return int64(bits.Len8(uint8(n))) - 1 450} 451func log16(n int16) int64 { 452 return int64(bits.Len16(uint16(n))) - 1 453} 454func log32(n int32) int64 { 455 return int64(bits.Len32(uint32(n))) - 1 456} 457func log64(n int64) int64 { 458 return int64(bits.Len64(uint64(n))) - 1 459} 460 461// log2uint32 returns logarithm in base 2 of uint32(n), with log2(0) = -1. 462// Rounds down. 463func log2uint32(n int64) int64 { 464 return int64(bits.Len32(uint32(n))) - 1 465} 466 467// isPowerOfTwo functions report whether n is a power of 2. 468func isPowerOfTwo8(n int8) bool { 469 return n > 0 && n&(n-1) == 0 470} 471func isPowerOfTwo16(n int16) bool { 472 return n > 0 && n&(n-1) == 0 473} 474func isPowerOfTwo32(n int32) bool { 475 return n > 0 && n&(n-1) == 0 476} 477func isPowerOfTwo64(n int64) bool { 478 return n > 0 && n&(n-1) == 0 479} 480 481// isUint64PowerOfTwo reports whether uint64(n) is a power of 2. 482func isUint64PowerOfTwo(in int64) bool { 483 n := uint64(in) 484 return n > 0 && n&(n-1) == 0 485} 486 487// isUint32PowerOfTwo reports whether uint32(n) is a power of 2. 488func isUint32PowerOfTwo(in int64) bool { 489 n := uint64(uint32(in)) 490 return n > 0 && n&(n-1) == 0 491} 492 493// is32Bit reports whether n can be represented as a signed 32 bit integer. 494func is32Bit(n int64) bool { 495 return n == int64(int32(n)) 496} 497 498// is16Bit reports whether n can be represented as a signed 16 bit integer. 499func is16Bit(n int64) bool { 500 return n == int64(int16(n)) 501} 502 503// is8Bit reports whether n can be represented as a signed 8 bit integer. 504func is8Bit(n int64) bool { 505 return n == int64(int8(n)) 506} 507 508// isU8Bit reports whether n can be represented as an unsigned 8 bit integer. 509func isU8Bit(n int64) bool { 510 return n == int64(uint8(n)) 511} 512 513// isU12Bit reports whether n can be represented as an unsigned 12 bit integer. 514func isU12Bit(n int64) bool { 515 return 0 <= n && n < (1<<12) 516} 517 518// isU16Bit reports whether n can be represented as an unsigned 16 bit integer. 519func isU16Bit(n int64) bool { 520 return n == int64(uint16(n)) 521} 522 523// isU32Bit reports whether n can be represented as an unsigned 32 bit integer. 524func isU32Bit(n int64) bool { 525 return n == int64(uint32(n)) 526} 527 528// is20Bit reports whether n can be represented as a signed 20 bit integer. 529func is20Bit(n int64) bool { 530 return -(1<<19) <= n && n < (1<<19) 531} 532 533// b2i translates a boolean value to 0 or 1 for assigning to auxInt. 534func b2i(b bool) int64 { 535 if b { 536 return 1 537 } 538 return 0 539} 540 541// b2i32 translates a boolean value to 0 or 1. 542func b2i32(b bool) int32 { 543 if b { 544 return 1 545 } 546 return 0 547} 548 549// shiftIsBounded reports whether (left/right) shift Value v is known to be bounded. 550// A shift is bounded if it is shifting by less than the width of the shifted value. 551func shiftIsBounded(v *Value) bool { 552 return v.AuxInt != 0 553} 554 555// canonLessThan returns whether x is "ordered" less than y, for purposes of normalizing 556// generated code as much as possible. 557func canonLessThan(x, y *Value) bool { 558 if x.Op != y.Op { 559 return x.Op < y.Op 560 } 561 if !x.Pos.SameFileAndLine(y.Pos) { 562 return x.Pos.Before(y.Pos) 563 } 564 return x.ID < y.ID 565} 566 567// truncate64Fto32F converts a float64 value to a float32 preserving the bit pattern 568// of the mantissa. It will panic if the truncation results in lost information. 569func truncate64Fto32F(f float64) float32 { 570 if !isExactFloat32(f) { 571 panic("truncate64Fto32F: truncation is not exact") 572 } 573 if !math.IsNaN(f) { 574 return float32(f) 575 } 576 // NaN bit patterns aren't necessarily preserved across conversion 577 // instructions so we need to do the conversion manually. 578 b := math.Float64bits(f) 579 m := b & ((1 << 52) - 1) // mantissa (a.k.a. significand) 580 // | sign | exponent | mantissa | 581 r := uint32(((b >> 32) & (1 << 31)) | 0x7f800000 | (m >> (52 - 23))) 582 return math.Float32frombits(r) 583} 584 585// extend32Fto64F converts a float32 value to a float64 value preserving the bit 586// pattern of the mantissa. 587func extend32Fto64F(f float32) float64 { 588 if !math.IsNaN(float64(f)) { 589 return float64(f) 590 } 591 // NaN bit patterns aren't necessarily preserved across conversion 592 // instructions so we need to do the conversion manually. 593 b := uint64(math.Float32bits(f)) 594 // | sign | exponent | mantissa | 595 r := ((b << 32) & (1 << 63)) | (0x7ff << 52) | ((b & 0x7fffff) << (52 - 23)) 596 return math.Float64frombits(r) 597} 598 599// DivisionNeedsFixUp reports whether the division needs fix-up code. 600func DivisionNeedsFixUp(v *Value) bool { 601 return v.AuxInt == 0 602} 603 604// auxFrom64F encodes a float64 value so it can be stored in an AuxInt. 605func auxFrom64F(f float64) int64 { 606 if f != f { 607 panic("can't encode a NaN in AuxInt field") 608 } 609 return int64(math.Float64bits(f)) 610} 611 612// auxFrom32F encodes a float32 value so it can be stored in an AuxInt. 613func auxFrom32F(f float32) int64 { 614 if f != f { 615 panic("can't encode a NaN in AuxInt field") 616 } 617 return int64(math.Float64bits(extend32Fto64F(f))) 618} 619 620// auxTo32F decodes a float32 from the AuxInt value provided. 621func auxTo32F(i int64) float32 { 622 return truncate64Fto32F(math.Float64frombits(uint64(i))) 623} 624 625// auxTo64F decodes a float64 from the AuxInt value provided. 626func auxTo64F(i int64) float64 { 627 return math.Float64frombits(uint64(i)) 628} 629 630func auxIntToBool(i int64) bool { 631 if i == 0 { 632 return false 633 } 634 return true 635} 636func auxIntToInt8(i int64) int8 { 637 return int8(i) 638} 639func auxIntToInt16(i int64) int16 { 640 return int16(i) 641} 642func auxIntToInt32(i int64) int32 { 643 return int32(i) 644} 645func auxIntToInt64(i int64) int64 { 646 return i 647} 648func auxIntToUint8(i int64) uint8 { 649 return uint8(i) 650} 651func auxIntToFloat32(i int64) float32 { 652 return float32(math.Float64frombits(uint64(i))) 653} 654func auxIntToFloat64(i int64) float64 { 655 return math.Float64frombits(uint64(i)) 656} 657func auxIntToValAndOff(i int64) ValAndOff { 658 return ValAndOff(i) 659} 660func auxIntToArm64BitField(i int64) arm64BitField { 661 return arm64BitField(i) 662} 663func auxIntToInt128(x int64) int128 { 664 if x != 0 { 665 panic("nonzero int128 not allowed") 666 } 667 return 0 668} 669func auxIntToFlagConstant(x int64) flagConstant { 670 return flagConstant(x) 671} 672 673func auxIntToOp(cc int64) Op { 674 return Op(cc) 675} 676 677func boolToAuxInt(b bool) int64 { 678 if b { 679 return 1 680 } 681 return 0 682} 683func int8ToAuxInt(i int8) int64 { 684 return int64(i) 685} 686func int16ToAuxInt(i int16) int64 { 687 return int64(i) 688} 689func int32ToAuxInt(i int32) int64 { 690 return int64(i) 691} 692func int64ToAuxInt(i int64) int64 { 693 return int64(i) 694} 695func uint8ToAuxInt(i uint8) int64 { 696 return int64(int8(i)) 697} 698func float32ToAuxInt(f float32) int64 { 699 return int64(math.Float64bits(float64(f))) 700} 701func float64ToAuxInt(f float64) int64 { 702 return int64(math.Float64bits(f)) 703} 704func valAndOffToAuxInt(v ValAndOff) int64 { 705 return int64(v) 706} 707func arm64BitFieldToAuxInt(v arm64BitField) int64 { 708 return int64(v) 709} 710func int128ToAuxInt(x int128) int64 { 711 if x != 0 { 712 panic("nonzero int128 not allowed") 713 } 714 return 0 715} 716func flagConstantToAuxInt(x flagConstant) int64 { 717 return int64(x) 718} 719 720func opToAuxInt(o Op) int64 { 721 return int64(o) 722} 723 724// Aux is an interface to hold miscellaneous data in Blocks and Values. 725type Aux interface { 726 CanBeAnSSAAux() 727} 728 729// stringAux wraps string values for use in Aux. 730type stringAux string 731 732func (stringAux) CanBeAnSSAAux() {} 733 734func auxToString(i Aux) string { 735 return string(i.(stringAux)) 736} 737func auxToSym(i Aux) Sym { 738 // TODO: kind of a hack - allows nil interface through 739 s, _ := i.(Sym) 740 return s 741} 742func auxToType(i Aux) *types.Type { 743 return i.(*types.Type) 744} 745func auxToCall(i Aux) *AuxCall { 746 return i.(*AuxCall) 747} 748func auxToS390xCCMask(i Aux) s390x.CCMask { 749 return i.(s390x.CCMask) 750} 751func auxToS390xRotateParams(i Aux) s390x.RotateParams { 752 return i.(s390x.RotateParams) 753} 754 755func StringToAux(s string) Aux { 756 return stringAux(s) 757} 758func symToAux(s Sym) Aux { 759 return s 760} 761func callToAux(s *AuxCall) Aux { 762 return s 763} 764func typeToAux(t *types.Type) Aux { 765 return t 766} 767func s390xCCMaskToAux(c s390x.CCMask) Aux { 768 return c 769} 770func s390xRotateParamsToAux(r s390x.RotateParams) Aux { 771 return r 772} 773 774// uaddOvf reports whether unsigned a+b would overflow. 775func uaddOvf(a, b int64) bool { 776 return uint64(a)+uint64(b) < uint64(a) 777} 778 779// loadLSymOffset simulates reading a word at an offset into a 780// read-only symbol's runtime memory. If it would read a pointer to 781// another symbol, that symbol is returned. Otherwise, it returns nil. 782func loadLSymOffset(lsym *obj.LSym, offset int64) *obj.LSym { 783 if lsym.Type != objabi.SRODATA { 784 return nil 785 } 786 787 for _, r := range lsym.R { 788 if int64(r.Off) == offset && r.Type&^objabi.R_WEAK == objabi.R_ADDR && r.Add == 0 { 789 return r.Sym 790 } 791 } 792 793 return nil 794} 795 796// de-virtualize an InterLECall 797// 'sym' is the symbol for the itab 798func devirtLESym(v *Value, aux Aux, sym Sym, offset int64) *obj.LSym { 799 n, ok := sym.(*obj.LSym) 800 if !ok { 801 return nil 802 } 803 804 lsym := loadLSymOffset(n, offset) 805 if f := v.Block.Func; f.pass.debug > 0 { 806 if lsym != nil { 807 f.Warnl(v.Pos, "de-virtualizing call") 808 } else { 809 f.Warnl(v.Pos, "couldn't de-virtualize call") 810 } 811 } 812 return lsym 813} 814 815func devirtLECall(v *Value, sym *obj.LSym) *Value { 816 v.Op = OpStaticLECall 817 auxcall := v.Aux.(*AuxCall) 818 auxcall.Fn = sym 819 // Remove first arg 820 v.Args[0].Uses-- 821 copy(v.Args[0:], v.Args[1:]) 822 v.Args[len(v.Args)-1] = nil // aid GC 823 v.Args = v.Args[:len(v.Args)-1] 824 return v 825} 826 827// isSamePtr reports whether p1 and p2 point to the same address. 828func isSamePtr(p1, p2 *Value) bool { 829 if p1 == p2 { 830 return true 831 } 832 if p1.Op != p2.Op { 833 return false 834 } 835 switch p1.Op { 836 case OpOffPtr: 837 return p1.AuxInt == p2.AuxInt && isSamePtr(p1.Args[0], p2.Args[0]) 838 case OpAddr, OpLocalAddr: 839 // OpAddr's 0th arg is either OpSP or OpSB, which means that it is uniquely identified by its Op. 840 // Checking for value equality only works after [z]cse has run. 841 return p1.Aux == p2.Aux && p1.Args[0].Op == p2.Args[0].Op 842 case OpAddPtr: 843 return p1.Args[1] == p2.Args[1] && isSamePtr(p1.Args[0], p2.Args[0]) 844 } 845 return false 846} 847 848func isStackPtr(v *Value) bool { 849 for v.Op == OpOffPtr || v.Op == OpAddPtr { 850 v = v.Args[0] 851 } 852 return v.Op == OpSP || v.Op == OpLocalAddr 853} 854 855// disjoint reports whether the memory region specified by [p1:p1+n1) 856// does not overlap with [p2:p2+n2). 857// A return value of false does not imply the regions overlap. 858func disjoint(p1 *Value, n1 int64, p2 *Value, n2 int64) bool { 859 if n1 == 0 || n2 == 0 { 860 return true 861 } 862 if p1 == p2 { 863 return false 864 } 865 baseAndOffset := func(ptr *Value) (base *Value, offset int64) { 866 base, offset = ptr, 0 867 for base.Op == OpOffPtr { 868 offset += base.AuxInt 869 base = base.Args[0] 870 } 871 return base, offset 872 } 873 p1, off1 := baseAndOffset(p1) 874 p2, off2 := baseAndOffset(p2) 875 if isSamePtr(p1, p2) { 876 return !overlap(off1, n1, off2, n2) 877 } 878 // p1 and p2 are not the same, so if they are both OpAddrs then 879 // they point to different variables. 880 // If one pointer is on the stack and the other is an argument 881 // then they can't overlap. 882 switch p1.Op { 883 case OpAddr, OpLocalAddr: 884 if p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpSP { 885 return true 886 } 887 return (p2.Op == OpArg || p2.Op == OpArgIntReg) && p1.Args[0].Op == OpSP 888 case OpArg, OpArgIntReg: 889 if p2.Op == OpSP || p2.Op == OpLocalAddr { 890 return true 891 } 892 case OpSP: 893 return p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpArg || p2.Op == OpArgIntReg || p2.Op == OpSP 894 } 895 return false 896} 897 898// moveSize returns the number of bytes an aligned MOV instruction moves 899func moveSize(align int64, c *Config) int64 { 900 switch { 901 case align%8 == 0 && c.PtrSize == 8: 902 return 8 903 case align%4 == 0: 904 return 4 905 case align%2 == 0: 906 return 2 907 } 908 return 1 909} 910 911// mergePoint finds a block among a's blocks which dominates b and is itself 912// dominated by all of a's blocks. Returns nil if it can't find one. 913// Might return nil even if one does exist. 914func mergePoint(b *Block, a ...*Value) *Block { 915 // Walk backward from b looking for one of the a's blocks. 916 917 // Max distance 918 d := 100 919 920 for d > 0 { 921 for _, x := range a { 922 if b == x.Block { 923 goto found 924 } 925 } 926 if len(b.Preds) > 1 { 927 // Don't know which way to go back. Abort. 928 return nil 929 } 930 b = b.Preds[0].b 931 d-- 932 } 933 return nil // too far away 934found: 935 // At this point, r is the first value in a that we find by walking backwards. 936 // if we return anything, r will be it. 937 r := b 938 939 // Keep going, counting the other a's that we find. They must all dominate r. 940 na := 0 941 for d > 0 { 942 for _, x := range a { 943 if b == x.Block { 944 na++ 945 } 946 } 947 if na == len(a) { 948 // Found all of a in a backwards walk. We can return r. 949 return r 950 } 951 if len(b.Preds) > 1 { 952 return nil 953 } 954 b = b.Preds[0].b 955 d-- 956 957 } 958 return nil // too far away 959} 960 961// clobber invalidates values. Returns true. 962// clobber is used by rewrite rules to: 963// A) make sure the values are really dead and never used again. 964// B) decrement use counts of the values' args. 965func clobber(vv ...*Value) bool { 966 for _, v := range vv { 967 v.reset(OpInvalid) 968 // Note: leave v.Block intact. The Block field is used after clobber. 969 } 970 return true 971} 972 973// clobberIfDead resets v when use count is 1. Returns true. 974// clobberIfDead is used by rewrite rules to decrement 975// use counts of v's args when v is dead and never used. 976func clobberIfDead(v *Value) bool { 977 if v.Uses == 1 { 978 v.reset(OpInvalid) 979 } 980 // Note: leave v.Block intact. The Block field is used after clobberIfDead. 981 return true 982} 983 984// noteRule is an easy way to track if a rule is matched when writing 985// new ones. Make the rule of interest also conditional on 986// noteRule("note to self: rule of interest matched") 987// and that message will print when the rule matches. 988func noteRule(s string) bool { 989 fmt.Println(s) 990 return true 991} 992 993// countRule increments Func.ruleMatches[key]. 994// If Func.ruleMatches is non-nil at the end 995// of compilation, it will be printed to stdout. 996// This is intended to make it easier to find which functions 997// which contain lots of rules matches when developing new rules. 998func countRule(v *Value, key string) bool { 999 f := v.Block.Func 1000 if f.ruleMatches == nil { 1001 f.ruleMatches = make(map[string]int) 1002 } 1003 f.ruleMatches[key]++ 1004 return true 1005} 1006 1007// warnRule generates compiler debug output with string s when 1008// v is not in autogenerated code, cond is true and the rule has fired. 1009func warnRule(cond bool, v *Value, s string) bool { 1010 if pos := v.Pos; pos.Line() > 1 && cond { 1011 v.Block.Func.Warnl(pos, s) 1012 } 1013 return true 1014} 1015 1016// for a pseudo-op like (LessThan x), extract x 1017func flagArg(v *Value) *Value { 1018 if len(v.Args) != 1 || !v.Args[0].Type.IsFlags() { 1019 return nil 1020 } 1021 return v.Args[0] 1022} 1023 1024// arm64Negate finds the complement to an ARM64 condition code, 1025// for example !Equal -> NotEqual or !LessThan -> GreaterEqual 1026// 1027// For floating point, it's more subtle because NaN is unordered. We do 1028// !LessThanF -> NotLessThanF, the latter takes care of NaNs. 1029func arm64Negate(op Op) Op { 1030 switch op { 1031 case OpARM64LessThan: 1032 return OpARM64GreaterEqual 1033 case OpARM64LessThanU: 1034 return OpARM64GreaterEqualU 1035 case OpARM64GreaterThan: 1036 return OpARM64LessEqual 1037 case OpARM64GreaterThanU: 1038 return OpARM64LessEqualU 1039 case OpARM64LessEqual: 1040 return OpARM64GreaterThan 1041 case OpARM64LessEqualU: 1042 return OpARM64GreaterThanU 1043 case OpARM64GreaterEqual: 1044 return OpARM64LessThan 1045 case OpARM64GreaterEqualU: 1046 return OpARM64LessThanU 1047 case OpARM64Equal: 1048 return OpARM64NotEqual 1049 case OpARM64NotEqual: 1050 return OpARM64Equal 1051 case OpARM64LessThanF: 1052 return OpARM64NotLessThanF 1053 case OpARM64NotLessThanF: 1054 return OpARM64LessThanF 1055 case OpARM64LessEqualF: 1056 return OpARM64NotLessEqualF 1057 case OpARM64NotLessEqualF: 1058 return OpARM64LessEqualF 1059 case OpARM64GreaterThanF: 1060 return OpARM64NotGreaterThanF 1061 case OpARM64NotGreaterThanF: 1062 return OpARM64GreaterThanF 1063 case OpARM64GreaterEqualF: 1064 return OpARM64NotGreaterEqualF 1065 case OpARM64NotGreaterEqualF: 1066 return OpARM64GreaterEqualF 1067 default: 1068 panic("unreachable") 1069 } 1070} 1071 1072// arm64Invert evaluates (InvertFlags op), which 1073// is the same as altering the condition codes such 1074// that the same result would be produced if the arguments 1075// to the flag-generating instruction were reversed, e.g. 1076// (InvertFlags (CMP x y)) -> (CMP y x) 1077func arm64Invert(op Op) Op { 1078 switch op { 1079 case OpARM64LessThan: 1080 return OpARM64GreaterThan 1081 case OpARM64LessThanU: 1082 return OpARM64GreaterThanU 1083 case OpARM64GreaterThan: 1084 return OpARM64LessThan 1085 case OpARM64GreaterThanU: 1086 return OpARM64LessThanU 1087 case OpARM64LessEqual: 1088 return OpARM64GreaterEqual 1089 case OpARM64LessEqualU: 1090 return OpARM64GreaterEqualU 1091 case OpARM64GreaterEqual: 1092 return OpARM64LessEqual 1093 case OpARM64GreaterEqualU: 1094 return OpARM64LessEqualU 1095 case OpARM64Equal, OpARM64NotEqual: 1096 return op 1097 case OpARM64LessThanF: 1098 return OpARM64GreaterThanF 1099 case OpARM64GreaterThanF: 1100 return OpARM64LessThanF 1101 case OpARM64LessEqualF: 1102 return OpARM64GreaterEqualF 1103 case OpARM64GreaterEqualF: 1104 return OpARM64LessEqualF 1105 case OpARM64NotLessThanF: 1106 return OpARM64NotGreaterThanF 1107 case OpARM64NotGreaterThanF: 1108 return OpARM64NotLessThanF 1109 case OpARM64NotLessEqualF: 1110 return OpARM64NotGreaterEqualF 1111 case OpARM64NotGreaterEqualF: 1112 return OpARM64NotLessEqualF 1113 default: 1114 panic("unreachable") 1115 } 1116} 1117 1118// evaluate an ARM64 op against a flags value 1119// that is potentially constant; return 1 for true, 1120// -1 for false, and 0 for not constant. 1121func ccARM64Eval(op Op, flags *Value) int { 1122 fop := flags.Op 1123 if fop == OpARM64InvertFlags { 1124 return -ccARM64Eval(op, flags.Args[0]) 1125 } 1126 if fop != OpARM64FlagConstant { 1127 return 0 1128 } 1129 fc := flagConstant(flags.AuxInt) 1130 b2i := func(b bool) int { 1131 if b { 1132 return 1 1133 } 1134 return -1 1135 } 1136 switch op { 1137 case OpARM64Equal: 1138 return b2i(fc.eq()) 1139 case OpARM64NotEqual: 1140 return b2i(fc.ne()) 1141 case OpARM64LessThan: 1142 return b2i(fc.lt()) 1143 case OpARM64LessThanU: 1144 return b2i(fc.ult()) 1145 case OpARM64GreaterThan: 1146 return b2i(fc.gt()) 1147 case OpARM64GreaterThanU: 1148 return b2i(fc.ugt()) 1149 case OpARM64LessEqual: 1150 return b2i(fc.le()) 1151 case OpARM64LessEqualU: 1152 return b2i(fc.ule()) 1153 case OpARM64GreaterEqual: 1154 return b2i(fc.ge()) 1155 case OpARM64GreaterEqualU: 1156 return b2i(fc.uge()) 1157 } 1158 return 0 1159} 1160 1161// logRule logs the use of the rule s. This will only be enabled if 1162// rewrite rules were generated with the -log option, see gen/rulegen.go. 1163func logRule(s string) { 1164 if ruleFile == nil { 1165 // Open a log file to write log to. We open in append 1166 // mode because all.bash runs the compiler lots of times, 1167 // and we want the concatenation of all of those logs. 1168 // This means, of course, that users need to rm the old log 1169 // to get fresh data. 1170 // TODO: all.bash runs compilers in parallel. Need to synchronize logging somehow? 1171 w, err := os.OpenFile(filepath.Join(os.Getenv("GOROOT"), "src", "rulelog"), 1172 os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666) 1173 if err != nil { 1174 panic(err) 1175 } 1176 ruleFile = w 1177 } 1178 _, err := fmt.Fprintln(ruleFile, s) 1179 if err != nil { 1180 panic(err) 1181 } 1182} 1183 1184var ruleFile io.Writer 1185 1186func min(x, y int64) int64 { 1187 if x < y { 1188 return x 1189 } 1190 return y 1191} 1192 1193func isConstZero(v *Value) bool { 1194 switch v.Op { 1195 case OpConstNil: 1196 return true 1197 case OpConst64, OpConst32, OpConst16, OpConst8, OpConstBool, OpConst32F, OpConst64F: 1198 return v.AuxInt == 0 1199 } 1200 return false 1201} 1202 1203// reciprocalExact64 reports whether 1/c is exactly representable. 1204func reciprocalExact64(c float64) bool { 1205 b := math.Float64bits(c) 1206 man := b & (1<<52 - 1) 1207 if man != 0 { 1208 return false // not a power of 2, denormal, or NaN 1209 } 1210 exp := b >> 52 & (1<<11 - 1) 1211 // exponent bias is 0x3ff. So taking the reciprocal of a number 1212 // changes the exponent to 0x7fe-exp. 1213 switch exp { 1214 case 0: 1215 return false // ±0 1216 case 0x7ff: 1217 return false // ±inf 1218 case 0x7fe: 1219 return false // exponent is not representable 1220 default: 1221 return true 1222 } 1223} 1224 1225// reciprocalExact32 reports whether 1/c is exactly representable. 1226func reciprocalExact32(c float32) bool { 1227 b := math.Float32bits(c) 1228 man := b & (1<<23 - 1) 1229 if man != 0 { 1230 return false // not a power of 2, denormal, or NaN 1231 } 1232 exp := b >> 23 & (1<<8 - 1) 1233 // exponent bias is 0x7f. So taking the reciprocal of a number 1234 // changes the exponent to 0xfe-exp. 1235 switch exp { 1236 case 0: 1237 return false // ±0 1238 case 0xff: 1239 return false // ±inf 1240 case 0xfe: 1241 return false // exponent is not representable 1242 default: 1243 return true 1244 } 1245} 1246 1247// check if an immediate can be directly encoded into an ARM's instruction 1248func isARMImmRot(v uint32) bool { 1249 for i := 0; i < 16; i++ { 1250 if v&^0xff == 0 { 1251 return true 1252 } 1253 v = v<<2 | v>>30 1254 } 1255 1256 return false 1257} 1258 1259// overlap reports whether the ranges given by the given offset and 1260// size pairs overlap. 1261func overlap(offset1, size1, offset2, size2 int64) bool { 1262 if offset1 >= offset2 && offset2+size2 > offset1 { 1263 return true 1264 } 1265 if offset2 >= offset1 && offset1+size1 > offset2 { 1266 return true 1267 } 1268 return false 1269} 1270 1271func areAdjacentOffsets(off1, off2, size int64) bool { 1272 return off1+size == off2 || off1 == off2+size 1273} 1274 1275// check if value zeroes out upper 32-bit of 64-bit register. 1276// depth limits recursion depth. In AMD64.rules 3 is used as limit, 1277// because it catches same amount of cases as 4. 1278func zeroUpper32Bits(x *Value, depth int) bool { 1279 switch x.Op { 1280 case OpAMD64MOVLconst, OpAMD64MOVLload, OpAMD64MOVLQZX, OpAMD64MOVLloadidx1, 1281 OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVBload, OpAMD64MOVBloadidx1, 1282 OpAMD64MOVLloadidx4, OpAMD64ADDLload, OpAMD64SUBLload, OpAMD64ANDLload, 1283 OpAMD64ORLload, OpAMD64XORLload, OpAMD64CVTTSD2SL, 1284 OpAMD64ADDL, OpAMD64ADDLconst, OpAMD64SUBL, OpAMD64SUBLconst, 1285 OpAMD64ANDL, OpAMD64ANDLconst, OpAMD64ORL, OpAMD64ORLconst, 1286 OpAMD64XORL, OpAMD64XORLconst, OpAMD64NEGL, OpAMD64NOTL, 1287 OpAMD64SHRL, OpAMD64SHRLconst, OpAMD64SARL, OpAMD64SARLconst, 1288 OpAMD64SHLL, OpAMD64SHLLconst: 1289 return true 1290 case OpArg: 1291 return x.Type.Size() == 4 1292 case OpPhi, OpSelect0, OpSelect1: 1293 // Phis can use each-other as an arguments, instead of tracking visited values, 1294 // just limit recursion depth. 1295 if depth <= 0 { 1296 return false 1297 } 1298 for i := range x.Args { 1299 if !zeroUpper32Bits(x.Args[i], depth-1) { 1300 return false 1301 } 1302 } 1303 return true 1304 1305 } 1306 return false 1307} 1308 1309// zeroUpper48Bits is similar to zeroUpper32Bits, but for upper 48 bits 1310func zeroUpper48Bits(x *Value, depth int) bool { 1311 switch x.Op { 1312 case OpAMD64MOVWQZX, OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVWloadidx2: 1313 return true 1314 case OpArg: 1315 return x.Type.Size() == 2 1316 case OpPhi, OpSelect0, OpSelect1: 1317 // Phis can use each-other as an arguments, instead of tracking visited values, 1318 // just limit recursion depth. 1319 if depth <= 0 { 1320 return false 1321 } 1322 for i := range x.Args { 1323 if !zeroUpper48Bits(x.Args[i], depth-1) { 1324 return false 1325 } 1326 } 1327 return true 1328 1329 } 1330 return false 1331} 1332 1333// zeroUpper56Bits is similar to zeroUpper32Bits, but for upper 56 bits 1334func zeroUpper56Bits(x *Value, depth int) bool { 1335 switch x.Op { 1336 case OpAMD64MOVBQZX, OpAMD64MOVBload, OpAMD64MOVBloadidx1: 1337 return true 1338 case OpArg: 1339 return x.Type.Size() == 1 1340 case OpPhi, OpSelect0, OpSelect1: 1341 // Phis can use each-other as an arguments, instead of tracking visited values, 1342 // just limit recursion depth. 1343 if depth <= 0 { 1344 return false 1345 } 1346 for i := range x.Args { 1347 if !zeroUpper56Bits(x.Args[i], depth-1) { 1348 return false 1349 } 1350 } 1351 return true 1352 1353 } 1354 return false 1355} 1356 1357// isInlinableMemmove reports whether the given arch performs a Move of the given size 1358// faster than memmove. It will only return true if replacing the memmove with a Move is 1359// safe, either because Move is small or because the arguments are disjoint. 1360// This is used as a check for replacing memmove with Move ops. 1361func isInlinableMemmove(dst, src *Value, sz int64, c *Config) bool { 1362 // It is always safe to convert memmove into Move when its arguments are disjoint. 1363 // Move ops may or may not be faster for large sizes depending on how the platform 1364 // lowers them, so we only perform this optimization on platforms that we know to 1365 // have fast Move ops. 1366 switch c.arch { 1367 case "amd64": 1368 return sz <= 16 || (sz < 1024 && disjoint(dst, sz, src, sz)) 1369 case "386", "arm64": 1370 return sz <= 8 1371 case "s390x", "ppc64", "ppc64le": 1372 return sz <= 8 || disjoint(dst, sz, src, sz) 1373 case "arm", "mips", "mips64", "mipsle", "mips64le": 1374 return sz <= 4 1375 } 1376 return false 1377} 1378 1379// logLargeCopy logs the occurrence of a large copy. 1380// The best place to do this is in the rewrite rules where the size of the move is easy to find. 1381// "Large" is arbitrarily chosen to be 128 bytes; this may change. 1382func logLargeCopy(v *Value, s int64) bool { 1383 if s < 128 { 1384 return true 1385 } 1386 if logopt.Enabled() { 1387 logopt.LogOpt(v.Pos, "copy", "lower", v.Block.Func.Name, fmt.Sprintf("%d bytes", s)) 1388 } 1389 return true 1390} 1391 1392// hasSmallRotate reports whether the architecture has rotate instructions 1393// for sizes < 32-bit. This is used to decide whether to promote some rotations. 1394func hasSmallRotate(c *Config) bool { 1395 switch c.arch { 1396 case "amd64", "386": 1397 return true 1398 default: 1399 return false 1400 } 1401} 1402 1403func newPPC64ShiftAuxInt(sh, mb, me, sz int64) int32 { 1404 if sh < 0 || sh >= sz { 1405 panic("PPC64 shift arg sh out of range") 1406 } 1407 if mb < 0 || mb >= sz { 1408 panic("PPC64 shift arg mb out of range") 1409 } 1410 if me < 0 || me >= sz { 1411 panic("PPC64 shift arg me out of range") 1412 } 1413 return int32(sh<<16 | mb<<8 | me) 1414} 1415 1416func GetPPC64Shiftsh(auxint int64) int64 { 1417 return int64(int8(auxint >> 16)) 1418} 1419 1420func GetPPC64Shiftmb(auxint int64) int64 { 1421 return int64(int8(auxint >> 8)) 1422} 1423 1424func GetPPC64Shiftme(auxint int64) int64 { 1425 return int64(int8(auxint)) 1426} 1427 1428// Test if this value can encoded as a mask for a rlwinm like 1429// operation. Masks can also extend from the msb and wrap to 1430// the lsb too. That is, the valid masks are 32 bit strings 1431// of the form: 0..01..10..0 or 1..10..01..1 or 1...1 1432func isPPC64WordRotateMask(v64 int64) bool { 1433 // Isolate rightmost 1 (if none 0) and add. 1434 v := uint32(v64) 1435 vp := (v & -v) + v 1436 // Likewise, for the wrapping case. 1437 vn := ^v 1438 vpn := (vn & -vn) + vn 1439 return (v&vp == 0 || vn&vpn == 0) && v != 0 1440} 1441 1442// Compress mask and shift into single value of the form 1443// me | mb<<8 | rotate<<16 | nbits<<24 where me and mb can 1444// be used to regenerate the input mask. 1445func encodePPC64RotateMask(rotate, mask, nbits int64) int64 { 1446 var mb, me, mbn, men int 1447 1448 // Determine boundaries and then decode them 1449 if mask == 0 || ^mask == 0 || rotate >= nbits { 1450 panic("Invalid PPC64 rotate mask") 1451 } else if nbits == 32 { 1452 mb = bits.LeadingZeros32(uint32(mask)) 1453 me = 32 - bits.TrailingZeros32(uint32(mask)) 1454 mbn = bits.LeadingZeros32(^uint32(mask)) 1455 men = 32 - bits.TrailingZeros32(^uint32(mask)) 1456 } else { 1457 mb = bits.LeadingZeros64(uint64(mask)) 1458 me = 64 - bits.TrailingZeros64(uint64(mask)) 1459 mbn = bits.LeadingZeros64(^uint64(mask)) 1460 men = 64 - bits.TrailingZeros64(^uint64(mask)) 1461 } 1462 // Check for a wrapping mask (e.g bits at 0 and 63) 1463 if mb == 0 && me == int(nbits) { 1464 // swap the inverted values 1465 mb, me = men, mbn 1466 } 1467 1468 return int64(me) | int64(mb<<8) | int64(rotate<<16) | int64(nbits<<24) 1469} 1470 1471// The inverse operation of encodePPC64RotateMask. The values returned as 1472// mb and me satisfy the POWER ISA definition of MASK(x,y) where MASK(mb,me) = mask. 1473func DecodePPC64RotateMask(sauxint int64) (rotate, mb, me int64, mask uint64) { 1474 auxint := uint64(sauxint) 1475 rotate = int64((auxint >> 16) & 0xFF) 1476 mb = int64((auxint >> 8) & 0xFF) 1477 me = int64((auxint >> 0) & 0xFF) 1478 nbits := int64((auxint >> 24) & 0xFF) 1479 mask = ((1 << uint(nbits-mb)) - 1) ^ ((1 << uint(nbits-me)) - 1) 1480 if mb > me { 1481 mask = ^mask 1482 } 1483 if nbits == 32 { 1484 mask = uint64(uint32(mask)) 1485 } 1486 1487 // Fixup ME to match ISA definition. The second argument to MASK(..,me) 1488 // is inclusive. 1489 me = (me - 1) & (nbits - 1) 1490 return 1491} 1492 1493// This verifies that the mask is a set of 1494// consecutive bits including the least 1495// significant bit. 1496func isPPC64ValidShiftMask(v int64) bool { 1497 if (v != 0) && ((v+1)&v) == 0 { 1498 return true 1499 } 1500 return false 1501} 1502 1503func getPPC64ShiftMaskLength(v int64) int64 { 1504 return int64(bits.Len64(uint64(v))) 1505} 1506 1507// Decompose a shift right into an equivalent rotate/mask, 1508// and return mask & m. 1509func mergePPC64RShiftMask(m, s, nbits int64) int64 { 1510 smask := uint64((1<<uint(nbits))-1) >> uint(s) 1511 return m & int64(smask) 1512} 1513 1514// Combine (ANDconst [m] (SRWconst [s])) into (RLWINM [y]) or return 0 1515func mergePPC64AndSrwi(m, s int64) int64 { 1516 mask := mergePPC64RShiftMask(m, s, 32) 1517 if !isPPC64WordRotateMask(mask) { 1518 return 0 1519 } 1520 return encodePPC64RotateMask((32-s)&31, mask, 32) 1521} 1522 1523// Test if a shift right feeding into a CLRLSLDI can be merged into RLWINM. 1524// Return the encoded RLWINM constant, or 0 if they cannot be merged. 1525func mergePPC64ClrlsldiSrw(sld, srw int64) int64 { 1526 mask_1 := uint64(0xFFFFFFFF >> uint(srw)) 1527 // for CLRLSLDI, it's more convient to think of it as a mask left bits then rotate left. 1528 mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld))) 1529 1530 // Rewrite mask to apply after the final left shift. 1531 mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld)) 1532 1533 r_1 := 32 - srw 1534 r_2 := GetPPC64Shiftsh(sld) 1535 r_3 := (r_1 + r_2) & 31 // This can wrap. 1536 1537 if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 { 1538 return 0 1539 } 1540 return encodePPC64RotateMask(int64(r_3), int64(mask_3), 32) 1541} 1542 1543// Test if a RLWINM feeding into a CLRLSLDI can be merged into RLWINM. Return 1544// the encoded RLWINM constant, or 0 if they cannot be merged. 1545func mergePPC64ClrlsldiRlwinm(sld int32, rlw int64) int64 { 1546 r_1, _, _, mask_1 := DecodePPC64RotateMask(rlw) 1547 // for CLRLSLDI, it's more convient to think of it as a mask left bits then rotate left. 1548 mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld))) 1549 1550 // combine the masks, and adjust for the final left shift. 1551 mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(int64(sld))) 1552 r_2 := GetPPC64Shiftsh(int64(sld)) 1553 r_3 := (r_1 + r_2) & 31 // This can wrap. 1554 1555 // Verify the result is still a valid bitmask of <= 32 bits. 1556 if !isPPC64WordRotateMask(int64(mask_3)) || uint64(uint32(mask_3)) != mask_3 { 1557 return 0 1558 } 1559 return encodePPC64RotateMask(r_3, int64(mask_3), 32) 1560} 1561 1562// Compute the encoded RLWINM constant from combining (SLDconst [sld] (SRWconst [srw] x)), 1563// or return 0 if they cannot be combined. 1564func mergePPC64SldiSrw(sld, srw int64) int64 { 1565 if sld > srw || srw >= 32 { 1566 return 0 1567 } 1568 mask_r := uint32(0xFFFFFFFF) >> uint(srw) 1569 mask_l := uint32(0xFFFFFFFF) >> uint(sld) 1570 mask := (mask_r & mask_l) << uint(sld) 1571 return encodePPC64RotateMask((32-srw+sld)&31, int64(mask), 32) 1572} 1573 1574// Convenience function to rotate a 32 bit constant value by another constant. 1575func rotateLeft32(v, rotate int64) int64 { 1576 return int64(bits.RotateLeft32(uint32(v), int(rotate))) 1577} 1578 1579func rotateRight64(v, rotate int64) int64 { 1580 return int64(bits.RotateLeft64(uint64(v), int(-rotate))) 1581} 1582 1583// encodes the lsb and width for arm(64) bitfield ops into the expected auxInt format. 1584func armBFAuxInt(lsb, width int64) arm64BitField { 1585 if lsb < 0 || lsb > 63 { 1586 panic("ARM(64) bit field lsb constant out of range") 1587 } 1588 if width < 1 || lsb+width > 64 { 1589 panic("ARM(64) bit field width constant out of range") 1590 } 1591 return arm64BitField(width | lsb<<8) 1592} 1593 1594// returns the lsb part of the auxInt field of arm64 bitfield ops. 1595func (bfc arm64BitField) getARM64BFlsb() int64 { 1596 return int64(uint64(bfc) >> 8) 1597} 1598 1599// returns the width part of the auxInt field of arm64 bitfield ops. 1600func (bfc arm64BitField) getARM64BFwidth() int64 { 1601 return int64(bfc) & 0xff 1602} 1603 1604// checks if mask >> rshift applied at lsb is a valid arm64 bitfield op mask. 1605func isARM64BFMask(lsb, mask, rshift int64) bool { 1606 shiftedMask := int64(uint64(mask) >> uint64(rshift)) 1607 return shiftedMask != 0 && isPowerOfTwo64(shiftedMask+1) && nto(shiftedMask)+lsb < 64 1608} 1609 1610// returns the bitfield width of mask >> rshift for arm64 bitfield ops 1611func arm64BFWidth(mask, rshift int64) int64 { 1612 shiftedMask := int64(uint64(mask) >> uint64(rshift)) 1613 if shiftedMask == 0 { 1614 panic("ARM64 BF mask is zero") 1615 } 1616 return nto(shiftedMask) 1617} 1618 1619// sizeof returns the size of t in bytes. 1620// It will panic if t is not a *types.Type. 1621func sizeof(t interface{}) int64 { 1622 return t.(*types.Type).Size() 1623} 1624 1625// registerizable reports whether t is a primitive type that fits in 1626// a register. It assumes float64 values will always fit into registers 1627// even if that isn't strictly true. 1628func registerizable(b *Block, typ *types.Type) bool { 1629 if typ.IsPtrShaped() || typ.IsFloat() { 1630 return true 1631 } 1632 if typ.IsInteger() { 1633 return typ.Size() <= b.Func.Config.RegSize 1634 } 1635 return false 1636} 1637 1638// needRaceCleanup reports whether this call to racefuncenter/exit isn't needed. 1639func needRaceCleanup(sym *AuxCall, v *Value) bool { 1640 f := v.Block.Func 1641 if !f.Config.Race { 1642 return false 1643 } 1644 if !isSameCall(sym, "runtime.racefuncenter") && !isSameCall(sym, "runtime.racefuncexit") { 1645 return false 1646 } 1647 for _, b := range f.Blocks { 1648 for _, v := range b.Values { 1649 switch v.Op { 1650 case OpStaticCall, OpStaticLECall: 1651 // Check for racefuncenter will encounter racefuncexit and vice versa. 1652 // Allow calls to panic* 1653 s := v.Aux.(*AuxCall).Fn.String() 1654 switch s { 1655 case "runtime.racefuncenter", "runtime.racefuncexit", 1656 "runtime.panicdivide", "runtime.panicwrap", 1657 "runtime.panicshift": 1658 continue 1659 } 1660 // If we encountered any call, we need to keep racefunc*, 1661 // for accurate stacktraces. 1662 return false 1663 case OpPanicBounds, OpPanicExtend: 1664 // Note: these are panic generators that are ok (like the static calls above). 1665 case OpClosureCall, OpInterCall, OpClosureLECall, OpInterLECall: 1666 // We must keep the race functions if there are any other call types. 1667 return false 1668 } 1669 } 1670 } 1671 if isSameCall(sym, "runtime.racefuncenter") { 1672 // TODO REGISTER ABI this needs to be cleaned up. 1673 // If we're removing racefuncenter, remove its argument as well. 1674 if v.Args[0].Op != OpStore { 1675 if v.Op == OpStaticLECall { 1676 // there is no store, yet. 1677 return true 1678 } 1679 return false 1680 } 1681 mem := v.Args[0].Args[2] 1682 v.Args[0].reset(OpCopy) 1683 v.Args[0].AddArg(mem) 1684 } 1685 return true 1686} 1687 1688// symIsRO reports whether sym is a read-only global. 1689func symIsRO(sym interface{}) bool { 1690 lsym := sym.(*obj.LSym) 1691 return lsym.Type == objabi.SRODATA && len(lsym.R) == 0 1692} 1693 1694// symIsROZero reports whether sym is a read-only global whose data contains all zeros. 1695func symIsROZero(sym Sym) bool { 1696 lsym := sym.(*obj.LSym) 1697 if lsym.Type != objabi.SRODATA || len(lsym.R) != 0 { 1698 return false 1699 } 1700 for _, b := range lsym.P { 1701 if b != 0 { 1702 return false 1703 } 1704 } 1705 return true 1706} 1707 1708// read8 reads one byte from the read-only global sym at offset off. 1709func read8(sym interface{}, off int64) uint8 { 1710 lsym := sym.(*obj.LSym) 1711 if off >= int64(len(lsym.P)) || off < 0 { 1712 // Invalid index into the global sym. 1713 // This can happen in dead code, so we don't want to panic. 1714 // Just return any value, it will eventually get ignored. 1715 // See issue 29215. 1716 return 0 1717 } 1718 return lsym.P[off] 1719} 1720 1721// read16 reads two bytes from the read-only global sym at offset off. 1722func read16(sym interface{}, off int64, byteorder binary.ByteOrder) uint16 { 1723 lsym := sym.(*obj.LSym) 1724 // lsym.P is written lazily. 1725 // Bytes requested after the end of lsym.P are 0. 1726 var src []byte 1727 if 0 <= off && off < int64(len(lsym.P)) { 1728 src = lsym.P[off:] 1729 } 1730 buf := make([]byte, 2) 1731 copy(buf, src) 1732 return byteorder.Uint16(buf) 1733} 1734 1735// read32 reads four bytes from the read-only global sym at offset off. 1736func read32(sym interface{}, off int64, byteorder binary.ByteOrder) uint32 { 1737 lsym := sym.(*obj.LSym) 1738 var src []byte 1739 if 0 <= off && off < int64(len(lsym.P)) { 1740 src = lsym.P[off:] 1741 } 1742 buf := make([]byte, 4) 1743 copy(buf, src) 1744 return byteorder.Uint32(buf) 1745} 1746 1747// read64 reads eight bytes from the read-only global sym at offset off. 1748func read64(sym interface{}, off int64, byteorder binary.ByteOrder) uint64 { 1749 lsym := sym.(*obj.LSym) 1750 var src []byte 1751 if 0 <= off && off < int64(len(lsym.P)) { 1752 src = lsym.P[off:] 1753 } 1754 buf := make([]byte, 8) 1755 copy(buf, src) 1756 return byteorder.Uint64(buf) 1757} 1758 1759// sequentialAddresses reports true if it can prove that x + n == y 1760func sequentialAddresses(x, y *Value, n int64) bool { 1761 if x.Op == Op386ADDL && y.Op == Op386LEAL1 && y.AuxInt == n && y.Aux == nil && 1762 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] || 1763 x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) { 1764 return true 1765 } 1766 if x.Op == Op386LEAL1 && y.Op == Op386LEAL1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux && 1767 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] || 1768 x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) { 1769 return true 1770 } 1771 if x.Op == OpAMD64ADDQ && y.Op == OpAMD64LEAQ1 && y.AuxInt == n && y.Aux == nil && 1772 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] || 1773 x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) { 1774 return true 1775 } 1776 if x.Op == OpAMD64LEAQ1 && y.Op == OpAMD64LEAQ1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux && 1777 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] || 1778 x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) { 1779 return true 1780 } 1781 return false 1782} 1783 1784// flagConstant represents the result of a compile-time comparison. 1785// The sense of these flags does not necessarily represent the hardware's notion 1786// of a flags register - these are just a compile-time construct. 1787// We happen to match the semantics to those of arm/arm64. 1788// Note that these semantics differ from x86: the carry flag has the opposite 1789// sense on a subtraction! 1790// On amd64, C=1 represents a borrow, e.g. SBB on amd64 does x - y - C. 1791// On arm64, C=0 represents a borrow, e.g. SBC on arm64 does x - y - ^C. 1792// (because it does x + ^y + C). 1793// See https://en.wikipedia.org/wiki/Carry_flag#Vs._borrow_flag 1794type flagConstant uint8 1795 1796// N reports whether the result of an operation is negative (high bit set). 1797func (fc flagConstant) N() bool { 1798 return fc&1 != 0 1799} 1800 1801// Z reports whether the result of an operation is 0. 1802func (fc flagConstant) Z() bool { 1803 return fc&2 != 0 1804} 1805 1806// C reports whether an unsigned add overflowed (carry), or an 1807// unsigned subtract did not underflow (borrow). 1808func (fc flagConstant) C() bool { 1809 return fc&4 != 0 1810} 1811 1812// V reports whether a signed operation overflowed or underflowed. 1813func (fc flagConstant) V() bool { 1814 return fc&8 != 0 1815} 1816 1817func (fc flagConstant) eq() bool { 1818 return fc.Z() 1819} 1820func (fc flagConstant) ne() bool { 1821 return !fc.Z() 1822} 1823func (fc flagConstant) lt() bool { 1824 return fc.N() != fc.V() 1825} 1826func (fc flagConstant) le() bool { 1827 return fc.Z() || fc.lt() 1828} 1829func (fc flagConstant) gt() bool { 1830 return !fc.Z() && fc.ge() 1831} 1832func (fc flagConstant) ge() bool { 1833 return fc.N() == fc.V() 1834} 1835func (fc flagConstant) ult() bool { 1836 return !fc.C() 1837} 1838func (fc flagConstant) ule() bool { 1839 return fc.Z() || fc.ult() 1840} 1841func (fc flagConstant) ugt() bool { 1842 return !fc.Z() && fc.uge() 1843} 1844func (fc flagConstant) uge() bool { 1845 return fc.C() 1846} 1847 1848func (fc flagConstant) ltNoov() bool { 1849 return fc.lt() && !fc.V() 1850} 1851func (fc flagConstant) leNoov() bool { 1852 return fc.le() && !fc.V() 1853} 1854func (fc flagConstant) gtNoov() bool { 1855 return fc.gt() && !fc.V() 1856} 1857func (fc flagConstant) geNoov() bool { 1858 return fc.ge() && !fc.V() 1859} 1860 1861func (fc flagConstant) String() string { 1862 return fmt.Sprintf("N=%v,Z=%v,C=%v,V=%v", fc.N(), fc.Z(), fc.C(), fc.V()) 1863} 1864 1865type flagConstantBuilder struct { 1866 N bool 1867 Z bool 1868 C bool 1869 V bool 1870} 1871 1872func (fcs flagConstantBuilder) encode() flagConstant { 1873 var fc flagConstant 1874 if fcs.N { 1875 fc |= 1 1876 } 1877 if fcs.Z { 1878 fc |= 2 1879 } 1880 if fcs.C { 1881 fc |= 4 1882 } 1883 if fcs.V { 1884 fc |= 8 1885 } 1886 return fc 1887} 1888 1889// Note: addFlags(x,y) != subFlags(x,-y) in some situations: 1890// - the results of the C flag are different 1891// - the results of the V flag when y==minint are different 1892 1893// addFlags64 returns the flags that would be set from computing x+y. 1894func addFlags64(x, y int64) flagConstant { 1895 var fcb flagConstantBuilder 1896 fcb.Z = x+y == 0 1897 fcb.N = x+y < 0 1898 fcb.C = uint64(x+y) < uint64(x) 1899 fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0 1900 return fcb.encode() 1901} 1902 1903// subFlags64 returns the flags that would be set from computing x-y. 1904func subFlags64(x, y int64) flagConstant { 1905 var fcb flagConstantBuilder 1906 fcb.Z = x-y == 0 1907 fcb.N = x-y < 0 1908 fcb.C = uint64(y) <= uint64(x) // This code follows the arm carry flag model. 1909 fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0 1910 return fcb.encode() 1911} 1912 1913// addFlags32 returns the flags that would be set from computing x+y. 1914func addFlags32(x, y int32) flagConstant { 1915 var fcb flagConstantBuilder 1916 fcb.Z = x+y == 0 1917 fcb.N = x+y < 0 1918 fcb.C = uint32(x+y) < uint32(x) 1919 fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0 1920 return fcb.encode() 1921} 1922 1923// subFlags32 returns the flags that would be set from computing x-y. 1924func subFlags32(x, y int32) flagConstant { 1925 var fcb flagConstantBuilder 1926 fcb.Z = x-y == 0 1927 fcb.N = x-y < 0 1928 fcb.C = uint32(y) <= uint32(x) // This code follows the arm carry flag model. 1929 fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0 1930 return fcb.encode() 1931} 1932 1933// logicFlags64 returns flags set to the sign/zeroness of x. 1934// C and V are set to false. 1935func logicFlags64(x int64) flagConstant { 1936 var fcb flagConstantBuilder 1937 fcb.Z = x == 0 1938 fcb.N = x < 0 1939 return fcb.encode() 1940} 1941 1942// logicFlags32 returns flags set to the sign/zeroness of x. 1943// C and V are set to false. 1944func logicFlags32(x int32) flagConstant { 1945 var fcb flagConstantBuilder 1946 fcb.Z = x == 0 1947 fcb.N = x < 0 1948 return fcb.encode() 1949} 1950