1// Copyright 2016 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package ssa
6
7import (
8	"cmd/compile/internal/types"
9	"cmd/internal/obj"
10	"cmd/internal/src"
11	"fmt"
12	"strings"
13)
14
15// A ZeroRegion records parts of an object which are known to be zero.
16// A ZeroRegion only applies to a single memory state.
17// Each bit in mask is set if the corresponding pointer-sized word of
18// the base object is known to be zero.
19// In other words, if mask & (1<<i) != 0, then [base+i*ptrSize, base+(i+1)*ptrSize)
20// is known to be zero.
21type ZeroRegion struct {
22	base *Value
23	mask uint64
24}
25
26// needwb reports whether we need write barrier for store op v.
27// v must be Store/Move/Zero.
28// zeroes provides known zero information (keyed by ID of memory-type values).
29func needwb(v *Value, zeroes map[ID]ZeroRegion) bool {
30	t, ok := v.Aux.(*types.Type)
31	if !ok {
32		v.Fatalf("store aux is not a type: %s", v.LongString())
33	}
34	if !t.HasHeapPointer() {
35		return false
36	}
37	if IsStackAddr(v.Args[0]) {
38		return false // write on stack doesn't need write barrier
39	}
40	if v.Op == OpMove && IsReadOnlyGlobalAddr(v.Args[1]) && IsNewObject(v.Args[0], v.MemoryArg()) {
41		// Copying data from readonly memory into a fresh object doesn't need a write barrier.
42		return false
43	}
44	if v.Op == OpStore && IsGlobalAddr(v.Args[1]) {
45		// Storing pointers to non-heap locations into zeroed memory doesn't need a write barrier.
46		ptr := v.Args[0]
47		var off int64
48		size := v.Aux.(*types.Type).Size()
49		for ptr.Op == OpOffPtr {
50			off += ptr.AuxInt
51			ptr = ptr.Args[0]
52		}
53		ptrSize := v.Block.Func.Config.PtrSize
54		if off%ptrSize != 0 || size%ptrSize != 0 {
55			v.Fatalf("unaligned pointer write")
56		}
57		if off < 0 || off+size > 64*ptrSize {
58			// write goes off end of tracked offsets
59			return true
60		}
61		z := zeroes[v.MemoryArg().ID]
62		if ptr != z.base {
63			return true
64		}
65		for i := off; i < off+size; i += ptrSize {
66			if z.mask>>uint(i/ptrSize)&1 == 0 {
67				return true // not known to be zero
68			}
69		}
70		// All written locations are known to be zero - write barrier not needed.
71		return false
72	}
73	return true
74}
75
76// writebarrier pass inserts write barriers for store ops (Store, Move, Zero)
77// when necessary (the condition above). It rewrites store ops to branches
78// and runtime calls, like
79//
80// if writeBarrier.enabled {
81//   gcWriteBarrier(ptr, val)	// Not a regular Go call
82// } else {
83//   *ptr = val
84// }
85//
86// A sequence of WB stores for many pointer fields of a single type will
87// be emitted together, with a single branch.
88func writebarrier(f *Func) {
89	if !f.fe.UseWriteBarrier() {
90		return
91	}
92
93	var sb, sp, wbaddr, const0 *Value
94	var typedmemmove, typedmemclr, gcWriteBarrier *obj.LSym
95	var stores, after []*Value
96	var sset *sparseSet
97	var storeNumber []int32
98
99	zeroes := f.computeZeroMap()
100	for _, b := range f.Blocks { // range loop is safe since the blocks we added contain no stores to expand
101		// first, identify all the stores that need to insert a write barrier.
102		// mark them with WB ops temporarily. record presence of WB ops.
103		nWBops := 0 // count of temporarily created WB ops remaining to be rewritten in the current block
104		for _, v := range b.Values {
105			switch v.Op {
106			case OpStore, OpMove, OpZero:
107				if needwb(v, zeroes) {
108					switch v.Op {
109					case OpStore:
110						v.Op = OpStoreWB
111					case OpMove:
112						v.Op = OpMoveWB
113					case OpZero:
114						v.Op = OpZeroWB
115					}
116					nWBops++
117				}
118			}
119		}
120		if nWBops == 0 {
121			continue
122		}
123
124		if wbaddr == nil {
125			// lazily initialize global values for write barrier test and calls
126			// find SB and SP values in entry block
127			initpos := f.Entry.Pos
128			for _, v := range f.Entry.Values {
129				if v.Op == OpSB {
130					sb = v
131				}
132				if v.Op == OpSP {
133					sp = v
134				}
135				if sb != nil && sp != nil {
136					break
137				}
138			}
139			if sb == nil {
140				sb = f.Entry.NewValue0(initpos, OpSB, f.Config.Types.Uintptr)
141			}
142			if sp == nil {
143				sp = f.Entry.NewValue0(initpos, OpSP, f.Config.Types.Uintptr)
144			}
145			wbsym := f.fe.Syslook("writeBarrier")
146			wbaddr = f.Entry.NewValue1A(initpos, OpAddr, f.Config.Types.UInt32Ptr, wbsym, sb)
147			gcWriteBarrier = f.fe.Syslook("gcWriteBarrier")
148			typedmemmove = f.fe.Syslook("typedmemmove")
149			typedmemclr = f.fe.Syslook("typedmemclr")
150			const0 = f.ConstInt32(f.Config.Types.UInt32, 0)
151
152			// allocate auxiliary data structures for computing store order
153			sset = f.newSparseSet(f.NumValues())
154			defer f.retSparseSet(sset)
155			storeNumber = make([]int32, f.NumValues())
156		}
157
158		// order values in store order
159		b.Values = storeOrder(b.Values, sset, storeNumber)
160
161		firstSplit := true
162	again:
163		// find the start and end of the last contiguous WB store sequence.
164		// a branch will be inserted there. values after it will be moved
165		// to a new block.
166		var last *Value
167		var start, end int
168		values := b.Values
169	FindSeq:
170		for i := len(values) - 1; i >= 0; i-- {
171			w := values[i]
172			switch w.Op {
173			case OpStoreWB, OpMoveWB, OpZeroWB:
174				start = i
175				if last == nil {
176					last = w
177					end = i + 1
178				}
179			case OpVarDef, OpVarLive, OpVarKill:
180				continue
181			default:
182				if last == nil {
183					continue
184				}
185				break FindSeq
186			}
187		}
188		stores = append(stores[:0], b.Values[start:end]...) // copy to avoid aliasing
189		after = append(after[:0], b.Values[end:]...)
190		b.Values = b.Values[:start]
191
192		// find the memory before the WB stores
193		mem := stores[0].MemoryArg()
194		pos := stores[0].Pos
195		bThen := f.NewBlock(BlockPlain)
196		bElse := f.NewBlock(BlockPlain)
197		bEnd := f.NewBlock(b.Kind)
198		bThen.Pos = pos
199		bElse.Pos = pos
200		bEnd.Pos = b.Pos
201		b.Pos = pos
202
203		// set up control flow for end block
204		bEnd.CopyControls(b)
205		bEnd.Likely = b.Likely
206		for _, e := range b.Succs {
207			bEnd.Succs = append(bEnd.Succs, e)
208			e.b.Preds[e.i].b = bEnd
209		}
210
211		// set up control flow for write barrier test
212		// load word, test word, avoiding partial register write from load byte.
213		cfgtypes := &f.Config.Types
214		flag := b.NewValue2(pos, OpLoad, cfgtypes.UInt32, wbaddr, mem)
215		flag = b.NewValue2(pos, OpNeq32, cfgtypes.Bool, flag, const0)
216		b.Kind = BlockIf
217		b.SetControl(flag)
218		b.Likely = BranchUnlikely
219		b.Succs = b.Succs[:0]
220		b.AddEdgeTo(bThen)
221		b.AddEdgeTo(bElse)
222		// TODO: For OpStoreWB and the buffered write barrier,
223		// we could move the write out of the write barrier,
224		// which would lead to fewer branches. We could do
225		// something similar to OpZeroWB, since the runtime
226		// could provide just the barrier half and then we
227		// could unconditionally do an OpZero (which could
228		// also generate better zeroing code). OpMoveWB is
229		// trickier and would require changing how
230		// cgoCheckMemmove works.
231		bThen.AddEdgeTo(bEnd)
232		bElse.AddEdgeTo(bEnd)
233
234		// for each write barrier store, append write barrier version to bThen
235		// and simple store version to bElse
236		memThen := mem
237		memElse := mem
238
239		// If the source of a MoveWB is volatile (will be clobbered by a
240		// function call), we need to copy it to a temporary location, as
241		// marshaling the args of typedmemmove might clobber the value we're
242		// trying to move.
243		// Look for volatile source, copy it to temporary before we emit any
244		// call.
245		// It is unlikely to have more than one of them. Just do a linear
246		// search instead of using a map.
247		type volatileCopy struct {
248			src *Value // address of original volatile value
249			tmp *Value // address of temporary we've copied the volatile value into
250		}
251		var volatiles []volatileCopy
252	copyLoop:
253		for _, w := range stores {
254			if w.Op == OpMoveWB {
255				val := w.Args[1]
256				if isVolatile(val) {
257					for _, c := range volatiles {
258						if val == c.src {
259							continue copyLoop // already copied
260						}
261					}
262
263					t := val.Type.Elem()
264					tmp := f.fe.Auto(w.Pos, t)
265					memThen = bThen.NewValue1A(w.Pos, OpVarDef, types.TypeMem, tmp, memThen)
266					tmpaddr := bThen.NewValue2A(w.Pos, OpLocalAddr, t.PtrTo(), tmp, sp, memThen)
267					siz := t.Size()
268					memThen = bThen.NewValue3I(w.Pos, OpMove, types.TypeMem, siz, tmpaddr, val, memThen)
269					memThen.Aux = t
270					volatiles = append(volatiles, volatileCopy{val, tmpaddr})
271				}
272			}
273		}
274
275		for _, w := range stores {
276			ptr := w.Args[0]
277			pos := w.Pos
278
279			var fn *obj.LSym
280			var typ *obj.LSym
281			var val *Value
282			switch w.Op {
283			case OpStoreWB:
284				val = w.Args[1]
285				nWBops--
286			case OpMoveWB:
287				fn = typedmemmove
288				val = w.Args[1]
289				typ = w.Aux.(*types.Type).Symbol()
290				nWBops--
291			case OpZeroWB:
292				fn = typedmemclr
293				typ = w.Aux.(*types.Type).Symbol()
294				nWBops--
295			case OpVarDef, OpVarLive, OpVarKill:
296			}
297
298			// then block: emit write barrier call
299			switch w.Op {
300			case OpStoreWB, OpMoveWB, OpZeroWB:
301				if w.Op == OpStoreWB {
302					memThen = bThen.NewValue3A(pos, OpWB, types.TypeMem, gcWriteBarrier, ptr, val, memThen)
303				} else {
304					srcval := val
305					if w.Op == OpMoveWB && isVolatile(srcval) {
306						for _, c := range volatiles {
307							if srcval == c.src {
308								srcval = c.tmp
309								break
310							}
311						}
312					}
313					memThen = wbcall(pos, bThen, fn, typ, ptr, srcval, memThen, sp, sb)
314				}
315				// Note that we set up a writebarrier function call.
316				f.fe.SetWBPos(pos)
317			case OpVarDef, OpVarLive, OpVarKill:
318				memThen = bThen.NewValue1A(pos, w.Op, types.TypeMem, w.Aux, memThen)
319			}
320
321			// else block: normal store
322			switch w.Op {
323			case OpStoreWB:
324				memElse = bElse.NewValue3A(pos, OpStore, types.TypeMem, w.Aux, ptr, val, memElse)
325			case OpMoveWB:
326				memElse = bElse.NewValue3I(pos, OpMove, types.TypeMem, w.AuxInt, ptr, val, memElse)
327				memElse.Aux = w.Aux
328			case OpZeroWB:
329				memElse = bElse.NewValue2I(pos, OpZero, types.TypeMem, w.AuxInt, ptr, memElse)
330				memElse.Aux = w.Aux
331			case OpVarDef, OpVarLive, OpVarKill:
332				memElse = bElse.NewValue1A(pos, w.Op, types.TypeMem, w.Aux, memElse)
333			}
334		}
335
336		// mark volatile temps dead
337		for _, c := range volatiles {
338			tmpNode := c.tmp.Aux
339			memThen = bThen.NewValue1A(memThen.Pos, OpVarKill, types.TypeMem, tmpNode, memThen)
340		}
341
342		// merge memory
343		// Splice memory Phi into the last memory of the original sequence,
344		// which may be used in subsequent blocks. Other memories in the
345		// sequence must be dead after this block since there can be only
346		// one memory live.
347		bEnd.Values = append(bEnd.Values, last)
348		last.Block = bEnd
349		last.reset(OpPhi)
350		last.Type = types.TypeMem
351		last.AddArg(memThen)
352		last.AddArg(memElse)
353		for _, w := range stores {
354			if w != last {
355				w.resetArgs()
356			}
357		}
358		for _, w := range stores {
359			if w != last {
360				f.freeValue(w)
361			}
362		}
363
364		// put values after the store sequence into the end block
365		bEnd.Values = append(bEnd.Values, after...)
366		for _, w := range after {
367			w.Block = bEnd
368		}
369
370		// Preemption is unsafe between loading the write
371		// barrier-enabled flag and performing the write
372		// because that would allow a GC phase transition,
373		// which would invalidate the flag. Remember the
374		// conditional block so liveness analysis can disable
375		// safe-points. This is somewhat subtle because we're
376		// splitting b bottom-up.
377		if firstSplit {
378			// Add b itself.
379			b.Func.WBLoads = append(b.Func.WBLoads, b)
380			firstSplit = false
381		} else {
382			// We've already split b, so we just pushed a
383			// write barrier test into bEnd.
384			b.Func.WBLoads = append(b.Func.WBLoads, bEnd)
385		}
386
387		// if we have more stores in this block, do this block again
388		if nWBops > 0 {
389			goto again
390		}
391	}
392}
393
394// computeZeroMap returns a map from an ID of a memory value to
395// a set of locations that are known to be zeroed at that memory value.
396func (f *Func) computeZeroMap() map[ID]ZeroRegion {
397	ptrSize := f.Config.PtrSize
398	// Keep track of which parts of memory are known to be zero.
399	// This helps with removing write barriers for various initialization patterns.
400	// This analysis is conservative. We only keep track, for each memory state, of
401	// which of the first 64 words of a single object are known to be zero.
402	zeroes := map[ID]ZeroRegion{}
403	// Find new objects.
404	for _, b := range f.Blocks {
405		for _, v := range b.Values {
406			if v.Op != OpLoad {
407				continue
408			}
409			mem := v.MemoryArg()
410			if IsNewObject(v, mem) {
411				nptr := v.Type.Elem().Size() / ptrSize
412				if nptr > 64 {
413					nptr = 64
414				}
415				zeroes[mem.ID] = ZeroRegion{base: v, mask: 1<<uint(nptr) - 1}
416			}
417		}
418	}
419	// Find stores to those new objects.
420	for {
421		changed := false
422		for _, b := range f.Blocks {
423			// Note: iterating forwards helps convergence, as values are
424			// typically (but not always!) in store order.
425			for _, v := range b.Values {
426				if v.Op != OpStore {
427					continue
428				}
429				z, ok := zeroes[v.MemoryArg().ID]
430				if !ok {
431					continue
432				}
433				ptr := v.Args[0]
434				var off int64
435				size := v.Aux.(*types.Type).Size()
436				for ptr.Op == OpOffPtr {
437					off += ptr.AuxInt
438					ptr = ptr.Args[0]
439				}
440				if ptr != z.base {
441					// Different base object - we don't know anything.
442					// We could even be writing to the base object we know
443					// about, but through an aliased but offset pointer.
444					// So we have to throw all the zero information we have away.
445					continue
446				}
447				// Round to cover any partially written pointer slots.
448				// Pointer writes should never be unaligned like this, but non-pointer
449				// writes to pointer-containing types will do this.
450				if d := off % ptrSize; d != 0 {
451					off -= d
452					size += d
453				}
454				if d := size % ptrSize; d != 0 {
455					size += ptrSize - d
456				}
457				// Clip to the 64 words that we track.
458				min := off
459				max := off + size
460				if min < 0 {
461					min = 0
462				}
463				if max > 64*ptrSize {
464					max = 64 * ptrSize
465				}
466				// Clear bits for parts that we are writing (and hence
467				// will no longer necessarily be zero).
468				for i := min; i < max; i += ptrSize {
469					bit := i / ptrSize
470					z.mask &^= 1 << uint(bit)
471				}
472				if z.mask == 0 {
473					// No more known zeros - don't bother keeping.
474					continue
475				}
476				// Save updated known zero contents for new store.
477				if zeroes[v.ID] != z {
478					zeroes[v.ID] = z
479					changed = true
480				}
481			}
482		}
483		if !changed {
484			break
485		}
486	}
487	if f.pass.debug > 0 {
488		fmt.Printf("func %s\n", f.Name)
489		for mem, z := range zeroes {
490			fmt.Printf("  memory=v%d ptr=%v zeromask=%b\n", mem, z.base, z.mask)
491		}
492	}
493	return zeroes
494}
495
496// wbcall emits write barrier runtime call in b, returns memory.
497func wbcall(pos src.XPos, b *Block, fn, typ *obj.LSym, ptr, val, mem, sp, sb *Value) *Value {
498	config := b.Func.Config
499
500	// put arguments on stack
501	off := config.ctxt.FixedFrameSize()
502
503	if typ != nil { // for typedmemmove
504		taddr := b.NewValue1A(pos, OpAddr, b.Func.Config.Types.Uintptr, typ, sb)
505		off = round(off, taddr.Type.Alignment())
506		arg := b.NewValue1I(pos, OpOffPtr, taddr.Type.PtrTo(), off, sp)
507		mem = b.NewValue3A(pos, OpStore, types.TypeMem, ptr.Type, arg, taddr, mem)
508		off += taddr.Type.Size()
509	}
510
511	off = round(off, ptr.Type.Alignment())
512	arg := b.NewValue1I(pos, OpOffPtr, ptr.Type.PtrTo(), off, sp)
513	mem = b.NewValue3A(pos, OpStore, types.TypeMem, ptr.Type, arg, ptr, mem)
514	off += ptr.Type.Size()
515
516	if val != nil {
517		off = round(off, val.Type.Alignment())
518		arg = b.NewValue1I(pos, OpOffPtr, val.Type.PtrTo(), off, sp)
519		mem = b.NewValue3A(pos, OpStore, types.TypeMem, val.Type, arg, val, mem)
520		off += val.Type.Size()
521	}
522	off = round(off, config.PtrSize)
523
524	// issue call
525	mem = b.NewValue1A(pos, OpStaticCall, types.TypeMem, fn, mem)
526	mem.AuxInt = off - config.ctxt.FixedFrameSize()
527	return mem
528}
529
530// round to a multiple of r, r is a power of 2
531func round(o int64, r int64) int64 {
532	return (o + r - 1) &^ (r - 1)
533}
534
535// IsStackAddr reports whether v is known to be an address of a stack slot.
536func IsStackAddr(v *Value) bool {
537	for v.Op == OpOffPtr || v.Op == OpAddPtr || v.Op == OpPtrIndex || v.Op == OpCopy {
538		v = v.Args[0]
539	}
540	switch v.Op {
541	case OpSP, OpLocalAddr:
542		return true
543	}
544	return false
545}
546
547// IsGlobalAddr reports whether v is known to be an address of a global (or nil).
548func IsGlobalAddr(v *Value) bool {
549	if v.Op == OpAddr && v.Args[0].Op == OpSB {
550		return true // address of a global
551	}
552	if v.Op == OpConstNil {
553		return true
554	}
555	if v.Op == OpLoad && IsReadOnlyGlobalAddr(v.Args[0]) {
556		return true // loading from a read-only global - the resulting address can't be a heap address.
557	}
558	return false
559}
560
561// IsReadOnlyGlobalAddr reports whether v is known to be an address of a read-only global.
562func IsReadOnlyGlobalAddr(v *Value) bool {
563	if v.Op == OpConstNil {
564		// Nil pointers are read only. See issue 33438.
565		return true
566	}
567	// See TODO in OpAddr case in IsSanitizerSafeAddr below.
568	if v.Op == OpAddr && strings.HasPrefix(v.Aux.(*obj.LSym).Name, `""..stmp_`) {
569		return true
570	}
571	return false
572}
573
574// IsNewObject reports whether v is a pointer to a freshly allocated & zeroed object at memory state mem.
575func IsNewObject(v *Value, mem *Value) bool {
576	if v.Op != OpLoad {
577		return false
578	}
579	if v.MemoryArg() != mem {
580		return false
581	}
582	if mem.Op != OpStaticCall {
583		return false
584	}
585	if !isSameSym(mem.Aux, "runtime.newobject") {
586		return false
587	}
588	if v.Args[0].Op != OpOffPtr {
589		return false
590	}
591	if v.Args[0].Args[0].Op != OpSP {
592		return false
593	}
594	c := v.Block.Func.Config
595	if v.Args[0].AuxInt != c.ctxt.FixedFrameSize()+c.RegSize { // offset of return value
596		return false
597	}
598	return true
599}
600
601// IsSanitizerSafeAddr reports whether v is known to be an address
602// that doesn't need instrumentation.
603func IsSanitizerSafeAddr(v *Value) bool {
604	for v.Op == OpOffPtr || v.Op == OpAddPtr || v.Op == OpPtrIndex || v.Op == OpCopy {
605		v = v.Args[0]
606	}
607	switch v.Op {
608	case OpSP, OpLocalAddr:
609		// Stack addresses are always safe.
610		return true
611	case OpITab, OpStringPtr, OpGetClosurePtr:
612		// Itabs, string data, and closure fields are
613		// read-only once initialized.
614		return true
615	case OpAddr:
616		sym := v.Aux.(*obj.LSym)
617		// TODO(mdempsky): Find a cleaner way to
618		// detect this. It would be nice if we could
619		// test sym.Type==objabi.SRODATA, but we don't
620		// initialize sym.Type until after function
621		// compilation.
622		if strings.HasPrefix(sym.Name, `""..stmp_`) {
623			return true
624		}
625	}
626	return false
627}
628
629// isVolatile reports whether v is a pointer to argument region on stack which
630// will be clobbered by a function call.
631func isVolatile(v *Value) bool {
632	for v.Op == OpOffPtr || v.Op == OpAddPtr || v.Op == OpPtrIndex || v.Op == OpCopy {
633		v = v.Args[0]
634	}
635	return v.Op == OpSP
636}
637