1// Copyright 2016 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package ssa
6
7import (
8	"cmd/compile/internal/reflectdata"
9	"cmd/compile/internal/types"
10	"cmd/internal/obj"
11	"cmd/internal/objabi"
12	"cmd/internal/src"
13	"fmt"
14)
15
16// A ZeroRegion records parts of an object which are known to be zero.
17// A ZeroRegion only applies to a single memory state.
18// Each bit in mask is set if the corresponding pointer-sized word of
19// the base object is known to be zero.
20// In other words, if mask & (1<<i) != 0, then [base+i*ptrSize, base+(i+1)*ptrSize)
21// is known to be zero.
22type ZeroRegion struct {
23	base *Value
24	mask uint64
25}
26
27// needwb reports whether we need write barrier for store op v.
28// v must be Store/Move/Zero.
29// zeroes provides known zero information (keyed by ID of memory-type values).
30func needwb(v *Value, zeroes map[ID]ZeroRegion) bool {
31	t, ok := v.Aux.(*types.Type)
32	if !ok {
33		v.Fatalf("store aux is not a type: %s", v.LongString())
34	}
35	if !t.HasPointers() {
36		return false
37	}
38	if IsStackAddr(v.Args[0]) {
39		return false // write on stack doesn't need write barrier
40	}
41	if v.Op == OpMove && IsReadOnlyGlobalAddr(v.Args[1]) {
42		if mem, ok := IsNewObject(v.Args[0]); ok && mem == v.MemoryArg() {
43			// Copying data from readonly memory into a fresh object doesn't need a write barrier.
44			return false
45		}
46	}
47	if v.Op == OpStore && IsGlobalAddr(v.Args[1]) {
48		// Storing pointers to non-heap locations into zeroed memory doesn't need a write barrier.
49		ptr := v.Args[0]
50		var off int64
51		size := v.Aux.(*types.Type).Size()
52		for ptr.Op == OpOffPtr {
53			off += ptr.AuxInt
54			ptr = ptr.Args[0]
55		}
56		ptrSize := v.Block.Func.Config.PtrSize
57		if off%ptrSize != 0 || size%ptrSize != 0 {
58			v.Fatalf("unaligned pointer write")
59		}
60		if off < 0 || off+size > 64*ptrSize {
61			// write goes off end of tracked offsets
62			return true
63		}
64		z := zeroes[v.MemoryArg().ID]
65		if ptr != z.base {
66			return true
67		}
68		for i := off; i < off+size; i += ptrSize {
69			if z.mask>>uint(i/ptrSize)&1 == 0 {
70				return true // not known to be zero
71			}
72		}
73		// All written locations are known to be zero - write barrier not needed.
74		return false
75	}
76	return true
77}
78
79// writebarrier pass inserts write barriers for store ops (Store, Move, Zero)
80// when necessary (the condition above). It rewrites store ops to branches
81// and runtime calls, like
82//
83// if writeBarrier.enabled {
84//   gcWriteBarrier(ptr, val)	// Not a regular Go call
85// } else {
86//   *ptr = val
87// }
88//
89// A sequence of WB stores for many pointer fields of a single type will
90// be emitted together, with a single branch.
91func writebarrier(f *Func) {
92	if !f.fe.UseWriteBarrier() {
93		return
94	}
95
96	var sb, sp, wbaddr, const0 *Value
97	var typedmemmove, typedmemclr, gcWriteBarrier *obj.LSym
98	var stores, after []*Value
99	var sset *sparseSet
100	var storeNumber []int32
101
102	zeroes := f.computeZeroMap()
103	for _, b := range f.Blocks { // range loop is safe since the blocks we added contain no stores to expand
104		// first, identify all the stores that need to insert a write barrier.
105		// mark them with WB ops temporarily. record presence of WB ops.
106		nWBops := 0 // count of temporarily created WB ops remaining to be rewritten in the current block
107		for _, v := range b.Values {
108			switch v.Op {
109			case OpStore, OpMove, OpZero:
110				if needwb(v, zeroes) {
111					switch v.Op {
112					case OpStore:
113						v.Op = OpStoreWB
114					case OpMove:
115						v.Op = OpMoveWB
116					case OpZero:
117						v.Op = OpZeroWB
118					}
119					nWBops++
120				}
121			}
122		}
123		if nWBops == 0 {
124			continue
125		}
126
127		if wbaddr == nil {
128			// lazily initialize global values for write barrier test and calls
129			// find SB and SP values in entry block
130			initpos := f.Entry.Pos
131			sp, sb = f.spSb()
132			wbsym := f.fe.Syslook("writeBarrier")
133			wbaddr = f.Entry.NewValue1A(initpos, OpAddr, f.Config.Types.UInt32Ptr, wbsym, sb)
134			gcWriteBarrier = f.fe.Syslook("gcWriteBarrier")
135			typedmemmove = f.fe.Syslook("typedmemmove")
136			typedmemclr = f.fe.Syslook("typedmemclr")
137			const0 = f.ConstInt32(f.Config.Types.UInt32, 0)
138
139			// allocate auxiliary data structures for computing store order
140			sset = f.newSparseSet(f.NumValues())
141			defer f.retSparseSet(sset)
142			storeNumber = make([]int32, f.NumValues())
143		}
144
145		// order values in store order
146		b.Values = storeOrder(b.Values, sset, storeNumber)
147
148		firstSplit := true
149	again:
150		// find the start and end of the last contiguous WB store sequence.
151		// a branch will be inserted there. values after it will be moved
152		// to a new block.
153		var last *Value
154		var start, end int
155		values := b.Values
156	FindSeq:
157		for i := len(values) - 1; i >= 0; i-- {
158			w := values[i]
159			switch w.Op {
160			case OpStoreWB, OpMoveWB, OpZeroWB:
161				start = i
162				if last == nil {
163					last = w
164					end = i + 1
165				}
166			case OpVarDef, OpVarLive, OpVarKill:
167				continue
168			default:
169				if last == nil {
170					continue
171				}
172				break FindSeq
173			}
174		}
175		stores = append(stores[:0], b.Values[start:end]...) // copy to avoid aliasing
176		after = append(after[:0], b.Values[end:]...)
177		b.Values = b.Values[:start]
178
179		// find the memory before the WB stores
180		mem := stores[0].MemoryArg()
181		pos := stores[0].Pos
182		bThen := f.NewBlock(BlockPlain)
183		bElse := f.NewBlock(BlockPlain)
184		bEnd := f.NewBlock(b.Kind)
185		bThen.Pos = pos
186		bElse.Pos = pos
187		bEnd.Pos = b.Pos
188		b.Pos = pos
189
190		// set up control flow for end block
191		bEnd.CopyControls(b)
192		bEnd.Likely = b.Likely
193		for _, e := range b.Succs {
194			bEnd.Succs = append(bEnd.Succs, e)
195			e.b.Preds[e.i].b = bEnd
196		}
197
198		// set up control flow for write barrier test
199		// load word, test word, avoiding partial register write from load byte.
200		cfgtypes := &f.Config.Types
201		flag := b.NewValue2(pos, OpLoad, cfgtypes.UInt32, wbaddr, mem)
202		flag = b.NewValue2(pos, OpNeq32, cfgtypes.Bool, flag, const0)
203		b.Kind = BlockIf
204		b.SetControl(flag)
205		b.Likely = BranchUnlikely
206		b.Succs = b.Succs[:0]
207		b.AddEdgeTo(bThen)
208		b.AddEdgeTo(bElse)
209		// TODO: For OpStoreWB and the buffered write barrier,
210		// we could move the write out of the write barrier,
211		// which would lead to fewer branches. We could do
212		// something similar to OpZeroWB, since the runtime
213		// could provide just the barrier half and then we
214		// could unconditionally do an OpZero (which could
215		// also generate better zeroing code). OpMoveWB is
216		// trickier and would require changing how
217		// cgoCheckMemmove works.
218		bThen.AddEdgeTo(bEnd)
219		bElse.AddEdgeTo(bEnd)
220
221		// for each write barrier store, append write barrier version to bThen
222		// and simple store version to bElse
223		memThen := mem
224		memElse := mem
225
226		// If the source of a MoveWB is volatile (will be clobbered by a
227		// function call), we need to copy it to a temporary location, as
228		// marshaling the args of typedmemmove might clobber the value we're
229		// trying to move.
230		// Look for volatile source, copy it to temporary before we emit any
231		// call.
232		// It is unlikely to have more than one of them. Just do a linear
233		// search instead of using a map.
234		type volatileCopy struct {
235			src *Value // address of original volatile value
236			tmp *Value // address of temporary we've copied the volatile value into
237		}
238		var volatiles []volatileCopy
239	copyLoop:
240		for _, w := range stores {
241			if w.Op == OpMoveWB {
242				val := w.Args[1]
243				if isVolatile(val) {
244					for _, c := range volatiles {
245						if val == c.src {
246							continue copyLoop // already copied
247						}
248					}
249
250					t := val.Type.Elem()
251					tmp := f.fe.Auto(w.Pos, t)
252					memThen = bThen.NewValue1A(w.Pos, OpVarDef, types.TypeMem, tmp, memThen)
253					tmpaddr := bThen.NewValue2A(w.Pos, OpLocalAddr, t.PtrTo(), tmp, sp, memThen)
254					siz := t.Size()
255					memThen = bThen.NewValue3I(w.Pos, OpMove, types.TypeMem, siz, tmpaddr, val, memThen)
256					memThen.Aux = t
257					volatiles = append(volatiles, volatileCopy{val, tmpaddr})
258				}
259			}
260		}
261
262		for _, w := range stores {
263			ptr := w.Args[0]
264			pos := w.Pos
265
266			var fn *obj.LSym
267			var typ *obj.LSym
268			var val *Value
269			switch w.Op {
270			case OpStoreWB:
271				val = w.Args[1]
272				nWBops--
273			case OpMoveWB:
274				fn = typedmemmove
275				val = w.Args[1]
276				typ = reflectdata.TypeLinksym(w.Aux.(*types.Type))
277				nWBops--
278			case OpZeroWB:
279				fn = typedmemclr
280				typ = reflectdata.TypeLinksym(w.Aux.(*types.Type))
281				nWBops--
282			case OpVarDef, OpVarLive, OpVarKill:
283			}
284
285			// then block: emit write barrier call
286			switch w.Op {
287			case OpStoreWB, OpMoveWB, OpZeroWB:
288				if w.Op == OpStoreWB {
289					memThen = bThen.NewValue3A(pos, OpWB, types.TypeMem, gcWriteBarrier, ptr, val, memThen)
290				} else {
291					srcval := val
292					if w.Op == OpMoveWB && isVolatile(srcval) {
293						for _, c := range volatiles {
294							if srcval == c.src {
295								srcval = c.tmp
296								break
297							}
298						}
299					}
300					memThen = wbcall(pos, bThen, fn, typ, ptr, srcval, memThen, sp, sb)
301				}
302				// Note that we set up a writebarrier function call.
303				f.fe.SetWBPos(pos)
304			case OpVarDef, OpVarLive, OpVarKill:
305				memThen = bThen.NewValue1A(pos, w.Op, types.TypeMem, w.Aux, memThen)
306			}
307
308			// else block: normal store
309			switch w.Op {
310			case OpStoreWB:
311				memElse = bElse.NewValue3A(pos, OpStore, types.TypeMem, w.Aux, ptr, val, memElse)
312			case OpMoveWB:
313				memElse = bElse.NewValue3I(pos, OpMove, types.TypeMem, w.AuxInt, ptr, val, memElse)
314				memElse.Aux = w.Aux
315			case OpZeroWB:
316				memElse = bElse.NewValue2I(pos, OpZero, types.TypeMem, w.AuxInt, ptr, memElse)
317				memElse.Aux = w.Aux
318			case OpVarDef, OpVarLive, OpVarKill:
319				memElse = bElse.NewValue1A(pos, w.Op, types.TypeMem, w.Aux, memElse)
320			}
321		}
322
323		// mark volatile temps dead
324		for _, c := range volatiles {
325			tmpNode := c.tmp.Aux
326			memThen = bThen.NewValue1A(memThen.Pos, OpVarKill, types.TypeMem, tmpNode, memThen)
327		}
328
329		// merge memory
330		// Splice memory Phi into the last memory of the original sequence,
331		// which may be used in subsequent blocks. Other memories in the
332		// sequence must be dead after this block since there can be only
333		// one memory live.
334		bEnd.Values = append(bEnd.Values, last)
335		last.Block = bEnd
336		last.reset(OpPhi)
337		last.Pos = last.Pos.WithNotStmt()
338		last.Type = types.TypeMem
339		last.AddArg(memThen)
340		last.AddArg(memElse)
341		for _, w := range stores {
342			if w != last {
343				w.resetArgs()
344			}
345		}
346		for _, w := range stores {
347			if w != last {
348				f.freeValue(w)
349			}
350		}
351
352		// put values after the store sequence into the end block
353		bEnd.Values = append(bEnd.Values, after...)
354		for _, w := range after {
355			w.Block = bEnd
356		}
357
358		// Preemption is unsafe between loading the write
359		// barrier-enabled flag and performing the write
360		// because that would allow a GC phase transition,
361		// which would invalidate the flag. Remember the
362		// conditional block so liveness analysis can disable
363		// safe-points. This is somewhat subtle because we're
364		// splitting b bottom-up.
365		if firstSplit {
366			// Add b itself.
367			b.Func.WBLoads = append(b.Func.WBLoads, b)
368			firstSplit = false
369		} else {
370			// We've already split b, so we just pushed a
371			// write barrier test into bEnd.
372			b.Func.WBLoads = append(b.Func.WBLoads, bEnd)
373		}
374
375		// if we have more stores in this block, do this block again
376		if nWBops > 0 {
377			goto again
378		}
379	}
380}
381
382// computeZeroMap returns a map from an ID of a memory value to
383// a set of locations that are known to be zeroed at that memory value.
384func (f *Func) computeZeroMap() map[ID]ZeroRegion {
385	ptrSize := f.Config.PtrSize
386	// Keep track of which parts of memory are known to be zero.
387	// This helps with removing write barriers for various initialization patterns.
388	// This analysis is conservative. We only keep track, for each memory state, of
389	// which of the first 64 words of a single object are known to be zero.
390	zeroes := map[ID]ZeroRegion{}
391	// Find new objects.
392	for _, b := range f.Blocks {
393		for _, v := range b.Values {
394			if mem, ok := IsNewObject(v); ok {
395				nptr := v.Type.Elem().Size() / ptrSize
396				if nptr > 64 {
397					nptr = 64
398				}
399				zeroes[mem.ID] = ZeroRegion{base: v, mask: 1<<uint(nptr) - 1}
400			}
401		}
402	}
403	// Find stores to those new objects.
404	for {
405		changed := false
406		for _, b := range f.Blocks {
407			// Note: iterating forwards helps convergence, as values are
408			// typically (but not always!) in store order.
409			for _, v := range b.Values {
410				if v.Op != OpStore {
411					continue
412				}
413				z, ok := zeroes[v.MemoryArg().ID]
414				if !ok {
415					continue
416				}
417				ptr := v.Args[0]
418				var off int64
419				size := v.Aux.(*types.Type).Size()
420				for ptr.Op == OpOffPtr {
421					off += ptr.AuxInt
422					ptr = ptr.Args[0]
423				}
424				if ptr != z.base {
425					// Different base object - we don't know anything.
426					// We could even be writing to the base object we know
427					// about, but through an aliased but offset pointer.
428					// So we have to throw all the zero information we have away.
429					continue
430				}
431				// Round to cover any partially written pointer slots.
432				// Pointer writes should never be unaligned like this, but non-pointer
433				// writes to pointer-containing types will do this.
434				if d := off % ptrSize; d != 0 {
435					off -= d
436					size += d
437				}
438				if d := size % ptrSize; d != 0 {
439					size += ptrSize - d
440				}
441				// Clip to the 64 words that we track.
442				min := off
443				max := off + size
444				if min < 0 {
445					min = 0
446				}
447				if max > 64*ptrSize {
448					max = 64 * ptrSize
449				}
450				// Clear bits for parts that we are writing (and hence
451				// will no longer necessarily be zero).
452				for i := min; i < max; i += ptrSize {
453					bit := i / ptrSize
454					z.mask &^= 1 << uint(bit)
455				}
456				if z.mask == 0 {
457					// No more known zeros - don't bother keeping.
458					continue
459				}
460				// Save updated known zero contents for new store.
461				if zeroes[v.ID] != z {
462					zeroes[v.ID] = z
463					changed = true
464				}
465			}
466		}
467		if !changed {
468			break
469		}
470	}
471	if f.pass.debug > 0 {
472		fmt.Printf("func %s\n", f.Name)
473		for mem, z := range zeroes {
474			fmt.Printf("  memory=v%d ptr=%v zeromask=%b\n", mem, z.base, z.mask)
475		}
476	}
477	return zeroes
478}
479
480// wbcall emits write barrier runtime call in b, returns memory.
481func wbcall(pos src.XPos, b *Block, fn, typ *obj.LSym, ptr, val, mem, sp, sb *Value) *Value {
482	config := b.Func.Config
483
484	var wbargs []*Value
485	// TODO (register args) this is a bit of a hack.
486	inRegs := b.Func.ABIDefault == b.Func.ABI1 && len(config.intParamRegs) >= 3
487
488	// put arguments on stack
489	off := config.ctxt.FixedFrameSize()
490
491	var argTypes []*types.Type
492	if typ != nil { // for typedmemmove
493		taddr := b.NewValue1A(pos, OpAddr, b.Func.Config.Types.Uintptr, typ, sb)
494		argTypes = append(argTypes, b.Func.Config.Types.Uintptr)
495		off = round(off, taddr.Type.Alignment())
496		if inRegs {
497			wbargs = append(wbargs, taddr)
498		} else {
499			arg := b.NewValue1I(pos, OpOffPtr, taddr.Type.PtrTo(), off, sp)
500			mem = b.NewValue3A(pos, OpStore, types.TypeMem, ptr.Type, arg, taddr, mem)
501		}
502		off += taddr.Type.Size()
503	}
504
505	argTypes = append(argTypes, ptr.Type)
506	off = round(off, ptr.Type.Alignment())
507	if inRegs {
508		wbargs = append(wbargs, ptr)
509	} else {
510		arg := b.NewValue1I(pos, OpOffPtr, ptr.Type.PtrTo(), off, sp)
511		mem = b.NewValue3A(pos, OpStore, types.TypeMem, ptr.Type, arg, ptr, mem)
512	}
513	off += ptr.Type.Size()
514
515	if val != nil {
516		argTypes = append(argTypes, val.Type)
517		off = round(off, val.Type.Alignment())
518		if inRegs {
519			wbargs = append(wbargs, val)
520		} else {
521			arg := b.NewValue1I(pos, OpOffPtr, val.Type.PtrTo(), off, sp)
522			mem = b.NewValue3A(pos, OpStore, types.TypeMem, val.Type, arg, val, mem)
523		}
524		off += val.Type.Size()
525	}
526	off = round(off, config.PtrSize)
527	wbargs = append(wbargs, mem)
528
529	// issue call
530	call := b.NewValue0A(pos, OpStaticCall, types.TypeResultMem, StaticAuxCall(fn, b.Func.ABIDefault.ABIAnalyzeTypes(nil, argTypes, nil)))
531	call.AddArgs(wbargs...)
532	call.AuxInt = off - config.ctxt.FixedFrameSize()
533	return b.NewValue1I(pos, OpSelectN, types.TypeMem, 0, call)
534}
535
536// round to a multiple of r, r is a power of 2
537func round(o int64, r int64) int64 {
538	return (o + r - 1) &^ (r - 1)
539}
540
541// IsStackAddr reports whether v is known to be an address of a stack slot.
542func IsStackAddr(v *Value) bool {
543	for v.Op == OpOffPtr || v.Op == OpAddPtr || v.Op == OpPtrIndex || v.Op == OpCopy {
544		v = v.Args[0]
545	}
546	switch v.Op {
547	case OpSP, OpLocalAddr, OpSelectNAddr, OpGetCallerSP:
548		return true
549	}
550	return false
551}
552
553// IsGlobalAddr reports whether v is known to be an address of a global (or nil).
554func IsGlobalAddr(v *Value) bool {
555	for v.Op == OpOffPtr || v.Op == OpAddPtr || v.Op == OpPtrIndex || v.Op == OpCopy {
556		v = v.Args[0]
557	}
558	if v.Op == OpAddr && v.Args[0].Op == OpSB {
559		return true // address of a global
560	}
561	if v.Op == OpConstNil {
562		return true
563	}
564	if v.Op == OpLoad && IsReadOnlyGlobalAddr(v.Args[0]) {
565		return true // loading from a read-only global - the resulting address can't be a heap address.
566	}
567	return false
568}
569
570// IsReadOnlyGlobalAddr reports whether v is known to be an address of a read-only global.
571func IsReadOnlyGlobalAddr(v *Value) bool {
572	if v.Op == OpConstNil {
573		// Nil pointers are read only. See issue 33438.
574		return true
575	}
576	if v.Op == OpAddr && v.Aux.(*obj.LSym).Type == objabi.SRODATA {
577		return true
578	}
579	return false
580}
581
582// IsNewObject reports whether v is a pointer to a freshly allocated & zeroed object,
583// if so, also returns the memory state mem at which v is zero.
584func IsNewObject(v *Value) (mem *Value, ok bool) {
585	f := v.Block.Func
586	c := f.Config
587	if f.ABIDefault == f.ABI1 && len(c.intParamRegs) >= 1 {
588		if v.Op != OpSelectN || v.AuxInt != 0 {
589			return nil, false
590		}
591		// Find the memory
592		for _, w := range v.Block.Values {
593			if w.Op == OpSelectN && w.AuxInt == 1 && w.Args[0] == v.Args[0] {
594				mem = w
595				break
596			}
597		}
598		if mem == nil {
599			return nil, false
600		}
601	} else {
602		if v.Op != OpLoad {
603			return nil, false
604		}
605		mem = v.MemoryArg()
606		if mem.Op != OpSelectN {
607			return nil, false
608		}
609		if mem.Type != types.TypeMem {
610			return nil, false
611		} // assume it is the right selection if true
612	}
613	call := mem.Args[0]
614	if call.Op != OpStaticCall {
615		return nil, false
616	}
617	if !isSameCall(call.Aux, "runtime.newobject") {
618		return nil, false
619	}
620	if f.ABIDefault == f.ABI1 && len(c.intParamRegs) >= 1 {
621		if v.Args[0] == call {
622			return mem, true
623		}
624		return nil, false
625	}
626	if v.Args[0].Op != OpOffPtr {
627		return nil, false
628	}
629	if v.Args[0].Args[0].Op != OpSP {
630		return nil, false
631	}
632	if v.Args[0].AuxInt != c.ctxt.FixedFrameSize()+c.RegSize { // offset of return value
633		return nil, false
634	}
635	return mem, true
636}
637
638// IsSanitizerSafeAddr reports whether v is known to be an address
639// that doesn't need instrumentation.
640func IsSanitizerSafeAddr(v *Value) bool {
641	for v.Op == OpOffPtr || v.Op == OpAddPtr || v.Op == OpPtrIndex || v.Op == OpCopy {
642		v = v.Args[0]
643	}
644	switch v.Op {
645	case OpSP, OpLocalAddr, OpSelectNAddr:
646		// Stack addresses are always safe.
647		return true
648	case OpITab, OpStringPtr, OpGetClosurePtr:
649		// Itabs, string data, and closure fields are
650		// read-only once initialized.
651		return true
652	case OpAddr:
653		return v.Aux.(*obj.LSym).Type == objabi.SRODATA || v.Aux.(*obj.LSym).Type == objabi.SLIBFUZZER_EXTRA_COUNTER
654	}
655	return false
656}
657
658// isVolatile reports whether v is a pointer to argument region on stack which
659// will be clobbered by a function call.
660func isVolatile(v *Value) bool {
661	for v.Op == OpOffPtr || v.Op == OpAddPtr || v.Op == OpPtrIndex || v.Op == OpCopy || v.Op == OpSelectNAddr {
662		v = v.Args[0]
663	}
664	return v.Op == OpSP
665}
666