1// Inferno utils/6l/pass.c
2// https://bitbucket.org/inferno-os/inferno-os/src/master/utils/6l/pass.c
3//
4//	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
5//	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
6//	Portions Copyright © 1997-1999 Vita Nuova Limited
7//	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
8//	Portions Copyright © 2004,2006 Bruce Ellis
9//	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
10//	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
11//	Portions Copyright © 2009 The Go Authors. All rights reserved.
12//
13// Permission is hereby granted, free of charge, to any person obtaining a copy
14// of this software and associated documentation files (the "Software"), to deal
15// in the Software without restriction, including without limitation the rights
16// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17// copies of the Software, and to permit persons to whom the Software is
18// furnished to do so, subject to the following conditions:
19//
20// The above copyright notice and this permission notice shall be included in
21// all copies or substantial portions of the Software.
22//
23// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
26// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
29// THE SOFTWARE.
30
31package x86
32
33import (
34	"cmd/internal/obj"
35	"cmd/internal/objabi"
36	"cmd/internal/src"
37	"cmd/internal/sys"
38	"log"
39	"math"
40	"path"
41	"strings"
42)
43
44func CanUse1InsnTLS(ctxt *obj.Link) bool {
45	if isAndroid {
46		// Android uses a global variable for the tls offset.
47		return false
48	}
49
50	if ctxt.Arch.Family == sys.I386 {
51		switch ctxt.Headtype {
52		case objabi.Hlinux,
53			objabi.Hplan9,
54			objabi.Hwindows:
55			return false
56		}
57
58		return true
59	}
60
61	switch ctxt.Headtype {
62	case objabi.Hplan9, objabi.Hwindows:
63		return false
64	case objabi.Hlinux, objabi.Hfreebsd:
65		return !ctxt.Flag_shared
66	}
67
68	return true
69}
70
71func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
72	// Thread-local storage references use the TLS pseudo-register.
73	// As a register, TLS refers to the thread-local storage base, and it
74	// can only be loaded into another register:
75	//
76	//         MOVQ TLS, AX
77	//
78	// An offset from the thread-local storage base is written off(reg)(TLS*1).
79	// Semantically it is off(reg), but the (TLS*1) annotation marks this as
80	// indexing from the loaded TLS base. This emits a relocation so that
81	// if the linker needs to adjust the offset, it can. For example:
82	//
83	//         MOVQ TLS, AX
84	//         MOVQ 0(AX)(TLS*1), CX // load g into CX
85	//
86	// On systems that support direct access to the TLS memory, this
87	// pair of instructions can be reduced to a direct TLS memory reference:
88	//
89	//         MOVQ 0(TLS), CX // load g into CX
90	//
91	// The 2-instruction and 1-instruction forms correspond to the two code
92	// sequences for loading a TLS variable in the local exec model given in "ELF
93	// Handling For Thread-Local Storage".
94	//
95	// We apply this rewrite on systems that support the 1-instruction form.
96	// The decision is made using only the operating system and the -shared flag,
97	// not the link mode. If some link modes on a particular operating system
98	// require the 2-instruction form, then all builds for that operating system
99	// will use the 2-instruction form, so that the link mode decision can be
100	// delayed to link time.
101	//
102	// In this way, all supported systems use identical instructions to
103	// access TLS, and they are rewritten appropriately first here in
104	// liblink and then finally using relocations in the linker.
105	//
106	// When -shared is passed, we leave the code in the 2-instruction form but
107	// assemble (and relocate) them in different ways to generate the initial
108	// exec code sequence. It's a bit of a fluke that this is possible without
109	// rewriting the instructions more comprehensively, and it only does because
110	// we only support a single TLS variable (g).
111
112	if CanUse1InsnTLS(ctxt) {
113		// Reduce 2-instruction sequence to 1-instruction sequence.
114		// Sequences like
115		//	MOVQ TLS, BX
116		//	... off(BX)(TLS*1) ...
117		// become
118		//	NOP
119		//	... off(TLS) ...
120		//
121		// TODO(rsc): Remove the Hsolaris special case. It exists only to
122		// guarantee we are producing byte-identical binaries as before this code.
123		// But it should be unnecessary.
124		if (p.As == AMOVQ || p.As == AMOVL) && p.From.Type == obj.TYPE_REG && p.From.Reg == REG_TLS && p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 && ctxt.Headtype != objabi.Hsolaris {
125			obj.Nopout(p)
126		}
127		if p.From.Type == obj.TYPE_MEM && p.From.Index == REG_TLS && REG_AX <= p.From.Reg && p.From.Reg <= REG_R15 {
128			p.From.Reg = REG_TLS
129			p.From.Scale = 0
130			p.From.Index = REG_NONE
131		}
132
133		if p.To.Type == obj.TYPE_MEM && p.To.Index == REG_TLS && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 {
134			p.To.Reg = REG_TLS
135			p.To.Scale = 0
136			p.To.Index = REG_NONE
137		}
138	} else {
139		// load_g, below, always inserts the 1-instruction sequence. Rewrite it
140		// as the 2-instruction sequence if necessary.
141		//	MOVQ 0(TLS), BX
142		// becomes
143		//	MOVQ TLS, BX
144		//	MOVQ 0(BX)(TLS*1), BX
145		if (p.As == AMOVQ || p.As == AMOVL) && p.From.Type == obj.TYPE_MEM && p.From.Reg == REG_TLS && p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 {
146			q := obj.Appendp(p, newprog)
147			q.As = p.As
148			q.From = p.From
149			q.From.Type = obj.TYPE_MEM
150			q.From.Reg = p.To.Reg
151			q.From.Index = REG_TLS
152			q.From.Scale = 2 // TODO: use 1
153			q.To = p.To
154			p.From.Type = obj.TYPE_REG
155			p.From.Reg = REG_TLS
156			p.From.Index = REG_NONE
157			p.From.Offset = 0
158		}
159	}
160
161	// Android uses a tls offset determined at runtime. Rewrite
162	//	MOVQ TLS, BX
163	// to
164	//	MOVQ runtime.tls_g(SB), BX
165	if isAndroid && (p.As == AMOVQ || p.As == AMOVL) && p.From.Type == obj.TYPE_REG && p.From.Reg == REG_TLS && p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 {
166		p.From.Type = obj.TYPE_MEM
167		p.From.Name = obj.NAME_EXTERN
168		p.From.Reg = REG_NONE
169		p.From.Sym = ctxt.Lookup("runtime.tls_g")
170		p.From.Index = REG_NONE
171	}
172
173	// TODO: Remove.
174	if ctxt.Headtype == objabi.Hwindows && ctxt.Arch.Family == sys.AMD64 || ctxt.Headtype == objabi.Hplan9 {
175		if p.From.Scale == 1 && p.From.Index == REG_TLS {
176			p.From.Scale = 2
177		}
178		if p.To.Scale == 1 && p.To.Index == REG_TLS {
179			p.To.Scale = 2
180		}
181	}
182
183	// Rewrite 0 to $0 in 3rd argument to CMPPS etc.
184	// That's what the tables expect.
185	switch p.As {
186	case ACMPPD, ACMPPS, ACMPSD, ACMPSS:
187		if p.To.Type == obj.TYPE_MEM && p.To.Name == obj.NAME_NONE && p.To.Reg == REG_NONE && p.To.Index == REG_NONE && p.To.Sym == nil {
188			p.To.Type = obj.TYPE_CONST
189		}
190	}
191
192	// Rewrite CALL/JMP/RET to symbol as TYPE_BRANCH.
193	switch p.As {
194	case obj.ACALL, obj.AJMP, obj.ARET:
195		if p.To.Type == obj.TYPE_MEM && (p.To.Name == obj.NAME_EXTERN || p.To.Name == obj.NAME_STATIC) && p.To.Sym != nil {
196			p.To.Type = obj.TYPE_BRANCH
197		}
198	}
199
200	// Rewrite MOVL/MOVQ $XXX(FP/SP) as LEAL/LEAQ.
201	if p.From.Type == obj.TYPE_ADDR && (ctxt.Arch.Family == sys.AMD64 || p.From.Name != obj.NAME_EXTERN && p.From.Name != obj.NAME_STATIC) {
202		switch p.As {
203		case AMOVL:
204			p.As = ALEAL
205			p.From.Type = obj.TYPE_MEM
206		case AMOVQ:
207			p.As = ALEAQ
208			p.From.Type = obj.TYPE_MEM
209		}
210	}
211
212	// Rewrite float constants to values stored in memory.
213	switch p.As {
214	// Convert AMOVSS $(0), Xx to AXORPS Xx, Xx
215	case AMOVSS:
216		if p.From.Type == obj.TYPE_FCONST {
217			//  f == 0 can't be used here due to -0, so use Float64bits
218			if f := p.From.Val.(float64); math.Float64bits(f) == 0 {
219				if p.To.Type == obj.TYPE_REG && REG_X0 <= p.To.Reg && p.To.Reg <= REG_X15 {
220					p.As = AXORPS
221					p.From = p.To
222					break
223				}
224			}
225		}
226		fallthrough
227
228	case AFMOVF,
229		AFADDF,
230		AFSUBF,
231		AFSUBRF,
232		AFMULF,
233		AFDIVF,
234		AFDIVRF,
235		AFCOMF,
236		AFCOMFP,
237		AADDSS,
238		ASUBSS,
239		AMULSS,
240		ADIVSS,
241		ACOMISS,
242		AUCOMISS:
243		if p.From.Type == obj.TYPE_FCONST {
244			f32 := float32(p.From.Val.(float64))
245			p.From.Type = obj.TYPE_MEM
246			p.From.Name = obj.NAME_EXTERN
247			p.From.Sym = ctxt.Float32Sym(f32)
248			p.From.Offset = 0
249		}
250
251	case AMOVSD:
252		// Convert AMOVSD $(0), Xx to AXORPS Xx, Xx
253		if p.From.Type == obj.TYPE_FCONST {
254			//  f == 0 can't be used here due to -0, so use Float64bits
255			if f := p.From.Val.(float64); math.Float64bits(f) == 0 {
256				if p.To.Type == obj.TYPE_REG && REG_X0 <= p.To.Reg && p.To.Reg <= REG_X15 {
257					p.As = AXORPS
258					p.From = p.To
259					break
260				}
261			}
262		}
263		fallthrough
264
265	case AFMOVD,
266		AFADDD,
267		AFSUBD,
268		AFSUBRD,
269		AFMULD,
270		AFDIVD,
271		AFDIVRD,
272		AFCOMD,
273		AFCOMDP,
274		AADDSD,
275		ASUBSD,
276		AMULSD,
277		ADIVSD,
278		ACOMISD,
279		AUCOMISD:
280		if p.From.Type == obj.TYPE_FCONST {
281			f64 := p.From.Val.(float64)
282			p.From.Type = obj.TYPE_MEM
283			p.From.Name = obj.NAME_EXTERN
284			p.From.Sym = ctxt.Float64Sym(f64)
285			p.From.Offset = 0
286		}
287	}
288
289	if ctxt.Flag_dynlink {
290		rewriteToUseGot(ctxt, p, newprog)
291	}
292
293	if ctxt.Flag_shared && ctxt.Arch.Family == sys.I386 {
294		rewriteToPcrel(ctxt, p, newprog)
295	}
296}
297
298// Rewrite p, if necessary, to access global data via the global offset table.
299func rewriteToUseGot(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
300	var lea, mov obj.As
301	var reg int16
302	if ctxt.Arch.Family == sys.AMD64 {
303		lea = ALEAQ
304		mov = AMOVQ
305		reg = REG_R15
306	} else {
307		lea = ALEAL
308		mov = AMOVL
309		reg = REG_CX
310		if p.As == ALEAL && p.To.Reg != p.From.Reg && p.To.Reg != p.From.Index {
311			// Special case: clobber the destination register with
312			// the PC so we don't have to clobber CX.
313			// The SSA backend depends on CX not being clobbered across LEAL.
314			// See cmd/compile/internal/ssa/gen/386.rules (search for Flag_shared).
315			reg = p.To.Reg
316		}
317	}
318
319	if p.As == obj.ADUFFCOPY || p.As == obj.ADUFFZERO {
320		//     ADUFFxxx $offset
321		// becomes
322		//     $MOV runtime.duffxxx@GOT, $reg
323		//     $LEA $offset($reg), $reg
324		//     CALL $reg
325		// (we use LEAx rather than ADDx because ADDx clobbers
326		// flags and duffzero on 386 does not otherwise do so).
327		var sym *obj.LSym
328		if p.As == obj.ADUFFZERO {
329			sym = ctxt.LookupABI("runtime.duffzero", obj.ABIInternal)
330		} else {
331			sym = ctxt.LookupABI("runtime.duffcopy", obj.ABIInternal)
332		}
333		offset := p.To.Offset
334		p.As = mov
335		p.From.Type = obj.TYPE_MEM
336		p.From.Name = obj.NAME_GOTREF
337		p.From.Sym = sym
338		p.To.Type = obj.TYPE_REG
339		p.To.Reg = reg
340		p.To.Offset = 0
341		p.To.Sym = nil
342		p1 := obj.Appendp(p, newprog)
343		p1.As = lea
344		p1.From.Type = obj.TYPE_MEM
345		p1.From.Offset = offset
346		p1.From.Reg = reg
347		p1.To.Type = obj.TYPE_REG
348		p1.To.Reg = reg
349		p2 := obj.Appendp(p1, newprog)
350		p2.As = obj.ACALL
351		p2.To.Type = obj.TYPE_REG
352		p2.To.Reg = reg
353	}
354
355	// We only care about global data: NAME_EXTERN means a global
356	// symbol in the Go sense, and p.Sym.Local is true for a few
357	// internally defined symbols.
358	if p.As == lea && p.From.Type == obj.TYPE_MEM && p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() {
359		// $LEA sym, Rx becomes $MOV $sym, Rx which will be rewritten below
360		p.As = mov
361		p.From.Type = obj.TYPE_ADDR
362	}
363	if p.From.Type == obj.TYPE_ADDR && p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() {
364		// $MOV $sym, Rx becomes $MOV sym@GOT, Rx
365		// $MOV $sym+<off>, Rx becomes $MOV sym@GOT, Rx; $LEA <off>(Rx), Rx
366		// On 386 only, more complicated things like PUSHL $sym become $MOV sym@GOT, CX; PUSHL CX
367		cmplxdest := false
368		pAs := p.As
369		var dest obj.Addr
370		if p.To.Type != obj.TYPE_REG || pAs != mov {
371			if ctxt.Arch.Family == sys.AMD64 {
372				ctxt.Diag("do not know how to handle LEA-type insn to non-register in %v with -dynlink", p)
373			}
374			cmplxdest = true
375			dest = p.To
376			p.As = mov
377			p.To.Type = obj.TYPE_REG
378			p.To.Reg = reg
379			p.To.Sym = nil
380			p.To.Name = obj.NAME_NONE
381		}
382		p.From.Type = obj.TYPE_MEM
383		p.From.Name = obj.NAME_GOTREF
384		q := p
385		if p.From.Offset != 0 {
386			q = obj.Appendp(p, newprog)
387			q.As = lea
388			q.From.Type = obj.TYPE_MEM
389			q.From.Reg = p.To.Reg
390			q.From.Offset = p.From.Offset
391			q.To = p.To
392			p.From.Offset = 0
393		}
394		if cmplxdest {
395			q = obj.Appendp(q, newprog)
396			q.As = pAs
397			q.To = dest
398			q.From.Type = obj.TYPE_REG
399			q.From.Reg = reg
400		}
401	}
402	if p.GetFrom3() != nil && p.GetFrom3().Name == obj.NAME_EXTERN {
403		ctxt.Diag("don't know how to handle %v with -dynlink", p)
404	}
405	var source *obj.Addr
406	// MOVx sym, Ry becomes $MOV sym@GOT, R15; MOVx (R15), Ry
407	// MOVx Ry, sym becomes $MOV sym@GOT, R15; MOVx Ry, (R15)
408	// An addition may be inserted between the two MOVs if there is an offset.
409	if p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() {
410		if p.To.Name == obj.NAME_EXTERN && !p.To.Sym.Local() {
411			ctxt.Diag("cannot handle NAME_EXTERN on both sides in %v with -dynlink", p)
412		}
413		source = &p.From
414	} else if p.To.Name == obj.NAME_EXTERN && !p.To.Sym.Local() {
415		source = &p.To
416	} else {
417		return
418	}
419	if p.As == obj.ACALL {
420		// When dynlinking on 386, almost any call might end up being a call
421		// to a PLT, so make sure the GOT pointer is loaded into BX.
422		// RegTo2 is set on the replacement call insn to stop it being
423		// processed when it is in turn passed to progedit.
424		//
425		// We disable open-coded defers in buildssa() on 386 ONLY with shared
426		// libraries because of this extra code added before deferreturn calls.
427		if ctxt.Arch.Family == sys.AMD64 || (p.To.Sym != nil && p.To.Sym.Local()) || p.RegTo2 != 0 {
428			return
429		}
430		p1 := obj.Appendp(p, newprog)
431		p2 := obj.Appendp(p1, newprog)
432
433		p1.As = ALEAL
434		p1.From.Type = obj.TYPE_MEM
435		p1.From.Name = obj.NAME_STATIC
436		p1.From.Sym = ctxt.Lookup("_GLOBAL_OFFSET_TABLE_")
437		p1.To.Type = obj.TYPE_REG
438		p1.To.Reg = REG_BX
439
440		p2.As = p.As
441		p2.Scond = p.Scond
442		p2.From = p.From
443		if p.RestArgs != nil {
444			p2.RestArgs = append(p2.RestArgs, p.RestArgs...)
445		}
446		p2.Reg = p.Reg
447		p2.To = p.To
448		// p.To.Type was set to TYPE_BRANCH above, but that makes checkaddr
449		// in ../pass.go complain, so set it back to TYPE_MEM here, until p2
450		// itself gets passed to progedit.
451		p2.To.Type = obj.TYPE_MEM
452		p2.RegTo2 = 1
453
454		obj.Nopout(p)
455		return
456
457	}
458	if p.As == obj.ATEXT || p.As == obj.AFUNCDATA || p.As == obj.ARET || p.As == obj.AJMP {
459		return
460	}
461	if source.Type != obj.TYPE_MEM {
462		ctxt.Diag("don't know how to handle %v with -dynlink", p)
463	}
464	p1 := obj.Appendp(p, newprog)
465	p2 := obj.Appendp(p1, newprog)
466
467	p1.As = mov
468	p1.From.Type = obj.TYPE_MEM
469	p1.From.Sym = source.Sym
470	p1.From.Name = obj.NAME_GOTREF
471	p1.To.Type = obj.TYPE_REG
472	p1.To.Reg = reg
473
474	p2.As = p.As
475	p2.From = p.From
476	p2.To = p.To
477	if p.From.Name == obj.NAME_EXTERN {
478		p2.From.Reg = reg
479		p2.From.Name = obj.NAME_NONE
480		p2.From.Sym = nil
481	} else if p.To.Name == obj.NAME_EXTERN {
482		p2.To.Reg = reg
483		p2.To.Name = obj.NAME_NONE
484		p2.To.Sym = nil
485	} else {
486		return
487	}
488	obj.Nopout(p)
489}
490
491func rewriteToPcrel(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
492	// RegTo2 is set on the instructions we insert here so they don't get
493	// processed twice.
494	if p.RegTo2 != 0 {
495		return
496	}
497	if p.As == obj.ATEXT || p.As == obj.AFUNCDATA || p.As == obj.ACALL || p.As == obj.ARET || p.As == obj.AJMP {
498		return
499	}
500	// Any Prog (aside from the above special cases) with an Addr with Name ==
501	// NAME_EXTERN, NAME_STATIC or NAME_GOTREF has a CALL __x86.get_pc_thunk.XX
502	// inserted before it.
503	isName := func(a *obj.Addr) bool {
504		if a.Sym == nil || (a.Type != obj.TYPE_MEM && a.Type != obj.TYPE_ADDR) || a.Reg != 0 {
505			return false
506		}
507		if a.Sym.Type == objabi.STLSBSS {
508			return false
509		}
510		return a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_STATIC || a.Name == obj.NAME_GOTREF
511	}
512
513	if isName(&p.From) && p.From.Type == obj.TYPE_ADDR {
514		// Handle things like "MOVL $sym, (SP)" or "PUSHL $sym" by rewriting
515		// to "MOVL $sym, CX; MOVL CX, (SP)" or "MOVL $sym, CX; PUSHL CX"
516		// respectively.
517		if p.To.Type != obj.TYPE_REG {
518			q := obj.Appendp(p, newprog)
519			q.As = p.As
520			q.From.Type = obj.TYPE_REG
521			q.From.Reg = REG_CX
522			q.To = p.To
523			p.As = AMOVL
524			p.To.Type = obj.TYPE_REG
525			p.To.Reg = REG_CX
526			p.To.Sym = nil
527			p.To.Name = obj.NAME_NONE
528		}
529	}
530
531	if !isName(&p.From) && !isName(&p.To) && (p.GetFrom3() == nil || !isName(p.GetFrom3())) {
532		return
533	}
534	var dst int16 = REG_CX
535	if (p.As == ALEAL || p.As == AMOVL) && p.To.Reg != p.From.Reg && p.To.Reg != p.From.Index {
536		dst = p.To.Reg
537		// Why? See the comment near the top of rewriteToUseGot above.
538		// AMOVLs might be introduced by the GOT rewrites.
539	}
540	q := obj.Appendp(p, newprog)
541	q.RegTo2 = 1
542	r := obj.Appendp(q, newprog)
543	r.RegTo2 = 1
544	q.As = obj.ACALL
545	thunkname := "__x86.get_pc_thunk." + strings.ToLower(rconv(int(dst)))
546	q.To.Sym = ctxt.LookupInit(thunkname, func(s *obj.LSym) { s.Set(obj.AttrLocal, true) })
547	q.To.Type = obj.TYPE_MEM
548	q.To.Name = obj.NAME_EXTERN
549	r.As = p.As
550	r.Scond = p.Scond
551	r.From = p.From
552	r.RestArgs = p.RestArgs
553	r.Reg = p.Reg
554	r.To = p.To
555	if isName(&p.From) {
556		r.From.Reg = dst
557	}
558	if isName(&p.To) {
559		r.To.Reg = dst
560	}
561	if p.GetFrom3() != nil && isName(p.GetFrom3()) {
562		r.GetFrom3().Reg = dst
563	}
564	obj.Nopout(p)
565}
566
567// Prog.mark
568const (
569	markBit = 1 << 0 // used in errorCheck to avoid duplicate work
570)
571
572func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
573	if cursym.Func().Text == nil || cursym.Func().Text.Link == nil {
574		return
575	}
576
577	p := cursym.Func().Text
578	autoffset := int32(p.To.Offset)
579	if autoffset < 0 {
580		autoffset = 0
581	}
582
583	hasCall := false
584	for q := p; q != nil; q = q.Link {
585		if q.As == obj.ACALL || q.As == obj.ADUFFCOPY || q.As == obj.ADUFFZERO {
586			hasCall = true
587			break
588		}
589	}
590
591	var bpsize int
592	if ctxt.Arch.Family == sys.AMD64 &&
593		!p.From.Sym.NoFrame() && // (1) below
594		!(autoffset == 0 && p.From.Sym.NoSplit()) && // (2) below
595		!(autoffset == 0 && !hasCall) { // (3) below
596		// Make room to save a base pointer.
597		// There are 2 cases we must avoid:
598		// 1) If noframe is set (which we do for functions which tail call).
599		// 2) Scary runtime internals which would be all messed up by frame pointers.
600		//    We detect these using a heuristic: frameless nosplit functions.
601		//    TODO: Maybe someday we label them all with NOFRAME and get rid of this heuristic.
602		// For performance, we also want to avoid:
603		// 3) Frameless leaf functions
604		bpsize = ctxt.Arch.PtrSize
605		autoffset += int32(bpsize)
606		p.To.Offset += int64(bpsize)
607	} else {
608		bpsize = 0
609	}
610
611	textarg := int64(p.To.Val.(int32))
612	cursym.Func().Args = int32(textarg)
613	cursym.Func().Locals = int32(p.To.Offset)
614
615	// TODO(rsc): Remove.
616	if ctxt.Arch.Family == sys.I386 && cursym.Func().Locals < 0 {
617		cursym.Func().Locals = 0
618	}
619
620	// TODO(rsc): Remove 'ctxt.Arch.Family == sys.AMD64 &&'.
621	if ctxt.Arch.Family == sys.AMD64 && autoffset < objabi.StackSmall && !p.From.Sym.NoSplit() {
622		leaf := true
623	LeafSearch:
624		for q := p; q != nil; q = q.Link {
625			switch q.As {
626			case obj.ACALL:
627				// Treat common runtime calls that take no arguments
628				// the same as duffcopy and duffzero.
629				if !isZeroArgRuntimeCall(q.To.Sym) {
630					leaf = false
631					break LeafSearch
632				}
633				fallthrough
634			case obj.ADUFFCOPY, obj.ADUFFZERO:
635				if autoffset >= objabi.StackSmall-8 {
636					leaf = false
637					break LeafSearch
638				}
639			}
640		}
641
642		if leaf {
643			p.From.Sym.Set(obj.AttrNoSplit, true)
644		}
645	}
646
647	var regEntryTmp0, regEntryTmp1 int16
648	if ctxt.Arch.Family == sys.AMD64 {
649		regEntryTmp0, regEntryTmp1 = REGENTRYTMP0, REGENTRYTMP1
650	} else {
651		regEntryTmp0, regEntryTmp1 = REG_BX, REG_DI
652	}
653
654	var regg int16
655	if !p.From.Sym.NoSplit() {
656		// Emit split check and load G register
657		p, regg = stacksplit(ctxt, cursym, p, newprog, autoffset, int32(textarg))
658	} else if p.From.Sym.Wrapper() {
659		// Load G register for the wrapper code
660		p, regg = loadG(ctxt, cursym, p, newprog)
661	}
662
663	// Delve debugger would like the next instruction to be noted as the end of the function prologue.
664	// TODO: are there other cases (e.g., wrapper functions) that need marking?
665	markedPrologue := false
666
667	if autoffset != 0 {
668		if autoffset%int32(ctxt.Arch.RegSize) != 0 {
669			ctxt.Diag("unaligned stack size %d", autoffset)
670		}
671		p = obj.Appendp(p, newprog)
672		p.As = AADJSP
673		p.From.Type = obj.TYPE_CONST
674		p.From.Offset = int64(autoffset)
675		p.Spadj = autoffset
676		p.Pos = p.Pos.WithXlogue(src.PosPrologueEnd)
677		markedPrologue = true
678	}
679
680	if bpsize > 0 {
681		// Save caller's BP
682		p = obj.Appendp(p, newprog)
683
684		p.As = AMOVQ
685		p.From.Type = obj.TYPE_REG
686		p.From.Reg = REG_BP
687		p.To.Type = obj.TYPE_MEM
688		p.To.Reg = REG_SP
689		p.To.Scale = 1
690		p.To.Offset = int64(autoffset) - int64(bpsize)
691		if !markedPrologue {
692			p.Pos = p.Pos.WithXlogue(src.PosPrologueEnd)
693		}
694
695		// Move current frame to BP
696		p = obj.Appendp(p, newprog)
697
698		p.As = ALEAQ
699		p.From.Type = obj.TYPE_MEM
700		p.From.Reg = REG_SP
701		p.From.Scale = 1
702		p.From.Offset = int64(autoffset) - int64(bpsize)
703		p.To.Type = obj.TYPE_REG
704		p.To.Reg = REG_BP
705	}
706
707	if cursym.Func().Text.From.Sym.Wrapper() {
708		// if g._panic != nil && g._panic.argp == FP {
709		//   g._panic.argp = bottom-of-frame
710		// }
711		//
712		//	MOVQ g_panic(g), regEntryTmp0
713		//	TESTQ regEntryTmp0, regEntryTmp0
714		//	JNE checkargp
715		// end:
716		//	NOP
717		//  ... rest of function ...
718		// checkargp:
719		//	LEAQ (autoffset+8)(SP), regEntryTmp1
720		//	CMPQ panic_argp(regEntryTmp0), regEntryTmp1
721		//	JNE end
722		//  MOVQ SP, panic_argp(regEntryTmp0)
723		//  JMP end
724		//
725		// The NOP is needed to give the jumps somewhere to land.
726		// It is a liblink NOP, not an x86 NOP: it encodes to 0 instruction bytes.
727		//
728		// The layout is chosen to help static branch prediction:
729		// Both conditional jumps are unlikely, so they are arranged to be forward jumps.
730
731		// MOVQ g_panic(g), regEntryTmp0
732		p = obj.Appendp(p, newprog)
733		p.As = AMOVQ
734		p.From.Type = obj.TYPE_MEM
735		p.From.Reg = regg
736		p.From.Offset = 4 * int64(ctxt.Arch.PtrSize) // g_panic
737		p.To.Type = obj.TYPE_REG
738		p.To.Reg = regEntryTmp0
739		if ctxt.Arch.Family == sys.I386 {
740			p.As = AMOVL
741		}
742
743		// TESTQ regEntryTmp0, regEntryTmp0
744		p = obj.Appendp(p, newprog)
745		p.As = ATESTQ
746		p.From.Type = obj.TYPE_REG
747		p.From.Reg = regEntryTmp0
748		p.To.Type = obj.TYPE_REG
749		p.To.Reg = regEntryTmp0
750		if ctxt.Arch.Family == sys.I386 {
751			p.As = ATESTL
752		}
753
754		// JNE checkargp (checkargp to be resolved later)
755		jne := obj.Appendp(p, newprog)
756		jne.As = AJNE
757		jne.To.Type = obj.TYPE_BRANCH
758
759		// end:
760		//  NOP
761		end := obj.Appendp(jne, newprog)
762		end.As = obj.ANOP
763
764		// Fast forward to end of function.
765		var last *obj.Prog
766		for last = end; last.Link != nil; last = last.Link {
767		}
768
769		// LEAQ (autoffset+8)(SP), regEntryTmp1
770		p = obj.Appendp(last, newprog)
771		p.As = ALEAQ
772		p.From.Type = obj.TYPE_MEM
773		p.From.Reg = REG_SP
774		p.From.Offset = int64(autoffset) + int64(ctxt.Arch.RegSize)
775		p.To.Type = obj.TYPE_REG
776		p.To.Reg = regEntryTmp1
777		if ctxt.Arch.Family == sys.I386 {
778			p.As = ALEAL
779		}
780
781		// Set jne branch target.
782		jne.To.SetTarget(p)
783
784		// CMPQ panic_argp(regEntryTmp0), regEntryTmp1
785		p = obj.Appendp(p, newprog)
786		p.As = ACMPQ
787		p.From.Type = obj.TYPE_MEM
788		p.From.Reg = regEntryTmp0
789		p.From.Offset = 0 // Panic.argp
790		p.To.Type = obj.TYPE_REG
791		p.To.Reg = regEntryTmp1
792		if ctxt.Arch.Family == sys.I386 {
793			p.As = ACMPL
794		}
795
796		// JNE end
797		p = obj.Appendp(p, newprog)
798		p.As = AJNE
799		p.To.Type = obj.TYPE_BRANCH
800		p.To.SetTarget(end)
801
802		// MOVQ SP, panic_argp(regEntryTmp0)
803		p = obj.Appendp(p, newprog)
804		p.As = AMOVQ
805		p.From.Type = obj.TYPE_REG
806		p.From.Reg = REG_SP
807		p.To.Type = obj.TYPE_MEM
808		p.To.Reg = regEntryTmp0
809		p.To.Offset = 0 // Panic.argp
810		if ctxt.Arch.Family == sys.I386 {
811			p.As = AMOVL
812		}
813
814		// JMP end
815		p = obj.Appendp(p, newprog)
816		p.As = obj.AJMP
817		p.To.Type = obj.TYPE_BRANCH
818		p.To.SetTarget(end)
819
820		// Reset p for following code.
821		p = end
822	}
823
824	var deltasp int32
825	for p = cursym.Func().Text; p != nil; p = p.Link {
826		pcsize := ctxt.Arch.RegSize
827		switch p.From.Name {
828		case obj.NAME_AUTO:
829			p.From.Offset += int64(deltasp) - int64(bpsize)
830		case obj.NAME_PARAM:
831			p.From.Offset += int64(deltasp) + int64(pcsize)
832		}
833		if p.GetFrom3() != nil {
834			switch p.GetFrom3().Name {
835			case obj.NAME_AUTO:
836				p.GetFrom3().Offset += int64(deltasp) - int64(bpsize)
837			case obj.NAME_PARAM:
838				p.GetFrom3().Offset += int64(deltasp) + int64(pcsize)
839			}
840		}
841		switch p.To.Name {
842		case obj.NAME_AUTO:
843			p.To.Offset += int64(deltasp) - int64(bpsize)
844		case obj.NAME_PARAM:
845			p.To.Offset += int64(deltasp) + int64(pcsize)
846		}
847
848		switch p.As {
849		default:
850			if p.To.Type == obj.TYPE_REG && p.To.Reg == REG_SP && p.As != ACMPL && p.As != ACMPQ {
851				f := cursym.Func()
852				if f.FuncFlag&objabi.FuncFlag_SPWRITE == 0 {
853					f.FuncFlag |= objabi.FuncFlag_SPWRITE
854					if ctxt.Debugvlog || !ctxt.IsAsm {
855						ctxt.Logf("auto-SPWRITE: %s %v\n", cursym.Name, p)
856						if !ctxt.IsAsm {
857							ctxt.Diag("invalid auto-SPWRITE in non-assembly")
858							ctxt.DiagFlush()
859							log.Fatalf("bad SPWRITE")
860						}
861					}
862				}
863			}
864			continue
865
866		case APUSHL, APUSHFL:
867			deltasp += 4
868			p.Spadj = 4
869			continue
870
871		case APUSHQ, APUSHFQ:
872			deltasp += 8
873			p.Spadj = 8
874			continue
875
876		case APUSHW, APUSHFW:
877			deltasp += 2
878			p.Spadj = 2
879			continue
880
881		case APOPL, APOPFL:
882			deltasp -= 4
883			p.Spadj = -4
884			continue
885
886		case APOPQ, APOPFQ:
887			deltasp -= 8
888			p.Spadj = -8
889			continue
890
891		case APOPW, APOPFW:
892			deltasp -= 2
893			p.Spadj = -2
894			continue
895
896		case AADJSP:
897			p.Spadj = int32(p.From.Offset)
898			deltasp += int32(p.From.Offset)
899			continue
900
901		case obj.ARET:
902			// do nothing
903		}
904
905		if autoffset != deltasp {
906			ctxt.Diag("%s: unbalanced PUSH/POP", cursym)
907		}
908
909		if autoffset != 0 {
910			to := p.To // Keep To attached to RET for retjmp below
911			p.To = obj.Addr{}
912			if bpsize > 0 {
913				// Restore caller's BP
914				p.As = AMOVQ
915
916				p.From.Type = obj.TYPE_MEM
917				p.From.Reg = REG_SP
918				p.From.Scale = 1
919				p.From.Offset = int64(autoffset) - int64(bpsize)
920				p.To.Type = obj.TYPE_REG
921				p.To.Reg = REG_BP
922				p = obj.Appendp(p, newprog)
923			}
924
925			p.As = AADJSP
926			p.From.Type = obj.TYPE_CONST
927			p.From.Offset = int64(-autoffset)
928			p.Spadj = -autoffset
929			p = obj.Appendp(p, newprog)
930			p.As = obj.ARET
931			p.To = to
932
933			// If there are instructions following
934			// this ARET, they come from a branch
935			// with the same stackframe, so undo
936			// the cleanup.
937			p.Spadj = +autoffset
938		}
939
940		if p.To.Sym != nil { // retjmp
941			p.As = obj.AJMP
942		}
943	}
944}
945
946func isZeroArgRuntimeCall(s *obj.LSym) bool {
947	if s == nil {
948		return false
949	}
950	switch s.Name {
951	case "runtime.panicdivide", "runtime.panicwrap", "runtime.panicshift":
952		return true
953	}
954	if strings.HasPrefix(s.Name, "runtime.panicIndex") || strings.HasPrefix(s.Name, "runtime.panicSlice") {
955		// These functions do take arguments (in registers),
956		// but use no stack before they do a stack check. We
957		// should include them. See issue 31219.
958		return true
959	}
960	return false
961}
962
963func indir_cx(ctxt *obj.Link, a *obj.Addr) {
964	a.Type = obj.TYPE_MEM
965	a.Reg = REG_CX
966}
967
968// loadG ensures the G is loaded into a register (either CX or REGG),
969// appending instructions to p if necessary. It returns the new last
970// instruction and the G register.
971func loadG(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, newprog obj.ProgAlloc) (*obj.Prog, int16) {
972	if ctxt.Arch.Family == sys.AMD64 && cursym.ABI() == obj.ABIInternal {
973		// Use the G register directly in ABIInternal
974		return p, REGG
975	}
976
977	var regg int16 = REG_CX
978	if ctxt.Arch.Family == sys.AMD64 {
979		regg = REGG // == REG_R14
980	}
981
982	p = obj.Appendp(p, newprog)
983	p.As = AMOVQ
984	if ctxt.Arch.PtrSize == 4 {
985		p.As = AMOVL
986	}
987	p.From.Type = obj.TYPE_MEM
988	p.From.Reg = REG_TLS
989	p.From.Offset = 0
990	p.To.Type = obj.TYPE_REG
991	p.To.Reg = regg
992
993	// Rewrite TLS instruction if necessary.
994	next := p.Link
995	progedit(ctxt, p, newprog)
996	for p.Link != next {
997		p = p.Link
998		progedit(ctxt, p, newprog)
999	}
1000
1001	if p.From.Index == REG_TLS {
1002		p.From.Scale = 2
1003	}
1004
1005	return p, regg
1006}
1007
1008// Append code to p to check for stack split.
1009// Appends to (does not overwrite) p.
1010// Assumes g is in rg.
1011// Returns last new instruction and G register.
1012func stacksplit(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, newprog obj.ProgAlloc, framesize int32, textarg int32) (*obj.Prog, int16) {
1013	cmp := ACMPQ
1014	lea := ALEAQ
1015	mov := AMOVQ
1016	sub := ASUBQ
1017	push, pop := APUSHQ, APOPQ
1018
1019	if ctxt.Arch.Family == sys.I386 {
1020		cmp = ACMPL
1021		lea = ALEAL
1022		mov = AMOVL
1023		sub = ASUBL
1024		push, pop = APUSHL, APOPL
1025	}
1026
1027	tmp := int16(REG_AX) // use AX for 32-bit
1028	if ctxt.Arch.Family == sys.AMD64 {
1029		// Avoid register parameters.
1030		tmp = int16(REGENTRYTMP0)
1031	}
1032
1033	if ctxt.Flag_maymorestack != "" {
1034		p = cursym.Func().SpillRegisterArgs(p, newprog)
1035
1036		if cursym.Func().Text.From.Sym.NeedCtxt() {
1037			p = obj.Appendp(p, newprog)
1038			p.As = push
1039			p.From.Type = obj.TYPE_REG
1040			p.From.Reg = REGCTXT
1041		}
1042
1043		// We call maymorestack with an ABI matching the
1044		// caller's ABI. Since this is the first thing that
1045		// happens in the function, we have to be consistent
1046		// with the caller about CPU state (notably,
1047		// fixed-meaning registers).
1048
1049		p = obj.Appendp(p, newprog)
1050		p.As = obj.ACALL
1051		p.To.Type = obj.TYPE_BRANCH
1052		p.To.Name = obj.NAME_EXTERN
1053		p.To.Sym = ctxt.LookupABI(ctxt.Flag_maymorestack, cursym.ABI())
1054
1055		if cursym.Func().Text.From.Sym.NeedCtxt() {
1056			p = obj.Appendp(p, newprog)
1057			p.As = pop
1058			p.To.Type = obj.TYPE_REG
1059			p.To.Reg = REGCTXT
1060		}
1061
1062		p = cursym.Func().UnspillRegisterArgs(p, newprog)
1063	}
1064
1065	// Jump back to here after morestack returns.
1066	startPred := p
1067
1068	// Load G register
1069	var rg int16
1070	p, rg = loadG(ctxt, cursym, p, newprog)
1071
1072	var q1 *obj.Prog
1073	if framesize <= objabi.StackSmall {
1074		// small stack: SP <= stackguard
1075		//	CMPQ SP, stackguard
1076		p = obj.Appendp(p, newprog)
1077
1078		p.As = cmp
1079		p.From.Type = obj.TYPE_REG
1080		p.From.Reg = REG_SP
1081		p.To.Type = obj.TYPE_MEM
1082		p.To.Reg = rg
1083		p.To.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0
1084		if cursym.CFunc() {
1085			p.To.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1
1086		}
1087
1088		// Mark the stack bound check and morestack call async nonpreemptible.
1089		// If we get preempted here, when resumed the preemption request is
1090		// cleared, but we'll still call morestack, which will double the stack
1091		// unnecessarily. See issue #35470.
1092		p = ctxt.StartUnsafePoint(p, newprog)
1093	} else if framesize <= objabi.StackBig {
1094		// large stack: SP-framesize <= stackguard-StackSmall
1095		//	LEAQ -xxx(SP), tmp
1096		//	CMPQ tmp, stackguard
1097		p = obj.Appendp(p, newprog)
1098
1099		p.As = lea
1100		p.From.Type = obj.TYPE_MEM
1101		p.From.Reg = REG_SP
1102		p.From.Offset = -(int64(framesize) - objabi.StackSmall)
1103		p.To.Type = obj.TYPE_REG
1104		p.To.Reg = tmp
1105
1106		p = obj.Appendp(p, newprog)
1107		p.As = cmp
1108		p.From.Type = obj.TYPE_REG
1109		p.From.Reg = tmp
1110		p.To.Type = obj.TYPE_MEM
1111		p.To.Reg = rg
1112		p.To.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0
1113		if cursym.CFunc() {
1114			p.To.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1
1115		}
1116
1117		p = ctxt.StartUnsafePoint(p, newprog) // see the comment above
1118	} else {
1119		// Such a large stack we need to protect against underflow.
1120		// The runtime guarantees SP > objabi.StackBig, but
1121		// framesize is large enough that SP-framesize may
1122		// underflow, causing a direct comparison with the
1123		// stack guard to incorrectly succeed. We explicitly
1124		// guard against underflow.
1125		//
1126		//	MOVQ	SP, tmp
1127		//	SUBQ	$(framesize - StackSmall), tmp
1128		//	// If subtraction wrapped (carry set), morestack.
1129		//	JCS	label-of-call-to-morestack
1130		//	CMPQ	tmp, stackguard
1131
1132		p = obj.Appendp(p, newprog)
1133
1134		p.As = mov
1135		p.From.Type = obj.TYPE_REG
1136		p.From.Reg = REG_SP
1137		p.To.Type = obj.TYPE_REG
1138		p.To.Reg = tmp
1139
1140		p = ctxt.StartUnsafePoint(p, newprog) // see the comment above
1141
1142		p = obj.Appendp(p, newprog)
1143		p.As = sub
1144		p.From.Type = obj.TYPE_CONST
1145		p.From.Offset = int64(framesize) - objabi.StackSmall
1146		p.To.Type = obj.TYPE_REG
1147		p.To.Reg = tmp
1148
1149		p = obj.Appendp(p, newprog)
1150		p.As = AJCS
1151		p.To.Type = obj.TYPE_BRANCH
1152		q1 = p
1153
1154		p = obj.Appendp(p, newprog)
1155		p.As = cmp
1156		p.From.Type = obj.TYPE_REG
1157		p.From.Reg = tmp
1158		p.To.Type = obj.TYPE_MEM
1159		p.To.Reg = rg
1160		p.To.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0
1161		if cursym.CFunc() {
1162			p.To.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1
1163		}
1164	}
1165
1166	// common
1167	jls := obj.Appendp(p, newprog)
1168	jls.As = AJLS
1169	jls.To.Type = obj.TYPE_BRANCH
1170
1171	end := ctxt.EndUnsafePoint(jls, newprog, -1)
1172
1173	var last *obj.Prog
1174	for last = cursym.Func().Text; last.Link != nil; last = last.Link {
1175	}
1176
1177	// Now we are at the end of the function, but logically
1178	// we are still in function prologue. We need to fix the
1179	// SP data and PCDATA.
1180	spfix := obj.Appendp(last, newprog)
1181	spfix.As = obj.ANOP
1182	spfix.Spadj = -framesize
1183
1184	pcdata := ctxt.EmitEntryStackMap(cursym, spfix, newprog)
1185	spill := ctxt.StartUnsafePoint(pcdata, newprog)
1186	pcdata = cursym.Func().SpillRegisterArgs(spill, newprog)
1187
1188	call := obj.Appendp(pcdata, newprog)
1189	call.Pos = cursym.Func().Text.Pos
1190	call.As = obj.ACALL
1191	call.To.Type = obj.TYPE_BRANCH
1192	call.To.Name = obj.NAME_EXTERN
1193	morestack := "runtime.morestack"
1194	switch {
1195	case cursym.CFunc():
1196		morestack = "runtime.morestackc"
1197	case !cursym.Func().Text.From.Sym.NeedCtxt():
1198		morestack = "runtime.morestack_noctxt"
1199	}
1200	call.To.Sym = ctxt.Lookup(morestack)
1201	// When compiling 386 code for dynamic linking, the call needs to be adjusted
1202	// to follow PIC rules. This in turn can insert more instructions, so we need
1203	// to keep track of the start of the call (where the jump will be to) and the
1204	// end (which following instructions are appended to).
1205	callend := call
1206	progedit(ctxt, callend, newprog)
1207	for ; callend.Link != nil; callend = callend.Link {
1208		progedit(ctxt, callend.Link, newprog)
1209	}
1210
1211	pcdata = cursym.Func().UnspillRegisterArgs(callend, newprog)
1212	pcdata = ctxt.EndUnsafePoint(pcdata, newprog, -1)
1213
1214	jmp := obj.Appendp(pcdata, newprog)
1215	jmp.As = obj.AJMP
1216	jmp.To.Type = obj.TYPE_BRANCH
1217	jmp.To.SetTarget(startPred.Link)
1218	jmp.Spadj = +framesize
1219
1220	jls.To.SetTarget(spill)
1221	if q1 != nil {
1222		q1.To.SetTarget(spill)
1223	}
1224
1225	return end, rg
1226}
1227
1228func isR15(r int16) bool {
1229	return r == REG_R15 || r == REG_R15B
1230}
1231func addrMentionsR15(a *obj.Addr) bool {
1232	if a == nil {
1233		return false
1234	}
1235	return isR15(a.Reg) || isR15(a.Index)
1236}
1237func progMentionsR15(p *obj.Prog) bool {
1238	return addrMentionsR15(&p.From) || addrMentionsR15(&p.To) || isR15(p.Reg) || addrMentionsR15(p.GetFrom3())
1239}
1240
1241// progOverwritesR15 reports whether p writes to R15 and does not depend on
1242// the previous value of R15.
1243func progOverwritesR15(p *obj.Prog) bool {
1244	if !(p.To.Type == obj.TYPE_REG && isR15(p.To.Reg)) {
1245		// Not writing to R15.
1246		return false
1247	}
1248	if (p.As == AXORL || p.As == AXORQ) && p.From.Type == obj.TYPE_REG && isR15(p.From.Reg) {
1249		// These look like uses of R15, but aren't, so we must detect these
1250		// before the use check below.
1251		return true
1252	}
1253	if addrMentionsR15(&p.From) || isR15(p.Reg) || addrMentionsR15(p.GetFrom3()) {
1254		// use before overwrite
1255		return false
1256	}
1257	if p.As == AMOVL || p.As == AMOVQ || p.As == APOPQ {
1258		return true
1259		// TODO: MOVB might be ok if we only ever use R15B.
1260	}
1261	return false
1262}
1263
1264func addrUsesGlobal(a *obj.Addr) bool {
1265	if a == nil {
1266		return false
1267	}
1268	return a.Name == obj.NAME_EXTERN && !a.Sym.Local()
1269}
1270func progUsesGlobal(p *obj.Prog) bool {
1271	if p.As == obj.ACALL || p.As == obj.ATEXT || p.As == obj.AFUNCDATA || p.As == obj.ARET || p.As == obj.AJMP {
1272		// These opcodes don't use a GOT to access their argument (see rewriteToUseGot),
1273		// or R15 would be dead at them anyway.
1274		return false
1275	}
1276	if p.As == ALEAQ {
1277		// The GOT entry is placed directly in the destination register; R15 is not used.
1278		return false
1279	}
1280	return addrUsesGlobal(&p.From) || addrUsesGlobal(&p.To) || addrUsesGlobal(p.GetFrom3())
1281}
1282
1283func errorCheck(ctxt *obj.Link, s *obj.LSym) {
1284	// When dynamic linking, R15 is used to access globals. Reject code that
1285	// uses R15 after a global variable access.
1286	if !ctxt.Flag_dynlink {
1287		return
1288	}
1289
1290	// Flood fill all the instructions where R15's value is junk.
1291	// If there are any uses of R15 in that set, report an error.
1292	var work []*obj.Prog
1293	var mentionsR15 bool
1294	for p := s.Func().Text; p != nil; p = p.Link {
1295		if progUsesGlobal(p) {
1296			work = append(work, p)
1297			p.Mark |= markBit
1298		}
1299		if progMentionsR15(p) {
1300			mentionsR15 = true
1301		}
1302	}
1303	if mentionsR15 {
1304		for len(work) > 0 {
1305			p := work[len(work)-1]
1306			work = work[:len(work)-1]
1307			if q := p.To.Target(); q != nil && q.Mark&markBit == 0 {
1308				q.Mark |= markBit
1309				work = append(work, q)
1310			}
1311			if p.As == obj.AJMP || p.As == obj.ARET {
1312				continue // no fallthrough
1313			}
1314			if progMentionsR15(p) {
1315				if progOverwritesR15(p) {
1316					// R15 is overwritten by this instruction. Its value is not junk any more.
1317					continue
1318				}
1319				pos := ctxt.PosTable.Pos(p.Pos)
1320				ctxt.Diag("%s:%s: when dynamic linking, R15 is clobbered by a global variable access and is used here: %v", path.Base(pos.Filename()), pos.LineNumber(), p)
1321				break // only report one error
1322			}
1323			if q := p.Link; q != nil && q.Mark&markBit == 0 {
1324				q.Mark |= markBit
1325				work = append(work, q)
1326			}
1327		}
1328	}
1329
1330	// Clean up.
1331	for p := s.Func().Text; p != nil; p = p.Link {
1332		p.Mark &^= markBit
1333	}
1334}
1335
1336var unaryDst = map[obj.As]bool{
1337	ABSWAPL:     true,
1338	ABSWAPQ:     true,
1339	ACLDEMOTE:   true,
1340	ACLFLUSH:    true,
1341	ACLFLUSHOPT: true,
1342	ACLWB:       true,
1343	ACMPXCHG16B: true,
1344	ACMPXCHG8B:  true,
1345	ADECB:       true,
1346	ADECL:       true,
1347	ADECQ:       true,
1348	ADECW:       true,
1349	AFBSTP:      true,
1350	AFFREE:      true,
1351	AFLDENV:     true,
1352	AFSAVE:      true,
1353	AFSTCW:      true,
1354	AFSTENV:     true,
1355	AFSTSW:      true,
1356	AFXSAVE64:   true,
1357	AFXSAVE:     true,
1358	AINCB:       true,
1359	AINCL:       true,
1360	AINCQ:       true,
1361	AINCW:       true,
1362	ANEGB:       true,
1363	ANEGL:       true,
1364	ANEGQ:       true,
1365	ANEGW:       true,
1366	ANOTB:       true,
1367	ANOTL:       true,
1368	ANOTQ:       true,
1369	ANOTW:       true,
1370	APOPL:       true,
1371	APOPQ:       true,
1372	APOPW:       true,
1373	ARDFSBASEL:  true,
1374	ARDFSBASEQ:  true,
1375	ARDGSBASEL:  true,
1376	ARDGSBASEQ:  true,
1377	ARDRANDL:    true,
1378	ARDRANDQ:    true,
1379	ARDRANDW:    true,
1380	ARDSEEDL:    true,
1381	ARDSEEDQ:    true,
1382	ARDSEEDW:    true,
1383	ASETCC:      true,
1384	ASETCS:      true,
1385	ASETEQ:      true,
1386	ASETGE:      true,
1387	ASETGT:      true,
1388	ASETHI:      true,
1389	ASETLE:      true,
1390	ASETLS:      true,
1391	ASETLT:      true,
1392	ASETMI:      true,
1393	ASETNE:      true,
1394	ASETOC:      true,
1395	ASETOS:      true,
1396	ASETPC:      true,
1397	ASETPL:      true,
1398	ASETPS:      true,
1399	ASGDT:       true,
1400	ASIDT:       true,
1401	ASLDTL:      true,
1402	ASLDTQ:      true,
1403	ASLDTW:      true,
1404	ASMSWL:      true,
1405	ASMSWQ:      true,
1406	ASMSWW:      true,
1407	ASTMXCSR:    true,
1408	ASTRL:       true,
1409	ASTRQ:       true,
1410	ASTRW:       true,
1411	AXSAVE64:    true,
1412	AXSAVE:      true,
1413	AXSAVEC64:   true,
1414	AXSAVEC:     true,
1415	AXSAVEOPT64: true,
1416	AXSAVEOPT:   true,
1417	AXSAVES64:   true,
1418	AXSAVES:     true,
1419}
1420
1421var Linkamd64 = obj.LinkArch{
1422	Arch:           sys.ArchAMD64,
1423	Init:           instinit,
1424	ErrorCheck:     errorCheck,
1425	Preprocess:     preprocess,
1426	Assemble:       span6,
1427	Progedit:       progedit,
1428	UnaryDst:       unaryDst,
1429	DWARFRegisters: AMD64DWARFRegisters,
1430}
1431
1432var Link386 = obj.LinkArch{
1433	Arch:           sys.Arch386,
1434	Init:           instinit,
1435	Preprocess:     preprocess,
1436	Assemble:       span6,
1437	Progedit:       progedit,
1438	UnaryDst:       unaryDst,
1439	DWARFRegisters: X86DWARFRegisters,
1440}
1441