1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5#include "go_asm.h"
6#include "go_tls.h"
7#include "funcdata.h"
8#include "textflag.h"
9
10// _rt0_386 is common startup code for most 386 systems when using
11// internal linking. This is the entry point for the program from the
12// kernel for an ordinary -buildmode=exe program. The stack holds the
13// number of arguments and the C-style argv.
14TEXT _rt0_386(SB),NOSPLIT,$8
15	MOVL	8(SP), AX	// argc
16	LEAL	12(SP), BX	// argv
17	MOVL	AX, 0(SP)
18	MOVL	BX, 4(SP)
19	JMP	runtime·rt0_go(SB)
20
21// _rt0_386_lib is common startup code for most 386 systems when
22// using -buildmode=c-archive or -buildmode=c-shared. The linker will
23// arrange to invoke this function as a global constructor (for
24// c-archive) or when the shared library is loaded (for c-shared).
25// We expect argc and argv to be passed on the stack following the
26// usual C ABI.
27TEXT _rt0_386_lib(SB),NOSPLIT,$0
28	PUSHL	BP
29	MOVL	SP, BP
30	PUSHL	BX
31	PUSHL	SI
32	PUSHL	DI
33
34	MOVL	8(BP), AX
35	MOVL	AX, _rt0_386_lib_argc<>(SB)
36	MOVL	12(BP), AX
37	MOVL	AX, _rt0_386_lib_argv<>(SB)
38
39	// Synchronous initialization.
40	CALL	runtime·libpreinit(SB)
41
42	SUBL	$8, SP
43
44	// Create a new thread to do the runtime initialization.
45	MOVL	_cgo_sys_thread_create(SB), AX
46	TESTL	AX, AX
47	JZ	nocgo
48
49	// Align stack to call C function.
50	// We moved SP to BP above, but BP was clobbered by the libpreinit call.
51	MOVL	SP, BP
52	ANDL	$~15, SP
53
54	MOVL	$_rt0_386_lib_go(SB), BX
55	MOVL	BX, 0(SP)
56	MOVL	$0, 4(SP)
57
58	CALL	AX
59
60	MOVL	BP, SP
61
62	JMP	restore
63
64nocgo:
65	MOVL	$0x800000, 0(SP)                    // stacksize = 8192KB
66	MOVL	$_rt0_386_lib_go(SB), AX
67	MOVL	AX, 4(SP)                           // fn
68	CALL	runtime·newosproc0(SB)
69
70restore:
71	ADDL	$8, SP
72	POPL	DI
73	POPL	SI
74	POPL	BX
75	POPL	BP
76	RET
77
78// _rt0_386_lib_go initializes the Go runtime.
79// This is started in a separate thread by _rt0_386_lib.
80TEXT _rt0_386_lib_go(SB),NOSPLIT,$8
81	MOVL	_rt0_386_lib_argc<>(SB), AX
82	MOVL	AX, 0(SP)
83	MOVL	_rt0_386_lib_argv<>(SB), AX
84	MOVL	AX, 4(SP)
85	JMP	runtime·rt0_go(SB)
86
87DATA _rt0_386_lib_argc<>(SB)/4, $0
88GLOBL _rt0_386_lib_argc<>(SB),NOPTR, $4
89DATA _rt0_386_lib_argv<>(SB)/4, $0
90GLOBL _rt0_386_lib_argv<>(SB),NOPTR, $4
91
92TEXT runtime·rt0_go(SB),NOSPLIT|NOFRAME,$0
93	// Copy arguments forward on an even stack.
94	// Users of this function jump to it, they don't call it.
95	MOVL	0(SP), AX
96	MOVL	4(SP), BX
97	SUBL	$128, SP		// plenty of scratch
98	ANDL	$~15, SP
99	MOVL	AX, 120(SP)		// save argc, argv away
100	MOVL	BX, 124(SP)
101
102	// set default stack bounds.
103	// _cgo_init may update stackguard.
104	MOVL	$runtime·g0(SB), BP
105	LEAL	(-64*1024+104)(SP), BX
106	MOVL	BX, g_stackguard0(BP)
107	MOVL	BX, g_stackguard1(BP)
108	MOVL	BX, (g_stack+stack_lo)(BP)
109	MOVL	SP, (g_stack+stack_hi)(BP)
110
111	// find out information about the processor we're on
112	// first see if CPUID instruction is supported.
113	PUSHFL
114	PUSHFL
115	XORL	$(1<<21), 0(SP) // flip ID bit
116	POPFL
117	PUSHFL
118	POPL	AX
119	XORL	0(SP), AX
120	POPFL	// restore EFLAGS
121	TESTL	$(1<<21), AX
122	JNE 	has_cpuid
123
124bad_proc: // show that the program requires MMX.
125	MOVL	$2, 0(SP)
126	MOVL	$bad_proc_msg<>(SB), 4(SP)
127	MOVL	$0x3d, 8(SP)
128	CALL	runtime·write(SB)
129	MOVL	$1, 0(SP)
130	CALL	runtime·exit(SB)
131	CALL	runtime·abort(SB)
132
133has_cpuid:
134	MOVL	$0, AX
135	CPUID
136	MOVL	AX, SI
137	CMPL	AX, $0
138	JE	nocpuinfo
139
140	// Figure out how to serialize RDTSC.
141	// On Intel processors LFENCE is enough. AMD requires MFENCE.
142	// Don't know about the rest, so let's do MFENCE.
143	CMPL	BX, $0x756E6547  // "Genu"
144	JNE	notintel
145	CMPL	DX, $0x49656E69  // "ineI"
146	JNE	notintel
147	CMPL	CX, $0x6C65746E  // "ntel"
148	JNE	notintel
149	MOVB	$1, runtime·isIntel(SB)
150	MOVB	$1, runtime·lfenceBeforeRdtsc(SB)
151notintel:
152
153	// Load EAX=1 cpuid flags
154	MOVL	$1, AX
155	CPUID
156	MOVL	CX, DI // Move to global variable clobbers CX when generating PIC
157	MOVL	AX, runtime·processorVersionInfo(SB)
158
159	// Check for MMX support
160	TESTL	$(1<<23), DX // MMX
161	JZ	bad_proc
162
163nocpuinfo:
164	// if there is an _cgo_init, call it to let it
165	// initialize and to set up GS.  if not,
166	// we set up GS ourselves.
167	MOVL	_cgo_init(SB), AX
168	TESTL	AX, AX
169	JZ	needtls
170#ifdef GOOS_android
171	// arg 4: TLS base, stored in slot 0 (Android's TLS_SLOT_SELF).
172	// Compensate for tls_g (+8).
173	MOVL	-8(TLS), BX
174	MOVL	BX, 12(SP)
175	MOVL	$runtime·tls_g(SB), 8(SP)	// arg 3: &tls_g
176#else
177	MOVL	$0, BX
178	MOVL	BX, 12(SP)	// arg 3,4: not used when using platform's TLS
179	MOVL	BX, 8(SP)
180#endif
181	MOVL	$setg_gcc<>(SB), BX
182	MOVL	BX, 4(SP)	// arg 2: setg_gcc
183	MOVL	BP, 0(SP)	// arg 1: g0
184	CALL	AX
185
186	// update stackguard after _cgo_init
187	MOVL	$runtime·g0(SB), CX
188	MOVL	(g_stack+stack_lo)(CX), AX
189	ADDL	$const__StackGuard, AX
190	MOVL	AX, g_stackguard0(CX)
191	MOVL	AX, g_stackguard1(CX)
192
193#ifndef GOOS_windows
194	// skip runtime·ldt0setup(SB) and tls test after _cgo_init for non-windows
195	JMP ok
196#endif
197needtls:
198#ifdef GOOS_plan9
199	// skip runtime·ldt0setup(SB) and tls test on Plan 9 in all cases
200	JMP	ok
201#endif
202#ifdef GOOS_darwin
203	// skip runtime·ldt0setup(SB) on Darwin
204	JMP	ok
205#endif
206
207	// set up %gs
208	CALL	ldt0setup<>(SB)
209
210	// store through it, to make sure it works
211	get_tls(BX)
212	MOVL	$0x123, g(BX)
213	MOVL	runtime·m0+m_tls(SB), AX
214	CMPL	AX, $0x123
215	JEQ	ok
216	MOVL	AX, 0	// abort
217ok:
218	// set up m and g "registers"
219	get_tls(BX)
220	LEAL	runtime·g0(SB), DX
221	MOVL	DX, g(BX)
222	LEAL	runtime·m0(SB), AX
223
224	// save m->g0 = g0
225	MOVL	DX, m_g0(AX)
226	// save g0->m = m0
227	MOVL	AX, g_m(DX)
228
229	CALL	runtime·emptyfunc(SB)	// fault if stack check is wrong
230
231	// convention is D is always cleared
232	CLD
233
234	CALL	runtime·check(SB)
235
236	// saved argc, argv
237	MOVL	120(SP), AX
238	MOVL	AX, 0(SP)
239	MOVL	124(SP), AX
240	MOVL	AX, 4(SP)
241	CALL	runtime·args(SB)
242	CALL	runtime·osinit(SB)
243	CALL	runtime·schedinit(SB)
244
245	// create a new goroutine to start program
246	PUSHL	$runtime·mainPC(SB)	// entry
247	PUSHL	$0	// arg size
248	CALL	runtime·newproc(SB)
249	POPL	AX
250	POPL	AX
251
252	// start this M
253	CALL	runtime·mstart(SB)
254
255	CALL	runtime·abort(SB)
256	RET
257
258DATA	bad_proc_msg<>+0x00(SB)/61, $"This program can only be run on processors with MMX support.\n"
259GLOBL	bad_proc_msg<>(SB), RODATA, $61
260
261DATA	runtime·mainPC+0(SB)/4,$runtime·main(SB)
262GLOBL	runtime·mainPC(SB),RODATA,$4
263
264TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
265	INT $3
266	RET
267
268TEXT runtime·asminit(SB),NOSPLIT,$0-0
269	// Linux and MinGW start the FPU in extended double precision.
270	// Other operating systems use double precision.
271	// Change to double precision to match them,
272	// and to match other hardware that only has double.
273	FLDCW	runtime·controlWord64(SB)
274	RET
275
276/*
277 *  go-routine
278 */
279
280// void gosave(Gobuf*)
281// save state in Gobuf; setjmp
282TEXT runtime·gosave(SB), NOSPLIT, $0-4
283	MOVL	buf+0(FP), AX		// gobuf
284	LEAL	buf+0(FP), BX		// caller's SP
285	MOVL	BX, gobuf_sp(AX)
286	MOVL	0(SP), BX		// caller's PC
287	MOVL	BX, gobuf_pc(AX)
288	MOVL	$0, gobuf_ret(AX)
289	// Assert ctxt is zero. See func save.
290	MOVL	gobuf_ctxt(AX), BX
291	TESTL	BX, BX
292	JZ	2(PC)
293	CALL	runtime·badctxt(SB)
294	get_tls(CX)
295	MOVL	g(CX), BX
296	MOVL	BX, gobuf_g(AX)
297	RET
298
299// void gogo(Gobuf*)
300// restore state from Gobuf; longjmp
301TEXT runtime·gogo(SB), NOSPLIT, $8-4
302	MOVL	buf+0(FP), BX		// gobuf
303	MOVL	gobuf_g(BX), DX
304	MOVL	0(DX), CX		// make sure g != nil
305	get_tls(CX)
306	MOVL	DX, g(CX)
307	MOVL	gobuf_sp(BX), SP	// restore SP
308	MOVL	gobuf_ret(BX), AX
309	MOVL	gobuf_ctxt(BX), DX
310	MOVL	$0, gobuf_sp(BX)	// clear to help garbage collector
311	MOVL	$0, gobuf_ret(BX)
312	MOVL	$0, gobuf_ctxt(BX)
313	MOVL	gobuf_pc(BX), BX
314	JMP	BX
315
316// func mcall(fn func(*g))
317// Switch to m->g0's stack, call fn(g).
318// Fn must never return. It should gogo(&g->sched)
319// to keep running g.
320TEXT runtime·mcall(SB), NOSPLIT, $0-4
321	MOVL	fn+0(FP), DI
322
323	get_tls(DX)
324	MOVL	g(DX), AX	// save state in g->sched
325	MOVL	0(SP), BX	// caller's PC
326	MOVL	BX, (g_sched+gobuf_pc)(AX)
327	LEAL	fn+0(FP), BX	// caller's SP
328	MOVL	BX, (g_sched+gobuf_sp)(AX)
329	MOVL	AX, (g_sched+gobuf_g)(AX)
330
331	// switch to m->g0 & its stack, call fn
332	MOVL	g(DX), BX
333	MOVL	g_m(BX), BX
334	MOVL	m_g0(BX), SI
335	CMPL	SI, AX	// if g == m->g0 call badmcall
336	JNE	3(PC)
337	MOVL	$runtime·badmcall(SB), AX
338	JMP	AX
339	MOVL	SI, g(DX)	// g = m->g0
340	MOVL	(g_sched+gobuf_sp)(SI), SP	// sp = m->g0->sched.sp
341	PUSHL	AX
342	MOVL	DI, DX
343	MOVL	0(DI), DI
344	CALL	DI
345	POPL	AX
346	MOVL	$runtime·badmcall2(SB), AX
347	JMP	AX
348	RET
349
350// systemstack_switch is a dummy routine that systemstack leaves at the bottom
351// of the G stack. We need to distinguish the routine that
352// lives at the bottom of the G stack from the one that lives
353// at the top of the system stack because the one at the top of
354// the system stack terminates the stack walk (see topofstack()).
355TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
356	RET
357
358// func systemstack(fn func())
359TEXT runtime·systemstack(SB), NOSPLIT, $0-4
360	MOVL	fn+0(FP), DI	// DI = fn
361	get_tls(CX)
362	MOVL	g(CX), AX	// AX = g
363	MOVL	g_m(AX), BX	// BX = m
364
365	CMPL	AX, m_gsignal(BX)
366	JEQ	noswitch
367
368	MOVL	m_g0(BX), DX	// DX = g0
369	CMPL	AX, DX
370	JEQ	noswitch
371
372	CMPL	AX, m_curg(BX)
373	JNE	bad
374
375	// switch stacks
376	// save our state in g->sched. Pretend to
377	// be systemstack_switch if the G stack is scanned.
378	MOVL	$runtime·systemstack_switch(SB), (g_sched+gobuf_pc)(AX)
379	MOVL	SP, (g_sched+gobuf_sp)(AX)
380	MOVL	AX, (g_sched+gobuf_g)(AX)
381
382	// switch to g0
383	get_tls(CX)
384	MOVL	DX, g(CX)
385	MOVL	(g_sched+gobuf_sp)(DX), BX
386	// make it look like mstart called systemstack on g0, to stop traceback
387	SUBL	$4, BX
388	MOVL	$runtime·mstart(SB), DX
389	MOVL	DX, 0(BX)
390	MOVL	BX, SP
391
392	// call target function
393	MOVL	DI, DX
394	MOVL	0(DI), DI
395	CALL	DI
396
397	// switch back to g
398	get_tls(CX)
399	MOVL	g(CX), AX
400	MOVL	g_m(AX), BX
401	MOVL	m_curg(BX), AX
402	MOVL	AX, g(CX)
403	MOVL	(g_sched+gobuf_sp)(AX), SP
404	MOVL	$0, (g_sched+gobuf_sp)(AX)
405	RET
406
407noswitch:
408	// already on system stack; tail call the function
409	// Using a tail call here cleans up tracebacks since we won't stop
410	// at an intermediate systemstack.
411	MOVL	DI, DX
412	MOVL	0(DI), DI
413	JMP	DI
414
415bad:
416	// Bad: g is not gsignal, not g0, not curg. What is it?
417	// Hide call from linker nosplit analysis.
418	MOVL	$runtime·badsystemstack(SB), AX
419	CALL	AX
420	INT	$3
421
422/*
423 * support for morestack
424 */
425
426// Called during function prolog when more stack is needed.
427//
428// The traceback routines see morestack on a g0 as being
429// the top of a stack (for example, morestack calling newstack
430// calling the scheduler calling newm calling gc), so we must
431// record an argument size. For that purpose, it has no arguments.
432TEXT runtime·morestack(SB),NOSPLIT,$0-0
433	// Cannot grow scheduler stack (m->g0).
434	get_tls(CX)
435	MOVL	g(CX), BX
436	MOVL	g_m(BX), BX
437	MOVL	m_g0(BX), SI
438	CMPL	g(CX), SI
439	JNE	3(PC)
440	CALL	runtime·badmorestackg0(SB)
441	CALL	runtime·abort(SB)
442
443	// Cannot grow signal stack.
444	MOVL	m_gsignal(BX), SI
445	CMPL	g(CX), SI
446	JNE	3(PC)
447	CALL	runtime·badmorestackgsignal(SB)
448	CALL	runtime·abort(SB)
449
450	// Called from f.
451	// Set m->morebuf to f's caller.
452	NOP	SP	// tell vet SP changed - stop checking offsets
453	MOVL	4(SP), DI	// f's caller's PC
454	MOVL	DI, (m_morebuf+gobuf_pc)(BX)
455	LEAL	8(SP), CX	// f's caller's SP
456	MOVL	CX, (m_morebuf+gobuf_sp)(BX)
457	get_tls(CX)
458	MOVL	g(CX), SI
459	MOVL	SI, (m_morebuf+gobuf_g)(BX)
460
461	// Set g->sched to context in f.
462	MOVL	0(SP), AX	// f's PC
463	MOVL	AX, (g_sched+gobuf_pc)(SI)
464	MOVL	SI, (g_sched+gobuf_g)(SI)
465	LEAL	4(SP), AX	// f's SP
466	MOVL	AX, (g_sched+gobuf_sp)(SI)
467	MOVL	DX, (g_sched+gobuf_ctxt)(SI)
468
469	// Call newstack on m->g0's stack.
470	MOVL	m_g0(BX), BP
471	MOVL	BP, g(CX)
472	MOVL	(g_sched+gobuf_sp)(BP), AX
473	MOVL	-4(AX), BX	// fault if CALL would, before smashing SP
474	MOVL	AX, SP
475	CALL	runtime·newstack(SB)
476	CALL	runtime·abort(SB)	// crash if newstack returns
477	RET
478
479TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0-0
480	MOVL	$0, DX
481	JMP runtime·morestack(SB)
482
483// reflectcall: call a function with the given argument list
484// func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32).
485// we don't have variable-sized frames, so we use a small number
486// of constant-sized-frame functions to encode a few bits of size in the pc.
487// Caution: ugly multiline assembly macros in your future!
488
489#define DISPATCH(NAME,MAXSIZE)		\
490	CMPL	CX, $MAXSIZE;		\
491	JA	3(PC);			\
492	MOVL	$NAME(SB), AX;		\
493	JMP	AX
494// Note: can't just "JMP NAME(SB)" - bad inlining results.
495
496TEXT ·reflectcall(SB), NOSPLIT, $0-20
497	MOVL	argsize+12(FP), CX
498	DISPATCH(runtime·call16, 16)
499	DISPATCH(runtime·call32, 32)
500	DISPATCH(runtime·call64, 64)
501	DISPATCH(runtime·call128, 128)
502	DISPATCH(runtime·call256, 256)
503	DISPATCH(runtime·call512, 512)
504	DISPATCH(runtime·call1024, 1024)
505	DISPATCH(runtime·call2048, 2048)
506	DISPATCH(runtime·call4096, 4096)
507	DISPATCH(runtime·call8192, 8192)
508	DISPATCH(runtime·call16384, 16384)
509	DISPATCH(runtime·call32768, 32768)
510	DISPATCH(runtime·call65536, 65536)
511	DISPATCH(runtime·call131072, 131072)
512	DISPATCH(runtime·call262144, 262144)
513	DISPATCH(runtime·call524288, 524288)
514	DISPATCH(runtime·call1048576, 1048576)
515	DISPATCH(runtime·call2097152, 2097152)
516	DISPATCH(runtime·call4194304, 4194304)
517	DISPATCH(runtime·call8388608, 8388608)
518	DISPATCH(runtime·call16777216, 16777216)
519	DISPATCH(runtime·call33554432, 33554432)
520	DISPATCH(runtime·call67108864, 67108864)
521	DISPATCH(runtime·call134217728, 134217728)
522	DISPATCH(runtime·call268435456, 268435456)
523	DISPATCH(runtime·call536870912, 536870912)
524	DISPATCH(runtime·call1073741824, 1073741824)
525	MOVL	$runtime·badreflectcall(SB), AX
526	JMP	AX
527
528#define CALLFN(NAME,MAXSIZE)			\
529TEXT NAME(SB), WRAPPER, $MAXSIZE-20;		\
530	NO_LOCAL_POINTERS;			\
531	/* copy arguments to stack */		\
532	MOVL	argptr+8(FP), SI;		\
533	MOVL	argsize+12(FP), CX;		\
534	MOVL	SP, DI;				\
535	REP;MOVSB;				\
536	/* call function */			\
537	MOVL	f+4(FP), DX;			\
538	MOVL	(DX), AX; 			\
539	PCDATA  $PCDATA_StackMapIndex, $0;	\
540	CALL	AX;				\
541	/* copy return values back */		\
542	MOVL	argtype+0(FP), DX;		\
543	MOVL	argptr+8(FP), DI;		\
544	MOVL	argsize+12(FP), CX;		\
545	MOVL	retoffset+16(FP), BX;		\
546	MOVL	SP, SI;				\
547	ADDL	BX, DI;				\
548	ADDL	BX, SI;				\
549	SUBL	BX, CX;				\
550	CALL	callRet<>(SB);			\
551	RET
552
553// callRet copies return values back at the end of call*. This is a
554// separate function so it can allocate stack space for the arguments
555// to reflectcallmove. It does not follow the Go ABI; it expects its
556// arguments in registers.
557TEXT callRet<>(SB), NOSPLIT, $16-0
558	MOVL	DX, 0(SP)
559	MOVL	DI, 4(SP)
560	MOVL	SI, 8(SP)
561	MOVL	CX, 12(SP)
562	CALL	runtime·reflectcallmove(SB)
563	RET
564
565CALLFNcall16, 16)
566CALLFNcall32, 32)
567CALLFNcall64, 64)
568CALLFNcall128, 128)
569CALLFNcall256, 256)
570CALLFNcall512, 512)
571CALLFNcall1024, 1024)
572CALLFNcall2048, 2048)
573CALLFNcall4096, 4096)
574CALLFNcall8192, 8192)
575CALLFNcall16384, 16384)
576CALLFNcall32768, 32768)
577CALLFNcall65536, 65536)
578CALLFNcall131072, 131072)
579CALLFNcall262144, 262144)
580CALLFNcall524288, 524288)
581CALLFNcall1048576, 1048576)
582CALLFNcall2097152, 2097152)
583CALLFNcall4194304, 4194304)
584CALLFNcall8388608, 8388608)
585CALLFNcall16777216, 16777216)
586CALLFNcall33554432, 33554432)
587CALLFNcall67108864, 67108864)
588CALLFNcall134217728, 134217728)
589CALLFNcall268435456, 268435456)
590CALLFNcall536870912, 536870912)
591CALLFNcall1073741824, 1073741824)
592
593TEXT runtime·procyield(SB),NOSPLIT,$0-0
594	MOVL	cycles+0(FP), AX
595again:
596	PAUSE
597	SUBL	$1, AX
598	JNZ	again
599	RET
600
601TEXT ·publicationBarrier(SB),NOSPLIT,$0-0
602	// Stores are already ordered on x86, so this is just a
603	// compile barrier.
604	RET
605
606// void jmpdefer(fn, sp);
607// called from deferreturn.
608// 1. pop the caller
609// 2. sub 5 bytes (the length of CALL & a 32 bit displacement) from the callers
610//    return (when building for shared libraries, subtract 16 bytes -- 5 bytes
611//    for CALL & displacement to call __x86.get_pc_thunk.cx, 6 bytes for the
612//    LEAL to load the offset into BX, and finally 5 for the call & displacement)
613// 3. jmp to the argument
614TEXT runtime·jmpdefer(SB), NOSPLIT, $0-8
615	MOVL	fv+0(FP), DX	// fn
616	MOVL	argp+4(FP), BX	// caller sp
617	LEAL	-4(BX), SP	// caller sp after CALL
618#ifdef GOBUILDMODE_shared
619	SUBL	$16, (SP)	// return to CALL again
620#else
621	SUBL	$5, (SP)	// return to CALL again
622#endif
623	MOVL	0(DX), BX
624	JMP	BX	// but first run the deferred function
625
626// Save state of caller into g->sched.
627TEXT gosave<>(SB),NOSPLIT,$0
628	PUSHL	AX
629	PUSHL	BX
630	get_tls(BX)
631	MOVL	g(BX), BX
632	LEAL	arg+0(FP), AX
633	MOVL	AX, (g_sched+gobuf_sp)(BX)
634	MOVL	-4(AX), AX
635	MOVL	AX, (g_sched+gobuf_pc)(BX)
636	MOVL	$0, (g_sched+gobuf_ret)(BX)
637	// Assert ctxt is zero. See func save.
638	MOVL	(g_sched+gobuf_ctxt)(BX), AX
639	TESTL	AX, AX
640	JZ	2(PC)
641	CALL	runtime·badctxt(SB)
642	POPL	BX
643	POPL	AX
644	RET
645
646// func asmcgocall(fn, arg unsafe.Pointer) int32
647// Call fn(arg) on the scheduler stack,
648// aligned appropriately for the gcc ABI.
649// See cgocall.go for more details.
650TEXT ·asmcgocall(SB),NOSPLIT,$0-12
651	MOVL	fn+0(FP), AX
652	MOVL	arg+4(FP), BX
653
654	MOVL	SP, DX
655
656	// Figure out if we need to switch to m->g0 stack.
657	// We get called to create new OS threads too, and those
658	// come in on the m->g0 stack already.
659	get_tls(CX)
660	MOVL	g(CX), BP
661	CMPL	BP, $0
662	JEQ	nosave	// Don't even have a G yet.
663	MOVL	g_m(BP), BP
664	MOVL	m_g0(BP), SI
665	MOVL	g(CX), DI
666	CMPL	SI, DI
667	JEQ	noswitch
668	CMPL	DI, m_gsignal(BP)
669	JEQ	noswitch
670	CALL	gosave<>(SB)
671	get_tls(CX)
672	MOVL	SI, g(CX)
673	MOVL	(g_sched+gobuf_sp)(SI), SP
674
675noswitch:
676	// Now on a scheduling stack (a pthread-created stack).
677	SUBL	$32, SP
678	ANDL	$~15, SP	// alignment, perhaps unnecessary
679	MOVL	DI, 8(SP)	// save g
680	MOVL	(g_stack+stack_hi)(DI), DI
681	SUBL	DX, DI
682	MOVL	DI, 4(SP)	// save depth in stack (can't just save SP, as stack might be copied during a callback)
683	MOVL	BX, 0(SP)	// first argument in x86-32 ABI
684	CALL	AX
685
686	// Restore registers, g, stack pointer.
687	get_tls(CX)
688	MOVL	8(SP), DI
689	MOVL	(g_stack+stack_hi)(DI), SI
690	SUBL	4(SP), SI
691	MOVL	DI, g(CX)
692	MOVL	SI, SP
693
694	MOVL	AX, ret+8(FP)
695	RET
696nosave:
697	// Now on a scheduling stack (a pthread-created stack).
698	SUBL	$32, SP
699	ANDL	$~15, SP	// alignment, perhaps unnecessary
700	MOVL	DX, 4(SP)	// save original stack pointer
701	MOVL	BX, 0(SP)	// first argument in x86-32 ABI
702	CALL	AX
703
704	MOVL	4(SP), CX	// restore original stack pointer
705	MOVL	CX, SP
706	MOVL	AX, ret+8(FP)
707	RET
708
709// cgocallback(void (*fn)(void*), void *frame, uintptr framesize, uintptr ctxt)
710// Turn the fn into a Go func (by taking its address) and call
711// cgocallback_gofunc.
712TEXT runtime·cgocallback(SB),NOSPLIT,$16-16
713	LEAL	fn+0(FP), AX
714	MOVL	AX, 0(SP)
715	MOVL	frame+4(FP), AX
716	MOVL	AX, 4(SP)
717	MOVL	framesize+8(FP), AX
718	MOVL	AX, 8(SP)
719	MOVL	ctxt+12(FP), AX
720	MOVL	AX, 12(SP)
721	MOVL	$runtime·cgocallback_gofunc(SB), AX
722	CALL	AX
723	RET
724
725// cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize, uintptr ctxt)
726// See cgocall.go for more details.
727TEXT ·cgocallback_gofunc(SB),NOSPLIT,$12-16
728	NO_LOCAL_POINTERS
729
730	// If g is nil, Go did not create the current thread.
731	// Call needm to obtain one for temporary use.
732	// In this case, we're running on the thread stack, so there's
733	// lots of space, but the linker doesn't know. Hide the call from
734	// the linker analysis by using an indirect call through AX.
735	get_tls(CX)
736#ifdef GOOS_windows
737	MOVL	$0, BP
738	CMPL	CX, $0
739	JEQ	2(PC) // TODO
740#endif
741	MOVL	g(CX), BP
742	CMPL	BP, $0
743	JEQ	needm
744	MOVL	g_m(BP), BP
745	MOVL	BP, DX // saved copy of oldm
746	JMP	havem
747needm:
748	MOVL	$0, 0(SP)
749	MOVL	$runtime·needm(SB), AX
750	CALL	AX
751	MOVL	0(SP), DX
752	get_tls(CX)
753	MOVL	g(CX), BP
754	MOVL	g_m(BP), BP
755
756	// Set m->sched.sp = SP, so that if a panic happens
757	// during the function we are about to execute, it will
758	// have a valid SP to run on the g0 stack.
759	// The next few lines (after the havem label)
760	// will save this SP onto the stack and then write
761	// the same SP back to m->sched.sp. That seems redundant,
762	// but if an unrecovered panic happens, unwindm will
763	// restore the g->sched.sp from the stack location
764	// and then systemstack will try to use it. If we don't set it here,
765	// that restored SP will be uninitialized (typically 0) and
766	// will not be usable.
767	MOVL	m_g0(BP), SI
768	MOVL	SP, (g_sched+gobuf_sp)(SI)
769
770havem:
771	// Now there's a valid m, and we're running on its m->g0.
772	// Save current m->g0->sched.sp on stack and then set it to SP.
773	// Save current sp in m->g0->sched.sp in preparation for
774	// switch back to m->curg stack.
775	// NOTE: unwindm knows that the saved g->sched.sp is at 0(SP).
776	MOVL	m_g0(BP), SI
777	MOVL	(g_sched+gobuf_sp)(SI), AX
778	MOVL	AX, 0(SP)
779	MOVL	SP, (g_sched+gobuf_sp)(SI)
780
781	// Switch to m->curg stack and call runtime.cgocallbackg.
782	// Because we are taking over the execution of m->curg
783	// but *not* resuming what had been running, we need to
784	// save that information (m->curg->sched) so we can restore it.
785	// We can restore m->curg->sched.sp easily, because calling
786	// runtime.cgocallbackg leaves SP unchanged upon return.
787	// To save m->curg->sched.pc, we push it onto the stack.
788	// This has the added benefit that it looks to the traceback
789	// routine like cgocallbackg is going to return to that
790	// PC (because the frame we allocate below has the same
791	// size as cgocallback_gofunc's frame declared above)
792	// so that the traceback will seamlessly trace back into
793	// the earlier calls.
794	//
795	// In the new goroutine, 4(SP) holds the saved oldm (DX) register.
796	// 8(SP) is unused.
797	MOVL	m_curg(BP), SI
798	MOVL	SI, g(CX)
799	MOVL	(g_sched+gobuf_sp)(SI), DI // prepare stack as DI
800	MOVL	(g_sched+gobuf_pc)(SI), BP
801	MOVL	BP, -4(DI)
802	MOVL	ctxt+12(FP), CX
803	LEAL	-(4+12)(DI), SP
804	MOVL	DX, 4(SP)
805	MOVL	CX, 0(SP)
806	CALL	runtime·cgocallbackg(SB)
807	MOVL	4(SP), DX
808
809	// Restore g->sched (== m->curg->sched) from saved values.
810	get_tls(CX)
811	MOVL	g(CX), SI
812	MOVL	12(SP), BP
813	MOVL	BP, (g_sched+gobuf_pc)(SI)
814	LEAL	(12+4)(SP), DI
815	MOVL	DI, (g_sched+gobuf_sp)(SI)
816
817	// Switch back to m->g0's stack and restore m->g0->sched.sp.
818	// (Unlike m->curg, the g0 goroutine never uses sched.pc,
819	// so we do not have to restore it.)
820	MOVL	g(CX), BP
821	MOVL	g_m(BP), BP
822	MOVL	m_g0(BP), SI
823	MOVL	SI, g(CX)
824	MOVL	(g_sched+gobuf_sp)(SI), SP
825	MOVL	0(SP), AX
826	MOVL	AX, (g_sched+gobuf_sp)(SI)
827
828	// If the m on entry was nil, we called needm above to borrow an m
829	// for the duration of the call. Since the call is over, return it with dropm.
830	CMPL	DX, $0
831	JNE 3(PC)
832	MOVL	$runtime·dropm(SB), AX
833	CALL	AX
834
835	// Done!
836	RET
837
838// void setg(G*); set g. for use by needm.
839TEXT runtime·setg(SB), NOSPLIT, $0-4
840	MOVL	gg+0(FP), BX
841#ifdef GOOS_windows
842	CMPL	BX, $0
843	JNE	settls
844	MOVL	$0, 0x14(FS)
845	RET
846settls:
847	MOVL	g_m(BX), AX
848	LEAL	m_tls(AX), AX
849	MOVL	AX, 0x14(FS)
850#endif
851	get_tls(CX)
852	MOVL	BX, g(CX)
853	RET
854
855// void setg_gcc(G*); set g. for use by gcc
856TEXT setg_gcc<>(SB), NOSPLIT, $0
857	get_tls(AX)
858	MOVL	gg+0(FP), DX
859	MOVL	DX, g(AX)
860	RET
861
862TEXT runtime·abort(SB),NOSPLIT,$0-0
863	INT	$3
864loop:
865	JMP	loop
866
867// check that SP is in range [g->stack.lo, g->stack.hi)
868TEXT runtime·stackcheck(SB), NOSPLIT, $0-0
869	get_tls(CX)
870	MOVL	g(CX), AX
871	CMPL	(g_stack+stack_hi)(AX), SP
872	JHI	2(PC)
873	CALL	runtime·abort(SB)
874	CMPL	SP, (g_stack+stack_lo)(AX)
875	JHI	2(PC)
876	CALL	runtime·abort(SB)
877	RET
878
879// func cputicks() int64
880TEXT runtime·cputicks(SB),NOSPLIT,$0-8
881	CMPB	internalcpu·X86+const_offsetX86HasSSE2(SB), $1
882	JNE	done
883	CMPB	runtime·lfenceBeforeRdtsc(SB), $1
884	JNE	mfence
885	LFENCE
886	JMP	done
887mfence:
888	MFENCE
889done:
890	RDTSC
891	MOVL	AX, ret_lo+0(FP)
892	MOVL	DX, ret_hi+4(FP)
893	RET
894
895TEXT ldt0setup<>(SB),NOSPLIT,$16-0
896	// set up ldt 7 to point at m0.tls
897	// ldt 1 would be fine on Linux, but on OS X, 7 is as low as we can go.
898	// the entry number is just a hint.  setldt will set up GS with what it used.
899	MOVL	$7, 0(SP)
900	LEAL	runtime·m0+m_tls(SB), AX
901	MOVL	AX, 4(SP)
902	MOVL	$32, 8(SP)	// sizeof(tls array)
903	CALL	runtime·setldt(SB)
904	RET
905
906TEXT runtime·emptyfunc(SB),0,$0-0
907	RET
908
909// hash function using AES hardware instructions
910TEXT runtime·memhash(SB),NOSPLIT,$0-16
911	CMPB	runtime·useAeshash(SB), $0
912	JEQ	noaes
913	MOVL	p+0(FP), AX	// ptr to data
914	MOVL	s+8(FP), BX	// size
915	LEAL	ret+12(FP), DX
916	JMP	aeshashbody<>(SB)
917noaes:
918	JMP	runtime·memhashFallback(SB)
919
920TEXT runtime·strhash(SB),NOSPLIT,$0-12
921	CMPB	runtime·useAeshash(SB), $0
922	JEQ	noaes
923	MOVL	p+0(FP), AX	// ptr to string object
924	MOVL	4(AX), BX	// length of string
925	MOVL	(AX), AX	// string data
926	LEAL	ret+8(FP), DX
927	JMP	aeshashbody<>(SB)
928noaes:
929	JMP	runtime·strhashFallback(SB)
930
931// AX: data
932// BX: length
933// DX: address to put return value
934TEXT aeshashbody<>(SB),NOSPLIT,$0-0
935	MOVL	h+4(FP), X0	            // 32 bits of per-table hash seed
936	PINSRW	$4, BX, X0	            // 16 bits of length
937	PSHUFHW	$0, X0, X0	            // replace size with its low 2 bytes repeated 4 times
938	MOVO	X0, X1                      // save unscrambled seed
939	PXOR	runtime·aeskeysched(SB), X0 // xor in per-process seed
940	AESENC	X0, X0                      // scramble seed
941
942	CMPL	BX, $16
943	JB	aes0to15
944	JE	aes16
945	CMPL	BX, $32
946	JBE	aes17to32
947	CMPL	BX, $64
948	JBE	aes33to64
949	JMP	aes65plus
950
951aes0to15:
952	TESTL	BX, BX
953	JE	aes0
954
955	ADDL	$16, AX
956	TESTW	$0xff0, AX
957	JE	endofpage
958
959	// 16 bytes loaded at this address won't cross
960	// a page boundary, so we can load it directly.
961	MOVOU	-16(AX), X1
962	ADDL	BX, BX
963	PAND	masks<>(SB)(BX*8), X1
964
965final1:
966	AESENC	X0, X1  // scramble input, xor in seed
967	AESENC	X1, X1  // scramble combo 2 times
968	AESENC	X1, X1
969	MOVL	X1, (DX)
970	RET
971
972endofpage:
973	// address ends in 1111xxxx. Might be up against
974	// a page boundary, so load ending at last byte.
975	// Then shift bytes down using pshufb.
976	MOVOU	-32(AX)(BX*1), X1
977	ADDL	BX, BX
978	PSHUFB	shifts<>(SB)(BX*8), X1
979	JMP	final1
980
981aes0:
982	// Return scrambled input seed
983	AESENC	X0, X0
984	MOVL	X0, (DX)
985	RET
986
987aes16:
988	MOVOU	(AX), X1
989	JMP	final1
990
991aes17to32:
992	// make second starting seed
993	PXOR	runtime·aeskeysched+16(SB), X1
994	AESENC	X1, X1
995
996	// load data to be hashed
997	MOVOU	(AX), X2
998	MOVOU	-16(AX)(BX*1), X3
999
1000	// scramble 3 times
1001	AESENC	X0, X2
1002	AESENC	X1, X3
1003	AESENC	X2, X2
1004	AESENC	X3, X3
1005	AESENC	X2, X2
1006	AESENC	X3, X3
1007
1008	// combine results
1009	PXOR	X3, X2
1010	MOVL	X2, (DX)
1011	RET
1012
1013aes33to64:
1014	// make 3 more starting seeds
1015	MOVO	X1, X2
1016	MOVO	X1, X3
1017	PXOR	runtime·aeskeysched+16(SB), X1
1018	PXOR	runtime·aeskeysched+32(SB), X2
1019	PXOR	runtime·aeskeysched+48(SB), X3
1020	AESENC	X1, X1
1021	AESENC	X2, X2
1022	AESENC	X3, X3
1023
1024	MOVOU	(AX), X4
1025	MOVOU	16(AX), X5
1026	MOVOU	-32(AX)(BX*1), X6
1027	MOVOU	-16(AX)(BX*1), X7
1028
1029	AESENC	X0, X4
1030	AESENC	X1, X5
1031	AESENC	X2, X6
1032	AESENC	X3, X7
1033
1034	AESENC	X4, X4
1035	AESENC	X5, X5
1036	AESENC	X6, X6
1037	AESENC	X7, X7
1038
1039	AESENC	X4, X4
1040	AESENC	X5, X5
1041	AESENC	X6, X6
1042	AESENC	X7, X7
1043
1044	PXOR	X6, X4
1045	PXOR	X7, X5
1046	PXOR	X5, X4
1047	MOVL	X4, (DX)
1048	RET
1049
1050aes65plus:
1051	// make 3 more starting seeds
1052	MOVO	X1, X2
1053	MOVO	X1, X3
1054	PXOR	runtime·aeskeysched+16(SB), X1
1055	PXOR	runtime·aeskeysched+32(SB), X2
1056	PXOR	runtime·aeskeysched+48(SB), X3
1057	AESENC	X1, X1
1058	AESENC	X2, X2
1059	AESENC	X3, X3
1060
1061	// start with last (possibly overlapping) block
1062	MOVOU	-64(AX)(BX*1), X4
1063	MOVOU	-48(AX)(BX*1), X5
1064	MOVOU	-32(AX)(BX*1), X6
1065	MOVOU	-16(AX)(BX*1), X7
1066
1067	// scramble state once
1068	AESENC	X0, X4
1069	AESENC	X1, X5
1070	AESENC	X2, X6
1071	AESENC	X3, X7
1072
1073	// compute number of remaining 64-byte blocks
1074	DECL	BX
1075	SHRL	$6, BX
1076
1077aesloop:
1078	// scramble state, xor in a block
1079	MOVOU	(AX), X0
1080	MOVOU	16(AX), X1
1081	MOVOU	32(AX), X2
1082	MOVOU	48(AX), X3
1083	AESENC	X0, X4
1084	AESENC	X1, X5
1085	AESENC	X2, X6
1086	AESENC	X3, X7
1087
1088	// scramble state
1089	AESENC	X4, X4
1090	AESENC	X5, X5
1091	AESENC	X6, X6
1092	AESENC	X7, X7
1093
1094	ADDL	$64, AX
1095	DECL	BX
1096	JNE	aesloop
1097
1098	// 2 more scrambles to finish
1099	AESENC	X4, X4
1100	AESENC	X5, X5
1101	AESENC	X6, X6
1102	AESENC	X7, X7
1103
1104	AESENC	X4, X4
1105	AESENC	X5, X5
1106	AESENC	X6, X6
1107	AESENC	X7, X7
1108
1109	PXOR	X6, X4
1110	PXOR	X7, X5
1111	PXOR	X5, X4
1112	MOVL	X4, (DX)
1113	RET
1114
1115TEXT runtime·memhash32(SB),NOSPLIT,$0-12
1116	CMPB	runtime·useAeshash(SB), $0
1117	JEQ	noaes
1118	MOVL	p+0(FP), AX	// ptr to data
1119	MOVL	h+4(FP), X0	// seed
1120	PINSRD	$1, (AX), X0	// data
1121	AESENC	runtime·aeskeysched+0(SB), X0
1122	AESENC	runtime·aeskeysched+16(SB), X0
1123	AESENC	runtime·aeskeysched+32(SB), X0
1124	MOVL	X0, ret+8(FP)
1125	RET
1126noaes:
1127	JMP	runtime·memhash32Fallback(SB)
1128
1129TEXT runtime·memhash64(SB),NOSPLIT,$0-12
1130	CMPB	runtime·useAeshash(SB), $0
1131	JEQ	noaes
1132	MOVL	p+0(FP), AX	// ptr to data
1133	MOVQ	(AX), X0	// data
1134	PINSRD	$2, h+4(FP), X0	// seed
1135	AESENC	runtime·aeskeysched+0(SB), X0
1136	AESENC	runtime·aeskeysched+16(SB), X0
1137	AESENC	runtime·aeskeysched+32(SB), X0
1138	MOVL	X0, ret+8(FP)
1139	RET
1140noaes:
1141	JMP	runtime·memhash64Fallback(SB)
1142
1143// simple mask to get rid of data in the high part of the register.
1144DATA masks<>+0x00(SB)/4, $0x00000000
1145DATA masks<>+0x04(SB)/4, $0x00000000
1146DATA masks<>+0x08(SB)/4, $0x00000000
1147DATA masks<>+0x0c(SB)/4, $0x00000000
1148
1149DATA masks<>+0x10(SB)/4, $0x000000ff
1150DATA masks<>+0x14(SB)/4, $0x00000000
1151DATA masks<>+0x18(SB)/4, $0x00000000
1152DATA masks<>+0x1c(SB)/4, $0x00000000
1153
1154DATA masks<>+0x20(SB)/4, $0x0000ffff
1155DATA masks<>+0x24(SB)/4, $0x00000000
1156DATA masks<>+0x28(SB)/4, $0x00000000
1157DATA masks<>+0x2c(SB)/4, $0x00000000
1158
1159DATA masks<>+0x30(SB)/4, $0x00ffffff
1160DATA masks<>+0x34(SB)/4, $0x00000000
1161DATA masks<>+0x38(SB)/4, $0x00000000
1162DATA masks<>+0x3c(SB)/4, $0x00000000
1163
1164DATA masks<>+0x40(SB)/4, $0xffffffff
1165DATA masks<>+0x44(SB)/4, $0x00000000
1166DATA masks<>+0x48(SB)/4, $0x00000000
1167DATA masks<>+0x4c(SB)/4, $0x00000000
1168
1169DATA masks<>+0x50(SB)/4, $0xffffffff
1170DATA masks<>+0x54(SB)/4, $0x000000ff
1171DATA masks<>+0x58(SB)/4, $0x00000000
1172DATA masks<>+0x5c(SB)/4, $0x00000000
1173
1174DATA masks<>+0x60(SB)/4, $0xffffffff
1175DATA masks<>+0x64(SB)/4, $0x0000ffff
1176DATA masks<>+0x68(SB)/4, $0x00000000
1177DATA masks<>+0x6c(SB)/4, $0x00000000
1178
1179DATA masks<>+0x70(SB)/4, $0xffffffff
1180DATA masks<>+0x74(SB)/4, $0x00ffffff
1181DATA masks<>+0x78(SB)/4, $0x00000000
1182DATA masks<>+0x7c(SB)/4, $0x00000000
1183
1184DATA masks<>+0x80(SB)/4, $0xffffffff
1185DATA masks<>+0x84(SB)/4, $0xffffffff
1186DATA masks<>+0x88(SB)/4, $0x00000000
1187DATA masks<>+0x8c(SB)/4, $0x00000000
1188
1189DATA masks<>+0x90(SB)/4, $0xffffffff
1190DATA masks<>+0x94(SB)/4, $0xffffffff
1191DATA masks<>+0x98(SB)/4, $0x000000ff
1192DATA masks<>+0x9c(SB)/4, $0x00000000
1193
1194DATA masks<>+0xa0(SB)/4, $0xffffffff
1195DATA masks<>+0xa4(SB)/4, $0xffffffff
1196DATA masks<>+0xa8(SB)/4, $0x0000ffff
1197DATA masks<>+0xac(SB)/4, $0x00000000
1198
1199DATA masks<>+0xb0(SB)/4, $0xffffffff
1200DATA masks<>+0xb4(SB)/4, $0xffffffff
1201DATA masks<>+0xb8(SB)/4, $0x00ffffff
1202DATA masks<>+0xbc(SB)/4, $0x00000000
1203
1204DATA masks<>+0xc0(SB)/4, $0xffffffff
1205DATA masks<>+0xc4(SB)/4, $0xffffffff
1206DATA masks<>+0xc8(SB)/4, $0xffffffff
1207DATA masks<>+0xcc(SB)/4, $0x00000000
1208
1209DATA masks<>+0xd0(SB)/4, $0xffffffff
1210DATA masks<>+0xd4(SB)/4, $0xffffffff
1211DATA masks<>+0xd8(SB)/4, $0xffffffff
1212DATA masks<>+0xdc(SB)/4, $0x000000ff
1213
1214DATA masks<>+0xe0(SB)/4, $0xffffffff
1215DATA masks<>+0xe4(SB)/4, $0xffffffff
1216DATA masks<>+0xe8(SB)/4, $0xffffffff
1217DATA masks<>+0xec(SB)/4, $0x0000ffff
1218
1219DATA masks<>+0xf0(SB)/4, $0xffffffff
1220DATA masks<>+0xf4(SB)/4, $0xffffffff
1221DATA masks<>+0xf8(SB)/4, $0xffffffff
1222DATA masks<>+0xfc(SB)/4, $0x00ffffff
1223
1224GLOBL masks<>(SB),RODATA,$256
1225
1226// these are arguments to pshufb. They move data down from
1227// the high bytes of the register to the low bytes of the register.
1228// index is how many bytes to move.
1229DATA shifts<>+0x00(SB)/4, $0x00000000
1230DATA shifts<>+0x04(SB)/4, $0x00000000
1231DATA shifts<>+0x08(SB)/4, $0x00000000
1232DATA shifts<>+0x0c(SB)/4, $0x00000000
1233
1234DATA shifts<>+0x10(SB)/4, $0xffffff0f
1235DATA shifts<>+0x14(SB)/4, $0xffffffff
1236DATA shifts<>+0x18(SB)/4, $0xffffffff
1237DATA shifts<>+0x1c(SB)/4, $0xffffffff
1238
1239DATA shifts<>+0x20(SB)/4, $0xffff0f0e
1240DATA shifts<>+0x24(SB)/4, $0xffffffff
1241DATA shifts<>+0x28(SB)/4, $0xffffffff
1242DATA shifts<>+0x2c(SB)/4, $0xffffffff
1243
1244DATA shifts<>+0x30(SB)/4, $0xff0f0e0d
1245DATA shifts<>+0x34(SB)/4, $0xffffffff
1246DATA shifts<>+0x38(SB)/4, $0xffffffff
1247DATA shifts<>+0x3c(SB)/4, $0xffffffff
1248
1249DATA shifts<>+0x40(SB)/4, $0x0f0e0d0c
1250DATA shifts<>+0x44(SB)/4, $0xffffffff
1251DATA shifts<>+0x48(SB)/4, $0xffffffff
1252DATA shifts<>+0x4c(SB)/4, $0xffffffff
1253
1254DATA shifts<>+0x50(SB)/4, $0x0e0d0c0b
1255DATA shifts<>+0x54(SB)/4, $0xffffff0f
1256DATA shifts<>+0x58(SB)/4, $0xffffffff
1257DATA shifts<>+0x5c(SB)/4, $0xffffffff
1258
1259DATA shifts<>+0x60(SB)/4, $0x0d0c0b0a
1260DATA shifts<>+0x64(SB)/4, $0xffff0f0e
1261DATA shifts<>+0x68(SB)/4, $0xffffffff
1262DATA shifts<>+0x6c(SB)/4, $0xffffffff
1263
1264DATA shifts<>+0x70(SB)/4, $0x0c0b0a09
1265DATA shifts<>+0x74(SB)/4, $0xff0f0e0d
1266DATA shifts<>+0x78(SB)/4, $0xffffffff
1267DATA shifts<>+0x7c(SB)/4, $0xffffffff
1268
1269DATA shifts<>+0x80(SB)/4, $0x0b0a0908
1270DATA shifts<>+0x84(SB)/4, $0x0f0e0d0c
1271DATA shifts<>+0x88(SB)/4, $0xffffffff
1272DATA shifts<>+0x8c(SB)/4, $0xffffffff
1273
1274DATA shifts<>+0x90(SB)/4, $0x0a090807
1275DATA shifts<>+0x94(SB)/4, $0x0e0d0c0b
1276DATA shifts<>+0x98(SB)/4, $0xffffff0f
1277DATA shifts<>+0x9c(SB)/4, $0xffffffff
1278
1279DATA shifts<>+0xa0(SB)/4, $0x09080706
1280DATA shifts<>+0xa4(SB)/4, $0x0d0c0b0a
1281DATA shifts<>+0xa8(SB)/4, $0xffff0f0e
1282DATA shifts<>+0xac(SB)/4, $0xffffffff
1283
1284DATA shifts<>+0xb0(SB)/4, $0x08070605
1285DATA shifts<>+0xb4(SB)/4, $0x0c0b0a09
1286DATA shifts<>+0xb8(SB)/4, $0xff0f0e0d
1287DATA shifts<>+0xbc(SB)/4, $0xffffffff
1288
1289DATA shifts<>+0xc0(SB)/4, $0x07060504
1290DATA shifts<>+0xc4(SB)/4, $0x0b0a0908
1291DATA shifts<>+0xc8(SB)/4, $0x0f0e0d0c
1292DATA shifts<>+0xcc(SB)/4, $0xffffffff
1293
1294DATA shifts<>+0xd0(SB)/4, $0x06050403
1295DATA shifts<>+0xd4(SB)/4, $0x0a090807
1296DATA shifts<>+0xd8(SB)/4, $0x0e0d0c0b
1297DATA shifts<>+0xdc(SB)/4, $0xffffff0f
1298
1299DATA shifts<>+0xe0(SB)/4, $0x05040302
1300DATA shifts<>+0xe4(SB)/4, $0x09080706
1301DATA shifts<>+0xe8(SB)/4, $0x0d0c0b0a
1302DATA shifts<>+0xec(SB)/4, $0xffff0f0e
1303
1304DATA shifts<>+0xf0(SB)/4, $0x04030201
1305DATA shifts<>+0xf4(SB)/4, $0x08070605
1306DATA shifts<>+0xf8(SB)/4, $0x0c0b0a09
1307DATA shifts<>+0xfc(SB)/4, $0xff0f0e0d
1308
1309GLOBL shifts<>(SB),RODATA,$256
1310
1311TEXT ·checkASM(SB),NOSPLIT,$0-1
1312	// check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte
1313	MOVL	$masks<>(SB), AX
1314	MOVL	$shifts<>(SB), BX
1315	ORL	BX, AX
1316	TESTL	$15, AX
1317	SETEQ	ret+0(FP)
1318	RET
1319
1320TEXT runtime·return0(SB), NOSPLIT, $0
1321	MOVL	$0, AX
1322	RET
1323
1324// Called from cgo wrappers, this function returns g->m->curg.stack.hi.
1325// Must obey the gcc calling convention.
1326TEXT _cgo_topofstack(SB),NOSPLIT,$0
1327	get_tls(CX)
1328	MOVL	g(CX), AX
1329	MOVL	g_m(AX), AX
1330	MOVL	m_curg(AX), AX
1331	MOVL	(g_stack+stack_hi)(AX), AX
1332	RET
1333
1334// The top-most function running on a goroutine
1335// returns to goexit+PCQuantum.
1336TEXT runtime·goexit(SB),NOSPLIT,$0-0
1337	BYTE	$0x90	// NOP
1338	CALL	runtime·goexit1(SB)	// does not return
1339	// traceback from goexit1 must hit code range of goexit
1340	BYTE	$0x90	// NOP
1341
1342// Add a module's moduledata to the linked list of moduledata objects. This
1343// is called from .init_array by a function generated in the linker and so
1344// follows the platform ABI wrt register preservation -- it only touches AX,
1345// CX (implicitly) and DX, but it does not follow the ABI wrt arguments:
1346// instead the pointer to the moduledata is passed in AX.
1347TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
1348	MOVL	runtime·lastmoduledatap(SB), DX
1349	MOVL	AX, moduledata_next(DX)
1350	MOVL	AX, runtime·lastmoduledatap(SB)
1351	RET
1352
1353TEXT runtime·uint32tofloat64(SB),NOSPLIT,$8-12
1354	MOVL	a+0(FP), AX
1355	MOVL	AX, 0(SP)
1356	MOVL	$0, 4(SP)
1357	FMOVV	0(SP), F0
1358	FMOVDP	F0, ret+4(FP)
1359	RET
1360
1361TEXT runtime·float64touint32(SB),NOSPLIT,$12-12
1362	FMOVD	a+0(FP), F0
1363	FSTCW	0(SP)
1364	FLDCW	runtime·controlWord64trunc(SB)
1365	FMOVVP	F0, 4(SP)
1366	FLDCW	0(SP)
1367	MOVL	4(SP), AX
1368	MOVL	AX, ret+8(FP)
1369	RET
1370
1371// gcWriteBarrier performs a heap pointer write and informs the GC.
1372//
1373// gcWriteBarrier does NOT follow the Go ABI. It takes two arguments:
1374// - DI is the destination of the write
1375// - AX is the value being written at DI
1376// It clobbers FLAGS. It does not clobber any general-purpose registers,
1377// but may clobber others (e.g., SSE registers).
1378TEXT runtime·gcWriteBarrier(SB),NOSPLIT,$28
1379	// Save the registers clobbered by the fast path. This is slightly
1380	// faster than having the caller spill these.
1381	MOVL	CX, 20(SP)
1382	MOVL	BX, 24(SP)
1383	// TODO: Consider passing g.m.p in as an argument so they can be shared
1384	// across a sequence of write barriers.
1385	get_tls(BX)
1386	MOVL	g(BX), BX
1387	MOVL	g_m(BX), BX
1388	MOVL	m_p(BX), BX
1389	MOVL	(p_wbBuf+wbBuf_next)(BX), CX
1390	// Increment wbBuf.next position.
1391	LEAL	8(CX), CX
1392	MOVL	CX, (p_wbBuf+wbBuf_next)(BX)
1393	CMPL	CX, (p_wbBuf+wbBuf_end)(BX)
1394	// Record the write.
1395	MOVL	AX, -8(CX)	// Record value
1396	MOVL	(DI), BX	// TODO: This turns bad writes into bad reads.
1397	MOVL	BX, -4(CX)	// Record *slot
1398	// Is the buffer full? (flags set in CMPL above)
1399	JEQ	flush
1400ret:
1401	MOVL	20(SP), CX
1402	MOVL	24(SP), BX
1403	// Do the write.
1404	MOVL	AX, (DI)
1405	RET
1406
1407flush:
1408	// Save all general purpose registers since these could be
1409	// clobbered by wbBufFlush and were not saved by the caller.
1410	MOVL	DI, 0(SP)	// Also first argument to wbBufFlush
1411	MOVL	AX, 4(SP)	// Also second argument to wbBufFlush
1412	// BX already saved
1413	// CX already saved
1414	MOVL	DX, 8(SP)
1415	MOVL	BP, 12(SP)
1416	MOVL	SI, 16(SP)
1417	// DI already saved
1418
1419	// This takes arguments DI and AX
1420	CALL	runtime·wbBufFlush(SB)
1421
1422	MOVL	0(SP), DI
1423	MOVL	4(SP), AX
1424	MOVL	8(SP), DX
1425	MOVL	12(SP), BP
1426	MOVL	16(SP), SI
1427	JMP	ret
1428
1429// Note: these functions use a special calling convention to save generated code space.
1430// Arguments are passed in registers, but the space for those arguments are allocated
1431// in the caller's stack frame. These stubs write the args into that stack space and
1432// then tail call to the corresponding runtime handler.
1433// The tail call makes these stubs disappear in backtraces.
1434TEXT runtime·panicIndex(SB),NOSPLIT,$0-8
1435	MOVL	AX, x+0(FP)
1436	MOVL	CX, y+4(FP)
1437	JMP	runtime·goPanicIndex(SB)
1438TEXT runtime·panicIndexU(SB),NOSPLIT,$0-8
1439	MOVL	AX, x+0(FP)
1440	MOVL	CX, y+4(FP)
1441	JMP	runtime·goPanicIndexU(SB)
1442TEXT runtime·panicSliceAlen(SB),NOSPLIT,$0-8
1443	MOVL	CX, x+0(FP)
1444	MOVL	DX, y+4(FP)
1445	JMP	runtime·goPanicSliceAlen(SB)
1446TEXT runtime·panicSliceAlenU(SB),NOSPLIT,$0-8
1447	MOVL	CX, x+0(FP)
1448	MOVL	DX, y+4(FP)
1449	JMP	runtime·goPanicSliceAlenU(SB)
1450TEXT runtime·panicSliceAcap(SB),NOSPLIT,$0-8
1451	MOVL	CX, x+0(FP)
1452	MOVL	DX, y+4(FP)
1453	JMP	runtime·goPanicSliceAcap(SB)
1454TEXT runtime·panicSliceAcapU(SB),NOSPLIT,$0-8
1455	MOVL	CX, x+0(FP)
1456	MOVL	DX, y+4(FP)
1457	JMP	runtime·goPanicSliceAcapU(SB)
1458TEXT runtime·panicSliceB(SB),NOSPLIT,$0-8
1459	MOVL	AX, x+0(FP)
1460	MOVL	CX, y+4(FP)
1461	JMP	runtime·goPanicSliceB(SB)
1462TEXT runtime·panicSliceBU(SB),NOSPLIT,$0-8
1463	MOVL	AX, x+0(FP)
1464	MOVL	CX, y+4(FP)
1465	JMP	runtime·goPanicSliceBU(SB)
1466TEXT runtime·panicSlice3Alen(SB),NOSPLIT,$0-8
1467	MOVL	DX, x+0(FP)
1468	MOVL	BX, y+4(FP)
1469	JMP	runtime·goPanicSlice3Alen(SB)
1470TEXT runtime·panicSlice3AlenU(SB),NOSPLIT,$0-8
1471	MOVL	DX, x+0(FP)
1472	MOVL	BX, y+4(FP)
1473	JMP	runtime·goPanicSlice3AlenU(SB)
1474TEXT runtime·panicSlice3Acap(SB),NOSPLIT,$0-8
1475	MOVL	DX, x+0(FP)
1476	MOVL	BX, y+4(FP)
1477	JMP	runtime·goPanicSlice3Acap(SB)
1478TEXT runtime·panicSlice3AcapU(SB),NOSPLIT,$0-8
1479	MOVL	DX, x+0(FP)
1480	MOVL	BX, y+4(FP)
1481	JMP	runtime·goPanicSlice3AcapU(SB)
1482TEXT runtime·panicSlice3B(SB),NOSPLIT,$0-8
1483	MOVL	CX, x+0(FP)
1484	MOVL	DX, y+4(FP)
1485	JMP	runtime·goPanicSlice3B(SB)
1486TEXT runtime·panicSlice3BU(SB),NOSPLIT,$0-8
1487	MOVL	CX, x+0(FP)
1488	MOVL	DX, y+4(FP)
1489	JMP	runtime·goPanicSlice3BU(SB)
1490TEXT runtime·panicSlice3C(SB),NOSPLIT,$0-8
1491	MOVL	AX, x+0(FP)
1492	MOVL	CX, y+4(FP)
1493	JMP	runtime·goPanicSlice3C(SB)
1494TEXT runtime·panicSlice3CU(SB),NOSPLIT,$0-8
1495	MOVL	AX, x+0(FP)
1496	MOVL	CX, y+4(FP)
1497	JMP	runtime·goPanicSlice3CU(SB)
1498
1499// Extended versions for 64-bit indexes.
1500TEXT runtime·panicExtendIndex(SB),NOSPLIT,$0-12
1501	MOVL	SI, hi+0(FP)
1502	MOVL	AX, lo+4(FP)
1503	MOVL	CX, y+8(FP)
1504	JMP	runtime·goPanicExtendIndex(SB)
1505TEXT runtime·panicExtendIndexU(SB),NOSPLIT,$0-12
1506	MOVL	SI, hi+0(FP)
1507	MOVL	AX, lo+4(FP)
1508	MOVL	CX, y+8(FP)
1509	JMP	runtime·goPanicExtendIndexU(SB)
1510TEXT runtime·panicExtendSliceAlen(SB),NOSPLIT,$0-12
1511	MOVL	SI, hi+0(FP)
1512	MOVL	CX, lo+4(FP)
1513	MOVL	DX, y+8(FP)
1514	JMP	runtime·goPanicExtendSliceAlen(SB)
1515TEXT runtime·panicExtendSliceAlenU(SB),NOSPLIT,$0-12
1516	MOVL	SI, hi+0(FP)
1517	MOVL	CX, lo+4(FP)
1518	MOVL	DX, y+8(FP)
1519	JMP	runtime·goPanicExtendSliceAlenU(SB)
1520TEXT runtime·panicExtendSliceAcap(SB),NOSPLIT,$0-12
1521	MOVL	SI, hi+0(FP)
1522	MOVL	CX, lo+4(FP)
1523	MOVL	DX, y+8(FP)
1524	JMP	runtime·goPanicExtendSliceAcap(SB)
1525TEXT runtime·panicExtendSliceAcapU(SB),NOSPLIT,$0-12
1526	MOVL	SI, hi+0(FP)
1527	MOVL	CX, lo+4(FP)
1528	MOVL	DX, y+8(FP)
1529	JMP	runtime·goPanicExtendSliceAcapU(SB)
1530TEXT runtime·panicExtendSliceB(SB),NOSPLIT,$0-12
1531	MOVL	SI, hi+0(FP)
1532	MOVL	AX, lo+4(FP)
1533	MOVL	CX, y+8(FP)
1534	JMP	runtime·goPanicExtendSliceB(SB)
1535TEXT runtime·panicExtendSliceBU(SB),NOSPLIT,$0-12
1536	MOVL	SI, hi+0(FP)
1537	MOVL	AX, lo+4(FP)
1538	MOVL	CX, y+8(FP)
1539	JMP	runtime·goPanicExtendSliceBU(SB)
1540TEXT runtime·panicExtendSlice3Alen(SB),NOSPLIT,$0-12
1541	MOVL	SI, hi+0(FP)
1542	MOVL	DX, lo+4(FP)
1543	MOVL	BX, y+8(FP)
1544	JMP	runtime·goPanicExtendSlice3Alen(SB)
1545TEXT runtime·panicExtendSlice3AlenU(SB),NOSPLIT,$0-12
1546	MOVL	SI, hi+0(FP)
1547	MOVL	DX, lo+4(FP)
1548	MOVL	BX, y+8(FP)
1549	JMP	runtime·goPanicExtendSlice3AlenU(SB)
1550TEXT runtime·panicExtendSlice3Acap(SB),NOSPLIT,$0-12
1551	MOVL	SI, hi+0(FP)
1552	MOVL	DX, lo+4(FP)
1553	MOVL	BX, y+8(FP)
1554	JMP	runtime·goPanicExtendSlice3Acap(SB)
1555TEXT runtime·panicExtendSlice3AcapU(SB),NOSPLIT,$0-12
1556	MOVL	SI, hi+0(FP)
1557	MOVL	DX, lo+4(FP)
1558	MOVL	BX, y+8(FP)
1559	JMP	runtime·goPanicExtendSlice3AcapU(SB)
1560TEXT runtime·panicExtendSlice3B(SB),NOSPLIT,$0-12
1561	MOVL	SI, hi+0(FP)
1562	MOVL	CX, lo+4(FP)
1563	MOVL	DX, y+8(FP)
1564	JMP	runtime·goPanicExtendSlice3B(SB)
1565TEXT runtime·panicExtendSlice3BU(SB),NOSPLIT,$0-12
1566	MOVL	SI, hi+0(FP)
1567	MOVL	CX, lo+4(FP)
1568	MOVL	DX, y+8(FP)
1569	JMP	runtime·goPanicExtendSlice3BU(SB)
1570TEXT runtime·panicExtendSlice3C(SB),NOSPLIT,$0-12
1571	MOVL	SI, hi+0(FP)
1572	MOVL	AX, lo+4(FP)
1573	MOVL	CX, y+8(FP)
1574	JMP	runtime·goPanicExtendSlice3C(SB)
1575TEXT runtime·panicExtendSlice3CU(SB),NOSPLIT,$0-12
1576	MOVL	SI, hi+0(FP)
1577	MOVL	AX, lo+4(FP)
1578	MOVL	CX, y+8(FP)
1579	JMP	runtime·goPanicExtendSlice3CU(SB)
1580
1581#ifdef GOOS_android
1582// Use the free TLS_SLOT_APP slot #2 on Android Q.
1583// Earlier androids are set up in gcc_android.c.
1584DATA runtime·tls_g+0(SB)/4, $8
1585GLOBL runtime·tls_g+0(SB), NOPTR, $4
1586#endif
1587