1/*
2 * %CopyrightBegin%
3 *
4 * Copyright Ericsson AB 2004-2016. All Rights Reserved.
5 *
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 *     http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 *
18 * %CopyrightEnd%
19 */
20
21#define ASM
22#include "hipe_ppc_asm.h"
23#include "hipe_literals.h"
24#include "hipe_mode_switch.h"
25
26	.text
27	.p2align 2
28
29#if defined(__powerpc64__)
30/*
31 * Enter Erlang from C.
32 * Create a new frame on the C stack.
33 * Save C callee-save registers (r14-r31) in the frame.
34 * Save r0 (C return address) in the caller's LR save slot.
35 * Retrieve the process pointer from the C argument registers.
36 * Return to LR.
37 * Do not clobber the C argument registers (r3-r10).
38 *
39 * Usage: mflr r0 SEMI bl .enter
40 */
41.enter:
42	# Our PPC64 ELF ABI frame must include:
43	# - 48 (6*8) bytes for AIX-like linkage area
44	# - 64 (8*8) bytes for AIX-like parameter area for
45	#   recursive C calls with up to 8 parameter words
46	# - padding to make the frame a multiple of 16 bytes
47	# - 144 (18*8) bytes for saving r14-r31
48	# The final size is 256 bytes.
49	# stdu is required for atomic alloc+init
50	stdu	r1,-256(r1)	/* 0(r1) contains r1+256 */
51	std	r14, 112(r1)
52	std	r15, 120(r1)
53	std	r16, 128(r1)
54	std	r17, 136(r1)
55	std	r18, 144(r1)
56	std	r19, 152(r1)
57	std	r20, 160(r1)
58	std	r21, 168(r1)
59	std	r22, 176(r1)
60	std	r23, 184(r1)
61	std	r24, 192(r1)
62	std	r25, 200(r1)
63	std	r26, 208(r1)
64	std	r27, 216(r1)
65	std	r28, 224(r1)
66	std	r29, 232(r1)
67	std	r30, 240(r1)
68	std	r31, 248(r1)
69	std	r0,  256+16(r1)	/* caller saved LR in r0 */
70	mr	P, r3		/* get the process pointer */
71	blr
72
73/*
74 * Return to the calling C function.
75 * The return value is in r3.
76 *
77 * .nosave_exit saves no state
78 * .flush_exit saves NSP and other cached P state.
79 * .suspend_exit also saves RA.
80 */
81.suspend_exit:
82	/* save RA, so we can be resumed */
83	mflr	r0
84	std	r0, P_NRA(P)
85.flush_exit:
86	/* flush cached P state */
87	SAVE_CACHED_STATE
88.nosave_exit:
89	/* restore callee-save registers, drop frame, return */
90	ld	r0, 256+16(r1)
91	mtlr	r0
92	ld	r14, 112(r1)
93	ld	r15, 120(r1)
94	ld	r16, 128(r1)
95	ld	r17, 136(r1)
96	ld	r18, 144(r1)
97	ld	r19, 152(r1)
98	ld	r20, 160(r1)
99	ld	r21, 168(r1)
100	ld	r22, 176(r1)
101	ld	r23, 184(r1)
102	ld	r24, 192(r1)
103	ld	r25, 200(r1)
104	ld	r26, 208(r1)
105	ld	r27, 216(r1)
106	ld	r28, 224(r1)
107	ld	r29, 232(r1)	/* kills HP */
108	ld	r30, 240(r1)	/* kills NSP */
109	ld	r31, 248(r1)	/* kills P */
110	addi	r1, r1, 256
111	blr
112#else /* !__powerpc64__ */
113/*
114 * Enter Erlang from C.
115 * Create a new frame on the C stack.
116 * Save C callee-save registers (r14-r31) in the frame.
117 * Save r0 (C return address) in the frame's LR save slot.
118 * Retrieve the process pointer from the C argument registers.
119 * Return to LR.
120 * Do not clobber the C argument registers (r3-r10).
121 *
122 * Usage: mflr r0 SEMI bl .enter
123 */
124.enter:
125	# A unified Linux/OSX C frame must include:
126	# - 24 bytes for AIX/OSX-like linkage area
127	# - 28 bytes for AIX/OSX-like parameter area for
128	#   recursive C calls with up to 7 parameter words
129	# - 76 bytes for saving r14-r31 and LR
130	# - padding to make it a multiple of 16 bytes
131	# The final size is 128 bytes.
132	# stwu is required for atomic alloc+init
133	stwu	r1,-128(r1)	/* 0(r1) contains r1+128 */
134	stw	r14, 52(r1)
135	stw	r15, 56(r1)
136	stw	r16, 60(r1)
137	stw	r17, 64(r1)
138	stw	r18, 68(r1)
139	stw	r19, 72(r1)
140	stw	r20, 76(r1)
141	stw	r21, 80(r1)
142	stw	r22, 84(r1)
143	stw	r23, 88(r1)
144	stw	r24, 92(r1)
145	stw	r25, 96(r1)
146	stw	r26, 100(r1)
147	stw	r27, 104(r1)
148	stw	r28, 108(r1)
149	stw	r29, 112(r1)
150	stw	r30, 116(r1)
151	stw	r31, 120(r1)
152	stw	r0,  124(r1)	/* caller saved LR in r0 */
153	mr	P, r3		/* get the process pointer */
154	blr
155
156/*
157 * Return to the calling C function.
158 * The return value is in r3.
159 *
160 * .nosave_exit saves no state
161 * .flush_exit saves NSP and other cached P state.
162 * .suspend_exit also saves RA.
163 */
164.suspend_exit:
165	/* save RA, so we can be resumed */
166	mflr	r0
167	stw	r0, P_NRA(P)
168.flush_exit:
169	/* flush cached P state */
170	SAVE_CACHED_STATE
171.nosave_exit:
172	/* restore callee-save registers, drop frame, return */
173	lwz	r0, 124(r1)
174	mtlr	r0
175	lwz	r14, 52(r1)
176	lwz	r15, 56(r1)
177	lwz	r16, 60(r1)
178	lwz	r17, 64(r1)
179	lwz	r18, 68(r1)
180	lwz	r19, 72(r1)
181	lwz	r20, 76(r1)
182	lwz	r21, 80(r1)
183	lwz	r22, 84(r1)
184	lwz	r23, 88(r1)
185	lwz	r24, 92(r1)
186	lwz	r25, 96(r1)
187	lwz	r26, 100(r1)
188	lwz	r27, 104(r1)
189	lwz	r28, 108(r1)
190	lwz	r29, 112(r1)	/* kills HP */
191	lwz	r30, 116(r1)	/* kills NSP */
192	lwz	r31, 120(r1)	/* kills P */
193	addi	r1, r1, 128
194	blr
195#endif /* !__powerpc64__ */
196
197/*
198 * int hipe_ppc_call_to_native(Process *p);
199 * Emulated code recursively calls native code.
200 */
201	OPD(hipe_ppc_call_to_native)
202	GLOBAL(CSYM(hipe_ppc_call_to_native))
203CSYM(hipe_ppc_call_to_native):
204	/* save C context */
205	mflr	r0
206	bl	.enter
207	/* prepare to call the target */
208	LOAD	r0, P_NCALLEE(P)
209	mtctr	r0
210	/* get argument registers */
211	LOAD_ARG_REGS
212	/* cache some P state in registers */
213	RESTORE_CACHED_STATE
214	/* call the target */
215	bctrl	/* defines LR (a.k.a. NRA) */
216/* FALLTHROUGH
217 *
218 * We export this return address so that hipe_mode_switch() can discover
219 * when native code tailcalls emulated code.
220 *
221 * This is where native code returns to emulated code.
222 */
223	GLOBAL(ASYM(nbif_return))
224ASYM(nbif_return):
225	STORE	r3, P_ARG0(P)			/* save retval */
226	li	r3, HIPE_MODE_SWITCH_RES_RETURN
227	b	.flush_exit
228
229/*
230 * int hipe_ppc_return_to_native(Process *p);
231 * Emulated code returns to its native code caller.
232 */
233	OPD(hipe_ppc_return_to_native)
234	GLOBAL(CSYM(hipe_ppc_return_to_native))
235CSYM(hipe_ppc_return_to_native):
236	/* save C context */
237	mflr	r0
238	bl	.enter
239	/* restore return address */
240	LOAD	r0, P_NRA(P)
241	mtlr	r0
242	/* cache some P state in registers */
243	RESTORE_CACHED_STATE
244	/* get return value */
245	LOAD	r3, P_ARG0(P)
246	/*
247	 * Return using the current return address.
248	 * The parameters were popped at the original native-to-emulated
249	 * call (hipe_call_from_native_is_recursive), so a plain ret suffices.
250	 */
251	blr
252
253/*
254 * int hipe_ppc_tailcall_to_native(Process *p);
255 * Emulated code tailcalls native code.
256 */
257	OPD(hipe_ppc_tailcall_to_native)
258	GLOBAL(CSYM(hipe_ppc_tailcall_to_native))
259CSYM(hipe_ppc_tailcall_to_native):
260	/* save C context */
261	mflr	r0
262	bl	.enter
263	/* prepare to call the target */
264	LOAD	r0, P_NCALLEE(P)
265	mtctr	r0
266	/* get argument registers */
267	LOAD_ARG_REGS
268	/* restore return address */
269	LOAD	r0, P_NRA(P)
270	mtlr	r0
271	/* cache some P state in registers */
272	RESTORE_CACHED_STATE
273	/* call the target */
274	bctr
275
276/*
277 * int hipe_ppc_throw_to_native(Process *p);
278 * Emulated code throws an exception to its native code caller.
279 */
280	OPD(hipe_ppc_throw_to_native)
281	GLOBAL(CSYM(hipe_ppc_throw_to_native))
282CSYM(hipe_ppc_throw_to_native):
283	/* save C context */
284	mflr	r0
285	bl	.enter
286	/* prepare to invoke handler */
287	LOAD	r0, P_NCALLEE(P)	/* set by hipe_find_handler() */
288	mtctr	r0
289	/* cache some P state in registers */
290	RESTORE_CACHED_STATE
291	/* invoke the handler */
292	bctr
293
294/*
295 * Native code calls emulated code via a stub
296 * which should look as follows:
297 *
298 * stub for f/N:
299 *	<set r12 to f's export entry address>
300 *	<set r0 to N>
301 *	b nbif_callemu
302 *
303 * The stub may need to create &nbif_callemu as a 32-bit immediate
304 * in a scratch register if the branch needs a trampoline. The code
305 * for creating a 32-bit immediate in r0 is potentially slower than
306 * for other registers (an add must be replaced by an or, and adds
307 * are potentially faster than ors), so it is better to use r0 for
308 * the arity (a small immediate), making r11 available for trampolines.
309 * (See "The PowerPC Compiler Writer's Guide, section 3.2.3.1.)
310 *
311 * XXX: Different stubs for different number of register parameters?
312 */
313	GLOBAL(ASYM(nbif_callemu))
314ASYM(nbif_callemu):
315	STORE	r12, P_CALLEE_EXP(P)
316	STORE	r0, P_ARITY(P)
317	STORE_ARG_REGS
318	li	r3, HIPE_MODE_SWITCH_RES_CALL_EXPORTED
319	b	.suspend_exit
320
321/*
322 * nbif_apply
323 */
324	GLOBAL(ASYM(nbif_apply))
325ASYM(nbif_apply):
326	STORE_ARG_REGS
327	li	r3, HIPE_MODE_SWITCH_RES_APPLY
328	b	.suspend_exit
329
330/*
331 * Native code calls an emulated-mode closure via a stub defined below.
332 *
333 * The closure is appended as the last actual parameter, and parameters
334 * beyond the first few passed in registers are pushed onto the stack in
335 * left-to-right order.
336 * Hence, the location of the closure parameter only depends on the number
337 * of parameters in registers, not the total number of parameters.
338 */
339#if NR_ARG_REGS >= 6
340	GLOBAL(ASYM(nbif_ccallemu6))
341ASYM(nbif_ccallemu6):
342	STORE	ARG5, P_ARG5(P)
343#if NR_ARG_REGS > 6
344	mr	ARG5, ARG6
345#else
346	LOAD	ARG5, 0(NSP)
347#endif
348	/*FALLTHROUGH*/
349#endif
350
351#if NR_ARG_REGS >= 5
352	GLOBAL(ASYM(nbif_ccallemu5))
353ASYM(nbif_ccallemu5):
354	STORE	ARG4, P_ARG4(P)
355#if NR_ARG_REGS > 5
356	mr	ARG4, ARG5
357#else
358	LOAD	ARG4, 0(NSP)
359#endif
360	/*FALLTHROUGH*/
361#endif
362
363#if NR_ARG_REGS >= 4
364	GLOBAL(ASYM(nbif_ccallemu4))
365ASYM(nbif_ccallemu4):
366	STORE	ARG3, P_ARG3(P)
367#if NR_ARG_REGS > 4
368	mr	ARG3, ARG4
369#else
370	LOAD	ARG3, 0(NSP)
371#endif
372	/*FALLTHROUGH*/
373#endif
374
375#if NR_ARG_REGS >= 3
376	GLOBAL(ASYM(nbif_ccallemu3))
377ASYM(nbif_ccallemu3):
378	STORE	ARG2, P_ARG2(P)
379#if NR_ARG_REGS > 3
380	mr	ARG2, ARG3
381#else
382	LOAD	ARG2, 0(NSP)
383#endif
384	/*FALLTHROUGH*/
385#endif
386
387#if NR_ARG_REGS >= 2
388	GLOBAL(ASYM(nbif_ccallemu2))
389ASYM(nbif_ccallemu2):
390	STORE	ARG1, P_ARG1(P)
391#if NR_ARG_REGS > 2
392	mr	ARG1, ARG2
393#else
394	LOAD	ARG1, 0(NSP)
395#endif
396	/*FALLTHROUGH*/
397#endif
398
399#if NR_ARG_REGS >= 1
400	GLOBAL(ASYM(nbif_ccallemu1))
401ASYM(nbif_ccallemu1):
402	STORE	ARG0, P_ARG0(P)
403#if NR_ARG_REGS > 1
404	mr	ARG0, ARG1
405#else
406	LOAD	ARG0, 0(NSP)
407#endif
408	/*FALLTHROUGH*/
409#endif
410
411	GLOBAL(ASYM(nbif_ccallemu0))
412ASYM(nbif_ccallemu0):
413	/* We use r4 not ARG0 here because ARG0 is not
414	   defined when NR_ARG_REGS == 0. */
415#if NR_ARG_REGS == 0
416	LOAD	r4, 0(NSP)		/* get the closure */
417#endif
418	STORE	r4, P_CLOSURE(P)	/* save the closure */
419	li	r3, HIPE_MODE_SWITCH_RES_CALL_CLOSURE
420	b	.suspend_exit
421
422/*
423 * This is where native code suspends.
424 */
425	GLOBAL(ASYM(nbif_suspend_0))
426ASYM(nbif_suspend_0):
427	li	r3, HIPE_MODE_SWITCH_RES_SUSPEND
428	b	.suspend_exit
429
430/*
431 * Suspend from a receive (waiting for a message)
432 */
433	GLOBAL(ASYM(nbif_suspend_msg))
434ASYM(nbif_suspend_msg):
435	li	r3, HIPE_MODE_SWITCH_RES_WAIT
436	b	.suspend_exit
437
438/*
439 * Suspend from a receive with a timeout (waiting for a message)
440 *	if (!(p->flags & F_TIMO)) { suspend }
441 *	else { return 0; }
442 */
443	GLOBAL(ASYM(nbif_suspend_msg_timeout))
444ASYM(nbif_suspend_msg_timeout):
445	LOAD	r4, P_FLAGS(P)
446	li	r3, HIPE_MODE_SWITCH_RES_WAIT_TIMEOUT
447	/* this relies on F_TIMO (1<<2) fitting in a uimm16 */
448	andi.	r0, r4, F_TIMO
449	beq-	.suspend_exit			/* sees the CR state from andi. above */
450	/* timeout has occurred */
451	li	r3, 0
452	blr
453
454/*
455 * This is the default exception handler for native code.
456 */
457	GLOBAL(ASYM(nbif_fail))
458ASYM(nbif_fail):
459	li	r3, HIPE_MODE_SWITCH_RES_THROW
460	b	.flush_exit	/* no need to save RA */
461
462	OPD(nbif_0_gc_after_bif)
463	OPD(nbif_1_gc_after_bif)
464	OPD(nbif_2_gc_after_bif)
465	OPD(nbif_3_gc_after_bif)
466	OPD(nbif_4_gc_after_bif)
467	GLOBAL(CSYM(nbif_0_gc_after_bif))
468	GLOBAL(CSYM(nbif_1_gc_after_bif))
469	GLOBAL(CSYM(nbif_2_gc_after_bif))
470	GLOBAL(CSYM(nbif_3_gc_after_bif))
471	GLOBAL(CSYM(nbif_4_gc_after_bif))
472CSYM(nbif_0_gc_after_bif):
473	li	r4, 0
474	b	.gc_after_bif
475CSYM(nbif_1_gc_after_bif):
476	li	r4, 1
477	b	.gc_after_bif
478CSYM(nbif_2_gc_after_bif):
479	li	r4, 2
480	b	.gc_after_bif
481CSYM(nbif_3_gc_after_bif):
482	li	r4, 3
483	b	.gc_after_bif
484CSYM(nbif_4_gc_after_bif):
485	li	r4, 4
486	/*FALLTHROUGH*/
487.gc_after_bif:
488	stw	r4, P_NARITY(P)		/* Note: narity is a 32-bit field */
489	STORE	TEMP_LR, P_NRA(P)
490	STORE	NSP, P_NSP(P)
491	mflr	TEMP_LR
492	li	r6, 0			/* Pass 0 in arity */
493	li	r5, 0			/* Pass NULL in regs */
494	mr	r4, r3
495	mr	r3, P
496	bl	CSYM(erts_gc_after_bif_call)
497	mtlr	TEMP_LR
498	LOAD	TEMP_LR, P_NRA(P)
499	li	r4, 0
500	stw	r4, P_NARITY(P)		/* Note: narity is a 32-bit field */
501	blr
502
503/*
504 * We end up here when a BIF called from native signals an
505 * exceptional condition.
506 * The heap pointer was just read from P.
507 * TEMP_LR contains a copy of LR
508 */
509	OPD(nbif_0_simple_exception)
510	GLOBAL(CSYM(nbif_0_simple_exception))
511CSYM(nbif_0_simple_exception):
512	li	r4, 0
513	b	.nbif_simple_exception
514
515	OPD(nbif_1_simple_exception)
516	GLOBAL(CSYM(nbif_1_simple_exception))
517CSYM(nbif_1_simple_exception):
518	li	r4, 1
519	b	.nbif_simple_exception
520
521	OPD(nbif_2_simple_exception)
522	GLOBAL(CSYM(nbif_2_simple_exception))
523CSYM(nbif_2_simple_exception):
524	li	r4, 2
525	b	.nbif_simple_exception
526
527	OPD(nbif_3_simple_exception)
528	GLOBAL(CSYM(nbif_3_simple_exception))
529CSYM(nbif_3_simple_exception):
530	li	r4, 3
531	b	.nbif_simple_exception
532
533	OPD(nbif_4_simple_exception)
534	GLOBAL(CSYM(nbif_4_simple_exception))
535CSYM(nbif_4_simple_exception):
536	li	r4, 4
537	/*FALLTHROUGH*/
538.nbif_simple_exception:
539	LOAD	r3, P_FREASON(P)
540	CMPI	r3, FREASON_TRAP
541	beq-	.handle_trap
542	/*
543	 * Find and invoke catch handler (it must exist).
544	 * The heap pointer was just read from P.
545	 * TEMP_LR should contain the current call's return address.
546	 * r4 should contain the current call's arity.
547	 */
548	STORE	NSP, P_NSP(P)
549	STORE	TEMP_LR, P_NRA(P)
550	stw	r4, P_NARITY(P)	/* Note: narity is a 32-bit field */
551	/* find and prepare to invoke the handler */
552	mr	r3, P
553	bl	CSYM(hipe_handle_exception)	/* Note: hipe_handle_exception() conses */
554	/* prepare to invoke handler */
555	LOAD	r0, P_NCALLEE(P)	/* set by hipe_find_handler() */
556	mtctr	r0
557	RESTORE_CACHED_STATE		/* NSP updated by hipe_find_handler() */
558	/* now invoke the handler */
559	bctr
560
561	/*
562	 * A BIF failed with freason TRAP:
563	 * - the BIF's arity is in r4
564	 * - the native RA was saved in TEMP_LR before the BIF call
565	 * - the native heap/stack/reds registers are saved in P
566	 */
567.handle_trap:
568	li	r3, HIPE_MODE_SWITCH_RES_TRAP
569	STORE	NSP, P_NSP(P)
570	stw	r4, P_NARITY(P)	/* Note: narity is a 32-bit field */
571	STORE	TEMP_LR, P_NRA(P)
572	b	.nosave_exit
573
574/*
575 * nbif_stack_trap_ra: trap return address for maintaining
576 * the gray/white stack boundary
577 */
578	GLOBAL(ASYM(nbif_stack_trap_ra))
579ASYM(nbif_stack_trap_ra):		/* a return address, not a function */
580	# This only handles a single return value.
581	# If we have more, we need to save them in the PCB.
582	mr	TEMP_ARG0, r3		/* save retval */
583	STORE	NSP, P_NSP(P)
584	mr	r3, P
585	bl	CSYM(hipe_handle_stack_trap)	/* must not cons */
586	mtctr	r3			/* original RA */
587	mr	r3, TEMP_ARG0		/* restore retval */
588	bctr				/* resume at original RA */
589
590/*
591 * hipe_ppc_inc_stack
592 * Caller saved its LR in TEMP_LR (== TEMP1) before calling us.
593 */
594	GLOBAL(ASYM(hipe_ppc_inc_stack))
595ASYM(hipe_ppc_inc_stack):
596	STORE_ARG_REGS
597	mflr	TEMP_ARG0
598	STORE	NSP, P_NSP(P)
599	mr	r3, P
600	# hipe_inc_nstack reads and writes NSP and NSP_LIMIT,
601	# but does not access LR/RA, HP, or FCALLS.
602	bl	CSYM(hipe_inc_nstack)
603	mtlr	TEMP_ARG0
604	LOAD	NSP, P_NSP(P)
605	LOAD_ARG_REGS
606	blr
607
608#if defined(__linux__) && defined(__ELF__)
609.section .note.GNU-stack,"",%progbits
610#endif
611