1/*      $NetBSD: rtld_start.S,v 1.4 2001/09/26 04:06:43 mycroft Exp $   */
2
3/*-
4 * Copyright (C) 1998   Tsubai Masanari
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. The name of the author may not be used to endorse or promote products
16 *    derived from this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 *
29 * $FreeBSD$
30 */
31
32#include <machine/asm.h>
33#include <machine/spr.h>	/* For SPR_SPEFSCR if needed. */
34
35.extern _GLOBAL_OFFSET_TABLE_
36.extern _DYNAMIC
37
38_ENTRY(.rtld_start)
39	stwu    %r1,-48(%r1)	/* 16-byte aligned stack for reg saves +
40				exit_proc & obj _rtld args +
41				backchain & lrsave stack frame */
42	stw     %r3,16(%r1)	/*  argc */
43	stw     %r4,20(%r1)	/*  argv */
44	stw     %r5,24(%r1)	/*  envp */
45/*	stw     %r6,28(%r1)   *//*  obj (always 0) */
46/*	stw     %r7,32(%r1)   *//*  cleanup (always 0) */
47	stw     %r8,36(%r1)	/*  ps_strings */
48
49	/*
50	 * Perform initial relocation of ld-elf.so. Not as easy as it
51	 * sounds.
52	 *  - perform small forward branch to put PC into link reg
53	 *  - use link-time constants to determine offset to the
54	 *    _DYNAMIC section and the GOT. Add these to the PC to
55	 *    convert to absolute addresses.
56	 *  - read GOT[0], which is the SVR4 ABI-specified link-time
57	 *    value of _DYNAMIC. Subtract this value from the absolute
58	 *    value to determine the load address
59	 *  - call reloc_non_plt_self() to fix up ld-elf.so's relocations
60	 */
61	bcl	20,31,1f
621:	mflr	%r30
63	mr	%r3,%r30		# save for _DYNAMIC
64	addis	%r30,%r30,_GLOBAL_OFFSET_TABLE_-1b@ha
65	addi	%r30,%r30,_GLOBAL_OFFSET_TABLE_-1b@l
66	addis	%r3,%r3,_DYNAMIC-1b@ha	# get _DYNAMIC actual address
67	addi	%r3,%r3,_DYNAMIC-1b@l
68	lwz	%r28,0(%r30)		# get base-relative &_DYNAMIC
69	sub	%r28,%r3,%r28		# r28 = relocbase
70	mr	%r4,%r28		# r4 = relocbase
71	bl	reloc_non_plt_self /* reloc_non_plt_self(&_DYNAMIC,base) */
72
73	/*
74	 * The _rtld() function likes to see a stack layout containing
75	 * { argc, argv[0], argv[1] ... argv[N], 0, env[0], ... , env[N] }
76	 * Since the PowerPC stack was 16-byte aligned at exec time, the
77	 * original stack layout has to be found by moving back a word
78	 * from the argv pointer.
79	 */
80        lwz     %r4,20(%r1)	/* restore argv */
81        addi    %r3,%r4,-4	/* locate argc ptr, &argv[-1] */
82
83	addi	%r4,%r1,8	/* &exit_proc on stack */
84	addi	%r5,%r1,12	/* &obj_main on stack */
85
86	bl      _rtld		/* &_start = _rtld(sp, &exit_proc, &obj_main)*/
87	mtlr    %r3
88
89	/*
90	 * Restore args, with new obj/exit proc
91	 */
92	lwz     %r3,16(%r1)     /* argc */
93	lwz     %r4,20(%r1)	/* argv */
94	lwz     %r5,24(%r1)	/* envp */
95	lwz     %r6,12(%r1)	/* obj */
96	lwz     %r7,8(%r1)	/* exit proc */
97	lwz     %r8,36(%r1)	/* ps_strings */
98        addi    %r1,%r1,48	/* restore original stackptr */
99
100	blrl	/* _start(argc, argv, envp, obj, cleanup, ps_strings) */
101
102	li      %r0,1		/* _exit() */
103	sc
104
105#ifdef __SPE__
106/* stack space for 30 GPRs + SPEFSCR/ACC/lr/cr */
107#define	NREGS		31
108#define	GPRWIDTH	8
109#define	FUDGE		4	/* Fudge factor for alignment */
110#else
111/* stack space for 30 GPRs + lr/cr */
112#define	NREGS		30
113#define	GPRWIDTH	4
114#define	FUDGE		4
115#endif
116/* Stack frame needs the 12-byte ABI frame plus fudge factor. */
117#define	STACK_SIZE	(NREGS * GPRWIDTH + 4 * 2 + 12 + FUDGE)
118
119/*
120 * _rtld_bind_secureplt_start()
121 *
122 * Call into the MI binder (Secure-PLT stub).
123 * secure-plt expects %r11 to be the offset to the rela entry.
124 * bss-plt expects %r11 to be index of the rela entry.
125 * So for bss-plt, we multiply the index by 12 to get the offset.
126 */
127_ENTRY(_rtld_bind_secureplt_start)
128	stwu    %r1,-STACK_SIZE(%r1)
129#ifdef __SPE__
130	evstdd	%r0,24(%r1)
131#else
132	stw     %r0,20(%r1)		# save r0
133#endif
134
135	/*
136	 * Instead of division which is costly we will use multiplicative
137	 * inverse.  a / n = ((a * inv(n)) >> 32)
138	 * where inv(n) = (0x100000000 + n - 1) / n
139	 */
140	mr	%r0,%r11
141	lis	%r11,0x15555556@h	# load multiplicative inverse of 12
142	ori	%r11,%r11,0x15555556@l
143	mulhwu	%r11,%r11,%r0		# get high half of multiplication
144	b	1f
145
146/*
147 * _rtld_bind_start()
148 *
149 * Call into the MI binder. This routine is reached via the PLT call cell,
150 * and then _rtld_powerpc_pltresolve().
151 * On entry, %r11 contains the index of the PLT cell, and %r12 contains
152 * a pointer to the ELF object for the file.
153 *  Save all registers, call into the binder to resolve and fixup the external
154 * routine, and then transfer to the external routine on return.
155 */
156	.globl  _rtld_bind
157
158_ENTRY(_rtld_bind_start)
159	stwu    %r1,-STACK_SIZE(%r1)
160#ifdef __SPE__
161	evstdd	%r0,24(%r1)
162#else
163	stw     %r0,20(%r1)		# save r0
164#endif
1651:
166	mflr    %r0
167	stw     %r0,16(%r1)		# save lr
168	mfcr    %r0
169	stw     %r0,12(%r1)		# save cr
170#ifdef __SPE__
171	evstdd	%r3, 32(%r1)
172	evstdd	%r4, 40(%r1)
173	evstdd	%r5, 48(%r1)
174	evstdd	%r6, 56(%r1)
175	evstdd	%r7, 64(%r1)
176	evstdd	%r8, 72(%r1)
177	evstdd	%r9, 80(%r1)
178	evstdd	%r10, 88(%r1)
179	evstdd	%r11, 96(%r1)
180	evstdd	%r12, 104(%r1)
181	evstdd	%r13, 112(%r1)
182	evstdd	%r14, 120(%r1)
183	evstdd	%r15, 128(%r1)
184	evstdd	%r16, 136(%r1)
185	evstdd	%r17, 144(%r1)
186	evstdd	%r18, 152(%r1)
187	evstdd	%r19, 160(%r1)
188	evstdd	%r20, 168(%r1)
189	evstdd	%r21, 176(%r1)
190	evstdd	%r22, 184(%r1)
191	evstdd	%r23, 192(%r1)
192	evstdd	%r24, 200(%r1)
193	evstdd	%r25, 208(%r1)
194	evstdd	%r26, 216(%r1)
195	evstdd	%r27, 224(%r1)
196	evstdd	%r28, 232(%r1)
197	evstdd	%r29, 240(%r1)
198	evstdd	%r30, 248(%r1)
199	li	%r3, 256
200	evstddx	%r31, %r1, %r3
201	evxor	%r0, %r0, %r0
202	li	%r3, 264
203	evmwumiaa	%r0, %r0, %r0
204	evstddx	%r0, %r1, %r3
205	mfspr	%r3, SPR_SPEFSCR
206	stw	%r3, 20(%r1)
207#else
208	stmw    %r3,24(%r1)		# save r3-r31
209#endif
210
211	mr      %r3,%r12		# obj
212	mulli   %r4,%r11,12		# rela index * sizeof(Elf_Rela)
213	bl      _rtld_bind		# target addr = _rtld_bind(obj, reloff)
214	mtctr   %r3			# move absolute target addr into ctr
215
216#ifdef __SPE__
217	lwz	%r3, 20(%r1)
218	mtspr	SPR_SPEFSCR, %r3
219	li	%r3, 264
220	evlddx	%r0, %r3, %r1
221	evmra	%r0, %r0
222	evldd	%r3, 32(%r1)
223	evldd	%r4, 40(%r1)
224	evldd	%r5, 48(%r1)
225	evldd	%r6, 56(%r1)
226	evldd	%r7, 64(%r1)
227	evldd	%r8, 72(%r1)
228	evldd	%r9, 80(%r1)
229	evldd	%r10, 88(%r1)
230	evldd	%r11, 96(%r1)
231	evldd	%r12, 104(%r1)
232	evldd	%r13, 112(%r1)
233	evldd	%r14, 120(%r1)
234	evldd	%r15, 128(%r1)
235	evldd	%r16, 136(%r1)
236	evldd	%r17, 144(%r1)
237	evldd	%r18, 152(%r1)
238	evldd	%r19, 160(%r1)
239	evldd	%r20, 168(%r1)
240	evldd	%r21, 176(%r1)
241	evldd	%r22, 184(%r1)
242	evldd	%r23, 192(%r1)
243	evldd	%r24, 200(%r1)
244	evldd	%r25, 208(%r1)
245	evldd	%r26, 216(%r1)
246	evldd	%r27, 224(%r1)
247	evldd	%r28, 232(%r1)
248	evldd	%r29, 240(%r1)
249	evldd	%r30, 248(%r1)
250	li	%r0, 256
251	evlddx	%r31, %r1, %r0
252#else
253        lmw     %r3,24(%r1)		# restore r3-r31
254#endif
255        lwz     %r0,12(%r1)		# restore cr
256        mtcr    %r0
257        lwz     %r0,16(%r1)		# restore lr
258        mtlr    %r0
259#ifdef __SPE__
260	evldd	%r0,24(%r1)
261#else
262        lwz     %r0,20(%r1)		# restore r0
263#endif
264
265        addi    %r1,%r1,STACK_SIZE	# restore stack
266        bctr				# jump to target
267
268
269/*
270 * _rtld_powerpc_pltresolve()
271 *
272 *  This routine is copied into the latter part of the 72-byte reserved
273 * area at the start of the PLT. The absolute address of the _rtld_bind_start
274 * routine, and the ELF object for the loaded file, are inserted into
275 * the code by the reloc.c:init_pltgot() routine.
276 *  The first time an external routine is called, the PLT slot will
277 * set up %r11 to the offset of the slot, and will jump to this routine.
278 * The ELF object is shifted into %r11, and _rtld_bind_start is called
279 * to complete the binding.
280 */
281_ENTRY(_rtld_powerpc_pltlongresolve)
282	lis	%r12,0			# lis	12,jmptab@ha
283	addi    %r12,%r12,0		# addi  12,12,jmptab@l
284	subf	%r11,%r12,%r11		# reloff
285	li	%r12,2
286	srw	%r11,%r11,%r12		# index = reloff/sizeof(Elf_Addr)
287_ENTRY(_rtld_powerpc_pltresolve)
288        lis     %r12,0			# lis   12,_rtld_bind_start@ha
289        addi    %r12,%r12,0		# addi  12,12,_rtld_bind_start@l
290        mtctr   %r12
291        lis     %r12,0			# lis   12,obj@ha
292        addi    %r12,%r12,0		# addi  12,12,obj@l
293        bctr
294
295/*
296 * _rtld_powerpc_pltcall()
297 *
298 *  This routine is copied into the 72-byte reserved area at the
299 * start of the PLT. The reloc.c:init_pltgot() routine inserts
300 * the absolute address of the jumptable.
301 *  Control is transferred to this routine when the binder has
302 * located the external routine, but determined that it is > 32Mb
303 * from the PLT slot. Code is inserted into the PLT slot to set up
304 * %r11 with the jumptable index, and jump to here, where the
305 * absolute address of the external routine is loaded from the
306 * jumptable and transferred to
307 */
308_ENTRY(_rtld_powerpc_pltcall)
309        slwi    %r11,%r11,2		# jmptab offset = index * 4
310        addis   %r11,%r11,0		# addis 11,11,jmptab@ha
311        lwz     %r11,0(%r11)		# lwz   11,jmptab@l(11)
312        mtctr   %r11
313        bctr				# (*jmptab[index])()
314
315	.section .note.GNU-stack,"",%progbits
316