1 /*	$OpenBSD: rtld_machine.c,v 1.58 2016/06/21 15:25:38 deraadt Exp $ */
2 
3 /*
4  * Copyright (c) 1999 Dale Rahn
5  * Copyright (c) 2001 Niklas Hallqvist
6  * Copyright (c) 2001 Artur Grabowski
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
18  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
21  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 /*-
30  * Copyright (c) 2000 Eduardo Horvath.
31  * Copyright (c) 1999 The NetBSD Foundation, Inc.
32  * All rights reserved.
33  *
34  * This code is derived from software contributed to The NetBSD Foundation
35  * by Paul Kranenburg.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. All advertising materials mentioning features or use of this software
46  *    must display the following acknowledgement:
47  *	This product includes software developed by the NetBSD
48  *	Foundation, Inc. and its contributors.
49  * 4. Neither the name of The NetBSD Foundation nor the names of its
50  *    contributors may be used to endorse or promote products derived
51  *    from this software without specific prior written permission.
52  *
53  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
54  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
55  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
56  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
57  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
58  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
59  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
60  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
61  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
62  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
63  * POSSIBILITY OF SUCH DAMAGE.
64  */
65 
66 #define _DYN_LOADER
67 
68 #include <sys/types.h>
69 #include <sys/mman.h>
70 #include <sys/syscall.h>
71 #include <sys/unistd.h>
72 #include <machine/trap.h>
73 
74 #include <nlist.h>
75 #include <link.h>
76 
77 #include "syscall.h"
78 #include "archdep.h"
79 #include "resolve.h"
80 
81 int64_t pcookie __attribute__((section(".openbsd.randomdata"))) __dso_hidden;
82 
83 /*
84  * The following table holds for each relocation type:
85  *	- the width in bits of the memory location the relocation
86  *	  applies to (not currently used)
87  *	- the number of bits the relocation value must be shifted to the
88  *	  right (i.e. discard least significant bits) to fit into
89  *	  the appropriate field in the instruction word.
90  *	- flags indicating whether
91  *		* the relocation involves a symbol
92  *		* the relocation is relative to the current position
93  *		* the relocation is for a GOT entry
94  *		* the relocation is relative to the load address
95  *
96  */
97 #define _RF_S		0x80000000		/* Resolve symbol */
98 #define _RF_A		0x40000000		/* Use addend */
99 #define _RF_P		0x20000000		/* Location relative */
100 #define _RF_G		0x10000000		/* GOT offset */
101 #define _RF_B		0x08000000		/* Load address relative */
102 #define _RF_U		0x04000000		/* Unaligned */
103 #define _RF_SZ(s)	(((s) & 0xff) << 8)	/* memory target size */
104 #define _RF_RS(s)	((s) & 0xff)		/* right shift */
105 static int reloc_target_flags[] = {
106 	0,							/* NONE */
107 	_RF_S|_RF_A|		_RF_SZ(8)  | _RF_RS(0),		/* RELOC_8 */
108 	_RF_S|_RF_A|		_RF_SZ(16) | _RF_RS(0),		/* RELOC_16 */
109 	_RF_S|_RF_A|		_RF_SZ(32) | _RF_RS(0),		/* RELOC_32 */
110 	_RF_S|_RF_A|_RF_P|	_RF_SZ(8)  | _RF_RS(0),		/* DISP_8 */
111 	_RF_S|_RF_A|_RF_P|	_RF_SZ(16) | _RF_RS(0),		/* DISP_16 */
112 	_RF_S|_RF_A|_RF_P|	_RF_SZ(32) | _RF_RS(0),		/* DISP_32 */
113 	_RF_S|_RF_A|_RF_P|	_RF_SZ(32) | _RF_RS(2),		/* WDISP_30 */
114 	_RF_S|_RF_A|_RF_P|	_RF_SZ(32) | _RF_RS(2),		/* WDISP_22 */
115 	_RF_S|_RF_A|		_RF_SZ(32) | _RF_RS(10),	/* HI22 */
116 	_RF_S|_RF_A|		_RF_SZ(32) | _RF_RS(0),		/* 22 */
117 	_RF_S|_RF_A|		_RF_SZ(32) | _RF_RS(0),		/* 13 */
118 	_RF_S|_RF_A|		_RF_SZ(32) | _RF_RS(0),		/* LO10 */
119 	_RF_G|			_RF_SZ(32) | _RF_RS(0),		/* GOT10 */
120 	_RF_G|			_RF_SZ(32) | _RF_RS(0),		/* GOT13 */
121 	_RF_G|			_RF_SZ(32) | _RF_RS(10),	/* GOT22 */
122 	_RF_S|_RF_A|_RF_P|	_RF_SZ(32) | _RF_RS(0),		/* PC10 */
123 	_RF_S|_RF_A|_RF_P|	_RF_SZ(32) | _RF_RS(10),	/* PC22 */
124 	      _RF_A|_RF_P|	_RF_SZ(32) | _RF_RS(2),		/* WPLT30 */
125 	_RF_S|			_RF_SZ(32) | _RF_RS(0),		/* COPY */
126 	_RF_S|_RF_A|		_RF_SZ(64) | _RF_RS(0),		/* GLOB_DAT */
127 	_RF_S|			_RF_SZ(32) | _RF_RS(0),		/* JMP_SLOT */
128 	      _RF_A|	_RF_B|	_RF_SZ(64) | _RF_RS(0),		/* RELATIVE */
129 	_RF_S|_RF_A|	_RF_U|	_RF_SZ(32) | _RF_RS(0),		/* UA_32 */
130 
131 	      _RF_A|		_RF_SZ(32) | _RF_RS(0),		/* PLT32 */
132 	      _RF_A|		_RF_SZ(32) | _RF_RS(10),	/* HIPLT22 */
133 	      _RF_A|		_RF_SZ(32) | _RF_RS(0),		/* LOPLT10 */
134 	      _RF_A|_RF_P|	_RF_SZ(32) | _RF_RS(0),		/* PCPLT32 */
135 	      _RF_A|_RF_P|	_RF_SZ(32) | _RF_RS(10),	/* PCPLT22 */
136 	      _RF_A|_RF_P|	_RF_SZ(32) | _RF_RS(0),		/* PCPLT10 */
137 	_RF_S|_RF_A|		_RF_SZ(32) | _RF_RS(0),		/* 10 */
138 	_RF_S|_RF_A|		_RF_SZ(32) | _RF_RS(0),		/* 11 */
139 	_RF_S|_RF_A|		_RF_SZ(64) | _RF_RS(0),		/* 64 */
140 	_RF_S|_RF_A|/*extra*/	_RF_SZ(32) | _RF_RS(0),		/* OLO10 */
141 	_RF_S|_RF_A|		_RF_SZ(32) | _RF_RS(42),	/* HH22 */
142 	_RF_S|_RF_A|		_RF_SZ(32) | _RF_RS(32),	/* HM10 */
143 	_RF_S|_RF_A|		_RF_SZ(32) | _RF_RS(10),	/* LM22 */
144 	_RF_S|_RF_A|_RF_P|	_RF_SZ(32) | _RF_RS(42),	/* PC_HH22 */
145 	_RF_S|_RF_A|_RF_P|	_RF_SZ(32) | _RF_RS(32),	/* PC_HM10 */
146 	_RF_S|_RF_A|_RF_P|	_RF_SZ(32) | _RF_RS(10),	/* PC_LM22 */
147 	_RF_S|_RF_A|_RF_P|	_RF_SZ(32) | _RF_RS(2),		/* WDISP16 */
148 	_RF_S|_RF_A|_RF_P|	_RF_SZ(32) | _RF_RS(2),		/* WDISP19 */
149 	_RF_S|_RF_A|		_RF_SZ(32) | _RF_RS(0),		/* GLOB_JMP */
150 	_RF_S|_RF_A|		_RF_SZ(32) | _RF_RS(0),		/* 7 */
151 	_RF_S|_RF_A|		_RF_SZ(32) | _RF_RS(0),		/* 5 */
152 	_RF_S|_RF_A|		_RF_SZ(32) | _RF_RS(0),		/* 6 */
153 	_RF_S|_RF_A|_RF_P|	_RF_SZ(64) | _RF_RS(0),		/* DISP64 */
154 	      _RF_A|		_RF_SZ(64) | _RF_RS(0),		/* PLT64 */
155 	_RF_S|_RF_A|		_RF_SZ(32) | _RF_RS(10),	/* HIX22 */
156 	_RF_S|_RF_A|		_RF_SZ(32) | _RF_RS(0),		/* LOX10 */
157 	_RF_S|_RF_A|		_RF_SZ(32) | _RF_RS(22),	/* H44 */
158 	_RF_S|_RF_A|		_RF_SZ(32) | _RF_RS(12),	/* M44 */
159 	_RF_S|_RF_A|		_RF_SZ(32) | _RF_RS(0),		/* L44 */
160 	_RF_S|_RF_A|		_RF_SZ(64) | _RF_RS(0),		/* REGISTER */
161 	_RF_S|_RF_A|	_RF_U|	_RF_SZ(64) | _RF_RS(0),		/* UA64 */
162 	_RF_S|_RF_A|	_RF_U|	_RF_SZ(16) | _RF_RS(0),		/* UA16 */
163 };
164 
165 #define RELOC_RESOLVE_SYMBOL(t)		((reloc_target_flags[t] & _RF_S) != 0)
166 #define RELOC_PC_RELATIVE(t)		((reloc_target_flags[t] & _RF_P) != 0)
167 #define RELOC_BASE_RELATIVE(t)		((reloc_target_flags[t] & _RF_B) != 0)
168 #define RELOC_UNALIGNED(t)		((reloc_target_flags[t] & _RF_U) != 0)
169 #define RELOC_USE_ADDEND(t)		((reloc_target_flags[t] & _RF_A) != 0)
170 #define RELOC_TARGET_SIZE(t)		((reloc_target_flags[t] >> 8) & 0xff)
171 #define RELOC_VALUE_RIGHTSHIFT(t)	(reloc_target_flags[t] & 0xff)
172 
173 static long reloc_target_bitmask[] = {
174 #define _BM(x)	(~(-(1ULL << (x))))
175 	0,				/* NONE */
176 	_BM(8), _BM(16), _BM(32),	/* RELOC_8, _16, _32 */
177 	_BM(8), _BM(16), _BM(32),	/* DISP8, DISP16, DISP32 */
178 	_BM(30), _BM(22),		/* WDISP30, WDISP22 */
179 	_BM(22), _BM(22),		/* HI22, _22 */
180 	_BM(13), _BM(10),		/* RELOC_13, _LO10 */
181 	_BM(10), _BM(13), _BM(22),	/* GOT10, GOT13, GOT22 */
182 	_BM(10), _BM(22),		/* _PC10, _PC22 */
183 	_BM(30), 0,			/* _WPLT30, _COPY */
184 	-1, _BM(32), -1,		/* _GLOB_DAT, JMP_SLOT, _RELATIVE */
185 	_BM(32), _BM(32),		/* _UA32, PLT32 */
186 	_BM(22), _BM(10),		/* _HIPLT22, LOPLT10 */
187 	_BM(32), _BM(22), _BM(10),	/* _PCPLT32, _PCPLT22, _PCPLT10 */
188 	_BM(10), _BM(11), -1,		/* _10, _11, _64 */
189 	_BM(10), _BM(22),		/* _OLO10, _HH22 */
190 	_BM(10), _BM(22),		/* _HM10, _LM22 */
191 	_BM(22), _BM(10), _BM(22),	/* _PC_HH22, _PC_HM10, _PC_LM22 */
192 	_BM(16), _BM(19),		/* _WDISP16, _WDISP19 */
193 	-1,				/* GLOB_JMP */
194 	_BM(7), _BM(5), _BM(6)		/* _7, _5, _6 */
195 	-1, -1,				/* DISP64, PLT64 */
196 	_BM(22), _BM(13),		/* HIX22, LOX10 */
197 	_BM(22), _BM(10), _BM(13),	/* H44, M44, L44 */
198 	-1, -1, _BM(16),		/* REGISTER, UA64, UA16 */
199 #undef _BM
200 };
201 #define RELOC_VALUE_BITMASK(t)	(reloc_target_bitmask[t])
202 
203 int _dl_reloc_plt(Elf_Word *where1, Elf_Word *where2, Elf_Word *pltaddr,
204 	Elf_Addr value);
205 void _dl_install_plt(Elf_Word *pltgot, Elf_Addr proc);
206 
207 int
208 _dl_md_reloc(elf_object_t *object, int rel, int relasz)
209 {
210 	long	i;
211 	long	numrela;
212 	long	relrel;
213 	int	fails = 0;
214 	Elf_Addr loff;
215 	Elf_Addr prev_value = 0;
216 	const Elf_Sym *prev_sym = NULL;
217 	Elf_RelA *relas;
218 	struct load_list *llist;
219 
220 	loff = object->obj_base;
221 	numrela = object->Dyn.info[relasz] / sizeof(Elf64_Rela);
222 	relrel = rel == DT_RELA ? object->relacount : 0;
223 	relas = (Elf64_Rela *)(object->Dyn.info[rel]);
224 
225 	if (relas == NULL)
226 		return(0);
227 
228 	if (relrel > numrela) {
229 		_dl_printf("relacount > numrel: %ld > %ld\n", relrel, numrela);
230 		_dl_exit(20);
231 	}
232 
233 	/*
234 	 * unprotect some segments if we need it.
235 	 */
236 	if ((object->dyn.textrel == 1) && (rel == DT_REL || rel == DT_RELA)) {
237 		for (llist = object->load_list; llist != NULL; llist = llist->next) {
238 			if (!(llist->prot & PROT_WRITE))
239 				_dl_mprotect(llist->start, llist->size,
240 				    PROT_READ | PROT_WRITE);
241 		}
242 	}
243 
244 	/* tight loop for leading RELATIVE relocs */
245 	for (i = 0; i < relrel; i++, relas++) {
246 		Elf_Addr *where;
247 
248 #ifdef DEBUG
249 		if (ELF_R_TYPE(relas->r_info) != R_TYPE(RELATIVE)) {
250 			_dl_printf("RELACOUNT wrong\n");
251 			_dl_exit(20);
252 		}
253 #endif
254 		where = (Elf_Addr *)(relas->r_offset + loff);
255 		*where = relas->r_addend + loff;
256 	}
257 	for (; i < numrela; i++, relas++) {
258 		Elf_Addr *where, value, ooff, mask;
259 		Elf_Word type;
260 		const Elf_Sym *sym, *this;
261 		const char *symn;
262 
263 		type = ELF_R_TYPE(relas->r_info);
264 
265 		if (type == R_TYPE(NONE) || type == R_TYPE(JMP_SLOT))
266 			continue;
267 
268 		where = (Elf_Addr *)(relas->r_offset + loff);
269 
270 		if (RELOC_USE_ADDEND(type))
271 			value = relas->r_addend;
272 		else
273 			value = 0;
274 
275 		sym = NULL;
276 		symn = NULL;
277 		if (RELOC_RESOLVE_SYMBOL(type)) {
278 			sym = object->dyn.symtab;
279 			sym += ELF_R_SYM(relas->r_info);
280 			symn = object->dyn.strtab + sym->st_name;
281 
282 			if (sym->st_shndx != SHN_UNDEF &&
283 			    ELF_ST_BIND(sym->st_info) == STB_LOCAL) {
284 				value += loff;
285 			} else if (sym == prev_sym) {
286 				value += prev_value;
287 			} else {
288 				this = NULL;
289 				ooff = _dl_find_symbol_bysym(object,
290 				    ELF_R_SYM(relas->r_info), &this,
291 				    SYM_SEARCH_ALL|SYM_WARNNOTFOUND|
292 				    ((type == R_TYPE(JMP_SLOT)) ?
293 					SYM_PLT : SYM_NOTPLT),
294 				    sym, NULL);
295 				if (this == NULL) {
296 resolve_failed:
297 					if (ELF_ST_BIND(sym->st_info) !=
298 					    STB_WEAK)
299 						fails++;
300 					continue;
301 				}
302 				prev_sym = sym;
303 				prev_value = (Elf_Addr)(ooff + this->st_value);
304 				value += prev_value;
305 			}
306 		}
307 
308 		if (type == R_TYPE(COPY)) {
309 			void *dstaddr = where;
310 			const void *srcaddr;
311 			const Elf_Sym *dstsym = sym, *srcsym = NULL;
312 			size_t size = dstsym->st_size;
313 			Elf_Addr soff;
314 
315 			soff = _dl_find_symbol(symn, &srcsym,
316 			    SYM_SEARCH_OTHER|SYM_WARNNOTFOUND|SYM_NOTPLT,
317 			    dstsym, object, NULL);
318 			if (srcsym == NULL)
319 				goto resolve_failed;
320 
321 			srcaddr = (void *)(soff + srcsym->st_value);
322 			_dl_bcopy(srcaddr, dstaddr, size);
323 			continue;
324 		}
325 
326 		if (RELOC_PC_RELATIVE(type))
327 			value -= (Elf_Addr)where;
328 		if (RELOC_BASE_RELATIVE(type))
329 			value += loff;
330 
331 		mask = RELOC_VALUE_BITMASK(type);
332 		value >>= RELOC_VALUE_RIGHTSHIFT(type);
333 		value &= mask;
334 
335 		if (RELOC_UNALIGNED(type)) {
336 			/* Handle unaligned relocations. */
337 			Elf_Addr tmp = 0;
338 			char *ptr = (char *)where;
339 			int i, size = RELOC_TARGET_SIZE(type)/8;
340 
341 			/* Read it in one byte at a time. */
342 			for (i=0; i<size; i++)
343 				tmp = (tmp << 8) | ptr[i];
344 
345 			tmp &= ~mask;
346 			tmp |= value;
347 
348 			/* Write it back out. */
349 			for (i=0; i<size; i++)
350 				ptr[i] = ((tmp >> (8*i)) & 0xff);
351 		} else if (RELOC_TARGET_SIZE(type) > 32) {
352 			*where &= ~mask;
353 			*where |= value;
354 		} else {
355 			Elf32_Addr *where32 = (Elf32_Addr *)where;
356 
357 			*where32 &= ~mask;
358 			*where32 |= value;
359 		}
360 	}
361 
362 	/* reprotect the unprotected segments */
363 	if ((object->dyn.textrel == 1) && (rel == DT_REL || rel == DT_RELA)) {
364 		for (llist = object->load_list; llist != NULL; llist = llist->next) {
365 			if (!(llist->prot & PROT_WRITE))
366 				_dl_mprotect(llist->start, llist->size,
367 				    llist->prot);
368 		}
369 	}
370 
371 	return (fails);
372 }
373 
374 /*
375  * Instruction templates:
376  */
377 
378 #define	BAA	0x30680000	/*	ba,a	%xcc, 0 */
379 #define	SETHI	0x03000000	/*	sethi	%hi(0), %g1 */
380 #define	JMP	0x81c06000	/*	jmpl	%g1+%lo(0), %g0	  <-- simm13 */
381 #define	NOP	0x01000000	/*	sethi	%hi(0), %g0 */
382 #define	OR	0x82106000	/*	or	%g1, 0, %g1 */
383 #define	ORG5	0x8a116000	/*	or	%g5, 0, %g5 */
384 #define	XOR	0x82186000	/*	xor	%g1, 0, %g1 */
385 #define	MOV71	0x8210000f	/*	or	%o7, 0, %g1 */
386 #define	MOV17	0x9e100001	/*	or	%g1, 0, %o7 */
387 #define	CALL	0x40000000	/*	call	0	  <-- disp30 */
388 #define	SLLX	0x83287000	/*	sllx	%g1, 0, %g1 */
389 #define	SLLXG5	0x8b297000	/*	sllx	%g5, 0, %g5 */
390 #define	SRAX	0x83387000	/*	srax	%g1, 0, %g1 */
391 #define	SETHIG5	0x0b000000	/*	sethi	%hi(0), %g5 */
392 #define	ORG15	0x82804005	/*	or	%g1, %g5, %g1 */
393 
394 
395 /* %hi(v) with variable shift */
396 #define	HIVAL(v, s)	(((v) >> (s)) &  0x003fffff)
397 #define LOVAL(v)	((v) & 0x000003ff)
398 
399 int
400 _dl_reloc_plt(Elf_Word *where1, Elf_Word *where2, Elf_Word *pltaddr,
401     Elf_Addr value)
402 {
403 	Elf_Addr offset;
404 
405 	/*
406 	 * At the PLT entry pointed at by `where', we now construct
407 	 * a direct transfer to the now fully resolved function
408 	 * address.
409 	 *
410 	 * A PLT entry is supposed to start by looking like this:
411 	 *
412 	 *	sethi	%hi(. - .PLT0), %g1
413 	 *	ba,a,pt	%xcc, .PLT1
414 	 *	nop
415 	 *	nop
416 	 *	nop
417 	 *	nop
418 	 *	nop
419 	 *	nop
420 	 *
421 	 * When we replace these entries we either (a) only replace
422 	 * the second word (the ba,a,pt), or (b) replace multiple
423 	 * words: one or more nops, then finally the ba,a,pt.  By
424 	 * replacing the ba,a,pt last, we guarantee that the PLT can
425 	 * be used by other threads even while it's being updated.
426 	 * This is made slightly more complicated by kbind, for which
427 	 * we need to pass them to the kernel in the order they get
428 	 * written.  To that end, we store the word to overwrite the
429 	 * ba,a,pt at *where1, and the words to overwrite the nops at
430 	 * where2[0], where2[1], ...
431 	 *
432 	 * We now need to find out how far we need to jump.  We
433 	 * have a choice of several different relocation techniques
434 	 * which are increasingly expensive.
435 	 */
436 
437 	offset = value - ((Elf_Addr)pltaddr);
438 	if ((int64_t)(offset-4) <= (1L<<20) &&
439 	    (int64_t)(offset-4) >= -(1L<<20)) {
440 		/*
441 		 * We're within 1MB -- we can use a direct branch insn.
442 		 *
443 		 * We can generate this pattern:
444 		 *
445 		 *	sethi	%hi(. - .PLT0), %g1
446 		 *	ba,a,pt	%xcc, addr
447 		 *	nop
448 		 *	nop
449 		 *	nop
450 		 *	nop
451 		 *	nop
452 		 *	nop
453 		 *
454 		 */
455 		*where1 = BAA | (((offset-4) >> 2) &0x7ffff);
456 		return (0);
457 	} else if (value < (1UL<<32)) {
458 		/*
459 		 * We're within 32-bits of address zero.
460 		 *
461 		 * The resulting code in the jump slot is:
462 		 *
463 		 *	sethi	%hi(. - .PLT0), %g1
464 		 *	sethi	%hi(addr), %g1
465 		 *	jmp	%g1+%lo(addr)
466 		 *	nop
467 		 *	nop
468 		 *	nop
469 		 *	nop
470 		 *	nop
471 		 *
472 		 */
473 		*where1 = SETHI | HIVAL(value, 10);
474 		where2[0] = JMP   | LOVAL(value);
475 		return (1);
476 	} else if (value > -(1UL<<32)) {
477 		/*
478 		 * We're within 32-bits of address -1.
479 		 *
480 		 * The resulting code in the jump slot is:
481 		 *
482 		 *	sethi	%hi(. - .PLT0), %g1
483 		 *	sethi	%hix(~addr), %g1
484 		 *	xor	%g1, %lox(~addr), %g1
485 		 *	jmp	%g1
486 		 *	nop
487 		 *	nop
488 		 *	nop
489 		 *	nop
490 		 *
491 		 */
492 		*where1 = SETHI | HIVAL(~value, 10);
493 		where2[0] = XOR | ((~value) & 0x00001fff);
494 		where2[1] = JMP;
495 		return (2);
496 	} else if ((int64_t)(offset-8) <= (1L<<31) &&
497 	    (int64_t)(offset-8) >= -((1L<<31) - 4)) {
498 		/*
499 		 * We're within 32-bits -- we can use a direct call insn
500 		 *
501 		 * The resulting code in the jump slot is:
502 		 *
503 		 *	sethi	%hi(. - .PLT0), %g1
504 		 *	mov	%o7, %g1
505 		 *	call	(.+offset)
506 		 *	 mov	%g1, %o7
507 		 *	nop
508 		 *	nop
509 		 *	nop
510 		 *	nop
511 		 *
512 		 */
513 		*where1 = MOV71;
514 		where2[0] = CALL | (((offset-8) >> 2) & 0x3fffffff);
515 		where2[1] = MOV17;
516 		return (2);
517 	} else if (value < (1L<<42)) {
518 		/*
519 		 * Target 42bits or smaller.
520 		 * We can generate this pattern:
521 		 *
522 		 * The resulting code in the jump slot is:
523 		 *
524 		 *	sethi	%hi(. - .PLT0), %g1
525 		 *	sethi	%hi(addr >> 20), %g1
526 		 *	or	%g1, %lo(addr >> 10), %g1
527 		 *	sllx	%g1, 10, %g1
528 		 *	jmp	%g1+%lo(addr)
529 		 *	nop
530 		 *	nop
531 		 *	nop
532 		 *
533 		 * this can handle addresses 0 - 0x3fffffffffc
534 		 */
535 		*where1 = SETHI | HIVAL(value, 20);
536 		where2[0] = OR    | LOVAL(value >> 10);
537 		where2[1] = SLLX  | 10;
538 		where2[2] = JMP   | LOVAL(value);
539 		return (3);
540 	} else if (value > -(1UL<<41)) {
541 		/*
542 		 * Large target >= 0xfffffe0000000000UL
543 		 * We can generate this pattern:
544 		 *
545 		 * The resulting code in the jump slot is:
546 		 *
547 		 *	sethi	%hi(. - .PLT0), %g1
548 		 *	sethi	%hi(addr >> 20), %g1
549 		 *	or	%g1, %lo(addr >> 10), %g1
550 		 *	sllx	%g1, 32, %g1
551 		 *	srax	%g1, 22, %g1
552 		 *	jmp	%g1+%lo(addr)
553 		 *	nop
554 		 *	nop
555 		 *	nop
556 		 *
557 		 */
558 		*where1 = SETHI | HIVAL(value, 20);
559 		where2[0] = OR   | LOVAL(value >> 10);
560 		where2[1] = SLLX  | 32;
561 		where2[2] = SRAX  | 22;
562 		where2[3] = JMP   | LOVAL(value);
563 		return (4);
564 	} else {
565 		/*
566 		 * We need to load all 64-bits
567 		 *
568 		 * The resulting code in the jump slot is:
569 		 *
570 		 *	sethi	%hi(. - .PLT0), %g1
571 		 *	sethi	%hi(addr >> 42), %g5
572 		 *	sethi	%hi(addr >> 10), %g1
573 		 *	or	%g1, %lo(addr >> 32), %g5
574 		 *	sllx	%g5, 32, %g5
575 		 *	or	%g1, %g5, %g1
576 		 *	jmp	%g1+%lo(addr)
577 		 *	nop
578 		 *
579 		 */
580 		*where1 = SETHIG5 | HIVAL(value, 42);
581 		where2[0] = SETHI | HIVAL(value, 10);
582 		where2[1] = ORG5 | LOVAL(value >> 32);
583 		where2[2] = SLLXG5 | 32;
584 		where2[3] = ORG15;
585 		where2[4] = JMP | LOVAL(value);
586 		return (5);
587 	}
588 }
589 
590 /*
591  * Resolve a symbol at run-time.
592  */
593 Elf_Addr
594 _dl_bind(elf_object_t *object, int index)
595 {
596 	Elf_RelA *rela;
597 	Elf_Word *addr;
598 	Elf_Addr ooff, newvalue;
599 	const Elf_Sym *sym, *this;
600 	const char *symn;
601 	const elf_object_t *sobj;
602 	int64_t cookie = pcookie;
603 	struct {
604 		struct __kbind param[2];
605 		Elf_Word newval[6];
606 	} buf;
607 	struct __kbind *param;
608 	size_t psize;
609 	int i;
610 
611 	rela = (Elf_RelA *)(object->Dyn.info[DT_JMPREL]);
612 	if (ELF_R_TYPE(rela->r_info) == R_TYPE(JMP_SLOT)) {
613 		/*
614 		 * XXXX
615 		 *
616 		 * The first four PLT entries are reserved.  There
617 		 * is some disagreement whether they should have
618 		 * associated relocation entries.  Both the SPARC
619 		 * 32-bit and 64-bit ELF specifications say that
620 		 * they should have relocation entries, but the
621 		 * 32-bit SPARC binutils do not generate them,
622 		 * and now the 64-bit SPARC binutils have stopped
623 		 * generating them too.
624 		 *
625 		 * So, to provide binary compatibility, we will
626 		 * check the first entry, if it is reserved it
627 		 * should not be of the type JMP_SLOT.  If it
628 		 * is JMP_SLOT, then the 4 reserved entries were
629 		 * not generated and our index is 4 entries too far.
630 		 */
631 		rela += index - 4;
632 	} else
633 		rela += index;
634 
635 	sym = object->dyn.symtab;
636 	sym += ELF64_R_SYM(rela->r_info);
637 	symn = object->dyn.strtab + sym->st_name;
638 
639 	this = NULL;
640 	ooff = _dl_find_symbol(symn, &this,
641 	    SYM_SEARCH_ALL|SYM_WARNNOTFOUND|SYM_PLT, sym, object, &sobj);
642 	if (this == NULL) {
643 		_dl_printf("lazy binding failed!\n");
644 		*(volatile int *)0 = 0;		/* XXX */
645 	}
646 
647 	newvalue = ooff + this->st_value;
648 
649 	if (__predict_false(sobj->traced) && _dl_trace_plt(sobj, symn))
650 		return (newvalue);
651 
652 	/*
653 	 * While some relocations just need to write one word and
654 	 * can do that with kbind() with just one block, many
655 	 * require two blocks to be written: all but first word,
656 	 * then the first word.  So, if we want to write 5 words
657 	 * in total, then the layout of the buffer we pass to
658 	 * kbind() needs to be one of these:
659 	 *   +------------+
660 	 *   | kbind.addr |
661 	 *   |     """    |
662 	 *   | kbind.size |
663 	 *   |     """    |		+------------+
664 	 *   | kbind.addr |		| kbind.addr |
665 	 *   |     """    |		|     """    |
666 	 *   | kbind.size |		| kbind.size |
667 	 *   |     """    |		|     """    |
668 	 *   |   word 2   |		|    word    |
669 	 *   |   word 3   |		+------------+
670 	 *   |   word 4   |
671 	 *   |   word 5   |
672 	 *   |   word 1   |
673 	 *   +------------+
674 	 *
675 	 * We first handle the special case of relocations with a
676 	 * non-zero r_addend, which have one block to update whose
677 	 * address is the relocation address itself.  This is only
678 	 * used for PLT entries after the 2^15th, i.e., truly monstrous
679 	 * programs, thus the __predict_false().
680 	 */
681 	addr = (Elf_Word *)(object->obj_base + rela->r_offset);
682 	_dl_memset(&buf, 0, sizeof(buf));
683 	if (__predict_false(rela->r_addend)) {
684 		/*
685 		 * This entry is >32768.  The relocation points to a
686 		 * PC-relative pointer to the _dl_bind_start_0 stub at
687 		 * the top of the PLT section.  Update it to point to
688 		 * the target function.
689 		 */
690 		buf.newval[0] = rela->r_addend + newvalue
691 		    - object->Dyn.info[DT_PLTGOT];
692 		buf.param[1].kb_addr = addr;
693 		buf.param[1].kb_size = sizeof(buf.newval[0]);
694 		param = &buf.param[1];
695 		psize = sizeof(struct __kbind) + sizeof(buf.newval[0]);
696 	} else {
697 		Elf_Word first;
698 
699 		/*
700 		 * For the other relocations, the word at the relocation
701 		 * address will be left unchanged.  Assume _dl_reloc_plt()
702 		 * will tell us to update multiple words, so save the first
703 		 * word to the side.
704 		 */
705 		i = _dl_reloc_plt(&first, &buf.newval[0], addr, newvalue);
706 
707 		/*
708 		 * _dl_reloc_plt() returns the number of words that must be
709 		 * written after the first word in location, but before it
710 		 * in time.  If it returns zero, then only a single block
711 		 * with one word is needed, so we just put it in place per
712 		 * the right-hand diagram and just use param[1] and newval[0]
713 		 */
714 		if (i == 0) {
715 			/* fill in the __kbind structure */
716 			buf.param[1].kb_addr = &addr[1];
717 			buf.param[1].kb_size = sizeof(Elf_Word);
718 			buf.newval[0] = first;
719 			param = &buf.param[1];
720 			psize = sizeof(struct __kbind) + sizeof(buf.newval[0]);
721 		} else {
722 			/*
723 			 * Two blocks are necessary.  Save the first word
724 			 * after the other words.
725 			 */
726 			buf.param[0].kb_addr = &addr[2];
727 			buf.param[0].kb_size = i * sizeof(Elf_Word);
728 			buf.param[1].kb_addr = &addr[1];
729 			buf.param[1].kb_size = sizeof(Elf_Word);
730 			buf.newval[i] = first;
731 			param = &buf.param[0];
732 			psize = 2 * sizeof(struct __kbind) +
733 			    (i + 1) * sizeof(buf.newval[0]);
734 		}
735 	}
736 
737 	/* directly code the syscall, so that it's actually inline here */
738 	{
739 		register long syscall_num __asm("g1") = SYS_kbind;
740 		register void *arg1 __asm("o0") = param;
741 		register long  arg2 __asm("o1") = psize;
742 		register long  arg3 __asm("o2") = cookie;
743 
744 		__asm volatile("t %2" : "+r" (arg1), "+r" (arg2)
745 		    : "i" (ST_SYSCALL), "r" (syscall_num), "r" (arg3)
746 		    : "cc", "memory");
747 	}
748 
749 	return (newvalue);
750 }
751 
752 /*
753  * Install rtld function call into this PLT slot.
754  */
755 #define SAVE		0x9de3bf50
756 #define SETHI_l0	0x21000000
757 #define SETHI_l1	0x23000000
758 #define OR_l0_l0	0xa0142000
759 #define SLLX_l0_32_l0	0xa12c3020
760 #define OR_l0_l1_l0	0xa0140011
761 #define JMPL_l0_o1	0x93c42000
762 #define MOV_g1_o0	0x90100001
763 
764 void
765 _dl_install_plt(Elf_Word *pltgot, Elf_Addr proc)
766 {
767 	pltgot[0] = SAVE;
768 	pltgot[1] = SETHI_l0  | HIVAL(proc, 42);
769 	pltgot[2] = SETHI_l1  | HIVAL(proc, 10);
770 	pltgot[3] = OR_l0_l0  | LOVAL((proc) >> 32);
771 	pltgot[4] = SLLX_l0_32_l0;
772 	pltgot[5] = OR_l0_l1_l0;
773 	pltgot[6] = JMPL_l0_o1 | LOVAL(proc);
774 	pltgot[7] = MOV_g1_o0;
775 }
776 
777 void _dl_bind_start_0(long, long);
778 void _dl_bind_start_1(long, long);
779 
780 static int
781 _dl_md_reloc_all_plt(elf_object_t *object)
782 {
783 	long	i;
784 	long	numrela;
785 	int	fails = 0;
786 	Elf_Addr loff;
787 	Elf_RelA *relas;
788 
789 	loff = object->obj_base;
790 	numrela = object->Dyn.info[DT_PLTRELSZ] / sizeof(Elf64_Rela);
791 	relas = (Elf64_Rela *)(object->Dyn.info[DT_JMPREL]);
792 
793 	if (relas == NULL)
794 		return(0);
795 
796 	for (i = 0; i < numrela; i++, relas++) {
797 		Elf_Addr value;
798 		Elf_Word *where;
799 		const Elf_Sym *sym, *this;
800 
801 		if (ELF_R_TYPE(relas->r_info) != R_TYPE(JMP_SLOT))
802 			continue;
803 
804 		sym = object->dyn.symtab + ELF_R_SYM(relas->r_info);
805 
806 		this = NULL;
807 		value = _dl_find_symbol_bysym(object, ELF_R_SYM(relas->r_info),
808 		    &this, SYM_SEARCH_ALL|SYM_WARNNOTFOUND|SYM_PLT, sym, NULL);
809 		if (this == NULL) {
810 			if (ELF_ST_BIND(sym->st_info) != STB_WEAK)
811 				fails++;
812 			continue;
813 		}
814 
815 		where = (Elf_Word *)(relas->r_offset + loff);
816 		value += this->st_value;
817 
818 		if (__predict_false(relas->r_addend)) {
819 			/*
820 			 * This entry is >32768.  The relocation points to a
821 			 * PC-relative pointer to the _dl_bind_start_0 stub at
822 			 * the top of the PLT section.  Update it to point to
823 			 * the target function.
824 			 */
825 			*(Elf_Addr *)where = relas->r_addend + value -
826 			    object->Dyn.info[DT_PLTGOT];
827 		} else
828 			_dl_reloc_plt(&where[1], &where[2], where, value);
829 	}
830 
831 	return (fails);
832 }
833 
834 /*
835  *	Relocate the Global Offset Table (GOT).
836  */
837 int
838 _dl_md_reloc_got(elf_object_t *object, int lazy)
839 {
840 	int	fails = 0;
841 	Elf_Addr *pltgot = (Elf_Addr *)object->Dyn.info[DT_PLTGOT];
842 	Elf_Word *entry = (Elf_Word *)pltgot;
843 
844 	if (object->Dyn.info[DT_PLTREL] != DT_RELA)
845 		return (0);
846 
847 	if (object->traced)
848 		lazy = 1;
849 
850 	/* temporarily make the PLT writable */
851 	_dl_protect_segment(object, 0, "__plt_start", "__plt_end",
852 	    PROT_READ|PROT_WRITE);
853 
854 	if (!lazy) {
855 		fails = _dl_md_reloc_all_plt(object);
856 	} else {
857 		_dl_install_plt(&entry[0], (Elf_Addr)&_dl_bind_start_0);
858 		_dl_install_plt(&entry[8], (Elf_Addr)&_dl_bind_start_1);
859 
860 		pltgot[8] = (Elf_Addr)object;
861 	}
862 
863 	/* mprotect the GOT */
864 	_dl_protect_segment(object, 0, "__got_start", "__got_end", PROT_READ);
865 
866 	/* mprotect the PLT */
867 	_dl_protect_segment(object, 0, "__plt_start", "__plt_end",
868 	    PROT_READ|PROT_EXEC);
869 
870 	return (fails);
871 }
872