xref: /freebsd/sys/kern/link_elf_obj.c (revision 2f513db7)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 1998-2000 Doug Rabson
5  * Copyright (c) 2004 Peter Wemm
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 
33 #include "opt_ddb.h"
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
38 #include <sys/lock.h>
39 #include <sys/malloc.h>
40 #include <sys/mutex.h>
41 #include <sys/mount.h>
42 #include <sys/proc.h>
43 #include <sys/namei.h>
44 #include <sys/fcntl.h>
45 #include <sys/vnode.h>
46 #include <sys/linker.h>
47 
48 #include <machine/elf.h>
49 
50 #include <net/vnet.h>
51 
52 #include <security/mac/mac_framework.h>
53 
54 #include <vm/vm.h>
55 #include <vm/vm_param.h>
56 #include <vm/vm_object.h>
57 #include <vm/vm_kern.h>
58 #include <vm/vm_extern.h>
59 #include <vm/pmap.h>
60 #include <vm/vm_map.h>
61 
62 #include <sys/link_elf.h>
63 
64 #ifdef DDB_CTF
65 #include <contrib/zlib/zlib.h>
66 #endif
67 
68 #include "linker_if.h"
69 
70 typedef struct {
71 	void		*addr;
72 	Elf_Off		size;
73 	int		flags;	/* Section flags. */
74 	int		sec;	/* Original section number. */
75 	char		*name;
76 } Elf_progent;
77 
78 typedef struct {
79 	Elf_Rel		*rel;
80 	int		nrel;
81 	int		sec;
82 } Elf_relent;
83 
84 typedef struct {
85 	Elf_Rela	*rela;
86 	int		nrela;
87 	int		sec;
88 } Elf_relaent;
89 
90 typedef struct elf_file {
91 	struct linker_file lf;		/* Common fields */
92 
93 	int		preloaded;
94 	caddr_t		address;	/* Relocation address */
95 	vm_object_t	object;		/* VM object to hold file pages */
96 	Elf_Shdr	*e_shdr;
97 
98 	Elf_progent	*progtab;
99 	u_int		nprogtab;
100 
101 	Elf_relaent	*relatab;
102 	u_int		nrelatab;
103 
104 	Elf_relent	*reltab;
105 	int		nreltab;
106 
107 	Elf_Sym		*ddbsymtab;	/* The symbol table we are using */
108 	long		ddbsymcnt;	/* Number of symbols */
109 	caddr_t		ddbstrtab;	/* String table */
110 	long		ddbstrcnt;	/* number of bytes in string table */
111 
112 	caddr_t		shstrtab;	/* Section name string table */
113 	long		shstrcnt;	/* number of bytes in string table */
114 
115 	caddr_t		ctftab;		/* CTF table */
116 	long		ctfcnt;		/* number of bytes in CTF table */
117 	caddr_t		ctfoff;		/* CTF offset table */
118 	caddr_t		typoff;		/* Type offset table */
119 	long		typlen;		/* Number of type entries. */
120 
121 } *elf_file_t;
122 
123 #include <kern/kern_ctf.c>
124 
125 static int	link_elf_link_preload(linker_class_t cls,
126 		    const char *, linker_file_t *);
127 static int	link_elf_link_preload_finish(linker_file_t);
128 static int	link_elf_load_file(linker_class_t, const char *, linker_file_t *);
129 static int	link_elf_lookup_symbol(linker_file_t, const char *,
130 		    c_linker_sym_t *);
131 static int	link_elf_symbol_values(linker_file_t, c_linker_sym_t,
132 		    linker_symval_t *);
133 static int	link_elf_search_symbol(linker_file_t, caddr_t value,
134 		    c_linker_sym_t *sym, long *diffp);
135 
136 static void	link_elf_unload_file(linker_file_t);
137 static int	link_elf_lookup_set(linker_file_t, const char *,
138 		    void ***, void ***, int *);
139 static int	link_elf_each_function_name(linker_file_t,
140 		    int (*)(const char *, void *), void *);
141 static int	link_elf_each_function_nameval(linker_file_t,
142 				linker_function_nameval_callback_t,
143 				void *);
144 static int	link_elf_reloc_local(linker_file_t, bool);
145 static long	link_elf_symtab_get(linker_file_t, const Elf_Sym **);
146 static long	link_elf_strtab_get(linker_file_t, caddr_t *);
147 
148 static int	elf_obj_lookup(linker_file_t lf, Elf_Size symidx, int deps,
149 		    Elf_Addr *);
150 
151 static kobj_method_t link_elf_methods[] = {
152 	KOBJMETHOD(linker_lookup_symbol,	link_elf_lookup_symbol),
153 	KOBJMETHOD(linker_symbol_values,	link_elf_symbol_values),
154 	KOBJMETHOD(linker_search_symbol,	link_elf_search_symbol),
155 	KOBJMETHOD(linker_unload,		link_elf_unload_file),
156 	KOBJMETHOD(linker_load_file,		link_elf_load_file),
157 	KOBJMETHOD(linker_link_preload,		link_elf_link_preload),
158 	KOBJMETHOD(linker_link_preload_finish,	link_elf_link_preload_finish),
159 	KOBJMETHOD(linker_lookup_set,		link_elf_lookup_set),
160 	KOBJMETHOD(linker_each_function_name,	link_elf_each_function_name),
161 	KOBJMETHOD(linker_each_function_nameval, link_elf_each_function_nameval),
162 	KOBJMETHOD(linker_ctf_get,		link_elf_ctf_get),
163 	KOBJMETHOD(linker_symtab_get, 		link_elf_symtab_get),
164 	KOBJMETHOD(linker_strtab_get, 		link_elf_strtab_get),
165 	KOBJMETHOD_END
166 };
167 
168 static struct linker_class link_elf_class = {
169 #if ELF_TARG_CLASS == ELFCLASS32
170 	"elf32_obj",
171 #else
172 	"elf64_obj",
173 #endif
174 	link_elf_methods, sizeof(struct elf_file)
175 };
176 
177 static int	relocate_file(elf_file_t ef);
178 static void	elf_obj_cleanup_globals_cache(elf_file_t);
179 
180 static void
181 link_elf_error(const char *filename, const char *s)
182 {
183 	if (filename == NULL)
184 		printf("kldload: %s\n", s);
185 	else
186 		printf("kldload: %s: %s\n", filename, s);
187 }
188 
189 static void
190 link_elf_init(void *arg)
191 {
192 
193 	linker_add_class(&link_elf_class);
194 }
195 SYSINIT(link_elf_obj, SI_SUB_KLD, SI_ORDER_SECOND, link_elf_init, NULL);
196 
197 static void
198 link_elf_protect_range(elf_file_t ef, vm_offset_t start, vm_offset_t end,
199     vm_prot_t prot)
200 {
201 	int error __unused;
202 
203 	KASSERT(start <= end && start >= (vm_offset_t)ef->address &&
204 	    end <= round_page((vm_offset_t)ef->address + ef->lf.size),
205 	    ("link_elf_protect_range: invalid range %#jx-%#jx",
206 	    (uintmax_t)start, (uintmax_t)end));
207 
208 	if (start == end)
209 		return;
210 	if (ef->preloaded) {
211 #ifdef __amd64__
212 		error = pmap_change_prot(start, end - start, prot);
213 		KASSERT(error == 0,
214 		    ("link_elf_protect_range: pmap_change_prot() returned %d",
215 		    error));
216 #endif
217 		return;
218 	}
219 	error = vm_map_protect(kernel_map, start, end, prot, FALSE);
220 	KASSERT(error == KERN_SUCCESS,
221 	    ("link_elf_protect_range: vm_map_protect() returned %d", error));
222 }
223 
224 /*
225  * Restrict permissions on linker file memory based on section flags.
226  * Sections need not be page-aligned, so overlap within a page is possible.
227  */
228 static void
229 link_elf_protect(elf_file_t ef)
230 {
231 	vm_offset_t end, segend, segstart, start;
232 	vm_prot_t gapprot, prot, segprot;
233 	int i;
234 
235 	/*
236 	 * If the file was preloaded, the last page may contain other preloaded
237 	 * data which may need to be writeable.  ELF files are always
238 	 * page-aligned, but other preloaded data, such as entropy or CPU
239 	 * microcode may be loaded with a smaller alignment.
240 	 */
241 	gapprot = ef->preloaded ? VM_PROT_RW : VM_PROT_READ;
242 
243 	start = end = (vm_offset_t)ef->address;
244 	prot = VM_PROT_READ;
245 	for (i = 0; i < ef->nprogtab; i++) {
246 		/*
247 		 * VNET and DPCPU sections have their memory allocated by their
248 		 * respective subsystems.
249 		 */
250 		if (ef->progtab[i].name != NULL && (
251 #ifdef VIMAGE
252 		    strcmp(ef->progtab[i].name, VNET_SETNAME) == 0 ||
253 #endif
254 		    strcmp(ef->progtab[i].name, DPCPU_SETNAME) == 0))
255 			continue;
256 
257 		segstart = trunc_page((vm_offset_t)ef->progtab[i].addr);
258 		segend = round_page((vm_offset_t)ef->progtab[i].addr +
259 		    ef->progtab[i].size);
260 		segprot = VM_PROT_READ;
261 		if ((ef->progtab[i].flags & SHF_WRITE) != 0)
262 			segprot |= VM_PROT_WRITE;
263 		if ((ef->progtab[i].flags & SHF_EXECINSTR) != 0)
264 			segprot |= VM_PROT_EXECUTE;
265 
266 		if (end <= segstart) {
267 			/*
268 			 * Case 1: there is no overlap between the previous
269 			 * segment and this one.  Apply protections to the
270 			 * previous segment, and protect the gap between the
271 			 * previous and current segments, if any.
272 			 */
273 			link_elf_protect_range(ef, start, end, prot);
274 			link_elf_protect_range(ef, end, segstart, gapprot);
275 
276 			start = segstart;
277 			end = segend;
278 			prot = segprot;
279 		} else if (start < segstart && end == segend) {
280 			/*
281 			 * Case 2: the current segment is a subrange of the
282 			 * previous segment.  Apply protections to the
283 			 * non-overlapping portion of the previous segment.
284 			 */
285 			link_elf_protect_range(ef, start, segstart, prot);
286 
287 			start = segstart;
288 			prot |= segprot;
289 		} else if (end < segend) {
290 			/*
291 			 * Case 3: there is partial overlap between the previous
292 			 * and current segments.  Apply protections to the
293 			 * non-overlapping portion of the previous segment, and
294 			 * then the overlap, which must use the union of the two
295 			 * segments' protections.
296 			 */
297 			link_elf_protect_range(ef, start, segstart, prot);
298 			link_elf_protect_range(ef, segstart, end,
299 			    prot | segprot);
300 			start = end;
301 			end = segend;
302 			prot = segprot;
303 		} else {
304 			/*
305 			 * Case 4: the two segments reside in the same page.
306 			 */
307 			prot |= segprot;
308 		}
309 	}
310 
311 	/*
312 	 * Fix up the last unprotected segment and trailing data.
313 	 */
314 	link_elf_protect_range(ef, start, end, prot);
315 	link_elf_protect_range(ef, end,
316 	    round_page((vm_offset_t)ef->address + ef->lf.size), gapprot);
317 }
318 
319 static int
320 link_elf_link_preload(linker_class_t cls, const char *filename,
321     linker_file_t *result)
322 {
323 	Elf_Ehdr *hdr;
324 	Elf_Shdr *shdr;
325 	Elf_Sym *es;
326 	void *modptr, *baseptr, *sizeptr;
327 	char *type;
328 	elf_file_t ef;
329 	linker_file_t lf;
330 	Elf_Addr off;
331 	int error, i, j, pb, ra, rl, shstrindex, symstrindex, symtabindex;
332 
333 	/* Look to see if we have the file preloaded */
334 	modptr = preload_search_by_name(filename);
335 	if (modptr == NULL)
336 		return ENOENT;
337 
338 	type = (char *)preload_search_info(modptr, MODINFO_TYPE);
339 	baseptr = preload_search_info(modptr, MODINFO_ADDR);
340 	sizeptr = preload_search_info(modptr, MODINFO_SIZE);
341 	hdr = (Elf_Ehdr *)preload_search_info(modptr, MODINFO_METADATA |
342 	    MODINFOMD_ELFHDR);
343 	shdr = (Elf_Shdr *)preload_search_info(modptr, MODINFO_METADATA |
344 	    MODINFOMD_SHDR);
345 	if (type == NULL || (strcmp(type, "elf" __XSTRING(__ELF_WORD_SIZE)
346 	    " obj module") != 0 &&
347 	    strcmp(type, "elf obj module") != 0)) {
348 		return (EFTYPE);
349 	}
350 	if (baseptr == NULL || sizeptr == NULL || hdr == NULL ||
351 	    shdr == NULL)
352 		return (EINVAL);
353 
354 	lf = linker_make_file(filename, &link_elf_class);
355 	if (lf == NULL)
356 		return (ENOMEM);
357 
358 	ef = (elf_file_t)lf;
359 	ef->preloaded = 1;
360 	ef->address = *(caddr_t *)baseptr;
361 	lf->address = *(caddr_t *)baseptr;
362 	lf->size = *(size_t *)sizeptr;
363 
364 	if (hdr->e_ident[EI_CLASS] != ELF_TARG_CLASS ||
365 	    hdr->e_ident[EI_DATA] != ELF_TARG_DATA ||
366 	    hdr->e_ident[EI_VERSION] != EV_CURRENT ||
367 	    hdr->e_version != EV_CURRENT ||
368 	    hdr->e_type != ET_REL ||
369 	    hdr->e_machine != ELF_TARG_MACH) {
370 		error = EFTYPE;
371 		goto out;
372 	}
373 	ef->e_shdr = shdr;
374 
375 	/* Scan the section header for information and table sizing. */
376 	symtabindex = -1;
377 	symstrindex = -1;
378 	for (i = 0; i < hdr->e_shnum; i++) {
379 		switch (shdr[i].sh_type) {
380 		case SHT_PROGBITS:
381 		case SHT_NOBITS:
382 #ifdef __amd64__
383 		case SHT_X86_64_UNWIND:
384 #endif
385 			/* Ignore sections not loaded by the loader. */
386 			if (shdr[i].sh_addr == 0)
387 				break;
388 			ef->nprogtab++;
389 			break;
390 		case SHT_SYMTAB:
391 			symtabindex = i;
392 			symstrindex = shdr[i].sh_link;
393 			break;
394 		case SHT_REL:
395 			/*
396 			 * Ignore relocation tables for sections not
397 			 * loaded by the loader.
398 			 */
399 			if (shdr[shdr[i].sh_info].sh_addr == 0)
400 				break;
401 			ef->nreltab++;
402 			break;
403 		case SHT_RELA:
404 			if (shdr[shdr[i].sh_info].sh_addr == 0)
405 				break;
406 			ef->nrelatab++;
407 			break;
408 		}
409 	}
410 
411 	shstrindex = hdr->e_shstrndx;
412 	if (ef->nprogtab == 0 || symstrindex < 0 ||
413 	    symstrindex >= hdr->e_shnum ||
414 	    shdr[symstrindex].sh_type != SHT_STRTAB || shstrindex == 0 ||
415 	    shstrindex >= hdr->e_shnum ||
416 	    shdr[shstrindex].sh_type != SHT_STRTAB) {
417 		printf("%s: bad/missing section headers\n", filename);
418 		error = ENOEXEC;
419 		goto out;
420 	}
421 
422 	/* Allocate space for tracking the load chunks */
423 	if (ef->nprogtab != 0)
424 		ef->progtab = malloc(ef->nprogtab * sizeof(*ef->progtab),
425 		    M_LINKER, M_WAITOK | M_ZERO);
426 	if (ef->nreltab != 0)
427 		ef->reltab = malloc(ef->nreltab * sizeof(*ef->reltab),
428 		    M_LINKER, M_WAITOK | M_ZERO);
429 	if (ef->nrelatab != 0)
430 		ef->relatab = malloc(ef->nrelatab * sizeof(*ef->relatab),
431 		    M_LINKER, M_WAITOK | M_ZERO);
432 	if ((ef->nprogtab != 0 && ef->progtab == NULL) ||
433 	    (ef->nreltab != 0 && ef->reltab == NULL) ||
434 	    (ef->nrelatab != 0 && ef->relatab == NULL)) {
435 		error = ENOMEM;
436 		goto out;
437 	}
438 
439 	/* XXX, relocate the sh_addr fields saved by the loader. */
440 	off = 0;
441 	for (i = 0; i < hdr->e_shnum; i++) {
442 		if (shdr[i].sh_addr != 0 && (off == 0 || shdr[i].sh_addr < off))
443 			off = shdr[i].sh_addr;
444 	}
445 	for (i = 0; i < hdr->e_shnum; i++) {
446 		if (shdr[i].sh_addr != 0)
447 			shdr[i].sh_addr = shdr[i].sh_addr - off +
448 			    (Elf_Addr)ef->address;
449 	}
450 
451 	ef->ddbsymcnt = shdr[symtabindex].sh_size / sizeof(Elf_Sym);
452 	ef->ddbsymtab = (Elf_Sym *)shdr[symtabindex].sh_addr;
453 	ef->ddbstrcnt = shdr[symstrindex].sh_size;
454 	ef->ddbstrtab = (char *)shdr[symstrindex].sh_addr;
455 	ef->shstrcnt = shdr[shstrindex].sh_size;
456 	ef->shstrtab = (char *)shdr[shstrindex].sh_addr;
457 
458 	/* Now fill out progtab and the relocation tables. */
459 	pb = 0;
460 	rl = 0;
461 	ra = 0;
462 	for (i = 0; i < hdr->e_shnum; i++) {
463 		switch (shdr[i].sh_type) {
464 		case SHT_PROGBITS:
465 		case SHT_NOBITS:
466 #ifdef __amd64__
467 		case SHT_X86_64_UNWIND:
468 #endif
469 			if (shdr[i].sh_addr == 0)
470 				break;
471 			ef->progtab[pb].addr = (void *)shdr[i].sh_addr;
472 			if (shdr[i].sh_type == SHT_PROGBITS)
473 				ef->progtab[pb].name = "<<PROGBITS>>";
474 #ifdef __amd64__
475 			else if (shdr[i].sh_type == SHT_X86_64_UNWIND)
476 				ef->progtab[pb].name = "<<UNWIND>>";
477 #endif
478 			else
479 				ef->progtab[pb].name = "<<NOBITS>>";
480 			ef->progtab[pb].size = shdr[i].sh_size;
481 			ef->progtab[pb].flags = shdr[i].sh_flags;
482 			ef->progtab[pb].sec = i;
483 			if (ef->shstrtab && shdr[i].sh_name != 0)
484 				ef->progtab[pb].name =
485 				    ef->shstrtab + shdr[i].sh_name;
486 			if (ef->progtab[pb].name != NULL &&
487 			    !strcmp(ef->progtab[pb].name, DPCPU_SETNAME)) {
488 				void *dpcpu;
489 
490 				dpcpu = dpcpu_alloc(shdr[i].sh_size);
491 				if (dpcpu == NULL) {
492 					printf("%s: pcpu module space is out "
493 					    "of space; cannot allocate %#jx "
494 					    "for %s\n", __func__,
495 					    (uintmax_t)shdr[i].sh_size,
496 					    filename);
497 					error = ENOSPC;
498 					goto out;
499 				}
500 				memcpy(dpcpu, ef->progtab[pb].addr,
501 				    ef->progtab[pb].size);
502 				dpcpu_copy(dpcpu, shdr[i].sh_size);
503 				ef->progtab[pb].addr = dpcpu;
504 #ifdef VIMAGE
505 			} else if (ef->progtab[pb].name != NULL &&
506 			    !strcmp(ef->progtab[pb].name, VNET_SETNAME)) {
507 				void *vnet_data;
508 
509 				vnet_data = vnet_data_alloc(shdr[i].sh_size);
510 				if (vnet_data == NULL) {
511 					printf("%s: vnet module space is out "
512 					    "of space; cannot allocate %#jx "
513 					    "for %s\n", __func__,
514 					    (uintmax_t)shdr[i].sh_size,
515 					    filename);
516 					error = ENOSPC;
517 					goto out;
518 				}
519 				memcpy(vnet_data, ef->progtab[pb].addr,
520 				    ef->progtab[pb].size);
521 				vnet_data_copy(vnet_data, shdr[i].sh_size);
522 				ef->progtab[pb].addr = vnet_data;
523 #endif
524 			} else if (ef->progtab[pb].name != NULL &&
525 			    !strcmp(ef->progtab[pb].name, ".ctors")) {
526 				lf->ctors_addr = ef->progtab[pb].addr;
527 				lf->ctors_size = shdr[i].sh_size;
528 			}
529 
530 			/* Update all symbol values with the offset. */
531 			for (j = 0; j < ef->ddbsymcnt; j++) {
532 				es = &ef->ddbsymtab[j];
533 				if (es->st_shndx != i)
534 					continue;
535 				es->st_value += (Elf_Addr)ef->progtab[pb].addr;
536 			}
537 			pb++;
538 			break;
539 		case SHT_REL:
540 			if (shdr[shdr[i].sh_info].sh_addr == 0)
541 				break;
542 			ef->reltab[rl].rel = (Elf_Rel *)shdr[i].sh_addr;
543 			ef->reltab[rl].nrel = shdr[i].sh_size / sizeof(Elf_Rel);
544 			ef->reltab[rl].sec = shdr[i].sh_info;
545 			rl++;
546 			break;
547 		case SHT_RELA:
548 			if (shdr[shdr[i].sh_info].sh_addr == 0)
549 				break;
550 			ef->relatab[ra].rela = (Elf_Rela *)shdr[i].sh_addr;
551 			ef->relatab[ra].nrela =
552 			    shdr[i].sh_size / sizeof(Elf_Rela);
553 			ef->relatab[ra].sec = shdr[i].sh_info;
554 			ra++;
555 			break;
556 		}
557 	}
558 	if (pb != ef->nprogtab) {
559 		printf("%s: lost progbits\n", filename);
560 		error = ENOEXEC;
561 		goto out;
562 	}
563 	if (rl != ef->nreltab) {
564 		printf("%s: lost reltab\n", filename);
565 		error = ENOEXEC;
566 		goto out;
567 	}
568 	if (ra != ef->nrelatab) {
569 		printf("%s: lost relatab\n", filename);
570 		error = ENOEXEC;
571 		goto out;
572 	}
573 
574 	/*
575 	 * The file needs to be writeable and executable while applying
576 	 * relocations.  Mapping protections are applied once relocation
577 	 * processing is complete.
578 	 */
579 	link_elf_protect_range(ef, (vm_offset_t)ef->address,
580 	    round_page((vm_offset_t)ef->address + ef->lf.size), VM_PROT_ALL);
581 
582 	/* Local intra-module relocations */
583 	error = link_elf_reloc_local(lf, false);
584 	if (error != 0)
585 		goto out;
586 	*result = lf;
587 	return (0);
588 
589 out:
590 	/* preload not done this way */
591 	linker_file_unload(lf, LINKER_UNLOAD_FORCE);
592 	return (error);
593 }
594 
595 static void
596 link_elf_invoke_ctors(caddr_t addr, size_t size)
597 {
598 	void (**ctor)(void);
599 	size_t i, cnt;
600 
601 	if (addr == NULL || size == 0)
602 		return;
603 	cnt = size / sizeof(*ctor);
604 	ctor = (void *)addr;
605 	for (i = 0; i < cnt; i++) {
606 		if (ctor[i] != NULL)
607 			(*ctor[i])();
608 	}
609 }
610 
611 static int
612 link_elf_link_preload_finish(linker_file_t lf)
613 {
614 	elf_file_t ef;
615 	int error;
616 
617 	ef = (elf_file_t)lf;
618 	error = relocate_file(ef);
619 	if (error)
620 		return (error);
621 
622 	/* Notify MD code that a module is being loaded. */
623 	error = elf_cpu_load_file(lf);
624 	if (error)
625 		return (error);
626 
627 #if defined(__i386__) || defined(__amd64__)
628 	/* Now ifuncs. */
629 	error = link_elf_reloc_local(lf, true);
630 	if (error != 0)
631 		return (error);
632 #endif
633 
634 	/* Apply protections now that relocation processing is complete. */
635 	link_elf_protect(ef);
636 
637 	link_elf_invoke_ctors(lf->ctors_addr, lf->ctors_size);
638 	return (0);
639 }
640 
641 static int
642 link_elf_load_file(linker_class_t cls, const char *filename,
643     linker_file_t *result)
644 {
645 	struct nameidata *nd;
646 	struct thread *td = curthread;	/* XXX */
647 	Elf_Ehdr *hdr;
648 	Elf_Shdr *shdr;
649 	Elf_Sym *es;
650 	int nbytes, i, j;
651 	vm_offset_t mapbase;
652 	size_t mapsize;
653 	int error = 0;
654 	ssize_t resid;
655 	int flags;
656 	elf_file_t ef;
657 	linker_file_t lf;
658 	int symtabindex;
659 	int symstrindex;
660 	int shstrindex;
661 	int nsym;
662 	int pb, rl, ra;
663 	int alignmask;
664 
665 	shdr = NULL;
666 	lf = NULL;
667 	mapsize = 0;
668 	hdr = NULL;
669 
670 	nd = malloc(sizeof(struct nameidata), M_TEMP, M_WAITOK);
671 	NDINIT(nd, LOOKUP, FOLLOW, UIO_SYSSPACE, filename, td);
672 	flags = FREAD;
673 	error = vn_open(nd, &flags, 0, NULL);
674 	if (error) {
675 		free(nd, M_TEMP);
676 		return error;
677 	}
678 	NDFREE(nd, NDF_ONLY_PNBUF);
679 	if (nd->ni_vp->v_type != VREG) {
680 		error = ENOEXEC;
681 		goto out;
682 	}
683 #ifdef MAC
684 	error = mac_kld_check_load(td->td_ucred, nd->ni_vp);
685 	if (error) {
686 		goto out;
687 	}
688 #endif
689 
690 	/* Read the elf header from the file. */
691 	hdr = malloc(sizeof(*hdr), M_LINKER, M_WAITOK);
692 	error = vn_rdwr(UIO_READ, nd->ni_vp, (void *)hdr, sizeof(*hdr), 0,
693 	    UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED,
694 	    &resid, td);
695 	if (error)
696 		goto out;
697 	if (resid != 0){
698 		error = ENOEXEC;
699 		goto out;
700 	}
701 
702 	if (!IS_ELF(*hdr)) {
703 		error = ENOEXEC;
704 		goto out;
705 	}
706 
707 	if (hdr->e_ident[EI_CLASS] != ELF_TARG_CLASS
708 	    || hdr->e_ident[EI_DATA] != ELF_TARG_DATA) {
709 		link_elf_error(filename, "Unsupported file layout");
710 		error = ENOEXEC;
711 		goto out;
712 	}
713 	if (hdr->e_ident[EI_VERSION] != EV_CURRENT
714 	    || hdr->e_version != EV_CURRENT) {
715 		link_elf_error(filename, "Unsupported file version");
716 		error = ENOEXEC;
717 		goto out;
718 	}
719 	if (hdr->e_type != ET_REL) {
720 		error = ENOSYS;
721 		goto out;
722 	}
723 	if (hdr->e_machine != ELF_TARG_MACH) {
724 		link_elf_error(filename, "Unsupported machine");
725 		error = ENOEXEC;
726 		goto out;
727 	}
728 
729 	lf = linker_make_file(filename, &link_elf_class);
730 	if (!lf) {
731 		error = ENOMEM;
732 		goto out;
733 	}
734 	ef = (elf_file_t) lf;
735 	ef->nprogtab = 0;
736 	ef->e_shdr = 0;
737 	ef->nreltab = 0;
738 	ef->nrelatab = 0;
739 
740 	/* Allocate and read in the section header */
741 	nbytes = hdr->e_shnum * hdr->e_shentsize;
742 	if (nbytes == 0 || hdr->e_shoff == 0 ||
743 	    hdr->e_shentsize != sizeof(Elf_Shdr)) {
744 		error = ENOEXEC;
745 		goto out;
746 	}
747 	shdr = malloc(nbytes, M_LINKER, M_WAITOK);
748 	ef->e_shdr = shdr;
749 	error = vn_rdwr(UIO_READ, nd->ni_vp, (caddr_t)shdr, nbytes,
750 	    hdr->e_shoff, UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred,
751 	    NOCRED, &resid, td);
752 	if (error)
753 		goto out;
754 	if (resid) {
755 		error = ENOEXEC;
756 		goto out;
757 	}
758 
759 	/* Scan the section header for information and table sizing. */
760 	nsym = 0;
761 	symtabindex = -1;
762 	symstrindex = -1;
763 	for (i = 0; i < hdr->e_shnum; i++) {
764 		if (shdr[i].sh_size == 0)
765 			continue;
766 		switch (shdr[i].sh_type) {
767 		case SHT_PROGBITS:
768 		case SHT_NOBITS:
769 #ifdef __amd64__
770 		case SHT_X86_64_UNWIND:
771 #endif
772 			if ((shdr[i].sh_flags & SHF_ALLOC) == 0)
773 				break;
774 			ef->nprogtab++;
775 			break;
776 		case SHT_SYMTAB:
777 			nsym++;
778 			symtabindex = i;
779 			symstrindex = shdr[i].sh_link;
780 			break;
781 		case SHT_REL:
782 			/*
783 			 * Ignore relocation tables for unallocated
784 			 * sections.
785 			 */
786 			if ((shdr[shdr[i].sh_info].sh_flags & SHF_ALLOC) == 0)
787 				break;
788 			ef->nreltab++;
789 			break;
790 		case SHT_RELA:
791 			if ((shdr[shdr[i].sh_info].sh_flags & SHF_ALLOC) == 0)
792 				break;
793 			ef->nrelatab++;
794 			break;
795 		case SHT_STRTAB:
796 			break;
797 		}
798 	}
799 	if (ef->nprogtab == 0) {
800 		link_elf_error(filename, "file has no contents");
801 		error = ENOEXEC;
802 		goto out;
803 	}
804 	if (nsym != 1) {
805 		/* Only allow one symbol table for now */
806 		link_elf_error(filename,
807 		    "file must have exactly one symbol table");
808 		error = ENOEXEC;
809 		goto out;
810 	}
811 	if (symstrindex < 0 || symstrindex > hdr->e_shnum ||
812 	    shdr[symstrindex].sh_type != SHT_STRTAB) {
813 		link_elf_error(filename, "file has invalid symbol strings");
814 		error = ENOEXEC;
815 		goto out;
816 	}
817 
818 	/* Allocate space for tracking the load chunks */
819 	if (ef->nprogtab != 0)
820 		ef->progtab = malloc(ef->nprogtab * sizeof(*ef->progtab),
821 		    M_LINKER, M_WAITOK | M_ZERO);
822 	if (ef->nreltab != 0)
823 		ef->reltab = malloc(ef->nreltab * sizeof(*ef->reltab),
824 		    M_LINKER, M_WAITOK | M_ZERO);
825 	if (ef->nrelatab != 0)
826 		ef->relatab = malloc(ef->nrelatab * sizeof(*ef->relatab),
827 		    M_LINKER, M_WAITOK | M_ZERO);
828 
829 	if (symtabindex == -1) {
830 		link_elf_error(filename, "lost symbol table index");
831 		error = ENOEXEC;
832 		goto out;
833 	}
834 	/* Allocate space for and load the symbol table */
835 	ef->ddbsymcnt = shdr[symtabindex].sh_size / sizeof(Elf_Sym);
836 	ef->ddbsymtab = malloc(shdr[symtabindex].sh_size, M_LINKER, M_WAITOK);
837 	error = vn_rdwr(UIO_READ, nd->ni_vp, (void *)ef->ddbsymtab,
838 	    shdr[symtabindex].sh_size, shdr[symtabindex].sh_offset,
839 	    UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED,
840 	    &resid, td);
841 	if (error)
842 		goto out;
843 	if (resid != 0){
844 		error = EINVAL;
845 		goto out;
846 	}
847 
848 	/* Allocate space for and load the symbol strings */
849 	ef->ddbstrcnt = shdr[symstrindex].sh_size;
850 	ef->ddbstrtab = malloc(shdr[symstrindex].sh_size, M_LINKER, M_WAITOK);
851 	error = vn_rdwr(UIO_READ, nd->ni_vp, ef->ddbstrtab,
852 	    shdr[symstrindex].sh_size, shdr[symstrindex].sh_offset,
853 	    UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED,
854 	    &resid, td);
855 	if (error)
856 		goto out;
857 	if (resid != 0){
858 		error = EINVAL;
859 		goto out;
860 	}
861 
862 	/* Do we have a string table for the section names?  */
863 	shstrindex = -1;
864 	if (hdr->e_shstrndx != 0 &&
865 	    shdr[hdr->e_shstrndx].sh_type == SHT_STRTAB) {
866 		shstrindex = hdr->e_shstrndx;
867 		ef->shstrcnt = shdr[shstrindex].sh_size;
868 		ef->shstrtab = malloc(shdr[shstrindex].sh_size, M_LINKER,
869 		    M_WAITOK);
870 		error = vn_rdwr(UIO_READ, nd->ni_vp, ef->shstrtab,
871 		    shdr[shstrindex].sh_size, shdr[shstrindex].sh_offset,
872 		    UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED,
873 		    &resid, td);
874 		if (error)
875 			goto out;
876 		if (resid != 0){
877 			error = EINVAL;
878 			goto out;
879 		}
880 	}
881 
882 	/* Size up code/data(progbits) and bss(nobits). */
883 	alignmask = 0;
884 	for (i = 0; i < hdr->e_shnum; i++) {
885 		if (shdr[i].sh_size == 0)
886 			continue;
887 		switch (shdr[i].sh_type) {
888 		case SHT_PROGBITS:
889 		case SHT_NOBITS:
890 #ifdef __amd64__
891 		case SHT_X86_64_UNWIND:
892 #endif
893 			if ((shdr[i].sh_flags & SHF_ALLOC) == 0)
894 				break;
895 			alignmask = shdr[i].sh_addralign - 1;
896 			mapsize += alignmask;
897 			mapsize &= ~alignmask;
898 			mapsize += shdr[i].sh_size;
899 			break;
900 		}
901 	}
902 
903 	/*
904 	 * We know how much space we need for the text/data/bss/etc.
905 	 * This stuff needs to be in a single chunk so that profiling etc
906 	 * can get the bounds and gdb can associate offsets with modules
907 	 */
908 	ef->object = vm_object_allocate(OBJT_PHYS, atop(round_page(mapsize)));
909 	if (ef->object == NULL) {
910 		error = ENOMEM;
911 		goto out;
912 	}
913 
914 	/*
915 	 * In order to satisfy amd64's architectural requirements on the
916 	 * location of code and data in the kernel's address space, request a
917 	 * mapping that is above the kernel.
918 	 *
919 	 * Protections will be restricted once relocations are applied.
920 	 */
921 #ifdef __amd64__
922 	mapbase = KERNBASE;
923 #else
924 	mapbase = VM_MIN_KERNEL_ADDRESS;
925 #endif
926 	error = vm_map_find(kernel_map, ef->object, 0, &mapbase,
927 	    round_page(mapsize), 0, VMFS_OPTIMAL_SPACE, VM_PROT_ALL,
928 	    VM_PROT_ALL, 0);
929 	if (error != KERN_SUCCESS) {
930 		vm_object_deallocate(ef->object);
931 		ef->object = NULL;
932 		error = ENOMEM;
933 		goto out;
934 	}
935 
936 	/* Wire the pages */
937 	error = vm_map_wire(kernel_map, mapbase,
938 	    mapbase + round_page(mapsize),
939 	    VM_MAP_WIRE_SYSTEM|VM_MAP_WIRE_NOHOLES);
940 	if (error != KERN_SUCCESS) {
941 		error = ENOMEM;
942 		goto out;
943 	}
944 
945 	/* Inform the kld system about the situation */
946 	lf->address = ef->address = (caddr_t)mapbase;
947 	lf->size = mapsize;
948 
949 	/*
950 	 * Now load code/data(progbits), zero bss(nobits), allocate space for
951 	 * and load relocs
952 	 */
953 	pb = 0;
954 	rl = 0;
955 	ra = 0;
956 	alignmask = 0;
957 	for (i = 0; i < hdr->e_shnum; i++) {
958 		if (shdr[i].sh_size == 0)
959 			continue;
960 		switch (shdr[i].sh_type) {
961 		case SHT_PROGBITS:
962 		case SHT_NOBITS:
963 #ifdef __amd64__
964 		case SHT_X86_64_UNWIND:
965 #endif
966 			if ((shdr[i].sh_flags & SHF_ALLOC) == 0)
967 				break;
968 			alignmask = shdr[i].sh_addralign - 1;
969 			mapbase += alignmask;
970 			mapbase &= ~alignmask;
971 			if (ef->shstrtab != NULL && shdr[i].sh_name != 0) {
972 				ef->progtab[pb].name =
973 				    ef->shstrtab + shdr[i].sh_name;
974 				if (!strcmp(ef->progtab[pb].name, ".ctors")) {
975 					lf->ctors_addr = (caddr_t)mapbase;
976 					lf->ctors_size = shdr[i].sh_size;
977 				}
978 			} else if (shdr[i].sh_type == SHT_PROGBITS)
979 				ef->progtab[pb].name = "<<PROGBITS>>";
980 #ifdef __amd64__
981 			else if (shdr[i].sh_type == SHT_X86_64_UNWIND)
982 				ef->progtab[pb].name = "<<UNWIND>>";
983 #endif
984 			else
985 				ef->progtab[pb].name = "<<NOBITS>>";
986 			if (ef->progtab[pb].name != NULL &&
987 			    !strcmp(ef->progtab[pb].name, DPCPU_SETNAME)) {
988 				ef->progtab[pb].addr =
989 				    dpcpu_alloc(shdr[i].sh_size);
990 				if (ef->progtab[pb].addr == NULL) {
991 					printf("%s: pcpu module space is out "
992 					    "of space; cannot allocate %#jx "
993 					    "for %s\n", __func__,
994 					    (uintmax_t)shdr[i].sh_size,
995 					    filename);
996 				}
997 			}
998 #ifdef VIMAGE
999 			else if (ef->progtab[pb].name != NULL &&
1000 			    !strcmp(ef->progtab[pb].name, VNET_SETNAME)) {
1001 				ef->progtab[pb].addr =
1002 				    vnet_data_alloc(shdr[i].sh_size);
1003 				if (ef->progtab[pb].addr == NULL) {
1004 					printf("%s: vnet module space is out "
1005 					    "of space; cannot allocate %#jx "
1006 					    "for %s\n", __func__,
1007 					    (uintmax_t)shdr[i].sh_size,
1008 					    filename);
1009 				}
1010 			}
1011 #endif
1012 			else
1013 				ef->progtab[pb].addr =
1014 				    (void *)(uintptr_t)mapbase;
1015 			if (ef->progtab[pb].addr == NULL) {
1016 				error = ENOSPC;
1017 				goto out;
1018 			}
1019 			ef->progtab[pb].size = shdr[i].sh_size;
1020 			ef->progtab[pb].flags = shdr[i].sh_flags;
1021 			ef->progtab[pb].sec = i;
1022 			if (shdr[i].sh_type == SHT_PROGBITS
1023 #ifdef __amd64__
1024 			    || shdr[i].sh_type == SHT_X86_64_UNWIND
1025 #endif
1026 			    ) {
1027 				error = vn_rdwr(UIO_READ, nd->ni_vp,
1028 				    ef->progtab[pb].addr,
1029 				    shdr[i].sh_size, shdr[i].sh_offset,
1030 				    UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred,
1031 				    NOCRED, &resid, td);
1032 				if (error)
1033 					goto out;
1034 				if (resid != 0){
1035 					error = EINVAL;
1036 					goto out;
1037 				}
1038 				/* Initialize the per-cpu or vnet area. */
1039 				if (ef->progtab[pb].addr != (void *)mapbase &&
1040 				    !strcmp(ef->progtab[pb].name, DPCPU_SETNAME))
1041 					dpcpu_copy(ef->progtab[pb].addr,
1042 					    shdr[i].sh_size);
1043 #ifdef VIMAGE
1044 				else if (ef->progtab[pb].addr !=
1045 				    (void *)mapbase &&
1046 				    !strcmp(ef->progtab[pb].name, VNET_SETNAME))
1047 					vnet_data_copy(ef->progtab[pb].addr,
1048 					    shdr[i].sh_size);
1049 #endif
1050 			} else
1051 				bzero(ef->progtab[pb].addr, shdr[i].sh_size);
1052 
1053 			/* Update all symbol values with the offset. */
1054 			for (j = 0; j < ef->ddbsymcnt; j++) {
1055 				es = &ef->ddbsymtab[j];
1056 				if (es->st_shndx != i)
1057 					continue;
1058 				es->st_value += (Elf_Addr)ef->progtab[pb].addr;
1059 			}
1060 			mapbase += shdr[i].sh_size;
1061 			pb++;
1062 			break;
1063 		case SHT_REL:
1064 			if ((shdr[shdr[i].sh_info].sh_flags & SHF_ALLOC) == 0)
1065 				break;
1066 			ef->reltab[rl].rel = malloc(shdr[i].sh_size, M_LINKER,
1067 			    M_WAITOK);
1068 			ef->reltab[rl].nrel = shdr[i].sh_size / sizeof(Elf_Rel);
1069 			ef->reltab[rl].sec = shdr[i].sh_info;
1070 			error = vn_rdwr(UIO_READ, nd->ni_vp,
1071 			    (void *)ef->reltab[rl].rel,
1072 			    shdr[i].sh_size, shdr[i].sh_offset,
1073 			    UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED,
1074 			    &resid, td);
1075 			if (error)
1076 				goto out;
1077 			if (resid != 0){
1078 				error = EINVAL;
1079 				goto out;
1080 			}
1081 			rl++;
1082 			break;
1083 		case SHT_RELA:
1084 			if ((shdr[shdr[i].sh_info].sh_flags & SHF_ALLOC) == 0)
1085 				break;
1086 			ef->relatab[ra].rela = malloc(shdr[i].sh_size, M_LINKER,
1087 			    M_WAITOK);
1088 			ef->relatab[ra].nrela =
1089 			    shdr[i].sh_size / sizeof(Elf_Rela);
1090 			ef->relatab[ra].sec = shdr[i].sh_info;
1091 			error = vn_rdwr(UIO_READ, nd->ni_vp,
1092 			    (void *)ef->relatab[ra].rela,
1093 			    shdr[i].sh_size, shdr[i].sh_offset,
1094 			    UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED,
1095 			    &resid, td);
1096 			if (error)
1097 				goto out;
1098 			if (resid != 0){
1099 				error = EINVAL;
1100 				goto out;
1101 			}
1102 			ra++;
1103 			break;
1104 		}
1105 	}
1106 	if (pb != ef->nprogtab) {
1107 		link_elf_error(filename, "lost progbits");
1108 		error = ENOEXEC;
1109 		goto out;
1110 	}
1111 	if (rl != ef->nreltab) {
1112 		link_elf_error(filename, "lost reltab");
1113 		error = ENOEXEC;
1114 		goto out;
1115 	}
1116 	if (ra != ef->nrelatab) {
1117 		link_elf_error(filename, "lost relatab");
1118 		error = ENOEXEC;
1119 		goto out;
1120 	}
1121 	if (mapbase != (vm_offset_t)ef->address + mapsize) {
1122 		printf(
1123 		    "%s: mapbase 0x%lx != address %p + mapsize 0x%lx (0x%lx)\n",
1124 		    filename != NULL ? filename : "<none>",
1125 		    (u_long)mapbase, ef->address, (u_long)mapsize,
1126 		    (u_long)(vm_offset_t)ef->address + mapsize);
1127 		error = ENOMEM;
1128 		goto out;
1129 	}
1130 
1131 	/* Local intra-module relocations */
1132 	error = link_elf_reloc_local(lf, false);
1133 	if (error != 0)
1134 		goto out;
1135 
1136 	/* Pull in dependencies */
1137 	VOP_UNLOCK(nd->ni_vp);
1138 	error = linker_load_dependencies(lf);
1139 	vn_lock(nd->ni_vp, LK_EXCLUSIVE | LK_RETRY);
1140 	if (error)
1141 		goto out;
1142 
1143 	/* External relocations */
1144 	error = relocate_file(ef);
1145 	if (error)
1146 		goto out;
1147 
1148 	/* Notify MD code that a module is being loaded. */
1149 	error = elf_cpu_load_file(lf);
1150 	if (error)
1151 		goto out;
1152 
1153 #if defined(__i386__) || defined(__amd64__)
1154 	/* Now ifuncs. */
1155 	error = link_elf_reloc_local(lf, true);
1156 	if (error != 0)
1157 		goto out;
1158 #endif
1159 
1160 	link_elf_protect(ef);
1161 	link_elf_invoke_ctors(lf->ctors_addr, lf->ctors_size);
1162 	*result = lf;
1163 
1164 out:
1165 	VOP_UNLOCK(nd->ni_vp);
1166 	vn_close(nd->ni_vp, FREAD, td->td_ucred, td);
1167 	free(nd, M_TEMP);
1168 	if (error && lf)
1169 		linker_file_unload(lf, LINKER_UNLOAD_FORCE);
1170 	free(hdr, M_LINKER);
1171 
1172 	return error;
1173 }
1174 
1175 static void
1176 link_elf_unload_file(linker_file_t file)
1177 {
1178 	elf_file_t ef = (elf_file_t) file;
1179 	u_int i;
1180 
1181 	/* Notify MD code that a module is being unloaded. */
1182 	elf_cpu_unload_file(file);
1183 
1184 	if (ef->progtab) {
1185 		for (i = 0; i < ef->nprogtab; i++) {
1186 			if (ef->progtab[i].size == 0)
1187 				continue;
1188 			if (ef->progtab[i].name == NULL)
1189 				continue;
1190 			if (!strcmp(ef->progtab[i].name, DPCPU_SETNAME))
1191 				dpcpu_free(ef->progtab[i].addr,
1192 				    ef->progtab[i].size);
1193 #ifdef VIMAGE
1194 			else if (!strcmp(ef->progtab[i].name, VNET_SETNAME))
1195 				vnet_data_free(ef->progtab[i].addr,
1196 				    ef->progtab[i].size);
1197 #endif
1198 		}
1199 	}
1200 	if (ef->preloaded) {
1201 		free(ef->reltab, M_LINKER);
1202 		free(ef->relatab, M_LINKER);
1203 		free(ef->progtab, M_LINKER);
1204 		free(ef->ctftab, M_LINKER);
1205 		free(ef->ctfoff, M_LINKER);
1206 		free(ef->typoff, M_LINKER);
1207 		if (file->pathname != NULL)
1208 			preload_delete_name(file->pathname);
1209 		return;
1210 	}
1211 
1212 	for (i = 0; i < ef->nreltab; i++)
1213 		free(ef->reltab[i].rel, M_LINKER);
1214 	for (i = 0; i < ef->nrelatab; i++)
1215 		free(ef->relatab[i].rela, M_LINKER);
1216 	free(ef->reltab, M_LINKER);
1217 	free(ef->relatab, M_LINKER);
1218 	free(ef->progtab, M_LINKER);
1219 
1220 	if (ef->object != NULL)
1221 		vm_map_remove(kernel_map, (vm_offset_t)ef->address,
1222 		    (vm_offset_t)ef->address + ptoa(ef->object->size));
1223 	free(ef->e_shdr, M_LINKER);
1224 	free(ef->ddbsymtab, M_LINKER);
1225 	free(ef->ddbstrtab, M_LINKER);
1226 	free(ef->shstrtab, M_LINKER);
1227 	free(ef->ctftab, M_LINKER);
1228 	free(ef->ctfoff, M_LINKER);
1229 	free(ef->typoff, M_LINKER);
1230 }
1231 
1232 static const char *
1233 symbol_name(elf_file_t ef, Elf_Size r_info)
1234 {
1235 	const Elf_Sym *ref;
1236 
1237 	if (ELF_R_SYM(r_info)) {
1238 		ref = ef->ddbsymtab + ELF_R_SYM(r_info);
1239 		return ef->ddbstrtab + ref->st_name;
1240 	} else
1241 		return NULL;
1242 }
1243 
1244 static Elf_Addr
1245 findbase(elf_file_t ef, int sec)
1246 {
1247 	int i;
1248 	Elf_Addr base = 0;
1249 
1250 	for (i = 0; i < ef->nprogtab; i++) {
1251 		if (sec == ef->progtab[i].sec) {
1252 			base = (Elf_Addr)ef->progtab[i].addr;
1253 			break;
1254 		}
1255 	}
1256 	return base;
1257 }
1258 
1259 static int
1260 relocate_file(elf_file_t ef)
1261 {
1262 	const Elf_Rel *rellim;
1263 	const Elf_Rel *rel;
1264 	const Elf_Rela *relalim;
1265 	const Elf_Rela *rela;
1266 	const char *symname;
1267 	const Elf_Sym *sym;
1268 	int i;
1269 	Elf_Size symidx;
1270 	Elf_Addr base;
1271 
1272 	/* Perform relocations without addend if there are any: */
1273 	for (i = 0; i < ef->nreltab; i++) {
1274 		rel = ef->reltab[i].rel;
1275 		if (rel == NULL) {
1276 			link_elf_error(ef->lf.filename, "lost a reltab!");
1277 			return (ENOEXEC);
1278 		}
1279 		rellim = rel + ef->reltab[i].nrel;
1280 		base = findbase(ef, ef->reltab[i].sec);
1281 		if (base == 0) {
1282 			link_elf_error(ef->lf.filename, "lost base for reltab");
1283 			return (ENOEXEC);
1284 		}
1285 		for ( ; rel < rellim; rel++) {
1286 			symidx = ELF_R_SYM(rel->r_info);
1287 			if (symidx >= ef->ddbsymcnt)
1288 				continue;
1289 			sym = ef->ddbsymtab + symidx;
1290 			/* Local relocs are already done */
1291 			if (ELF_ST_BIND(sym->st_info) == STB_LOCAL)
1292 				continue;
1293 			if (elf_reloc(&ef->lf, base, rel, ELF_RELOC_REL,
1294 			    elf_obj_lookup)) {
1295 				symname = symbol_name(ef, rel->r_info);
1296 				printf("link_elf_obj: symbol %s undefined\n",
1297 				    symname);
1298 				return (ENOENT);
1299 			}
1300 		}
1301 	}
1302 
1303 	/* Perform relocations with addend if there are any: */
1304 	for (i = 0; i < ef->nrelatab; i++) {
1305 		rela = ef->relatab[i].rela;
1306 		if (rela == NULL) {
1307 			link_elf_error(ef->lf.filename, "lost a relatab!");
1308 			return (ENOEXEC);
1309 		}
1310 		relalim = rela + ef->relatab[i].nrela;
1311 		base = findbase(ef, ef->relatab[i].sec);
1312 		if (base == 0) {
1313 			link_elf_error(ef->lf.filename,
1314 			    "lost base for relatab");
1315 			return (ENOEXEC);
1316 		}
1317 		for ( ; rela < relalim; rela++) {
1318 			symidx = ELF_R_SYM(rela->r_info);
1319 			if (symidx >= ef->ddbsymcnt)
1320 				continue;
1321 			sym = ef->ddbsymtab + symidx;
1322 			/* Local relocs are already done */
1323 			if (ELF_ST_BIND(sym->st_info) == STB_LOCAL)
1324 				continue;
1325 			if (elf_reloc(&ef->lf, base, rela, ELF_RELOC_RELA,
1326 			    elf_obj_lookup)) {
1327 				symname = symbol_name(ef, rela->r_info);
1328 				printf("link_elf_obj: symbol %s undefined\n",
1329 				    symname);
1330 				return (ENOENT);
1331 			}
1332 		}
1333 	}
1334 
1335 	/*
1336 	 * Only clean SHN_FBSD_CACHED for successful return.  If we
1337 	 * modified symbol table for the object but found an
1338 	 * unresolved symbol, there is no reason to roll back.
1339 	 */
1340 	elf_obj_cleanup_globals_cache(ef);
1341 
1342 	return (0);
1343 }
1344 
1345 static int
1346 link_elf_lookup_symbol(linker_file_t lf, const char *name, c_linker_sym_t *sym)
1347 {
1348 	elf_file_t ef = (elf_file_t) lf;
1349 	const Elf_Sym *symp;
1350 	const char *strp;
1351 	int i;
1352 
1353 	for (i = 0, symp = ef->ddbsymtab; i < ef->ddbsymcnt; i++, symp++) {
1354 		strp = ef->ddbstrtab + symp->st_name;
1355 		if (symp->st_shndx != SHN_UNDEF && strcmp(name, strp) == 0) {
1356 			*sym = (c_linker_sym_t) symp;
1357 			return 0;
1358 		}
1359 	}
1360 	return ENOENT;
1361 }
1362 
1363 static int
1364 link_elf_symbol_values(linker_file_t lf, c_linker_sym_t sym,
1365     linker_symval_t *symval)
1366 {
1367 	elf_file_t ef;
1368 	const Elf_Sym *es;
1369 	caddr_t val;
1370 
1371 	ef = (elf_file_t) lf;
1372 	es = (const Elf_Sym*) sym;
1373 	val = (caddr_t)es->st_value;
1374 	if (es >= ef->ddbsymtab && es < (ef->ddbsymtab + ef->ddbsymcnt)) {
1375 		symval->name = ef->ddbstrtab + es->st_name;
1376 		val = (caddr_t)es->st_value;
1377 		if (ELF_ST_TYPE(es->st_info) == STT_GNU_IFUNC)
1378 			val = ((caddr_t (*)(void))val)();
1379 		symval->value = val;
1380 		symval->size = es->st_size;
1381 		return 0;
1382 	}
1383 	return ENOENT;
1384 }
1385 
1386 static int
1387 link_elf_search_symbol(linker_file_t lf, caddr_t value,
1388     c_linker_sym_t *sym, long *diffp)
1389 {
1390 	elf_file_t ef = (elf_file_t) lf;
1391 	u_long off = (uintptr_t) (void *) value;
1392 	u_long diff = off;
1393 	u_long st_value;
1394 	const Elf_Sym *es;
1395 	const Elf_Sym *best = NULL;
1396 	int i;
1397 
1398 	for (i = 0, es = ef->ddbsymtab; i < ef->ddbsymcnt; i++, es++) {
1399 		if (es->st_name == 0)
1400 			continue;
1401 		st_value = es->st_value;
1402 		if (off >= st_value) {
1403 			if (off - st_value < diff) {
1404 				diff = off - st_value;
1405 				best = es;
1406 				if (diff == 0)
1407 					break;
1408 			} else if (off - st_value == diff) {
1409 				best = es;
1410 			}
1411 		}
1412 	}
1413 	if (best == NULL)
1414 		*diffp = off;
1415 	else
1416 		*diffp = diff;
1417 	*sym = (c_linker_sym_t) best;
1418 
1419 	return 0;
1420 }
1421 
1422 /*
1423  * Look up a linker set on an ELF system.
1424  */
1425 static int
1426 link_elf_lookup_set(linker_file_t lf, const char *name,
1427     void ***startp, void ***stopp, int *countp)
1428 {
1429 	elf_file_t ef = (elf_file_t)lf;
1430 	void **start, **stop;
1431 	int i, count;
1432 
1433 	/* Relative to section number */
1434 	for (i = 0; i < ef->nprogtab; i++) {
1435 		if ((strncmp(ef->progtab[i].name, "set_", 4) == 0) &&
1436 		    strcmp(ef->progtab[i].name + 4, name) == 0) {
1437 			start  = (void **)ef->progtab[i].addr;
1438 			stop = (void **)((char *)ef->progtab[i].addr +
1439 			    ef->progtab[i].size);
1440 			count = stop - start;
1441 			if (startp)
1442 				*startp = start;
1443 			if (stopp)
1444 				*stopp = stop;
1445 			if (countp)
1446 				*countp = count;
1447 			return (0);
1448 		}
1449 	}
1450 	return (ESRCH);
1451 }
1452 
1453 static int
1454 link_elf_each_function_name(linker_file_t file,
1455     int (*callback)(const char *, void *), void *opaque)
1456 {
1457 	elf_file_t ef = (elf_file_t)file;
1458 	const Elf_Sym *symp;
1459 	int i, error;
1460 
1461 	/* Exhaustive search */
1462 	for (i = 0, symp = ef->ddbsymtab; i < ef->ddbsymcnt; i++, symp++) {
1463 		if (symp->st_value != 0 &&
1464 		    (ELF_ST_TYPE(symp->st_info) == STT_FUNC ||
1465 		    ELF_ST_TYPE(symp->st_info) == STT_GNU_IFUNC)) {
1466 			error = callback(ef->ddbstrtab + symp->st_name, opaque);
1467 			if (error)
1468 				return (error);
1469 		}
1470 	}
1471 	return (0);
1472 }
1473 
1474 static int
1475 link_elf_each_function_nameval(linker_file_t file,
1476     linker_function_nameval_callback_t callback, void *opaque)
1477 {
1478 	linker_symval_t symval;
1479 	elf_file_t ef = (elf_file_t)file;
1480 	const Elf_Sym* symp;
1481 	int i, error;
1482 
1483 	/* Exhaustive search */
1484 	for (i = 0, symp = ef->ddbsymtab; i < ef->ddbsymcnt; i++, symp++) {
1485 		if (symp->st_value != 0 &&
1486 		    (ELF_ST_TYPE(symp->st_info) == STT_FUNC ||
1487 		    ELF_ST_TYPE(symp->st_info) == STT_GNU_IFUNC)) {
1488 			error = link_elf_symbol_values(file,
1489 			    (c_linker_sym_t)symp, &symval);
1490 			if (error)
1491 				return (error);
1492 			error = callback(file, i, &symval, opaque);
1493 			if (error)
1494 				return (error);
1495 		}
1496 	}
1497 	return (0);
1498 }
1499 
1500 static void
1501 elf_obj_cleanup_globals_cache(elf_file_t ef)
1502 {
1503 	Elf_Sym *sym;
1504 	Elf_Size i;
1505 
1506 	for (i = 0; i < ef->ddbsymcnt; i++) {
1507 		sym = ef->ddbsymtab + i;
1508 		if (sym->st_shndx == SHN_FBSD_CACHED) {
1509 			sym->st_shndx = SHN_UNDEF;
1510 			sym->st_value = 0;
1511 		}
1512 	}
1513 }
1514 
1515 /*
1516  * Symbol lookup function that can be used when the symbol index is known (ie
1517  * in relocations). It uses the symbol index instead of doing a fully fledged
1518  * hash table based lookup when such is valid. For example for local symbols.
1519  * This is not only more efficient, it's also more correct. It's not always
1520  * the case that the symbol can be found through the hash table.
1521  */
1522 static int
1523 elf_obj_lookup(linker_file_t lf, Elf_Size symidx, int deps, Elf_Addr *res)
1524 {
1525 	elf_file_t ef = (elf_file_t)lf;
1526 	Elf_Sym *sym;
1527 	const char *symbol;
1528 	Elf_Addr res1;
1529 
1530 	/* Don't even try to lookup the symbol if the index is bogus. */
1531 	if (symidx >= ef->ddbsymcnt) {
1532 		*res = 0;
1533 		return (EINVAL);
1534 	}
1535 
1536 	sym = ef->ddbsymtab + symidx;
1537 
1538 	/* Quick answer if there is a definition included. */
1539 	if (sym->st_shndx != SHN_UNDEF) {
1540 		res1 = (Elf_Addr)sym->st_value;
1541 		if (ELF_ST_TYPE(sym->st_info) == STT_GNU_IFUNC)
1542 			res1 = ((Elf_Addr (*)(void))res1)();
1543 		*res = res1;
1544 		return (0);
1545 	}
1546 
1547 	/* If we get here, then it is undefined and needs a lookup. */
1548 	switch (ELF_ST_BIND(sym->st_info)) {
1549 	case STB_LOCAL:
1550 		/* Local, but undefined? huh? */
1551 		*res = 0;
1552 		return (EINVAL);
1553 
1554 	case STB_GLOBAL:
1555 	case STB_WEAK:
1556 		/* Relative to Data or Function name */
1557 		symbol = ef->ddbstrtab + sym->st_name;
1558 
1559 		/* Force a lookup failure if the symbol name is bogus. */
1560 		if (*symbol == 0) {
1561 			*res = 0;
1562 			return (EINVAL);
1563 		}
1564 		res1 = (Elf_Addr)linker_file_lookup_symbol(lf, symbol, deps);
1565 
1566 		/*
1567 		 * Cache global lookups during module relocation. The failure
1568 		 * case is particularly expensive for callers, who must scan
1569 		 * through the entire globals table doing strcmp(). Cache to
1570 		 * avoid doing such work repeatedly.
1571 		 *
1572 		 * After relocation is complete, undefined globals will be
1573 		 * restored to SHN_UNDEF in elf_obj_cleanup_globals_cache(),
1574 		 * above.
1575 		 */
1576 		if (res1 != 0) {
1577 			sym->st_shndx = SHN_FBSD_CACHED;
1578 			sym->st_value = res1;
1579 			*res = res1;
1580 			return (0);
1581 		} else if (ELF_ST_BIND(sym->st_info) == STB_WEAK) {
1582 			sym->st_value = 0;
1583 			*res = 0;
1584 			return (0);
1585 		}
1586 		return (EINVAL);
1587 
1588 	default:
1589 		return (EINVAL);
1590 	}
1591 }
1592 
1593 static void
1594 link_elf_fix_link_set(elf_file_t ef)
1595 {
1596 	static const char startn[] = "__start_";
1597 	static const char stopn[] = "__stop_";
1598 	Elf_Sym *sym;
1599 	const char *sym_name, *linkset_name;
1600 	Elf_Addr startp, stopp;
1601 	Elf_Size symidx;
1602 	int start, i;
1603 
1604 	startp = stopp = 0;
1605 	for (symidx = 1 /* zero entry is special */;
1606 		symidx < ef->ddbsymcnt; symidx++) {
1607 		sym = ef->ddbsymtab + symidx;
1608 		if (sym->st_shndx != SHN_UNDEF)
1609 			continue;
1610 
1611 		sym_name = ef->ddbstrtab + sym->st_name;
1612 		if (strncmp(sym_name, startn, sizeof(startn) - 1) == 0) {
1613 			start = 1;
1614 			linkset_name = sym_name + sizeof(startn) - 1;
1615 		}
1616 		else if (strncmp(sym_name, stopn, sizeof(stopn) - 1) == 0) {
1617 			start = 0;
1618 			linkset_name = sym_name + sizeof(stopn) - 1;
1619 		}
1620 		else
1621 			continue;
1622 
1623 		for (i = 0; i < ef->nprogtab; i++) {
1624 			if (strcmp(ef->progtab[i].name, linkset_name) == 0) {
1625 				startp = (Elf_Addr)ef->progtab[i].addr;
1626 				stopp = (Elf_Addr)(startp + ef->progtab[i].size);
1627 				break;
1628 			}
1629 		}
1630 		if (i == ef->nprogtab)
1631 			continue;
1632 
1633 		sym->st_value = start ? startp : stopp;
1634 		sym->st_shndx = i;
1635 	}
1636 }
1637 
1638 static int
1639 link_elf_reloc_local(linker_file_t lf, bool ifuncs)
1640 {
1641 	elf_file_t ef = (elf_file_t)lf;
1642 	const Elf_Rel *rellim;
1643 	const Elf_Rel *rel;
1644 	const Elf_Rela *relalim;
1645 	const Elf_Rela *rela;
1646 	const Elf_Sym *sym;
1647 	Elf_Addr base;
1648 	int i;
1649 	Elf_Size symidx;
1650 
1651 	link_elf_fix_link_set(ef);
1652 
1653 	/* Perform relocations without addend if there are any: */
1654 	for (i = 0; i < ef->nreltab; i++) {
1655 		rel = ef->reltab[i].rel;
1656 		if (rel == NULL) {
1657 			link_elf_error(ef->lf.filename, "lost a reltab");
1658 			return (ENOEXEC);
1659 		}
1660 		rellim = rel + ef->reltab[i].nrel;
1661 		base = findbase(ef, ef->reltab[i].sec);
1662 		if (base == 0) {
1663 			link_elf_error(ef->lf.filename, "lost base for reltab");
1664 			return (ENOEXEC);
1665 		}
1666 		for ( ; rel < rellim; rel++) {
1667 			symidx = ELF_R_SYM(rel->r_info);
1668 			if (symidx >= ef->ddbsymcnt)
1669 				continue;
1670 			sym = ef->ddbsymtab + symidx;
1671 			/* Only do local relocs */
1672 			if (ELF_ST_BIND(sym->st_info) != STB_LOCAL)
1673 				continue;
1674 			if ((ELF_ST_TYPE(sym->st_info) == STT_GNU_IFUNC ||
1675 			    elf_is_ifunc_reloc(rel->r_info)) == ifuncs)
1676 				elf_reloc_local(lf, base, rel, ELF_RELOC_REL,
1677 				    elf_obj_lookup);
1678 		}
1679 	}
1680 
1681 	/* Perform relocations with addend if there are any: */
1682 	for (i = 0; i < ef->nrelatab; i++) {
1683 		rela = ef->relatab[i].rela;
1684 		if (rela == NULL) {
1685 			link_elf_error(ef->lf.filename, "lost a relatab!");
1686 			return (ENOEXEC);
1687 		}
1688 		relalim = rela + ef->relatab[i].nrela;
1689 		base = findbase(ef, ef->relatab[i].sec);
1690 		if (base == 0) {
1691 			link_elf_error(ef->lf.filename, "lost base for reltab");
1692 			return (ENOEXEC);
1693 		}
1694 		for ( ; rela < relalim; rela++) {
1695 			symidx = ELF_R_SYM(rela->r_info);
1696 			if (symidx >= ef->ddbsymcnt)
1697 				continue;
1698 			sym = ef->ddbsymtab + symidx;
1699 			/* Only do local relocs */
1700 			if (ELF_ST_BIND(sym->st_info) != STB_LOCAL)
1701 				continue;
1702 			if ((ELF_ST_TYPE(sym->st_info) == STT_GNU_IFUNC ||
1703 			    elf_is_ifunc_reloc(rela->r_info)) == ifuncs)
1704 				elf_reloc_local(lf, base, rela, ELF_RELOC_RELA,
1705 				    elf_obj_lookup);
1706 		}
1707 	}
1708 	return (0);
1709 }
1710 
1711 static long
1712 link_elf_symtab_get(linker_file_t lf, const Elf_Sym **symtab)
1713 {
1714     elf_file_t ef = (elf_file_t)lf;
1715 
1716     *symtab = ef->ddbsymtab;
1717 
1718     if (*symtab == NULL)
1719         return (0);
1720 
1721     return (ef->ddbsymcnt);
1722 }
1723 
1724 static long
1725 link_elf_strtab_get(linker_file_t lf, caddr_t *strtab)
1726 {
1727     elf_file_t ef = (elf_file_t)lf;
1728 
1729     *strtab = ef->ddbstrtab;
1730 
1731     if (*strtab == NULL)
1732         return (0);
1733 
1734     return (ef->ddbstrcnt);
1735 }
1736