xref: /netbsd/lib/librumpuser/rumpuser_dl.c (revision abd1934e)
1 /*      $NetBSD: rumpuser_dl.c,v 1.34 2022/04/19 20:32:17 rillig Exp $	*/
2 
3 /*
4  * Copyright (c) 2009 Antti Kantee.  All Rights Reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 /*
29  * Load all module link sets and feed symbol table to the kernel.
30  * Called during rump bootstrap.
31  */
32 
33 /*
34  * Solaris libelf.h doesn't support _FILE_OFFSET_BITS=64.  Luckily,
35  * for this module it doesn't matter.
36  */
37 #if defined(__sun__)
38 #define RUMPUSER_NO_FILE_OFFSET_BITS
39 #endif
40 #include "rumpuser_port.h"
41 
42 #if !defined(lint)
43 __RCSID("$NetBSD: rumpuser_dl.c,v 1.34 2022/04/19 20:32:17 rillig Exp $");
44 #endif /* !lint */
45 
46 #include <sys/types.h>
47 #include <sys/time.h>
48 #include <sys/evcnt.h>
49 
50 #include <assert.h>
51 
52 #include <dlfcn.h>
53 #include <errno.h>
54 #include <fcntl.h>
55 #include <stdint.h>
56 #include <stdio.h>
57 #include <stdlib.h>
58 #include <string.h>
59 #include <unistd.h>
60 
61 #include <rump/rumpuser.h>
62 
63 #if defined(__ELF__) && defined(HAVE_DLINFO)
64 #include <elf.h>
65 #include <link.h>
66 
67 static size_t symtabsize = 0, strtabsize = 0;
68 static size_t symtaboff = 0, strtaboff = 0;
69 static uint8_t *symtab = NULL;
70 static char *strtab = NULL;
71 static unsigned char eident;
72 
73 /* nb5 compat */
74 #ifndef Elf_Symindx
75 #define Elf_Symindx uint32_t
76 #endif
77 
78 static void *
reservespace(void * store,size_t * storesize,size_t storeoff,size_t required)79 reservespace(void *store, size_t *storesize,
80 	size_t storeoff, size_t required)
81 {
82 	size_t chunk, newsize;
83 
84 	assert(storeoff <= *storesize);
85 	chunk = *storesize - storeoff;
86 
87 	if (chunk >= required)
88 		return store;
89 
90 	newsize = *storesize + ((size_t)required - chunk);
91 	store = realloc(store, newsize);
92 	if (store == NULL) {
93 		return NULL;
94 	}
95 	*((uint8_t *)store + storeoff) = '\0';
96 	*storesize = newsize;
97 
98 	return store;
99 }
100 
101 /*
102  * Macros to make handling elf32/64 in the code a little saner.
103  */
104 
105 #define DYNn_GETMEMBER(base, n, thevar, result)				\
106 do {									\
107 	if (eident == ELFCLASS32) {					\
108 		const Elf32_Dyn *dyn = base;				\
109 		/*LINTED*/						\
110 		result = dyn[n].thevar;					\
111 	} else {							\
112 		const Elf64_Dyn *dyn = base;				\
113 		/*LINTED*/						\
114 		result = dyn[n].thevar;					\
115 	}								\
116 } while (0)
117 
118 #define SYMn_GETMEMBER(base, n, thevar, result)				\
119 do {									\
120 	if (eident == ELFCLASS32) {					\
121 		const Elf32_Sym *sym = base;				\
122 		/*LINTED*/						\
123 		result = sym[n].thevar;					\
124 	} else {							\
125 		const Elf64_Sym *sym = base;				\
126 		/*LINTED*/						\
127 		result = sym[n].thevar;					\
128 	}								\
129 } while (0)
130 
131 #define SYMn_SETMEMBER(base, n, thevar, value)				\
132 do {									\
133 	if (eident == ELFCLASS32) {					\
134 		Elf32_Sym *sym = base;					\
135 		/*LINTED*/						\
136 		sym[n].thevar = value;					\
137 	} else {							\
138 		Elf64_Sym *sym = base;					\
139 		/*LINTED*/						\
140 		sym[n].thevar = value;					\
141 	}								\
142 } while (0)
143 
144 #define SYM_GETSIZE() ((eident==ELFCLASS32)?sizeof(Elf32_Sym):sizeof(Elf64_Sym))
145 
146 /*
147  * On NetBSD, the dynamic section pointer values seem to be relative to
148  * the address the dso is mapped at.  On glibc, they seem to contain
149  * the absolute address.  I couldn't find anything definite from a quick
150  * read of the standard and therefore I will not go and figure beyond ifdef.
151  * On Solaris and DragonFly / FreeBSD, the main object works differently
152  * ... uuuuh.
153  */
154 #if defined(__GLIBC__) && !defined(__mips__)
155 #define adjptr(_map_, _ptr_) ((void *)(_ptr_))
156 #elif defined(__sun__) || defined(__DragonFly__) || defined(__FreeBSD__)
157 #define adjptr(_map_, _ptr_) \
158     (ismainobj ? (void *)(_ptr_) : (void *)(_map_->l_addr + (_ptr_)))
159 #else
160 /* NetBSD and some others, e.g. Linux + musl */
161 #define adjptr(_map_, _ptr_) ((void *)(_map_->l_addr + (_ptr_)))
162 #endif
163 
164 static int
getsymbols(struct link_map * map,int ismainobj)165 getsymbols(struct link_map *map, int ismainobj)
166 {
167 	char *str_base;
168 	void *syms_base = NULL; /* XXXgcc */
169 	size_t curstrsize;
170 	const void *ed_base;
171 	uint64_t ed_tag;
172 	size_t cursymcount;
173 	unsigned i;
174 
175 	if (map->l_addr) {
176 		if (memcmp((void *)map->l_addr, ELFMAG, SELFMAG) != 0)
177 			return ENOEXEC;
178 		eident = *(unsigned char *)(map->l_addr + EI_CLASS);
179 		if (eident != ELFCLASS32 && eident != ELFCLASS64)
180 			return ENOEXEC;
181 	}
182 
183 	/*
184 	 * ok, we probably have only the main object.  instead of going
185 	 * to disk and reading the ehdr, just try to guess the size.
186 	 */
187 	if (eident == 0) {
188 		if (/*CONSTCOND*/sizeof(void *) == 4)
189 			eident = ELFCLASS32;
190 		else
191 			eident = ELFCLASS64;
192 	}
193 
194 	/*
195 	 * Find symtab and strtab and their sizes.
196 	 */
197 	str_base = NULL;
198 	curstrsize = 0;
199 	cursymcount = 0;
200 	ed_base = map->l_ld;
201 	DYNn_GETMEMBER(ed_base, 0, d_tag, ed_tag);
202 	for (i = 0; ed_tag != DT_NULL;) {
203 		uintptr_t edptr;
204 		size_t edval;
205 		Elf_Symindx *hashtab;
206 
207 		switch (ed_tag) {
208 		case DT_SYMTAB:
209 			DYNn_GETMEMBER(ed_base, i, d_un.d_ptr, edptr);
210 			syms_base = adjptr(map, edptr);
211 			break;
212 		case DT_STRTAB:
213 			DYNn_GETMEMBER(ed_base, i, d_un.d_ptr, edptr);
214 			str_base = adjptr(map, edptr);
215 			break;
216 		case DT_STRSZ:
217 			DYNn_GETMEMBER(ed_base, i, d_un.d_val, edval);
218 			curstrsize = edval;
219 			break;
220 		case DT_HASH:
221 			DYNn_GETMEMBER(ed_base, i, d_un.d_ptr, edptr);
222 			hashtab = (Elf_Symindx *)adjptr(map, edptr);
223 			cursymcount = hashtab[1];
224 			break;
225 #ifdef DT_GNU_HASH
226 		/*
227 		 * DT_GNU_HASH is a bit more complicated than DT_HASH
228 		 * in this regard since apparently there is no field
229 		 * telling us the total symbol count.  Instead, we look
230 		 * for the last valid hash bucket and add its chain length
231 		 * to the bucket's base index.
232 		 */
233 		case DT_GNU_HASH: {
234 			Elf32_Word nbuck, symndx, maskwords, maxchain = 0;
235 			Elf32_Word *gnuhash, *buckets, *ptr;
236 			int bi;
237 
238 			DYNn_GETMEMBER(ed_base, i, d_un.d_ptr, edptr);
239 			gnuhash = (Elf32_Word *)adjptr(map, edptr);
240 
241 			nbuck = gnuhash[0];
242 			symndx = gnuhash[1];
243 			maskwords = gnuhash[2];
244 
245 			/*
246 			 * First, find the last valid bucket and grab its index
247 			 */
248 			if (eident == ELFCLASS64)
249 				maskwords *= 2; /* sizeof(*buckets) == 4 */
250 			buckets = gnuhash + 4 + maskwords;
251 			for (bi = nbuck-1; bi >= 0; bi--) {
252 				if (buckets[bi] != 0) {
253 					maxchain = buckets[bi];
254 					break;
255 				}
256 			}
257 			if (maxchain == 0 || maxchain < symndx)
258 				break;
259 
260 			/*
261 			 * Then, traverse the last chain and count symbols.
262 			 */
263 
264 			cursymcount = maxchain;
265 			ptr = buckets + nbuck + (maxchain - symndx);
266 			do {
267 				cursymcount++;
268 			} while ((*ptr++ & 1) == 0);
269 		}
270 			break;
271 #endif
272 		case DT_SYMENT:
273 			DYNn_GETMEMBER(ed_base, i, d_un.d_val, edval);
274 			assert(edval == SYM_GETSIZE());
275 			break;
276 		default:
277 			break;
278 		}
279 		i++;
280 		DYNn_GETMEMBER(ed_base, i, d_tag, ed_tag);
281 	}
282 
283 	if (str_base == NULL || syms_base == NULL ||
284 	    curstrsize == 0 || cursymcount == 0) {
285 		fprintf(stderr, "could not find strtab, symtab or their sizes "
286 		    "in %s\n", map->l_name);
287 		return ENOEXEC;
288 	}
289 
290 	/*
291 	 * Make sure we have enough space for the contents of the symbol
292 	 * and string tables we are currently processing.  The total used
293 	 * space will be smaller due to undefined symbols we are not
294 	 * interested in.
295 	 */
296 	symtab = reservespace(symtab, &symtabsize,
297 	    symtaboff, cursymcount * SYM_GETSIZE());
298 	strtab = reservespace(strtab, &strtabsize, strtaboff, curstrsize);
299 	if (symtab == NULL || strtab == NULL) {
300 		fprintf(stderr, "failed to reserve memory");
301 		return ENOMEM;
302 	}
303 
304 	/* iterate over all symbols in current symtab */
305 	for (i = 0; i < cursymcount; i++) {
306 		const char *cursymname;
307 		int shndx, name;
308 		uintptr_t value;
309 		void *csym;
310 
311 		SYMn_GETMEMBER(syms_base, i, st_shndx, shndx);
312 		SYMn_GETMEMBER(syms_base, i, st_value, value);
313 		if (shndx == SHN_UNDEF || value == 0)
314 			continue;
315 
316 		/* get symbol name */
317 		SYMn_GETMEMBER(syms_base, i, st_name, name);
318 		cursymname = name + str_base;
319 
320 		/*
321 		 * Only accept symbols which are decidedly in
322 		 * the rump kernel namespace.
323 		 * XXX: quirks, but they wouldn't matter here
324 		 */
325 		if (strncmp(cursymname, "rump", 4) != 0 &&
326 		    strncmp(cursymname, "RUMP", 4) != 0 &&
327 		    strncmp(cursymname, "__", 2) != 0) {
328 			continue;
329 		}
330 
331 		memcpy(symtab + symtaboff,
332 		    (const uint8_t *)syms_base + i*SYM_GETSIZE(),SYM_GETSIZE());
333 
334 		/*
335 		 * set name to point at new strtab, offset symbol value
336 		 * with lib base address.
337 		 */
338 		csym = symtab + symtaboff;
339 		SYMn_SETMEMBER(csym, 0, st_name, strtaboff);
340 		SYMn_GETMEMBER(csym, 0, st_value, value);
341 		SYMn_SETMEMBER(csym, 0, st_value,(intptr_t)(value+map->l_addr));
342 		symtaboff += SYM_GETSIZE();
343 
344 		strcpy(strtab + strtaboff, cursymname);
345 		strtaboff += strlen(cursymname)+1;
346 	}
347 
348 	return 0;
349 }
350 
351 static void
process_object(void * handle,rump_modinit_fn domodinit,rump_compload_fn docompload,rump_evcntattach_fn doevcntattach)352 process_object(void *handle,
353 	rump_modinit_fn domodinit, rump_compload_fn docompload,
354 	rump_evcntattach_fn doevcntattach)
355 {
356 	const struct modinfo *const *mi_start, *const *mi_end;
357 	struct rump_component *const *rc, *const *rc_end;
358 
359 	struct sysctllog;
360 	typedef void sysctl_setup_func(struct sysctllog **);
361 	sysctl_setup_func *const *sfp, *const *sfp_end;
362 
363 	struct evcnt *const *evp, *const *evp_end;
364 
365 	mi_start = dlsym(handle, "__start_link_set_modules");
366 	mi_end = dlsym(handle, "__stop_link_set_modules");
367 	if (mi_start && mi_end)
368 		domodinit(mi_start, (size_t)(mi_end-mi_start));
369 
370 	rc = dlsym(handle, "__start_link_set_rump_components");
371 	rc_end = dlsym(handle, "__stop_link_set_rump_components");
372 	if (rc && rc_end) {
373 		for (; rc < rc_end; rc++)
374 			docompload(*rc);
375 		assert(rc == rc_end);
376 	}
377 
378 	/* handle link_set_sysctl_funcs */
379 	sfp = dlsym(handle, "__start_link_set_sysctl_funcs");
380 	sfp_end = dlsym(handle, "__stop_link_set_sysctl_funcs");
381 	if (sfp && sfp_end) {
382 		for (; sfp < sfp_end; sfp++)
383 			(**sfp)(NULL);
384 		assert(sfp == sfp_end);
385 	}
386 
387 	/* handle link_set_evcnts */
388 	evp = dlsym(handle, "__start_link_set_evcnts");
389 	evp_end = dlsym(handle, "__stop_link_set_evcnts");
390 	if (evp && evp_end) {
391 		for (; evp < evp_end; evp++)
392 			doevcntattach(*evp);
393 		assert(evp == evp_end);
394 	}
395 }
396 
397 /*
398  * Get the linkmap from the dynlinker.  Try to load kernel modules
399  * from all objects in the linkmap.
400  */
401 void
rumpuser_dl_bootstrap(rump_modinit_fn domodinit,rump_symload_fn symload,rump_compload_fn compload,rump_evcntattach_fn doevcntattach)402 rumpuser_dl_bootstrap(rump_modinit_fn domodinit,
403 	rump_symload_fn symload, rump_compload_fn compload,
404 	rump_evcntattach_fn doevcntattach)
405 {
406 	struct link_map *map, *origmap, *mainmap;
407 	void *mainhandle;
408 	int error;
409 
410 	mainhandle = dlopen(NULL, RTLD_NOW);
411 	/* Will be null if statically linked so just return */
412 	if (mainhandle == NULL)
413 		return;
414 	if (dlinfo(mainhandle, RTLD_DI_LINKMAP, &mainmap) == -1) {
415 		fprintf(stderr, "warning: rumpuser module bootstrap "
416 		    "failed: %s\n", dlerror());
417 		return;
418 	}
419 	origmap = mainmap;
420 
421 	/*
422 	 * Use a heuristic to determine if we are static linked.
423 	 * A dynamically linked binary should always have at least
424 	 * two objects: itself and ld.so.
425 	 *
426 	 * In a statically linked binary with glibc the linkmap
427 	 * contains some "info" that leads to a segfault.  Since we
428 	 * can't really do anything useful in here without ld.so, just
429 	 * simply bail and let the symbol references in librump do the
430 	 * right things.
431 	 */
432 	if (origmap->l_next == NULL && origmap->l_prev == NULL) {
433 		dlclose(mainhandle);
434 		return;
435 	}
436 
437 	/*
438 	 * Process last->first because that's the most probable
439 	 * order for dependencies
440 	 */
441 	for (; origmap->l_next; origmap = origmap->l_next)
442 		continue;
443 
444 	/*
445 	 * Build symbol table to hand to the rump kernel.  Do this by
446 	 * iterating over all rump libraries and collecting symbol
447 	 * addresses and relocation info.
448 	 */
449 	error = 0;
450 	for (map = origmap; map && !error; map = map->l_prev) {
451 		if (strstr(map->l_name, "librump") != NULL || map == mainmap)
452 			error = getsymbols(map, map == mainmap);
453 	}
454 
455 	if (error == 0) {
456 		void *trimmedsym, *trimmedstr;
457 
458 		/*
459 		 * Allocate optimum-sized memory for storing tables
460 		 * and feed to kernel.  If memory allocation fails,
461 		 * just give the ones with extra context (although
462 		 * I'm pretty sure we'll die moments later due to
463 		 * memory running out).
464 		 */
465 		if ((trimmedsym = malloc(symtaboff)) != NULL) {
466 			memcpy(trimmedsym, symtab, symtaboff);
467 		} else {
468 			trimmedsym = symtab;
469 			symtab = NULL;
470 		}
471 		if ((trimmedstr = malloc(strtaboff)) != NULL) {
472 			memcpy(trimmedstr, strtab, strtaboff);
473 		} else {
474 			trimmedstr = strtab;
475 			strtab = NULL;
476 		}
477 		symload(trimmedsym, symtaboff, trimmedstr, strtaboff);
478 	}
479 	free(symtab);
480 	free(strtab);
481 
482 	/*
483 	 * Next, load modules and components.
484 	 *
485 	 * Simply loop through all objects, ones unrelated to rump kernels
486 	 * will not contain link_set_rump_components (well, not including
487 	 * "sabotage", but that needs to be solved at another level anyway).
488 	 */
489 	for (map = origmap; map; map = map->l_prev) {
490 		void *handle;
491 
492 		if (map == mainmap) {
493 			handle = mainhandle;
494 		} else {
495 			handle = dlopen(map->l_name, RTLD_LAZY);
496 			if (handle == NULL)
497 				continue;
498 		}
499 		process_object(handle, domodinit, compload, doevcntattach);
500 		if (map != mainmap)
501 			dlclose(handle);
502 	}
503 }
504 #else
505 /*
506  * no dynamic linking supported
507  */
508 void
rumpuser_dl_bootstrap(rump_modinit_fn domodinit,rump_symload_fn symload,rump_compload_fn compload,rump_evcntattach_fn doevcntattach)509 rumpuser_dl_bootstrap(rump_modinit_fn domodinit,
510 	rump_symload_fn symload, rump_compload_fn compload,
511 	rump_evcntattach_fn doevcntattach)
512 {
513 
514 	return;
515 }
516 #endif
517