xref: /netbsd/sys/kern/kern_ksyms.c (revision 50cde1ed)
1 /*	$NetBSD: kern_ksyms.c,v 1.108 2023/02/21 11:40:00 riastradh Exp $	*/
2 
3 /*-
4  * Copyright (c) 2008 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software developed for The NetBSD Foundation
8  * by Andrew Doran.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /*
33  * Copyright (c) 2001, 2003 Anders Magnusson (ragge@ludd.luth.se).
34  * All rights reserved.
35  *
36  * Redistribution and use in source and binary forms, with or without
37  * modification, are permitted provided that the following conditions
38  * are met:
39  * 1. Redistributions of source code must retain the above copyright
40  *    notice, this list of conditions and the following disclaimer.
41  * 2. Redistributions in binary form must reproduce the above copyright
42  *    notice, this list of conditions and the following disclaimer in the
43  *    documentation and/or other materials provided with the distribution.
44  * 3. The name of the author may not be used to endorse or promote products
45  *    derived from this software without specific prior written permission
46  *
47  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
48  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
49  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
50  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
51  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
52  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
53  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
54  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
55  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
56  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
57  */
58 
59 /*
60  * Code to deal with in-kernel symbol table management + /dev/ksyms.
61  *
62  * For each loaded module the symbol table info is kept track of by a
63  * struct, placed in a circular list. The first entry is the kernel
64  * symbol table.
65  */
66 
67 /*
68  * TODO:
69  *
70  *	Add support for mmap, poll.
71  *	Constify tables.
72  *	Constify db_symtab and move it to .rodata.
73  */
74 
75 #include <sys/cdefs.h>
76 __KERNEL_RCSID(0, "$NetBSD: kern_ksyms.c,v 1.108 2023/02/21 11:40:00 riastradh Exp $");
77 
78 #if defined(_KERNEL) && defined(_KERNEL_OPT)
79 #include "opt_copy_symtab.h"
80 #include "opt_ddb.h"
81 #include "opt_dtrace.h"
82 #endif
83 
84 #define _KSYMS_PRIVATE
85 
86 #include <sys/param.h>
87 #include <sys/queue.h>
88 #include <sys/exec.h>
89 #include <sys/file.h>
90 #include <sys/filedesc.h>
91 #include <sys/kauth.h>
92 #include <sys/systm.h>
93 #include <sys/conf.h>
94 #include <sys/kmem.h>
95 #include <sys/proc.h>
96 #include <sys/atomic.h>
97 #include <sys/ksyms.h>
98 #include <sys/kernel.h>
99 #include <sys/intr.h>
100 #include <sys/pserialize.h>
101 #include <sys/stat.h>
102 
103 #include <uvm/uvm_extern.h>
104 
105 #ifdef DDB
106 #include <ddb/db_output.h>
107 #endif
108 
109 #include "ksyms.h"
110 #if NKSYMS > 0
111 #include "ioconf.h"
112 #endif
113 
114 struct ksyms_snapshot {
115 	uint64_t		ks_refcnt;
116 	uint64_t		ks_gen;
117 	struct uvm_object	*ks_uobj;
118 	size_t			ks_size;
119 	dev_t			ks_dev;
120 	int			ks_maxlen;
121 };
122 
123 #define KSYMS_MAX_ID	98304
124 #ifdef KDTRACE_HOOKS
125 static uint32_t ksyms_nmap[KSYMS_MAX_ID];	/* sorted symbol table map */
126 #else
127 static uint32_t *ksyms_nmap = NULL;
128 #endif
129 
130 static int ksyms_maxlen;
131 static bool ksyms_initted;
132 static bool ksyms_loaded;
133 static kmutex_t ksyms_lock __cacheline_aligned;
134 static struct ksyms_symtab kernel_symtab;
135 static kcondvar_t ksyms_cv;
136 static struct lwp *ksyms_snapshotting;
137 static struct ksyms_snapshot *ksyms_snapshot;
138 static uint64_t ksyms_snapshot_gen;
139 static pserialize_t ksyms_psz __read_mostly;
140 
141 static void ksyms_hdr_init(const void *);
142 static void ksyms_sizes_calc(void);
143 static struct ksyms_snapshot *ksyms_snapshot_alloc(int, size_t, dev_t,
144     uint64_t);
145 static void ksyms_snapshot_release(struct ksyms_snapshot *);
146 
147 #ifdef KSYMS_DEBUG
148 #define	FOLLOW_CALLS		1
149 #define	FOLLOW_MORE_CALLS	2
150 #define	FOLLOW_DEVKSYMS		4
151 static int ksyms_debug;
152 #endif
153 
154 #define		SYMTAB_FILLER	"|This is the symbol table!"
155 
156 #ifdef makeoptions_COPY_SYMTAB
157 extern char db_symtab[];
158 extern int db_symtabsize;
159 #endif
160 
161 /*
162  * used by savecore(8) so non-static
163  */
164 struct ksyms_hdr ksyms_hdr;
165 int ksyms_symsz;
166 int ksyms_strsz;
167 int ksyms_ctfsz;	/* this is not currently used by savecore(8) */
168 TAILQ_HEAD(ksyms_symtab_queue, ksyms_symtab) ksyms_symtabs =
169     TAILQ_HEAD_INITIALIZER(ksyms_symtabs);
170 static struct pslist_head ksyms_symtabs_psz = PSLIST_INITIALIZER;
171 
172 static int
ksyms_verify(const void * symstart,const void * strstart)173 ksyms_verify(const void *symstart, const void *strstart)
174 {
175 #if defined(DIAGNOSTIC) || defined(DEBUG)
176 	if (symstart == NULL)
177 		printf("ksyms: Symbol table not found\n");
178 	if (strstart == NULL)
179 		printf("ksyms: String table not found\n");
180 	if (symstart == NULL || strstart == NULL)
181 		printf("ksyms: Perhaps the kernel is stripped?\n");
182 #endif
183 	if (symstart == NULL || strstart == NULL)
184 		return 0;
185 	return 1;
186 }
187 
188 /*
189  * Finds a certain symbol name in a certain symbol table.
190  */
191 static Elf_Sym *
findsym(const char * name,struct ksyms_symtab * table,int type)192 findsym(const char *name, struct ksyms_symtab *table, int type)
193 {
194 	Elf_Sym *sym, *maxsym;
195 	int low, mid, high, nglob;
196 	char *str, *cmp;
197 
198 	sym = table->sd_symstart;
199 	str = table->sd_strstart - table->sd_usroffset;
200 	nglob = table->sd_nglob;
201 	low = 0;
202 	high = nglob;
203 
204 	/*
205 	 * Start with a binary search of all global symbols in this table.
206 	 * Global symbols must have unique names.
207 	 */
208 	while (low < high) {
209 		mid = (low + high) >> 1;
210 		cmp = sym[mid].st_name + str;
211 		if (cmp[0] < name[0] || strcmp(cmp, name) < 0) {
212 			low = mid + 1;
213 		} else {
214 			high = mid;
215 		}
216 	}
217 	KASSERT(low == high);
218 	if (__predict_true(low < nglob &&
219 	    strcmp(sym[low].st_name + str, name) == 0)) {
220 		KASSERT(ELF_ST_BIND(sym[low].st_info) == STB_GLOBAL);
221 		return &sym[low];
222 	}
223 
224 	/*
225 	 * Perform a linear search of local symbols (rare).  Many local
226 	 * symbols with the same name can exist so are not included in
227 	 * the binary search.
228 	 */
229 	if (type != KSYMS_EXTERN) {
230 		maxsym = sym + table->sd_symsize / sizeof(Elf_Sym);
231 		for (sym += nglob; sym < maxsym; sym++) {
232 			if (strcmp(name, sym->st_name + str) == 0) {
233 				return sym;
234 			}
235 		}
236 	}
237 	return NULL;
238 }
239 
240 /*
241  * The "attach" is in reality done in ksyms_init().
242  */
243 #if NKSYMS > 0
244 /*
245  * ksyms can be loaded even if the kernel has a missing "pseudo-device ksyms"
246  * statement because ddb and modules require it. Fixing it properly requires
247  * fixing config to warn about required, but missing preudo-devices. For now,
248  * if we don't have the pseudo-device we don't need the attach function; this
249  * is fine, as it does nothing.
250  */
251 void
ksymsattach(int arg)252 ksymsattach(int arg)
253 {
254 }
255 #endif
256 
257 void
ksyms_init(void)258 ksyms_init(void)
259 {
260 
261 #ifdef makeoptions_COPY_SYMTAB
262 	if (!ksyms_loaded &&
263 	    strncmp(db_symtab, SYMTAB_FILLER, sizeof(SYMTAB_FILLER))) {
264 		ksyms_addsyms_elf(db_symtabsize, db_symtab,
265 		    db_symtab + db_symtabsize);
266 	}
267 #endif
268 
269 	if (!ksyms_initted) {
270 		mutex_init(&ksyms_lock, MUTEX_DEFAULT, IPL_NONE);
271 		cv_init(&ksyms_cv, "ksyms");
272 		ksyms_psz = pserialize_create();
273 		ksyms_initted = true;
274 	}
275 }
276 
277 /*
278  * Are any symbols available?
279  */
280 bool
ksyms_available(void)281 ksyms_available(void)
282 {
283 
284 	return ksyms_loaded;
285 }
286 
287 /*
288  * Add a symbol table.
289  * This is intended for use when the symbol table and its corresponding
290  * string table are easily available.  If they are embedded in an ELF
291  * image, use addsymtab_elf() instead.
292  *
293  * name - Symbol's table name.
294  * symstart, symsize - Address and size of the symbol table.
295  * strstart, strsize - Address and size of the string table.
296  * tab - Symbol table to be updated with this information.
297  * newstart - Address to which the symbol table has to be copied during
298  *            shrinking.  If NULL, it is not moved.
299  */
300 static const char *addsymtab_strstart;
301 
302 static int
addsymtab_compar(const void * a,const void * b)303 addsymtab_compar(const void *a, const void *b)
304 {
305 	const Elf_Sym *sa, *sb;
306 
307 	sa = a;
308 	sb = b;
309 
310 	/*
311 	 * Split the symbol table into two, with globals at the start
312 	 * and locals at the end.
313 	 */
314 	if (ELF_ST_BIND(sa->st_info) != ELF_ST_BIND(sb->st_info)) {
315 		if (ELF_ST_BIND(sa->st_info) == STB_GLOBAL) {
316 			return -1;
317 		}
318 		if (ELF_ST_BIND(sb->st_info) == STB_GLOBAL) {
319 			return 1;
320 		}
321 	}
322 
323 	/* Within each band, sort by name. */
324 	return strcmp(sa->st_name + addsymtab_strstart,
325 	    sb->st_name + addsymtab_strstart);
326 }
327 
328 static void
addsymtab(const char * name,void * symstart,size_t symsize,void * strstart,size_t strsize,struct ksyms_symtab * tab,void * newstart,void * ctfstart,size_t ctfsize,uint32_t * nmap)329 addsymtab(const char *name, void *symstart, size_t symsize,
330 	  void *strstart, size_t strsize, struct ksyms_symtab *tab,
331 	  void *newstart, void *ctfstart, size_t ctfsize, uint32_t *nmap)
332 {
333 	Elf_Sym *sym, *nsym, ts;
334 	int i, j, n, nglob;
335 	char *str;
336 	int nsyms = symsize / sizeof(Elf_Sym);
337 	int s;
338 
339 	/* Sanity check for pre-allocated map table used during startup. */
340 	if ((nmap == ksyms_nmap) && (nsyms >= KSYMS_MAX_ID)) {
341 		printf("kern_ksyms: ERROR %d > %d, increase KSYMS_MAX_ID\n",
342 		    nsyms, KSYMS_MAX_ID);
343 
344 		/* truncate for now */
345 		nsyms = KSYMS_MAX_ID - 1;
346 	}
347 
348 	tab->sd_symstart = symstart;
349 	tab->sd_symsize = symsize;
350 	tab->sd_strstart = strstart;
351 	tab->sd_strsize = strsize;
352 	tab->sd_name = name;
353 	tab->sd_minsym = UINTPTR_MAX;
354 	tab->sd_maxsym = 0;
355 	tab->sd_usroffset = 0;
356 	tab->sd_ctfstart = ctfstart;
357 	tab->sd_ctfsize = ctfsize;
358 	tab->sd_nmap = nmap;
359 	tab->sd_nmapsize = nsyms;
360 #ifdef KSYMS_DEBUG
361 	printf("newstart %p sym %p ksyms_symsz %zu str %p strsz %zu send %p\n",
362 	    newstart, symstart, symsize, strstart, strsize,
363 	    tab->sd_strstart + tab->sd_strsize);
364 #endif
365 
366 	if (nmap) {
367 		memset(nmap, 0, nsyms * sizeof(uint32_t));
368 	}
369 
370 	/* Pack symbol table by removing all file name references. */
371 	sym = tab->sd_symstart;
372 	nsym = (Elf_Sym *)newstart;
373 	str = tab->sd_strstart;
374 	nglob = 0;
375 	for (i = n = 0; i < nsyms; i++) {
376 
377 		/*
378 		 * This breaks CTF mapping, so don't do it when
379 		 * DTrace is enabled.
380 		 */
381 #ifndef KDTRACE_HOOKS
382 		/*
383 		 * Remove useless symbols.
384 		 * Should actually remove all typeless symbols.
385 		 */
386 		if (sym[i].st_name == 0)
387 			continue; /* Skip nameless entries */
388 		if (sym[i].st_shndx == SHN_UNDEF)
389 			continue; /* Skip external references */
390 		if (ELF_ST_TYPE(sym[i].st_info) == STT_FILE)
391 			continue; /* Skip filenames */
392 		if (ELF_ST_TYPE(sym[i].st_info) == STT_NOTYPE &&
393 		    sym[i].st_value == 0 &&
394 		    strcmp(str + sym[i].st_name, "*ABS*") == 0)
395 			continue; /* XXX */
396 		if (ELF_ST_TYPE(sym[i].st_info) == STT_NOTYPE &&
397 		    strcmp(str + sym[i].st_name, "gcc2_compiled.") == 0)
398 			continue; /* XXX */
399 #endif
400 
401 		/* Save symbol. Set it as an absolute offset */
402 		nsym[n] = sym[i];
403 
404 #ifdef KDTRACE_HOOKS
405 		if (nmap != NULL) {
406 			/*
407 			 * Save the size, replace it with the symbol id so
408 			 * the mapping can be done after the cleanup and sort.
409 			 */
410 			nmap[i] = nsym[n].st_size;
411 			nsym[n].st_size = i + 1;	/* zero is reserved */
412 		}
413 #endif
414 
415 		if (sym[i].st_shndx != SHN_ABS) {
416 			nsym[n].st_shndx = SHBSS;
417 		} else {
418 			/* SHN_ABS is a magic value, don't overwrite it */
419 		}
420 
421 		j = strlen(nsym[n].st_name + str) + 1;
422 		if (j > ksyms_maxlen)
423 			ksyms_maxlen = j;
424 		nglob += (ELF_ST_BIND(nsym[n].st_info) == STB_GLOBAL);
425 
426 		/* Compute min and max symbols. */
427 		if (strcmp(str + sym[i].st_name, "*ABS*") != 0
428 		    && ELF_ST_TYPE(nsym[n].st_info) != STT_NOTYPE) {
429 			if (nsym[n].st_value < tab->sd_minsym) {
430 				tab->sd_minsym = nsym[n].st_value;
431 			}
432 			if (nsym[n].st_value > tab->sd_maxsym) {
433 				tab->sd_maxsym = nsym[n].st_value;
434 			}
435 		}
436 		n++;
437 	}
438 
439 	/* Fill the rest of the record, and sort the symbols. */
440 	tab->sd_symstart = nsym;
441 	tab->sd_symsize = n * sizeof(Elf_Sym);
442 	tab->sd_nglob = nglob;
443 
444 	addsymtab_strstart = str;
445 	if (kheapsort(nsym, n, sizeof(Elf_Sym), addsymtab_compar, &ts) != 0)
446 		panic("addsymtab");
447 
448 #ifdef KDTRACE_HOOKS
449 	/*
450 	 * Build the mapping from original symbol id to new symbol table.
451 	 * Deleted symbols will have a zero map, indices will be one based
452 	 * instead of zero based.
453 	 * Resulting map is sd_nmap[original_index] = new_index + 1
454 	 */
455 	if (nmap != NULL) {
456 		int new;
457 		for (new = 0; new < n; new++) {
458 			uint32_t orig = nsym[new].st_size - 1;
459 			uint32_t size = nmap[orig];
460 
461 			nmap[orig] = new + 1;
462 
463 			/* restore the size */
464 			nsym[new].st_size = size;
465 		}
466 	}
467 #endif
468 
469 	KASSERT(strcmp(name, "netbsd") == 0 || mutex_owned(&ksyms_lock));
470 	KASSERT(cold || mutex_owned(&ksyms_lock));
471 
472 	/*
473 	 * Publish the symtab.  Do this at splhigh to ensure ddb never
474 	 * witnesses an inconsistent state of the queue, unless memory
475 	 * is so corrupt that we crash in PSLIST_WRITER_INSERT_AFTER or
476 	 * TAILQ_INSERT_TAIL.
477 	 */
478 	PSLIST_ENTRY_INIT(tab, sd_pslist);
479 	s = splhigh();
480 	if (TAILQ_EMPTY(&ksyms_symtabs)) {
481 		PSLIST_WRITER_INSERT_HEAD(&ksyms_symtabs_psz, tab, sd_pslist);
482 	} else {
483 		struct ksyms_symtab *last;
484 
485 		last = TAILQ_LAST(&ksyms_symtabs, ksyms_symtab_queue);
486 		PSLIST_WRITER_INSERT_AFTER(last, tab, sd_pslist);
487 	}
488 	TAILQ_INSERT_TAIL(&ksyms_symtabs, tab, sd_queue);
489 	splx(s);
490 
491 	ksyms_sizes_calc();
492 	ksyms_loaded = true;
493 }
494 
495 /*
496  * Setup the kernel symbol table stuff.
497  */
498 void
ksyms_addsyms_elf(int symsize,void * start,void * end)499 ksyms_addsyms_elf(int symsize, void *start, void *end)
500 {
501 	int i, j;
502 	Elf_Shdr *shdr;
503 	char *symstart = NULL, *strstart = NULL;
504 	size_t strsize = 0;
505 	Elf_Ehdr *ehdr;
506 	char *ctfstart = NULL;
507 	size_t ctfsize = 0;
508 
509 	if (symsize <= 0) {
510 		printf("[ Kernel symbol table missing! ]\n");
511 		return;
512 	}
513 
514 	/* Sanity check */
515 	if (ALIGNED_POINTER(start, long) == 0) {
516 		printf("[ Kernel symbol table has bad start address %p ]\n",
517 		    start);
518 		return;
519 	}
520 
521 	ehdr = (Elf_Ehdr *)start;
522 
523 	/* check if this is a valid ELF header */
524 	/* No reason to verify arch type, the kernel is actually running! */
525 	if (memcmp(ehdr->e_ident, ELFMAG, SELFMAG) ||
526 	    ehdr->e_ident[EI_CLASS] != ELFCLASS ||
527 	    ehdr->e_version > 1) {
528 		printf("[ Kernel symbol table invalid! ]\n");
529 		return; /* nothing to do */
530 	}
531 
532 	/* Loaded header will be scratched in addsymtab */
533 	ksyms_hdr_init(start);
534 
535 	/* Find the symbol table and the corresponding string table. */
536 	shdr = (Elf_Shdr *)((uint8_t *)start + ehdr->e_shoff);
537 	for (i = 1; i < ehdr->e_shnum; i++) {
538 		if (shdr[i].sh_type != SHT_SYMTAB)
539 			continue;
540 		if (shdr[i].sh_offset == 0)
541 			continue;
542 		symstart = (uint8_t *)start + shdr[i].sh_offset;
543 		symsize = shdr[i].sh_size;
544 		j = shdr[i].sh_link;
545 		if (shdr[j].sh_offset == 0)
546 			continue; /* Can this happen? */
547 		strstart = (uint8_t *)start + shdr[j].sh_offset;
548 		strsize = shdr[j].sh_size;
549 		break;
550 	}
551 
552 #ifdef KDTRACE_HOOKS
553 	/* Find the CTF section */
554 	shdr = (Elf_Shdr *)((uint8_t *)start + ehdr->e_shoff);
555 	if (ehdr->e_shstrndx != 0) {
556 		char *shstr = (uint8_t *)start +
557 		    shdr[ehdr->e_shstrndx].sh_offset;
558 		for (i = 1; i < ehdr->e_shnum; i++) {
559 #ifdef KSYMS_DEBUG
560 			printf("ksyms: checking %s\n", &shstr[shdr[i].sh_name]);
561 #endif
562 			if (shdr[i].sh_type != SHT_PROGBITS)
563 				continue;
564 			if (strncmp(".SUNW_ctf", &shstr[shdr[i].sh_name], 10)
565 			    != 0)
566 				continue;
567 			ctfstart = (uint8_t *)start + shdr[i].sh_offset;
568 			ctfsize = shdr[i].sh_size;
569 			ksyms_ctfsz = ctfsize;
570 #ifdef DEBUG
571 			aprint_normal("Found CTF at %p, size 0x%zx\n",
572 			    ctfstart, ctfsize);
573 #endif
574 			break;
575 		}
576 #ifdef DEBUG
577 	} else {
578 		printf("ksyms: e_shstrndx == 0\n");
579 #endif
580 	}
581 #endif
582 
583 	if (!ksyms_verify(symstart, strstart))
584 		return;
585 
586 	addsymtab("netbsd", symstart, symsize, strstart, strsize,
587 	    &kernel_symtab, symstart, ctfstart, ctfsize, ksyms_nmap);
588 
589 #ifdef DEBUG
590 	aprint_normal("Loaded initial symtab at %p, strtab at %p, # entries %ld\n",
591 	    kernel_symtab.sd_symstart, kernel_symtab.sd_strstart,
592 	    (long)kernel_symtab.sd_symsize/sizeof(Elf_Sym));
593 #endif
594 
595 	/* Should be no snapshot to invalidate yet.  */
596 	KASSERT(ksyms_snapshot == NULL);
597 }
598 
599 /*
600  * Setup the kernel symbol table stuff.
601  * Use this when the address of the symbol and string tables are known;
602  * otherwise use ksyms_init with an ELF image.
603  * We need to pass a minimal ELF header which will later be completed by
604  * ksyms_hdr_init and handed off to userland through /dev/ksyms.  We use
605  * a void *rather than a pointer to avoid exposing the Elf_Ehdr type.
606  */
607 void
ksyms_addsyms_explicit(void * ehdr,void * symstart,size_t symsize,void * strstart,size_t strsize)608 ksyms_addsyms_explicit(void *ehdr, void *symstart, size_t symsize,
609     void *strstart, size_t strsize)
610 {
611 	if (!ksyms_verify(symstart, strstart))
612 		return;
613 
614 	ksyms_hdr_init(ehdr);
615 	addsymtab("netbsd", symstart, symsize, strstart, strsize,
616 	    &kernel_symtab, symstart, NULL, 0, ksyms_nmap);
617 
618 	/* Should be no snapshot to invalidate yet.  */
619 	KASSERT(ksyms_snapshot == NULL);
620 }
621 
622 /*
623  * Get the value associated with a symbol.
624  * "mod" is the module name, or null if any module.
625  * "sym" is the symbol name.
626  * "val" is a pointer to the corresponding value, if call succeeded.
627  * Returns 0 if success or ENOENT if no such entry.
628  *
629  * If symp is nonnull, caller must hold ksyms_lock or module_lock, have
630  * ksyms_opencnt nonzero, be in a pserialize read section, be in ddb
631  * with all other CPUs quiescent.
632  */
633 int
ksyms_getval_unlocked(const char * mod,const char * sym,Elf_Sym ** symp,unsigned long * val,int type)634 ksyms_getval_unlocked(const char *mod, const char *sym, Elf_Sym **symp,
635     unsigned long *val, int type)
636 {
637 	struct ksyms_symtab *st;
638 	Elf_Sym *es;
639 	int s, error = ENOENT;
640 
641 #ifdef KSYMS_DEBUG
642 	if (ksyms_debug & FOLLOW_CALLS)
643 		printf("%s: mod %s sym %s valp %p\n", __func__, mod, sym, val);
644 #endif
645 
646 	s = pserialize_read_enter();
647 	PSLIST_READER_FOREACH(st, &ksyms_symtabs_psz, struct ksyms_symtab,
648 	    sd_pslist) {
649 		if (mod != NULL && strcmp(st->sd_name, mod))
650 			continue;
651 		if ((es = findsym(sym, st, type)) != NULL) {
652 			*val = es->st_value;
653 			if (symp)
654 				*symp = es;
655 			error = 0;
656 			break;
657 		}
658 	}
659 	pserialize_read_exit(s);
660 	return error;
661 }
662 
663 int
ksyms_getval(const char * mod,const char * sym,unsigned long * val,int type)664 ksyms_getval(const char *mod, const char *sym, unsigned long *val, int type)
665 {
666 
667 	if (!ksyms_loaded)
668 		return ENOENT;
669 
670 	/* No locking needed -- we read the table pserialized.  */
671 	return ksyms_getval_unlocked(mod, sym, NULL, val, type);
672 }
673 
674 /*
675  * ksyms_get_mod(mod)
676  *
677  * Return the symtab for the given module name.  Caller must ensure
678  * that the module cannot be unloaded until after this returns.
679  */
680 struct ksyms_symtab *
ksyms_get_mod(const char * mod)681 ksyms_get_mod(const char *mod)
682 {
683 	struct ksyms_symtab *st;
684 	int s;
685 
686 	s = pserialize_read_enter();
687 	PSLIST_READER_FOREACH(st, &ksyms_symtabs_psz, struct ksyms_symtab,
688 	    sd_pslist) {
689 		if (mod != NULL && strcmp(st->sd_name, mod))
690 			continue;
691 		break;
692 	}
693 	pserialize_read_exit(s);
694 
695 	return st;
696 }
697 
698 
699 /*
700  * ksyms_mod_foreach()
701  *
702  * Iterate over the symbol table of the specified module, calling the callback
703  * handler for each symbol. Stop iterating if the handler return is non-zero.
704  *
705  */
706 
707 int
ksyms_mod_foreach(const char * mod,ksyms_callback_t callback,void * opaque)708 ksyms_mod_foreach(const char *mod, ksyms_callback_t callback, void *opaque)
709 {
710 	struct ksyms_symtab *st;
711 	Elf_Sym *sym, *maxsym;
712 	char *str;
713 	int symindx;
714 
715 	if (!ksyms_loaded)
716 		return ENOENT;
717 
718 	mutex_enter(&ksyms_lock);
719 
720 	/* find the module */
721 	TAILQ_FOREACH(st, &ksyms_symtabs, sd_queue) {
722 		if (mod != NULL && strcmp(st->sd_name, mod))
723 			continue;
724 
725 		sym = st->sd_symstart;
726 		str = st->sd_strstart - st->sd_usroffset;
727 
728 		/* now iterate through the symbols */
729 		maxsym = sym + st->sd_symsize / sizeof(Elf_Sym);
730 		for (symindx = 0; sym < maxsym; sym++, symindx++) {
731 			if (callback(str + sym->st_name, symindx,
732 			    (void *)sym->st_value,
733 			    sym->st_size,
734 			    sym->st_info,
735 			    opaque) != 0) {
736 				break;
737 			}
738 		}
739 	}
740 	mutex_exit(&ksyms_lock);
741 
742 	return 0;
743 }
744 
745 /*
746  * Get "mod" and "symbol" associated with an address.
747  * Returns 0 if success or ENOENT if no such entry.
748  *
749  * Caller must hold ksyms_lock or module_lock, have ksyms_opencnt
750  * nonzero, be in a pserialize read section, or be in ddb with all
751  * other CPUs quiescent.
752  */
753 int
ksyms_getname(const char ** mod,const char ** sym,vaddr_t v,int f)754 ksyms_getname(const char **mod, const char **sym, vaddr_t v, int f)
755 {
756 	struct ksyms_symtab *st;
757 	Elf_Sym *les, *es = NULL;
758 	vaddr_t laddr = 0;
759 	const char *lmod = NULL;
760 	char *stable = NULL;
761 	int type, i, sz;
762 
763 	if (!ksyms_loaded)
764 		return ENOENT;
765 
766 	PSLIST_READER_FOREACH(st, &ksyms_symtabs_psz, struct ksyms_symtab,
767 	    sd_pslist) {
768 		if (v < st->sd_minsym || v > st->sd_maxsym)
769 			continue;
770 		sz = st->sd_symsize/sizeof(Elf_Sym);
771 		for (i = 0; i < sz; i++) {
772 			les = st->sd_symstart + i;
773 			type = ELF_ST_TYPE(les->st_info);
774 
775 			if ((f & KSYMS_PROC) && (type != STT_FUNC))
776 				continue;
777 
778 			if (type == STT_NOTYPE)
779 				continue;
780 
781 			if (((f & KSYMS_ANY) == 0) &&
782 			    (type != STT_FUNC) && (type != STT_OBJECT))
783 				continue;
784 
785 			if ((les->st_value <= v) && (les->st_value > laddr)) {
786 				laddr = les->st_value;
787 				es = les;
788 				lmod = st->sd_name;
789 				stable = st->sd_strstart - st->sd_usroffset;
790 			}
791 		}
792 	}
793 	if (es == NULL)
794 		return ENOENT;
795 	if ((f & KSYMS_EXACT) && (v != es->st_value))
796 		return ENOENT;
797 	if (mod)
798 		*mod = lmod;
799 	if (sym)
800 		*sym = stable + es->st_name;
801 	return 0;
802 }
803 
804 /*
805  * Add a symbol table from a loadable module.
806  */
807 void
ksyms_modload(const char * name,void * symstart,vsize_t symsize,char * strstart,vsize_t strsize)808 ksyms_modload(const char *name, void *symstart, vsize_t symsize,
809     char *strstart, vsize_t strsize)
810 {
811 	struct ksyms_symtab *st;
812 	struct ksyms_snapshot *ks;
813 	void *nmap;
814 
815 	st = kmem_zalloc(sizeof(*st), KM_SLEEP);
816 	nmap = kmem_zalloc(symsize / sizeof(Elf_Sym) * sizeof (uint32_t),
817 			   KM_SLEEP);
818 	mutex_enter(&ksyms_lock);
819 	addsymtab(name, symstart, symsize, strstart, strsize, st, symstart,
820 	    NULL, 0, nmap);
821 	ks = ksyms_snapshot;
822 	ksyms_snapshot = NULL;
823 	mutex_exit(&ksyms_lock);
824 
825 	if (ks)
826 		ksyms_snapshot_release(ks);
827 }
828 
829 /*
830  * Remove a symbol table from a loadable module.
831  */
832 void
ksyms_modunload(const char * name)833 ksyms_modunload(const char *name)
834 {
835 	struct ksyms_symtab *st;
836 	struct ksyms_snapshot *ks;
837 	int s;
838 
839 	mutex_enter(&ksyms_lock);
840 	TAILQ_FOREACH(st, &ksyms_symtabs, sd_queue) {
841 		if (strcmp(name, st->sd_name) != 0)
842 			continue;
843 		break;
844 	}
845 	KASSERT(st != NULL);
846 
847 	/* Wait for any snapshot in progress to complete.  */
848 	while (ksyms_snapshotting)
849 		cv_wait(&ksyms_cv, &ksyms_lock);
850 
851 	/*
852 	 * Remove the symtab.  Do this at splhigh to ensure ddb never
853 	 * witnesses an inconsistent state of the queue, unless memory
854 	 * is so corrupt that we crash in TAILQ_REMOVE or
855 	 * PSLIST_WRITER_REMOVE.
856 	 */
857 	s = splhigh();
858 	TAILQ_REMOVE(&ksyms_symtabs, st, sd_queue);
859 	PSLIST_WRITER_REMOVE(st, sd_pslist);
860 	splx(s);
861 
862 	/*
863 	 * And wait a grace period, in case there are any pserialized
864 	 * readers in flight.
865 	 */
866 	pserialize_perform(ksyms_psz);
867 	PSLIST_ENTRY_DESTROY(st, sd_pslist);
868 
869 	/* Recompute the ksyms sizes now that we've removed st.  */
870 	ksyms_sizes_calc();
871 
872 	/* Invalidate the global ksyms snapshot.  */
873 	ks = ksyms_snapshot;
874 	ksyms_snapshot = NULL;
875 	mutex_exit(&ksyms_lock);
876 
877 	/*
878 	 * No more references are possible.  Free the name map and the
879 	 * symtab itself, which we had allocated in ksyms_modload.
880 	 */
881 	kmem_free(st->sd_nmap, st->sd_nmapsize * sizeof(uint32_t));
882 	kmem_free(st, sizeof(*st));
883 
884 	/* Release the formerly global ksyms snapshot, if any.  */
885 	if (ks)
886 		ksyms_snapshot_release(ks);
887 }
888 
889 #ifdef DDB
890 /*
891  * Keep sifting stuff here, to avoid export of ksyms internals.
892  *
893  * Systems is expected to be quiescent, so no locking done.
894  */
895 int
ksyms_sift(char * mod,char * sym,int mode)896 ksyms_sift(char *mod, char *sym, int mode)
897 {
898 	struct ksyms_symtab *st;
899 	char *sb;
900 	int i, sz;
901 
902 	if (!ksyms_loaded)
903 		return ENOENT;
904 
905 	TAILQ_FOREACH(st, &ksyms_symtabs, sd_queue) {
906 		if (mod && strcmp(mod, st->sd_name))
907 			continue;
908 		sb = st->sd_strstart - st->sd_usroffset;
909 
910 		sz = st->sd_symsize/sizeof(Elf_Sym);
911 		for (i = 0; i < sz; i++) {
912 			Elf_Sym *les = st->sd_symstart + i;
913 			char c;
914 
915 			if (strstr(sb + les->st_name, sym) == NULL)
916 				continue;
917 
918 			if (mode == 'F') {
919 				switch (ELF_ST_TYPE(les->st_info)) {
920 				case STT_OBJECT:
921 					c = '+';
922 					break;
923 				case STT_FUNC:
924 					c = '*';
925 					break;
926 				case STT_SECTION:
927 					c = '&';
928 					break;
929 				case STT_FILE:
930 					c = '/';
931 					break;
932 				default:
933 					c = ' ';
934 					break;
935 				}
936 				db_printf("%s%c ", sb + les->st_name, c);
937 			} else
938 				db_printf("%s ", sb + les->st_name);
939 		}
940 	}
941 	return ENOENT;
942 }
943 #endif /* DDB */
944 
945 /*
946  * In case we exposing the symbol table to the userland using the pseudo-
947  * device /dev/ksyms, it is easier to provide all the tables as one.
948  * However, it means we have to change all the st_name fields for the
949  * symbols so they match the ELF image that the userland will read
950  * through the device.
951  *
952  * The actual (correct) value of st_name is preserved through a global
953  * offset stored in the symbol table structure.
954  *
955  * Call with ksyms_lock held.
956  */
957 static void
ksyms_sizes_calc(void)958 ksyms_sizes_calc(void)
959 {
960 	struct ksyms_symtab *st;
961 	int i, delta;
962 
963 	KASSERT(cold || mutex_owned(&ksyms_lock));
964 
965 	ksyms_symsz = ksyms_strsz = 0;
966 	TAILQ_FOREACH(st, &ksyms_symtabs, sd_queue) {
967 		delta = ksyms_strsz - st->sd_usroffset;
968 		if (delta != 0) {
969 			for (i = 0; i < st->sd_symsize/sizeof(Elf_Sym); i++)
970 				st->sd_symstart[i].st_name += delta;
971 			st->sd_usroffset = ksyms_strsz;
972 		}
973 		ksyms_symsz += st->sd_symsize;
974 		ksyms_strsz += st->sd_strsize;
975 	}
976 }
977 
978 static void
ksyms_fill_note(void)979 ksyms_fill_note(void)
980 {
981 	int32_t *note = ksyms_hdr.kh_note;
982 	note[0] = ELF_NOTE_NETBSD_NAMESZ;
983 	note[1] = ELF_NOTE_NETBSD_DESCSZ;
984 	note[2] = ELF_NOTE_TYPE_NETBSD_TAG;
985 	memcpy(&note[3],  "NetBSD\0", 8);
986 	note[5] = __NetBSD_Version__;
987 }
988 
989 static void
ksyms_hdr_init(const void * hdraddr)990 ksyms_hdr_init(const void *hdraddr)
991 {
992 	/* Copy the loaded elf exec header */
993 	memcpy(&ksyms_hdr.kh_ehdr, hdraddr, sizeof(Elf_Ehdr));
994 
995 	/* Set correct program/section header sizes, offsets and numbers */
996 	ksyms_hdr.kh_ehdr.e_phoff = offsetof(struct ksyms_hdr, kh_phdr[0]);
997 	ksyms_hdr.kh_ehdr.e_phentsize = sizeof(Elf_Phdr);
998 	ksyms_hdr.kh_ehdr.e_phnum = NPRGHDR;
999 	ksyms_hdr.kh_ehdr.e_shoff = offsetof(struct ksyms_hdr, kh_shdr[0]);
1000 	ksyms_hdr.kh_ehdr.e_shentsize = sizeof(Elf_Shdr);
1001 	ksyms_hdr.kh_ehdr.e_shnum = NSECHDR;
1002 	ksyms_hdr.kh_ehdr.e_shstrndx = SHSTRTAB;
1003 
1004 	/* Text/data - fake */
1005 	ksyms_hdr.kh_phdr[0].p_type = PT_LOAD;
1006 	ksyms_hdr.kh_phdr[0].p_memsz = (unsigned long)-1L;
1007 	ksyms_hdr.kh_phdr[0].p_flags = PF_R | PF_X | PF_W;
1008 
1009 #define SHTCOPY(name)  strlcpy(&ksyms_hdr.kh_strtab[offs], (name), \
1010     sizeof(ksyms_hdr.kh_strtab) - offs), offs += sizeof(name)
1011 
1012 	uint32_t offs = 1;
1013 	/* First section header ".note.netbsd.ident" */
1014 	ksyms_hdr.kh_shdr[SHNOTE].sh_name = offs;
1015 	ksyms_hdr.kh_shdr[SHNOTE].sh_type = SHT_NOTE;
1016 	ksyms_hdr.kh_shdr[SHNOTE].sh_offset =
1017 	    offsetof(struct ksyms_hdr, kh_note[0]);
1018 	ksyms_hdr.kh_shdr[SHNOTE].sh_size = sizeof(ksyms_hdr.kh_note);
1019 	ksyms_hdr.kh_shdr[SHNOTE].sh_addralign = sizeof(int);
1020 	SHTCOPY(".note.netbsd.ident");
1021 	ksyms_fill_note();
1022 
1023 	/* Second section header; ".symtab" */
1024 	ksyms_hdr.kh_shdr[SYMTAB].sh_name = offs;
1025 	ksyms_hdr.kh_shdr[SYMTAB].sh_type = SHT_SYMTAB;
1026 	ksyms_hdr.kh_shdr[SYMTAB].sh_offset = sizeof(struct ksyms_hdr);
1027 /*	ksyms_hdr.kh_shdr[SYMTAB].sh_size = filled in at open */
1028 	ksyms_hdr.kh_shdr[SYMTAB].sh_link = STRTAB; /* Corresponding strtab */
1029 	ksyms_hdr.kh_shdr[SYMTAB].sh_addralign = sizeof(long);
1030 	ksyms_hdr.kh_shdr[SYMTAB].sh_entsize = sizeof(Elf_Sym);
1031 	SHTCOPY(".symtab");
1032 
1033 	/* Third section header; ".strtab" */
1034 	ksyms_hdr.kh_shdr[STRTAB].sh_name = offs;
1035 	ksyms_hdr.kh_shdr[STRTAB].sh_type = SHT_STRTAB;
1036 /*	ksyms_hdr.kh_shdr[STRTAB].sh_offset = filled in at open */
1037 /*	ksyms_hdr.kh_shdr[STRTAB].sh_size = filled in at open */
1038 	ksyms_hdr.kh_shdr[STRTAB].sh_addralign = sizeof(char);
1039 	SHTCOPY(".strtab");
1040 
1041 	/* Fourth section, ".shstrtab" */
1042 	ksyms_hdr.kh_shdr[SHSTRTAB].sh_name = offs;
1043 	ksyms_hdr.kh_shdr[SHSTRTAB].sh_type = SHT_STRTAB;
1044 	ksyms_hdr.kh_shdr[SHSTRTAB].sh_offset =
1045 	    offsetof(struct ksyms_hdr, kh_strtab);
1046 	ksyms_hdr.kh_shdr[SHSTRTAB].sh_size = SHSTRSIZ;
1047 	ksyms_hdr.kh_shdr[SHSTRTAB].sh_addralign = sizeof(char);
1048 	SHTCOPY(".shstrtab");
1049 
1050 	/* Fifth section, ".bss". All symbols reside here. */
1051 	ksyms_hdr.kh_shdr[SHBSS].sh_name = offs;
1052 	ksyms_hdr.kh_shdr[SHBSS].sh_type = SHT_NOBITS;
1053 	ksyms_hdr.kh_shdr[SHBSS].sh_offset = 0;
1054 	ksyms_hdr.kh_shdr[SHBSS].sh_size = (unsigned long)-1L;
1055 	ksyms_hdr.kh_shdr[SHBSS].sh_addralign = PAGE_SIZE;
1056 	ksyms_hdr.kh_shdr[SHBSS].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1057 	SHTCOPY(".bss");
1058 
1059 	/* Sixth section header; ".SUNW_ctf" */
1060 	ksyms_hdr.kh_shdr[SHCTF].sh_name = offs;
1061 	ksyms_hdr.kh_shdr[SHCTF].sh_type = SHT_PROGBITS;
1062 /*	ksyms_hdr.kh_shdr[SHCTF].sh_offset = filled in at open */
1063 /*	ksyms_hdr.kh_shdr[SHCTF].sh_size = filled in at open */
1064 	ksyms_hdr.kh_shdr[SHCTF].sh_link = SYMTAB; /* Corresponding symtab */
1065 	ksyms_hdr.kh_shdr[SHCTF].sh_addralign = sizeof(char);
1066 	SHTCOPY(".SUNW_ctf");
1067 }
1068 
1069 static struct ksyms_snapshot *
ksyms_snapshot_alloc(int maxlen,size_t size,dev_t dev,uint64_t gen)1070 ksyms_snapshot_alloc(int maxlen, size_t size, dev_t dev, uint64_t gen)
1071 {
1072 	struct ksyms_snapshot *ks;
1073 
1074 	ks = kmem_zalloc(sizeof(*ks), KM_SLEEP);
1075 	ks->ks_refcnt = 1;
1076 	ks->ks_gen = gen;
1077 	ks->ks_uobj = uao_create(size, 0);
1078 	ks->ks_size = size;
1079 	ks->ks_dev = dev;
1080 	ks->ks_maxlen = maxlen;
1081 
1082 	return ks;
1083 }
1084 
1085 static void
ksyms_snapshot_release(struct ksyms_snapshot * ks)1086 ksyms_snapshot_release(struct ksyms_snapshot *ks)
1087 {
1088 	uint64_t refcnt;
1089 
1090 	mutex_enter(&ksyms_lock);
1091 	refcnt = --ks->ks_refcnt;
1092 	mutex_exit(&ksyms_lock);
1093 
1094 	if (refcnt)
1095 		return;
1096 
1097 	uao_detach(ks->ks_uobj);
1098 	kmem_free(ks, sizeof(*ks));
1099 }
1100 
1101 static int
ubc_copyfrombuf(struct uvm_object * uobj,struct uio * uio,const void * buf,size_t n)1102 ubc_copyfrombuf(struct uvm_object *uobj, struct uio *uio, const void *buf,
1103     size_t n)
1104 {
1105 	struct iovec iov = { .iov_base = __UNCONST(buf), .iov_len = n };
1106 
1107 	uio->uio_iov = &iov;
1108 	uio->uio_iovcnt = 1;
1109 	uio->uio_resid = n;
1110 
1111 	return ubc_uiomove(uobj, uio, n, UVM_ADV_SEQUENTIAL, UBC_WRITE);
1112 }
1113 
1114 static int
ksyms_take_snapshot(struct ksyms_snapshot * ks,struct ksyms_symtab * last)1115 ksyms_take_snapshot(struct ksyms_snapshot *ks, struct ksyms_symtab *last)
1116 {
1117 	struct uvm_object *uobj = ks->ks_uobj;
1118 	struct uio uio;
1119 	struct ksyms_symtab *st;
1120 	int error;
1121 
1122 	/* Caller must have initiated snapshotting.  */
1123 	KASSERT(ksyms_snapshotting == curlwp);
1124 
1125 	/* Start a uio transfer to reuse incrementally.  */
1126 	uio.uio_offset = 0;
1127 	uio.uio_rw = UIO_WRITE; /* write from buffer to uobj */
1128 	UIO_SETUP_SYSSPACE(&uio);
1129 
1130 	/*
1131 	 * First: Copy out the ELF header.
1132 	 */
1133 	error = ubc_copyfrombuf(uobj, &uio, &ksyms_hdr, sizeof(ksyms_hdr));
1134 	if (error)
1135 		return error;
1136 
1137 	/*
1138 	 * Copy out the symbol table.  The list of symtabs is
1139 	 * guaranteed to be nonempty because we always have an entry
1140 	 * for the main kernel.  We stop at last, not at the end of the
1141 	 * tailq or NULL, because entries beyond last are not included
1142 	 * in this snapshot (and may not be fully initialized memory as
1143 	 * we witness it).
1144 	 */
1145 	KASSERT(uio.uio_offset == sizeof(struct ksyms_hdr));
1146 	for (st = TAILQ_FIRST(&ksyms_symtabs);
1147 	     ;
1148 	     st = TAILQ_NEXT(st, sd_queue)) {
1149 		error = ubc_copyfrombuf(uobj, &uio, st->sd_symstart,
1150 		    st->sd_symsize);
1151 		if (error)
1152 			return error;
1153 		if (st == last)
1154 			break;
1155 	}
1156 
1157 	/*
1158 	 * Copy out the string table
1159 	 */
1160 	KASSERT(uio.uio_offset == sizeof(struct ksyms_hdr) +
1161 	    ksyms_hdr.kh_shdr[SYMTAB].sh_size);
1162 	for (st = TAILQ_FIRST(&ksyms_symtabs);
1163 	     ;
1164 	     st = TAILQ_NEXT(st, sd_queue)) {
1165 		error = ubc_copyfrombuf(uobj, &uio, st->sd_strstart,
1166 		    st->sd_strsize);
1167 		if (error)
1168 			return error;
1169 		if (st == last)
1170 			break;
1171 	}
1172 
1173 	/*
1174 	 * Copy out the CTF table.
1175 	 */
1176 	KASSERT(uio.uio_offset == sizeof(struct ksyms_hdr) +
1177 	    ksyms_hdr.kh_shdr[SYMTAB].sh_size +
1178 	    ksyms_hdr.kh_shdr[STRTAB].sh_size);
1179 	st = TAILQ_FIRST(&ksyms_symtabs);
1180 	if (st->sd_ctfstart != NULL) {
1181 		error = ubc_copyfrombuf(uobj, &uio, st->sd_ctfstart,
1182 		    st->sd_ctfsize);
1183 		if (error)
1184 			return error;
1185 	}
1186 
1187 	KASSERT(uio.uio_offset == sizeof(struct ksyms_hdr) +
1188 	    ksyms_hdr.kh_shdr[SYMTAB].sh_size +
1189 	    ksyms_hdr.kh_shdr[STRTAB].sh_size +
1190 	    ksyms_hdr.kh_shdr[SHCTF].sh_size);
1191 	KASSERT(uio.uio_offset == ks->ks_size);
1192 
1193 	return 0;
1194 }
1195 
1196 static const struct fileops ksyms_fileops;
1197 
1198 static int
ksymsopen(dev_t dev,int flags,int devtype,struct lwp * l)1199 ksymsopen(dev_t dev, int flags, int devtype, struct lwp *l)
1200 {
1201 	struct file *fp = NULL;
1202 	int fd = -1;
1203 	struct ksyms_snapshot *ks = NULL;
1204 	size_t size;
1205 	struct ksyms_symtab *last;
1206 	int maxlen;
1207 	uint64_t gen;
1208 	int error;
1209 
1210 	if (minor(dev) != 0 || !ksyms_loaded)
1211 		return ENXIO;
1212 
1213 	/* Allocate a private file.  */
1214 	error = fd_allocfile(&fp, &fd);
1215 	if (error)
1216 		return error;
1217 
1218 	mutex_enter(&ksyms_lock);
1219 
1220 	/*
1221 	 * Wait until we have a snapshot, or until there is no snapshot
1222 	 * being taken right now so we can take one.
1223 	 */
1224 	while ((ks = ksyms_snapshot) == NULL && ksyms_snapshotting) {
1225 		error = cv_wait_sig(&ksyms_cv, &ksyms_lock);
1226 		if (error)
1227 			goto out;
1228 	}
1229 
1230 	/*
1231 	 * If there's a usable snapshot, increment its reference count
1232 	 * (can't overflow, 64-bit) and just reuse it.
1233 	 */
1234 	if (ks) {
1235 		ks->ks_refcnt++;
1236 		goto out;
1237 	}
1238 
1239 	/* Find the current length of the symtab object. */
1240 	size = sizeof(struct ksyms_hdr);
1241 	size += ksyms_strsz;
1242 	size += ksyms_symsz;
1243 	size += ksyms_ctfsz;
1244 
1245 	/* Start a new snapshot.  */
1246 	ksyms_hdr.kh_shdr[SYMTAB].sh_size = ksyms_symsz;
1247 	ksyms_hdr.kh_shdr[SYMTAB].sh_info = ksyms_symsz / sizeof(Elf_Sym);
1248 	ksyms_hdr.kh_shdr[STRTAB].sh_offset = ksyms_symsz +
1249 	    ksyms_hdr.kh_shdr[SYMTAB].sh_offset;
1250 	ksyms_hdr.kh_shdr[STRTAB].sh_size = ksyms_strsz;
1251 	ksyms_hdr.kh_shdr[SHCTF].sh_offset = ksyms_strsz +
1252 	    ksyms_hdr.kh_shdr[STRTAB].sh_offset;
1253 	ksyms_hdr.kh_shdr[SHCTF].sh_size = ksyms_ctfsz;
1254 	last = TAILQ_LAST(&ksyms_symtabs, ksyms_symtab_queue);
1255 	maxlen = ksyms_maxlen;
1256 	gen = ksyms_snapshot_gen++;
1257 
1258 	/*
1259 	 * Prevent ksyms entries from being removed while we take the
1260 	 * snapshot.
1261 	 */
1262 	KASSERT(ksyms_snapshotting == NULL);
1263 	ksyms_snapshotting = curlwp;
1264 	mutex_exit(&ksyms_lock);
1265 
1266 	/* Create a snapshot and write the symtab to it.  */
1267 	ks = ksyms_snapshot_alloc(maxlen, size, dev, gen);
1268 	error = ksyms_take_snapshot(ks, last);
1269 
1270 	/*
1271 	 * Snapshot creation is done.  Wake up anyone waiting to remove
1272 	 * entries (module unload).
1273 	 */
1274 	mutex_enter(&ksyms_lock);
1275 	KASSERTMSG(ksyms_snapshotting == curlwp, "lwp %p stole snapshot",
1276 	    ksyms_snapshotting);
1277 	ksyms_snapshotting = NULL;
1278 	cv_broadcast(&ksyms_cv);
1279 
1280 	/* If we failed, give up.  */
1281 	if (error)
1282 		goto out;
1283 
1284 	/* Cache the snapshot for the next reader.  */
1285 	KASSERT(ksyms_snapshot == NULL);
1286 	ksyms_snapshot = ks;
1287 	ks->ks_refcnt++;
1288 	KASSERT(ks->ks_refcnt == 2);
1289 
1290 out:	mutex_exit(&ksyms_lock);
1291 	if (error) {
1292 		if (fp)
1293 			fd_abort(curproc, fp, fd);
1294 		if (ks)
1295 			ksyms_snapshot_release(ks);
1296 	} else {
1297 		KASSERT(fp);
1298 		KASSERT(ks);
1299 		error = fd_clone(fp, fd, flags, &ksyms_fileops, ks);
1300 		KASSERTMSG(error == EMOVEFD, "error=%d", error);
1301 	}
1302 	return error;
1303 }
1304 
1305 static int
ksymsclose(struct file * fp)1306 ksymsclose(struct file *fp)
1307 {
1308 	struct ksyms_snapshot *ks = fp->f_data;
1309 
1310 	ksyms_snapshot_release(ks);
1311 
1312 	return 0;
1313 }
1314 
1315 static int
ksymsread(struct file * fp,off_t * offp,struct uio * uio,kauth_cred_t cred,int flags)1316 ksymsread(struct file *fp, off_t *offp, struct uio *uio, kauth_cred_t cred,
1317     int flags)
1318 {
1319 	const struct ksyms_snapshot *ks = fp->f_data;
1320 	size_t count;
1321 	int error;
1322 
1323 	/*
1324 	 * Since we don't have a per-object lock, we might as well use
1325 	 * the struct file lock to serialize access to fp->f_offset --
1326 	 * but if the caller isn't relying on or updating fp->f_offset,
1327 	 * there's no need to do even that.  We could use ksyms_lock,
1328 	 * but why bother with a global lock if not needed?  Either
1329 	 * way, the lock we use here must agree with what ksymsseek
1330 	 * takes (nothing else in ksyms uses fp->f_offset).
1331 	 */
1332 	if (offp == &fp->f_offset)
1333 		mutex_enter(&fp->f_lock);
1334 
1335 	/* Refuse negative offsets.  */
1336 	if (*offp < 0) {
1337 		error = EINVAL;
1338 		goto out;
1339 	}
1340 
1341 	/* Return nothing at or past end of file.  */
1342 	if (*offp >= ks->ks_size) {
1343 		error = 0;
1344 		goto out;
1345 	}
1346 
1347 	/*
1348 	 * 1. Set up the uio to transfer from offset *offp.
1349 	 * 2. Transfer as many bytes as we can (at most uio->uio_resid
1350 	 *    or what's left in the ksyms).
1351 	 * 3. If requested, update *offp to reflect the number of bytes
1352 	 *    transferred.
1353 	 */
1354 	uio->uio_offset = *offp;
1355 	count = uio->uio_resid;
1356 	error = ubc_uiomove(ks->ks_uobj, uio, MIN(count, ks->ks_size - *offp),
1357 	    UVM_ADV_SEQUENTIAL, UBC_READ|UBC_PARTIALOK);
1358 	if (flags & FOF_UPDATE_OFFSET)
1359 		*offp += count - uio->uio_resid;
1360 
1361 out:	if (offp == &fp->f_offset)
1362 		mutex_exit(&fp->f_lock);
1363 	return error;
1364 }
1365 
1366 static int
ksymsstat(struct file * fp,struct stat * st)1367 ksymsstat(struct file *fp, struct stat *st)
1368 {
1369 	const struct ksyms_snapshot *ks = fp->f_data;
1370 
1371 	memset(st, 0, sizeof(*st));
1372 
1373 	st->st_dev = NODEV;
1374 	st->st_ino = 0;
1375 	st->st_mode = S_IFCHR;
1376 	st->st_nlink = 1;
1377 	st->st_uid = kauth_cred_geteuid(fp->f_cred);
1378 	st->st_gid = kauth_cred_getegid(fp->f_cred);
1379 	st->st_rdev = ks->ks_dev;
1380 	st->st_size = ks->ks_size;
1381 	/* zero time */
1382 	st->st_blksize = MAXPHYS; /* XXX arbitrary */
1383 	st->st_blocks = 0;
1384 	st->st_gen = ks->ks_gen;
1385 
1386 	return 0;
1387 }
1388 
1389 static int
ksymsmmap(struct file * fp,off_t * offp,size_t nbytes,int prot,int * flagsp,int * advicep,struct uvm_object ** uobjp,int * maxprotp)1390 ksymsmmap(struct file *fp, off_t *offp, size_t nbytes, int prot, int *flagsp,
1391     int *advicep, struct uvm_object **uobjp, int *maxprotp)
1392 {
1393 	const struct ksyms_snapshot *ks = fp->f_data;
1394 
1395 	/* uvm_mmap guarantees page-aligned offset and size.  */
1396 	KASSERT(*offp == round_page(*offp));
1397 	KASSERT(nbytes == round_page(nbytes));
1398 	KASSERT(nbytes > 0);
1399 
1400 	/* Refuse negative offsets.  */
1401 	if (*offp < 0)
1402 		return EINVAL;
1403 
1404 	/* Refuse mappings that pass the end of file.  */
1405 	if (nbytes > round_page(ks->ks_size) ||
1406 	    *offp > round_page(ks->ks_size) - nbytes)
1407 		return EINVAL;	/* XXX ??? */
1408 
1409 	/* Success!  */
1410 	uao_reference(ks->ks_uobj);
1411 	*advicep = UVM_ADV_SEQUENTIAL;
1412 	*uobjp = ks->ks_uobj;
1413 	*maxprotp = prot & VM_PROT_READ;
1414 	return 0;
1415 }
1416 
1417 static int
ksymsseek(struct file * fp,off_t delta,int whence,off_t * newoffp,int flags)1418 ksymsseek(struct file *fp, off_t delta, int whence, off_t *newoffp, int flags)
1419 {
1420 	const off_t OFF_MAX = __type_max(off_t);
1421 	struct ksyms_snapshot *ks = fp->f_data;
1422 	off_t base, newoff;
1423 	int error;
1424 
1425 	mutex_enter(&fp->f_lock);
1426 
1427 	switch (whence) {
1428 	case SEEK_CUR:
1429 		base = fp->f_offset;
1430 		break;
1431 	case SEEK_END:
1432 		base = ks->ks_size;
1433 		break;
1434 	case SEEK_SET:
1435 		base = 0;
1436 		break;
1437 	default:
1438 		error = EINVAL;
1439 		goto out;
1440 	}
1441 
1442 	/* Check for arithmetic overflow and reject negative offsets.  */
1443 	if (base < 0 || delta > OFF_MAX - base || base + delta < 0) {
1444 		error = EINVAL;
1445 		goto out;
1446 	}
1447 
1448 	/* Compute the new offset.  */
1449 	newoff = base + delta;
1450 
1451 	/* Success!  */
1452 	if (newoffp)
1453 		*newoffp = newoff;
1454 	if (flags & FOF_UPDATE_OFFSET)
1455 		fp->f_offset = newoff;
1456 	error = 0;
1457 
1458 out:	mutex_exit(&fp->f_lock);
1459 	return error;
1460 }
1461 
1462 __CTASSERT(offsetof(struct ksyms_ogsymbol, kg_name) == offsetof(struct ksyms_gsymbol, kg_name));
1463 __CTASSERT(offsetof(struct ksyms_gvalue, kv_name) == offsetof(struct ksyms_gsymbol, kg_name));
1464 
1465 static int
ksymsioctl(struct file * fp,u_long cmd,void * data)1466 ksymsioctl(struct file *fp, u_long cmd, void *data)
1467 {
1468 	struct ksyms_snapshot *ks = fp->f_data;
1469 	struct ksyms_ogsymbol *okg = (struct ksyms_ogsymbol *)data;
1470 	struct ksyms_gsymbol *kg = (struct ksyms_gsymbol *)data;
1471 	struct ksyms_gvalue *kv = (struct ksyms_gvalue *)data;
1472 	struct ksyms_symtab *st;
1473 	Elf_Sym *sym = NULL, copy;
1474 	unsigned long val;
1475 	int error = 0;
1476 	char *str = NULL;
1477 	int len, s;
1478 
1479 	/* Read cached ksyms_maxlen.  */
1480 	len = ks->ks_maxlen;
1481 
1482 	if (cmd == OKIOCGVALUE || cmd == OKIOCGSYMBOL ||
1483 	    cmd == KIOCGVALUE || cmd == KIOCGSYMBOL) {
1484 		str = kmem_alloc(len, KM_SLEEP);
1485 		if ((error = copyinstr(kg->kg_name, str, len, NULL)) != 0) {
1486 			kmem_free(str, len);
1487 			return error;
1488 		}
1489 	}
1490 
1491 	switch (cmd) {
1492 	case OKIOCGVALUE:
1493 		/*
1494 		 * Use the in-kernel symbol lookup code for fast
1495 		 * retreival of a value.
1496 		 */
1497 		error = ksyms_getval(NULL, str, &val, KSYMS_EXTERN);
1498 		if (error == 0)
1499 			error = copyout(&val, okg->kg_value, sizeof(long));
1500 		kmem_free(str, len);
1501 		break;
1502 
1503 	case OKIOCGSYMBOL:
1504 		/*
1505 		 * Use the in-kernel symbol lookup code for fast
1506 		 * retreival of a symbol.
1507 		 */
1508 		s = pserialize_read_enter();
1509 		PSLIST_READER_FOREACH(st, &ksyms_symtabs_psz,
1510 		    struct ksyms_symtab, sd_pslist) {
1511 			if ((sym = findsym(str, st, KSYMS_ANY)) == NULL)
1512 				continue;
1513 #ifdef notdef
1514 			/* Skip if bad binding */
1515 			if (ELF_ST_BIND(sym->st_info) != STB_GLOBAL) {
1516 				sym = NULL;
1517 				continue;
1518 			}
1519 #endif
1520 			break;
1521 		}
1522 		if (sym != NULL) {
1523 			memcpy(&copy, sym, sizeof(copy));
1524 			pserialize_read_exit(s);
1525 			error = copyout(&copy, okg->kg_sym, sizeof(Elf_Sym));
1526 		} else {
1527 			pserialize_read_exit(s);
1528 			error = ENOENT;
1529 		}
1530 		kmem_free(str, len);
1531 		break;
1532 
1533 	case KIOCGVALUE:
1534 		/*
1535 		 * Use the in-kernel symbol lookup code for fast
1536 		 * retreival of a value.
1537 		 */
1538 		error = ksyms_getval(NULL, str, &val, KSYMS_EXTERN);
1539 		if (error == 0)
1540 			kv->kv_value = val;
1541 		kmem_free(str, len);
1542 		break;
1543 
1544 	case KIOCGSYMBOL:
1545 		/*
1546 		 * Use the in-kernel symbol lookup code for fast
1547 		 * retreival of a symbol.
1548 		 */
1549 		s = pserialize_read_enter();
1550 		PSLIST_READER_FOREACH(st, &ksyms_symtabs_psz,
1551 		    struct ksyms_symtab, sd_pslist) {
1552 			if ((sym = findsym(str, st, KSYMS_ANY)) == NULL)
1553 				continue;
1554 #ifdef notdef
1555 			/* Skip if bad binding */
1556 			if (ELF_ST_BIND(sym->st_info) != STB_GLOBAL) {
1557 				sym = NULL;
1558 				continue;
1559 			}
1560 #endif
1561 			break;
1562 		}
1563 		if (sym != NULL) {
1564 			kg->kg_sym = *sym;
1565 		} else {
1566 			error = ENOENT;
1567 		}
1568 		pserialize_read_exit(s);
1569 		kmem_free(str, len);
1570 		break;
1571 
1572 	case KIOCGSIZE:
1573 		/*
1574 		 * Get total size of symbol table.
1575 		 */
1576 		*(int *)data = ks->ks_size;
1577 		break;
1578 
1579 	default:
1580 		error = ENOTTY;
1581 		break;
1582 	}
1583 
1584 	return error;
1585 }
1586 
1587 const struct cdevsw ksyms_cdevsw = {
1588 	.d_open = ksymsopen,
1589 	.d_close = noclose,
1590 	.d_read = noread,
1591 	.d_write = nowrite,
1592 	.d_ioctl = noioctl,
1593 	.d_stop = nostop,
1594 	.d_tty = notty,
1595 	.d_poll = nopoll,
1596 	.d_mmap = nommap,
1597 	.d_kqfilter = nokqfilter,
1598 	.d_discard = nodiscard,
1599 	.d_flag = D_OTHER | D_MPSAFE
1600 };
1601 
1602 static const struct fileops ksyms_fileops = {
1603 	.fo_name = "ksyms",
1604 	.fo_read = ksymsread,
1605 	.fo_write = fbadop_write,
1606 	.fo_ioctl = ksymsioctl,
1607 	.fo_fcntl = fnullop_fcntl,
1608 	.fo_poll = fnullop_poll,
1609 	.fo_stat = ksymsstat,
1610 	.fo_close = ksymsclose,
1611 	.fo_kqfilter = fnullop_kqfilter,
1612 	.fo_restart = fnullop_restart,
1613 	.fo_mmap = ksymsmmap,
1614 	.fo_seek = ksymsseek,
1615 };
1616