xref: /linux/tools/bpf/resolve_btfids/main.c (revision f614f2c7)
1 // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
2 
3 /*
4  * resolve_btfids scans Elf object for .BTF_ids section and resolves
5  * its symbols with BTF ID values.
6  *
7  * Each symbol points to 4 bytes data and is expected to have
8  * following name syntax:
9  *
10  * __BTF_ID__<type>__<symbol>[__<id>]
11  *
12  * type is:
13  *
14  *   func    - lookup BTF_KIND_FUNC symbol with <symbol> name
15  *             and store its ID into the data:
16  *
17  *             __BTF_ID__func__vfs_close__1:
18  *             .zero 4
19  *
20  *   struct  - lookup BTF_KIND_STRUCT symbol with <symbol> name
21  *             and store its ID into the data:
22  *
23  *             __BTF_ID__struct__sk_buff__1:
24  *             .zero 4
25  *
26  *   union   - lookup BTF_KIND_UNION symbol with <symbol> name
27  *             and store its ID into the data:
28  *
29  *             __BTF_ID__union__thread_union__1:
30  *             .zero 4
31  *
32  *   typedef - lookup BTF_KIND_TYPEDEF symbol with <symbol> name
33  *             and store its ID into the data:
34  *
35  *             __BTF_ID__typedef__pid_t__1:
36  *             .zero 4
37  *
38  *   set     - store symbol size into first 4 bytes and sort following
39  *             ID list
40  *
41  *             __BTF_ID__set__list:
42  *             .zero 4
43  *             list:
44  *             __BTF_ID__func__vfs_getattr__3:
45  *             .zero 4
46  *             __BTF_ID__func__vfs_fallocate__4:
47  *             .zero 4
48  */
49 
50 #define  _GNU_SOURCE
51 #include <stdio.h>
52 #include <string.h>
53 #include <unistd.h>
54 #include <stdlib.h>
55 #include <libelf.h>
56 #include <gelf.h>
57 #include <sys/stat.h>
58 #include <fcntl.h>
59 #include <errno.h>
60 #include <linux/rbtree.h>
61 #include <linux/zalloc.h>
62 #include <linux/err.h>
63 #include <btf.h>
64 #include <libbpf.h>
65 #include <parse-options.h>
66 
67 #define BTF_IDS_SECTION	".BTF_ids"
68 #define BTF_ID		"__BTF_ID__"
69 
70 #define BTF_STRUCT	"struct"
71 #define BTF_UNION	"union"
72 #define BTF_TYPEDEF	"typedef"
73 #define BTF_FUNC	"func"
74 #define BTF_SET		"set"
75 
76 #define ADDR_CNT	100
77 
78 struct btf_id {
79 	struct rb_node	 rb_node;
80 	char		*name;
81 	union {
82 		int	 id;
83 		int	 cnt;
84 	};
85 	int		 addr_cnt;
86 	Elf64_Addr	 addr[ADDR_CNT];
87 };
88 
89 struct object {
90 	const char *path;
91 	const char *btf;
92 	const char *base_btf_path;
93 
94 	struct {
95 		int		 fd;
96 		Elf		*elf;
97 		Elf_Data	*symbols;
98 		Elf_Data	*idlist;
99 		int		 symbols_shndx;
100 		int		 idlist_shndx;
101 		size_t		 strtabidx;
102 		unsigned long	 idlist_addr;
103 	} efile;
104 
105 	struct rb_root	sets;
106 	struct rb_root	structs;
107 	struct rb_root	unions;
108 	struct rb_root	typedefs;
109 	struct rb_root	funcs;
110 
111 	int nr_funcs;
112 	int nr_structs;
113 	int nr_unions;
114 	int nr_typedefs;
115 };
116 
117 static int verbose;
118 
119 static int eprintf(int level, int var, const char *fmt, ...)
120 {
121 	va_list args;
122 	int ret = 0;
123 
124 	if (var >= level) {
125 		va_start(args, fmt);
126 		ret = vfprintf(stderr, fmt, args);
127 		va_end(args);
128 	}
129 	return ret;
130 }
131 
132 #ifndef pr_fmt
133 #define pr_fmt(fmt) fmt
134 #endif
135 
136 #define pr_debug(fmt, ...) \
137 	eprintf(1, verbose, pr_fmt(fmt), ##__VA_ARGS__)
138 #define pr_debugN(n, fmt, ...) \
139 	eprintf(n, verbose, pr_fmt(fmt), ##__VA_ARGS__)
140 #define pr_debug2(fmt, ...) pr_debugN(2, pr_fmt(fmt), ##__VA_ARGS__)
141 #define pr_err(fmt, ...) \
142 	eprintf(0, verbose, pr_fmt(fmt), ##__VA_ARGS__)
143 #define pr_info(fmt, ...) \
144 	eprintf(0, verbose, pr_fmt(fmt), ##__VA_ARGS__)
145 
146 static bool is_btf_id(const char *name)
147 {
148 	return name && !strncmp(name, BTF_ID, sizeof(BTF_ID) - 1);
149 }
150 
151 static struct btf_id *btf_id__find(struct rb_root *root, const char *name)
152 {
153 	struct rb_node *p = root->rb_node;
154 	struct btf_id *id;
155 	int cmp;
156 
157 	while (p) {
158 		id = rb_entry(p, struct btf_id, rb_node);
159 		cmp = strcmp(id->name, name);
160 		if (cmp < 0)
161 			p = p->rb_left;
162 		else if (cmp > 0)
163 			p = p->rb_right;
164 		else
165 			return id;
166 	}
167 	return NULL;
168 }
169 
170 static struct btf_id*
171 btf_id__add(struct rb_root *root, char *name, bool unique)
172 {
173 	struct rb_node **p = &root->rb_node;
174 	struct rb_node *parent = NULL;
175 	struct btf_id *id;
176 	int cmp;
177 
178 	while (*p != NULL) {
179 		parent = *p;
180 		id = rb_entry(parent, struct btf_id, rb_node);
181 		cmp = strcmp(id->name, name);
182 		if (cmp < 0)
183 			p = &(*p)->rb_left;
184 		else if (cmp > 0)
185 			p = &(*p)->rb_right;
186 		else
187 			return unique ? NULL : id;
188 	}
189 
190 	id = zalloc(sizeof(*id));
191 	if (id) {
192 		pr_debug("adding symbol %s\n", name);
193 		id->name = name;
194 		rb_link_node(&id->rb_node, parent, p);
195 		rb_insert_color(&id->rb_node, root);
196 	}
197 	return id;
198 }
199 
200 static char *get_id(const char *prefix_end)
201 {
202 	/*
203 	 * __BTF_ID__func__vfs_truncate__0
204 	 * prefix_end =  ^
205 	 * pos        =    ^
206 	 */
207 	int len = strlen(prefix_end);
208 	int pos = sizeof("__") - 1;
209 	char *p, *id;
210 
211 	if (pos >= len)
212 		return NULL;
213 
214 	id = strdup(prefix_end + pos);
215 	if (id) {
216 		/*
217 		 * __BTF_ID__func__vfs_truncate__0
218 		 * id =            ^
219 		 *
220 		 * cut the unique id part
221 		 */
222 		p = strrchr(id, '_');
223 		p--;
224 		if (*p != '_') {
225 			free(id);
226 			return NULL;
227 		}
228 		*p = '\0';
229 	}
230 	return id;
231 }
232 
233 static struct btf_id *add_set(struct object *obj, char *name)
234 {
235 	/*
236 	 * __BTF_ID__set__name
237 	 * name =    ^
238 	 * id   =         ^
239 	 */
240 	char *id = name + sizeof(BTF_SET "__") - 1;
241 	int len = strlen(name);
242 
243 	if (id >= name + len) {
244 		pr_err("FAILED to parse set name: %s\n", name);
245 		return NULL;
246 	}
247 
248 	return btf_id__add(&obj->sets, id, true);
249 }
250 
251 static struct btf_id *add_symbol(struct rb_root *root, char *name, size_t size)
252 {
253 	char *id;
254 
255 	id = get_id(name + size);
256 	if (!id) {
257 		pr_err("FAILED to parse symbol name: %s\n", name);
258 		return NULL;
259 	}
260 
261 	return btf_id__add(root, id, false);
262 }
263 
264 /* Older libelf.h and glibc elf.h might not yet define the ELF compression types. */
265 #ifndef SHF_COMPRESSED
266 #define SHF_COMPRESSED (1 << 11) /* Section with compressed data. */
267 #endif
268 
269 /*
270  * The data of compressed section should be aligned to 4
271  * (for 32bit) or 8 (for 64 bit) bytes. The binutils ld
272  * sets sh_addralign to 1, which makes libelf fail with
273  * misaligned section error during the update:
274  *    FAILED elf_update(WRITE): invalid section alignment
275  *
276  * While waiting for ld fix, we fix the compressed sections
277  * sh_addralign value manualy.
278  */
279 static int compressed_section_fix(Elf *elf, Elf_Scn *scn, GElf_Shdr *sh)
280 {
281 	int expected = gelf_getclass(elf) == ELFCLASS32 ? 4 : 8;
282 
283 	if (!(sh->sh_flags & SHF_COMPRESSED))
284 		return 0;
285 
286 	if (sh->sh_addralign == expected)
287 		return 0;
288 
289 	pr_debug2(" - fixing wrong alignment sh_addralign %u, expected %u\n",
290 		  sh->sh_addralign, expected);
291 
292 	sh->sh_addralign = expected;
293 
294 	if (gelf_update_shdr(scn, sh) == 0) {
295 		pr_err("FAILED cannot update section header: %s\n",
296 			elf_errmsg(-1));
297 		return -1;
298 	}
299 	return 0;
300 }
301 
302 static int elf_collect(struct object *obj)
303 {
304 	Elf_Scn *scn = NULL;
305 	size_t shdrstrndx;
306 	int idx = 0;
307 	Elf *elf;
308 	int fd;
309 
310 	fd = open(obj->path, O_RDWR, 0666);
311 	if (fd == -1) {
312 		pr_err("FAILED cannot open %s: %s\n",
313 			obj->path, strerror(errno));
314 		return -1;
315 	}
316 
317 	elf_version(EV_CURRENT);
318 
319 	elf = elf_begin(fd, ELF_C_RDWR_MMAP, NULL);
320 	if (!elf) {
321 		close(fd);
322 		pr_err("FAILED cannot create ELF descriptor: %s\n",
323 			elf_errmsg(-1));
324 		return -1;
325 	}
326 
327 	obj->efile.fd  = fd;
328 	obj->efile.elf = elf;
329 
330 	elf_flagelf(elf, ELF_C_SET, ELF_F_LAYOUT);
331 
332 	if (elf_getshdrstrndx(elf, &shdrstrndx) != 0) {
333 		pr_err("FAILED cannot get shdr str ndx\n");
334 		return -1;
335 	}
336 
337 	/*
338 	 * Scan all the elf sections and look for save data
339 	 * from .BTF_ids section and symbols.
340 	 */
341 	while ((scn = elf_nextscn(elf, scn)) != NULL) {
342 		Elf_Data *data;
343 		GElf_Shdr sh;
344 		char *name;
345 
346 		idx++;
347 		if (gelf_getshdr(scn, &sh) != &sh) {
348 			pr_err("FAILED get section(%d) header\n", idx);
349 			return -1;
350 		}
351 
352 		name = elf_strptr(elf, shdrstrndx, sh.sh_name);
353 		if (!name) {
354 			pr_err("FAILED get section(%d) name\n", idx);
355 			return -1;
356 		}
357 
358 		data = elf_getdata(scn, 0);
359 		if (!data) {
360 			pr_err("FAILED to get section(%d) data from %s\n",
361 				idx, name);
362 			return -1;
363 		}
364 
365 		pr_debug2("section(%d) %s, size %ld, link %d, flags %lx, type=%d\n",
366 			  idx, name, (unsigned long) data->d_size,
367 			  (int) sh.sh_link, (unsigned long) sh.sh_flags,
368 			  (int) sh.sh_type);
369 
370 		if (sh.sh_type == SHT_SYMTAB) {
371 			obj->efile.symbols       = data;
372 			obj->efile.symbols_shndx = idx;
373 			obj->efile.strtabidx     = sh.sh_link;
374 		} else if (!strcmp(name, BTF_IDS_SECTION)) {
375 			obj->efile.idlist       = data;
376 			obj->efile.idlist_shndx = idx;
377 			obj->efile.idlist_addr  = sh.sh_addr;
378 		}
379 
380 		if (compressed_section_fix(elf, scn, &sh))
381 			return -1;
382 	}
383 
384 	return 0;
385 }
386 
387 static int symbols_collect(struct object *obj)
388 {
389 	Elf_Scn *scn = NULL;
390 	int n, i;
391 	GElf_Shdr sh;
392 	char *name;
393 
394 	scn = elf_getscn(obj->efile.elf, obj->efile.symbols_shndx);
395 	if (!scn)
396 		return -1;
397 
398 	if (gelf_getshdr(scn, &sh) != &sh)
399 		return -1;
400 
401 	n = sh.sh_size / sh.sh_entsize;
402 
403 	/*
404 	 * Scan symbols and look for the ones starting with
405 	 * __BTF_ID__* over .BTF_ids section.
406 	 */
407 	for (i = 0; i < n; i++) {
408 		char *prefix;
409 		struct btf_id *id;
410 		GElf_Sym sym;
411 
412 		if (!gelf_getsym(obj->efile.symbols, i, &sym))
413 			return -1;
414 
415 		if (sym.st_shndx != obj->efile.idlist_shndx)
416 			continue;
417 
418 		name = elf_strptr(obj->efile.elf, obj->efile.strtabidx,
419 				  sym.st_name);
420 
421 		if (!is_btf_id(name))
422 			continue;
423 
424 		/*
425 		 * __BTF_ID__TYPE__vfs_truncate__0
426 		 * prefix =  ^
427 		 */
428 		prefix = name + sizeof(BTF_ID) - 1;
429 
430 		/* struct */
431 		if (!strncmp(prefix, BTF_STRUCT, sizeof(BTF_STRUCT) - 1)) {
432 			obj->nr_structs++;
433 			id = add_symbol(&obj->structs, prefix, sizeof(BTF_STRUCT) - 1);
434 		/* union  */
435 		} else if (!strncmp(prefix, BTF_UNION, sizeof(BTF_UNION) - 1)) {
436 			obj->nr_unions++;
437 			id = add_symbol(&obj->unions, prefix, sizeof(BTF_UNION) - 1);
438 		/* typedef */
439 		} else if (!strncmp(prefix, BTF_TYPEDEF, sizeof(BTF_TYPEDEF) - 1)) {
440 			obj->nr_typedefs++;
441 			id = add_symbol(&obj->typedefs, prefix, sizeof(BTF_TYPEDEF) - 1);
442 		/* func */
443 		} else if (!strncmp(prefix, BTF_FUNC, sizeof(BTF_FUNC) - 1)) {
444 			obj->nr_funcs++;
445 			id = add_symbol(&obj->funcs, prefix, sizeof(BTF_FUNC) - 1);
446 		/* set */
447 		} else if (!strncmp(prefix, BTF_SET, sizeof(BTF_SET) - 1)) {
448 			id = add_set(obj, prefix);
449 			/*
450 			 * SET objects store list's count, which is encoded
451 			 * in symbol's size, together with 'cnt' field hence
452 			 * that - 1.
453 			 */
454 			if (id)
455 				id->cnt = sym.st_size / sizeof(int) - 1;
456 		} else {
457 			pr_err("FAILED unsupported prefix %s\n", prefix);
458 			return -1;
459 		}
460 
461 		if (!id)
462 			return -ENOMEM;
463 
464 		if (id->addr_cnt >= ADDR_CNT) {
465 			pr_err("FAILED symbol %s crossed the number of allowed lists\n",
466 				id->name);
467 			return -1;
468 		}
469 		id->addr[id->addr_cnt++] = sym.st_value;
470 	}
471 
472 	return 0;
473 }
474 
475 static int symbols_resolve(struct object *obj)
476 {
477 	int nr_typedefs = obj->nr_typedefs;
478 	int nr_structs  = obj->nr_structs;
479 	int nr_unions   = obj->nr_unions;
480 	int nr_funcs    = obj->nr_funcs;
481 	struct btf *base_btf = NULL;
482 	int err, type_id;
483 	struct btf *btf;
484 	__u32 nr_types;
485 
486 	if (obj->base_btf_path) {
487 		base_btf = btf__parse(obj->base_btf_path, NULL);
488 		err = libbpf_get_error(base_btf);
489 		if (err) {
490 			pr_err("FAILED: load base BTF from %s: %s\n",
491 			       obj->base_btf_path, strerror(-err));
492 			return -1;
493 		}
494 	}
495 
496 	btf = btf__parse_split(obj->btf ?: obj->path, base_btf);
497 	err = libbpf_get_error(btf);
498 	if (err) {
499 		pr_err("FAILED: load BTF from %s: %s\n",
500 			obj->btf ?: obj->path, strerror(-err));
501 		goto out;
502 	}
503 
504 	err = -1;
505 	nr_types = btf__get_nr_types(btf);
506 
507 	/*
508 	 * Iterate all the BTF types and search for collected symbol IDs.
509 	 */
510 	for (type_id = 1; type_id <= nr_types; type_id++) {
511 		const struct btf_type *type;
512 		struct rb_root *root;
513 		struct btf_id *id;
514 		const char *str;
515 		int *nr;
516 
517 		type = btf__type_by_id(btf, type_id);
518 		if (!type) {
519 			pr_err("FAILED: malformed BTF, can't resolve type for ID %d\n",
520 				type_id);
521 			goto out;
522 		}
523 
524 		if (btf_is_func(type) && nr_funcs) {
525 			nr   = &nr_funcs;
526 			root = &obj->funcs;
527 		} else if (btf_is_struct(type) && nr_structs) {
528 			nr   = &nr_structs;
529 			root = &obj->structs;
530 		} else if (btf_is_union(type) && nr_unions) {
531 			nr   = &nr_unions;
532 			root = &obj->unions;
533 		} else if (btf_is_typedef(type) && nr_typedefs) {
534 			nr   = &nr_typedefs;
535 			root = &obj->typedefs;
536 		} else
537 			continue;
538 
539 		str = btf__name_by_offset(btf, type->name_off);
540 		if (!str) {
541 			pr_err("FAILED: malformed BTF, can't resolve name for ID %d\n",
542 				type_id);
543 			goto out;
544 		}
545 
546 		id = btf_id__find(root, str);
547 		if (id) {
548 			if (id->id) {
549 				pr_info("WARN: multiple IDs found for '%s': %d, %d - using %d\n",
550 					str, id->id, type_id, id->id);
551 			} else {
552 				id->id = type_id;
553 				(*nr)--;
554 			}
555 		}
556 	}
557 
558 	err = 0;
559 out:
560 	btf__free(base_btf);
561 	btf__free(btf);
562 	return err;
563 }
564 
565 static int id_patch(struct object *obj, struct btf_id *id)
566 {
567 	Elf_Data *data = obj->efile.idlist;
568 	int *ptr = data->d_buf;
569 	int i;
570 
571 	if (!id->id) {
572 		pr_err("WARN: resolve_btfids: unresolved symbol %s\n", id->name);
573 	}
574 
575 	for (i = 0; i < id->addr_cnt; i++) {
576 		unsigned long addr = id->addr[i];
577 		unsigned long idx = addr - obj->efile.idlist_addr;
578 
579 		pr_debug("patching addr %5lu: ID %7d [%s]\n",
580 			 idx, id->id, id->name);
581 
582 		if (idx >= data->d_size) {
583 			pr_err("FAILED patching index %lu out of bounds %lu\n",
584 				idx, data->d_size);
585 			return -1;
586 		}
587 
588 		idx = idx / sizeof(int);
589 		ptr[idx] = id->id;
590 	}
591 
592 	return 0;
593 }
594 
595 static int __symbols_patch(struct object *obj, struct rb_root *root)
596 {
597 	struct rb_node *next;
598 	struct btf_id *id;
599 
600 	next = rb_first(root);
601 	while (next) {
602 		id = rb_entry(next, struct btf_id, rb_node);
603 
604 		if (id_patch(obj, id))
605 			return -1;
606 
607 		next = rb_next(next);
608 	}
609 	return 0;
610 }
611 
612 static int cmp_id(const void *pa, const void *pb)
613 {
614 	const int *a = pa, *b = pb;
615 
616 	return *a - *b;
617 }
618 
619 static int sets_patch(struct object *obj)
620 {
621 	Elf_Data *data = obj->efile.idlist;
622 	int *ptr = data->d_buf;
623 	struct rb_node *next;
624 
625 	next = rb_first(&obj->sets);
626 	while (next) {
627 		unsigned long addr, idx;
628 		struct btf_id *id;
629 		int *base;
630 		int cnt;
631 
632 		id   = rb_entry(next, struct btf_id, rb_node);
633 		addr = id->addr[0];
634 		idx  = addr - obj->efile.idlist_addr;
635 
636 		/* sets are unique */
637 		if (id->addr_cnt != 1) {
638 			pr_err("FAILED malformed data for set '%s'\n",
639 				id->name);
640 			return -1;
641 		}
642 
643 		idx = idx / sizeof(int);
644 		base = &ptr[idx] + 1;
645 		cnt = ptr[idx];
646 
647 		pr_debug("sorting  addr %5lu: cnt %6d [%s]\n",
648 			 (idx + 1) * sizeof(int), cnt, id->name);
649 
650 		qsort(base, cnt, sizeof(int), cmp_id);
651 
652 		next = rb_next(next);
653 	}
654 	return 0;
655 }
656 
657 static int symbols_patch(struct object *obj)
658 {
659 	int err;
660 
661 	if (__symbols_patch(obj, &obj->structs)  ||
662 	    __symbols_patch(obj, &obj->unions)   ||
663 	    __symbols_patch(obj, &obj->typedefs) ||
664 	    __symbols_patch(obj, &obj->funcs)    ||
665 	    __symbols_patch(obj, &obj->sets))
666 		return -1;
667 
668 	if (sets_patch(obj))
669 		return -1;
670 
671 	/* Set type to ensure endian translation occurs. */
672 	obj->efile.idlist->d_type = ELF_T_WORD;
673 
674 	elf_flagdata(obj->efile.idlist, ELF_C_SET, ELF_F_DIRTY);
675 
676 	err = elf_update(obj->efile.elf, ELF_C_WRITE);
677 	if (err < 0) {
678 		pr_err("FAILED elf_update(WRITE): %s\n",
679 			elf_errmsg(-1));
680 	}
681 
682 	pr_debug("update %s for %s\n",
683 		 err >= 0 ? "ok" : "failed", obj->path);
684 	return err < 0 ? -1 : 0;
685 }
686 
687 static const char * const resolve_btfids_usage[] = {
688 	"resolve_btfids [<options>] <ELF object>",
689 	NULL
690 };
691 
692 int main(int argc, const char **argv)
693 {
694 	struct object obj = {
695 		.efile = {
696 			.idlist_shndx  = -1,
697 			.symbols_shndx = -1,
698 		},
699 		.structs  = RB_ROOT,
700 		.unions   = RB_ROOT,
701 		.typedefs = RB_ROOT,
702 		.funcs    = RB_ROOT,
703 		.sets     = RB_ROOT,
704 	};
705 	struct option btfid_options[] = {
706 		OPT_INCR('v', "verbose", &verbose,
707 			 "be more verbose (show errors, etc)"),
708 		OPT_STRING(0, "btf", &obj.btf, "BTF data",
709 			   "BTF data"),
710 		OPT_STRING('b', "btf_base", &obj.base_btf_path, "file",
711 			   "path of file providing base BTF"),
712 		OPT_END()
713 	};
714 	int err = -1;
715 
716 	argc = parse_options(argc, argv, btfid_options, resolve_btfids_usage,
717 			     PARSE_OPT_STOP_AT_NON_OPTION);
718 	if (argc != 1)
719 		usage_with_options(resolve_btfids_usage, btfid_options);
720 
721 	obj.path = argv[0];
722 
723 	if (elf_collect(&obj))
724 		goto out;
725 
726 	/*
727 	 * We did not find .BTF_ids section or symbols section,
728 	 * nothing to do..
729 	 */
730 	if (obj.efile.idlist_shndx == -1 ||
731 	    obj.efile.symbols_shndx == -1) {
732 		pr_debug("Cannot find .BTF_ids or symbols sections, nothing to do\n");
733 		return 0;
734 	}
735 
736 	if (symbols_collect(&obj))
737 		goto out;
738 
739 	if (symbols_resolve(&obj))
740 		goto out;
741 
742 	if (symbols_patch(&obj))
743 		goto out;
744 
745 	err = 0;
746 out:
747 	if (obj.efile.elf) {
748 		elf_end(obj.efile.elf);
749 		close(obj.efile.fd);
750 	}
751 	return err;
752 }
753