xref: /openbsd/usr.bin/ctfconv/ctfconv.c (revision 3bef86f7)
1 /*	$OpenBSD: ctfconv.c,v 1.20 2022/10/02 11:56:43 mpi Exp $ */
2 
3 /*
4  * Copyright (c) 2016-2017 Martin Pieuchot
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/types.h>
20 #include <sys/stat.h>
21 #include <sys/mman.h>
22 #include <sys/queue.h>
23 #include <sys/tree.h>
24 #include <sys/ctf.h>
25 
26 #include <assert.h>
27 #include <elf.h>
28 #include <err.h>
29 #include <fcntl.h>
30 #include <limits.h>
31 #include <locale.h>
32 #include <stdio.h>
33 #include <stdint.h>
34 #include <stdlib.h>
35 #include <string.h>
36 #include <unistd.h>
37 
38 #include "itype.h"
39 #include "xmalloc.h"
40 
41 #ifndef nitems
42 #define nitems(_a)	(sizeof((_a)) / sizeof((_a)[0]))
43 #endif
44 
45 #define DEBUG_ABBREV	".debug_abbrev"
46 #define DEBUG_INFO	".debug_info"
47 #define DEBUG_STR	".debug_str"
48 
49 __dead void	 usage(void);
50 int		 convert(const char *);
51 int		 generate(const char *, const char *, int);
52 int		 elf_convert(char *, size_t);
53 void		 elf_sort(void);
54 char		*guess_static_local_name(char *);
55 struct itype	*find_symb(struct itype *, size_t);
56 void		 dump_type(struct itype *);
57 void		 dump_func(struct itype *, int *);
58 void		 dump_obj(struct itype *, int *);
59 
60 /* elf.c */
61 int		 iself(const char *, size_t);
62 int		 elf_getshstab(const char *, size_t, const char **, size_t *);
63 ssize_t		 elf_getsymtab(const char *, size_t, const char *, size_t,
64 		     const Elf_Sym **, size_t *, const char **, size_t *);
65 ssize_t		 elf_getsection(char *, size_t, const char *, const char *,
66 		     size_t, const char **, size_t *);
67 
68 /* parse.c */
69 void		 dwarf_parse(const char *, size_t, const char *, size_t);
70 
71 const char	*ctf_enc2name(unsigned short);
72 
73 /* lists of parsed types and functions */
74 struct itype_queue itypeq = TAILQ_HEAD_INITIALIZER(itypeq);
75 struct itype_queue ifuncq = TAILQ_HEAD_INITIALIZER(ifuncq);
76 struct itype_queue iobjq = TAILQ_HEAD_INITIALIZER(iobjq);
77 
78 __dead void
79 usage(void)
80 {
81 	fprintf(stderr, "usage: %s [-d] -l label -o outfile file\n",
82 	    getprogname());
83 	exit(1);
84 }
85 
86 int
87 main(int argc, char *argv[])
88 {
89 	const char *filename, *label = NULL, *outfile = NULL;
90 	int dump = 0;
91 	int ch, error = 0;
92 	struct itype *it;
93 
94 	setlocale(LC_ALL, "");
95 
96 	while ((ch = getopt(argc, argv, "dl:o:")) != -1) {
97 		switch (ch) {
98 		case 'd':
99 			dump = 1;	/* ctfdump(1)-like SUNW_ctf sections */
100 			break;
101 		case 'l':
102 			if (label != NULL)
103 				usage();
104 			label = optarg;
105 			break;
106 		case 'o':
107 			if (outfile != NULL)
108 				usage();
109 			outfile = optarg;
110 			break;
111 		default:
112 			usage();
113 		}
114 	}
115 
116 	argc -= optind;
117 	argv += optind;
118 
119 	if (argc != 1)
120 		usage();
121 
122 	/* Either dump the sections, or write it out. */
123 	if ((dump && (outfile != NULL || label != NULL)) ||
124 	    (!dump && (outfile == NULL || label == NULL)))
125 		usage();
126 
127 	filename = *argv;
128 
129 	if (unveil(filename, "r") == -1)
130 		err(1, "unveil %s", filename);
131 
132 	if (outfile != NULL) {
133 		if (unveil(outfile, "wc") == -1)
134 			err(1, "unveil %s", outfile);
135 	}
136 
137 	if (pledge("stdio rpath wpath cpath", NULL) == -1)
138 		err(1, "pledge");
139 
140 	error = convert(filename);
141 	if (error != 0)
142 		return error;
143 
144 	if (outfile != NULL) {
145 		if (pledge("stdio wpath cpath", NULL) == -1)
146 			err(1, "pledge");
147 
148 		error = generate(outfile, label, 1);
149 		if (error != 0)
150 			return error;
151 	}
152 
153 	if (dump) {
154 		if (pledge("stdio", NULL) == -1)
155 			err(1, "pledge");
156 
157 		int fidx = -1, oidx = -1;
158 
159 		TAILQ_FOREACH(it, &iobjq, it_symb)
160 			dump_obj(it, &oidx);
161 		printf("\n");
162 
163 		TAILQ_FOREACH(it, &ifuncq, it_symb)
164 			dump_func(it, &fidx);
165 		printf("\n");
166 
167 		TAILQ_FOREACH(it, &itypeq, it_next) {
168 			if (it->it_flags & (ITF_FUNC|ITF_OBJ))
169 				continue;
170 
171 			dump_type(it);
172 		}
173 
174 		return 0;
175 	}
176 
177 	return 0;
178 }
179 
180 int
181 convert(const char *path)
182 {
183 	struct stat		 st;
184 	int			 fd, error = 1;
185 	char			*p;
186 
187 	fd = open(path, O_RDONLY);
188 	if (fd == -1) {
189 		warn("open %s", path);
190 		return 1;
191 	}
192 	if (fstat(fd, &st) == -1) {
193 		warn("fstat %s", path);
194 		close(fd);
195 		return 1;
196 	}
197 	if ((uintmax_t)st.st_size > SIZE_MAX) {
198 		warnx("file too big to fit memory");
199 		close(fd);
200 		return 1;
201 	}
202 
203 	p = mmap(NULL, st.st_size, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0);
204 	if (p == MAP_FAILED)
205 		err(1, "mmap");
206 
207 	if (iself(p, st.st_size))
208 		error = elf_convert(p, st.st_size);
209 
210 	munmap(p, st.st_size);
211 	close(fd);
212 
213 	return error;
214 }
215 
216 const char		*dstrbuf;
217 size_t			 dstrlen;
218 const char		*strtab;
219 const Elf_Sym		*symtab;
220 size_t			 strtabsz, nsymb;
221 
222 int
223 elf_convert(char *p, size_t filesize)
224 {
225 	const char		*shstab;
226 	const char		*infobuf, *abbuf;
227 	size_t			 infolen, ablen;
228 	size_t			 shstabsz;
229 
230 	/* Find section header string table location and size. */
231 	if (elf_getshstab(p, filesize, &shstab, &shstabsz))
232 		return 1;
233 
234 	/* Find symbol table and associated string table. */
235 	if (elf_getsymtab(p, filesize, shstab, shstabsz, &symtab, &nsymb,
236 	    &strtab, &strtabsz) == -1)
237 		warnx("symbol table not found");
238 
239 	/* Find abbreviation location and size. */
240 	if (elf_getsection(p, filesize, DEBUG_ABBREV, shstab, shstabsz, &abbuf,
241 	    &ablen) == -1) {
242 		warnx("%s section not found", DEBUG_ABBREV);
243 		return 1;
244 	}
245 
246 	if (elf_getsection(p, filesize, DEBUG_INFO, shstab, shstabsz, &infobuf,
247 	    &infolen) == -1) {
248 		warnx("%s section not found", DEBUG_INFO);
249 		return 1;
250 	}
251 
252 	/* Find string table location and size. */
253 	if (elf_getsection(p, filesize, DEBUG_STR, shstab, shstabsz, &dstrbuf,
254 	    &dstrlen) == -1)
255 		warnx("%s section not found", DEBUG_STR);
256 
257 	dwarf_parse(infobuf, infolen, abbuf, ablen);
258 
259 	/* Sort functions */
260 	elf_sort();
261 
262 	return 0;
263 }
264 
265 /*
266  * Guess which part of a local symbol name correspond to the variable
267  * name.
268  *
269  * gcc 4.2.1 emits:
270  *
271  *	varname.id
272  *
273  * clang 8 emits:
274  *
275  *	funcname.varname
276  *
277  */
278 char *
279 guess_static_local_name(char *sname)
280 {
281 	const char *errstr;
282 	char *first, *second;
283 
284 	first = strtok(sname, ".");
285 	if (first == NULL)
286 		return NULL;
287 
288 	/* Skip meta symbols - gcc style. */
289 	if (strncmp(first, "__func__", sizeof("__func__") - 1) == 0 ||
290 	    strncmp(first, "__FUNCTION__", sizeof("__FUNCTION__") - 1) == 0 ||
291 	    strncmp(first, "__warned", sizeof("__warned") - 1) == 0)
292 	    	return NULL;
293 
294 	second = strtok(NULL, "\0");
295 	if (second == NULL)
296 		return first;
297 
298 	/* Skip meta symbols - clang style. */
299 	if (strncmp(second, "__warned", sizeof("__warned") - 1) == 0)
300 	    	return NULL;
301 
302 	/* If `second' isn't a number, assume clang-style name. */
303 	if (strtonum(second, 1, INT_MAX, &errstr) == 0)
304 		return second;
305 
306 	return first;
307 }
308 
309 struct itype *
310 find_symb(struct itype *tmp, size_t stroff)
311 {
312 	struct itype		*it;
313 	char 			*sname, *p;
314 
315 	if (strtab == NULL || stroff >= strtabsz)
316 		return NULL;
317 
318 	sname = xstrdup(strtab + stroff);
319 	if ((p = guess_static_local_name(sname)) == NULL) {
320 		free(sname);
321 		return NULL;
322 	}
323 
324 	strlcpy(tmp->it_name, p, ITNAME_MAX);
325 	free(sname);
326 	it = RB_FIND(isymb_tree, &isymbt, tmp);
327 
328 	/* Restore original name */
329 	if (it == NULL)
330 		strlcpy(tmp->it_name, (strtab + stroff), ITNAME_MAX);
331 
332 	return it;
333 }
334 
335 void
336 elf_sort(void)
337 {
338 	struct itype		*it, tmp;
339 	size_t			 i;
340 
341 	memset(&tmp, 0, sizeof(tmp));
342 	for (i = 0; i < nsymb; i++) {
343 		const Elf_Sym	*st = &symtab[i];
344 
345 		if (st->st_shndx == SHN_UNDEF || st->st_shndx == SHN_COMMON)
346 			continue;
347 
348 		switch (ELF_ST_TYPE(st->st_info)) {
349 		case STT_FUNC:
350 			tmp.it_flags = ITF_FUNC;
351 			break;
352 		case STT_OBJECT:
353 			tmp.it_flags = ITF_OBJ;
354 			break;
355 		default:
356 			continue;
357 		}
358 
359 		it = find_symb(&tmp, st->st_name);
360 		if (it == NULL) {
361 			/* Insert 'unknown' entry to match symbol order. */
362 			it = it_dup(&tmp);
363 			it->it_refp = it;
364 #ifdef DEBUG
365 			warnx("symbol not found: %s", it_name(it));
366 #endif
367 		}
368 
369 		if (it->it_flags & ITF_INSERTED) {
370 #ifdef DEBUG
371 			warnx("%s: already inserted", it_name(it));
372 #endif
373 			it = it_dup(it);
374 		}
375 
376 		/* Save symbol index for dump. */
377 		it->it_ref = i;
378 
379 		it->it_flags |= ITF_INSERTED;
380 		if (it->it_flags & ITF_FUNC)
381 			TAILQ_INSERT_TAIL(&ifuncq, it, it_symb);
382 		else
383 			TAILQ_INSERT_TAIL(&iobjq, it, it_symb);
384 	}
385 }
386 
387 const char *
388 type_name(struct itype *it)
389 {
390 	const char *name;
391 
392 	name = it_name(it);
393 	if (name == NULL)
394 		return "(anon)";
395 
396 	return name;
397 }
398 
399 /* Display parsed types a la ctfdump(1) */
400 void
401 dump_type(struct itype *it)
402 {
403 	struct imember *im;
404 
405 #ifdef DEBUG
406 	switch (it->it_type) {
407 	case CTF_K_POINTER:
408 	case CTF_K_TYPEDEF:
409 	case CTF_K_VOLATILE:
410 	case CTF_K_CONST:
411 	case CTF_K_RESTRICT:
412 	case CTF_K_ARRAY:
413 	case CTF_K_FUNCTION:
414 		if (it->it_refp == NULL) {
415 			printf("unresolved: %s type=%d\n", it_name(it),
416 			    it->it_type);
417 			return;
418 		}
419 	default:
420 		break;
421 	}
422 #endif
423 
424 	switch (it->it_type) {
425 	case CTF_K_FLOAT:
426 	case CTF_K_INTEGER:
427 		printf("  [%u] %s %s encoding=%s offset=0 bits=%u\n",
428 		    it->it_idx,
429 		    (it->it_type == CTF_K_INTEGER) ? "INTEGER" : "FLOAT",
430 		    it_name(it), ctf_enc2name(it->it_enc), it->it_size);
431 		break;
432 	case CTF_K_POINTER:
433 		printf("  <%u> POINTER %s refers to %u\n", it->it_idx,
434 		    type_name(it), it->it_refp->it_idx);
435 		break;
436 	case CTF_K_TYPEDEF:
437 		printf("  <%u> TYPEDEF %s refers to %u\n",
438 		    it->it_idx, it_name(it), it->it_refp->it_idx);
439 		break;
440 	case CTF_K_VOLATILE:
441 		printf("  <%u> VOLATILE %s refers to %u\n", it->it_idx,
442 		    type_name(it), it->it_refp->it_idx);
443 		break;
444 	case CTF_K_CONST:
445 		printf("  <%u> CONST %s refers to %u\n", it->it_idx,
446 		    type_name(it), it->it_refp->it_idx);
447 		break;
448 	case CTF_K_RESTRICT:
449 		printf("  <%u> RESTRICT %s refers to %u\n", it->it_idx,
450 		    it_name(it), it->it_refp->it_idx);
451 		break;
452 	case CTF_K_ARRAY:
453 		printf("  [%u] ARRAY %s content: %u index: %u nelems: %u\n",
454 		    it->it_idx, type_name(it), it->it_refp->it_idx, long_tidx,
455 		    it->it_nelems);
456 		printf("\n");
457 		break;
458 	case CTF_K_STRUCT:
459 	case CTF_K_UNION:
460 		printf("  [%u] %s %s (%u bytes)\n", it->it_idx,
461 		    (it->it_type == CTF_K_STRUCT) ? "STRUCT" : "UNION",
462 		    type_name(it), it->it_size);
463 		TAILQ_FOREACH(im, &it->it_members, im_next) {
464 			printf("\t%s type=%u off=%zu\n",
465 			    (im_name(im) == NULL) ? "unknown" : im_name(im),
466 			    im->im_refp ? im->im_refp->it_idx : 0, im->im_off);
467 		}
468 		printf("\n");
469 		break;
470 	case CTF_K_ENUM:
471 		printf("  [%u] ENUM %s\n", it->it_idx, type_name(it));
472 		TAILQ_FOREACH(im, &it->it_members, im_next) {
473 			printf("\t%s = %zu\n", im_name(im), im->im_ref);
474 		}
475 		printf("\n");
476 		break;
477 	case CTF_K_FUNCTION:
478 		printf("  [%u] FUNCTION (%s) returns: %u args: (",
479 		    it->it_idx, (it_name(it) != NULL) ? it_name(it) : "anon",
480 		    it->it_refp->it_idx);
481 		TAILQ_FOREACH(im, &it->it_members, im_next) {
482 			printf("%u%s", im->im_refp->it_idx,
483 			    TAILQ_NEXT(im, im_next) ? ", " : "");
484 		}
485 		printf(")\n");
486 		break;
487 	default:
488 		assert(0 == 1);
489 	}
490 }
491 
492 void
493 dump_func(struct itype *it, int *idx)
494 {
495 	struct imember *im;
496 
497 	(*idx)++;
498 
499 	if (it->it_type == CTF_K_UNKNOWN && it->it_nelems == 0)
500 		return;
501 
502 	printf("  [%u] FUNC (%s) returns: %u args: (", (*idx),
503 	    (it_name(it) != NULL) ? it_name(it) : "unknown",
504 	    it->it_refp->it_idx);
505 	TAILQ_FOREACH(im, &it->it_members, im_next) {
506 		printf("%u%s", im->im_refp->it_idx,
507 		    TAILQ_NEXT(im, im_next) ? ", " : "");
508 	}
509 	printf(")\n");
510 }
511 
512 void
513 dump_obj(struct itype *it, int *idx)
514 {
515 	int l;
516 
517 	(*idx)++;
518 
519 	l = printf("  [%u] %u", (*idx), it->it_refp->it_idx);
520 	printf("%*s %s (%llu)\n", 14 - l, "", it_name(it), it->it_ref);
521 }
522 
523 const char *
524 ctf_enc2name(unsigned short enc)
525 {
526 	static const char *enc_name[] = { "SIGNED", "CHAR", "SIGNED CHAR",
527 	    "BOOL", "SIGNED BOOL" };
528 	static char invalid[7];
529 
530 	if (enc == CTF_INT_VARARGS)
531 		return "VARARGS";
532 
533 	if (enc > 0 && enc < nitems(enc_name))
534 		return enc_name[enc - 1];
535 
536 	snprintf(invalid, sizeof(invalid), "0x%x", enc);
537 	return invalid;
538 }
539