xref: /openbsd/usr.bin/ctfconv/ctfconv.c (revision 4cfece93)
1 /*	$OpenBSD: ctfconv.c,v 1.18 2019/11/07 13:39:08 mpi Exp $ */
2 
3 /*
4  * Copyright (c) 2016-2017 Martin Pieuchot
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/types.h>
20 #include <sys/stat.h>
21 #include <sys/mman.h>
22 #include <sys/queue.h>
23 #include <sys/tree.h>
24 #include <sys/ctf.h>
25 
26 #include <assert.h>
27 #include <elf.h>
28 #include <err.h>
29 #include <fcntl.h>
30 #include <limits.h>
31 #include <locale.h>
32 #include <stdio.h>
33 #include <stdint.h>
34 #include <stdlib.h>
35 #include <string.h>
36 #include <unistd.h>
37 
38 #include "itype.h"
39 #include "xmalloc.h"
40 
41 #ifndef nitems
42 #define nitems(_a)	(sizeof((_a)) / sizeof((_a)[0]))
43 #endif
44 
45 #define DEBUG_ABBREV	".debug_abbrev"
46 #define DEBUG_INFO	".debug_info"
47 #define DEBUG_LINE	".debug_line"
48 #define DEBUG_STR	".debug_str"
49 
50 __dead void	 usage(void);
51 int		 convert(const char *);
52 int		 generate(const char *, const char *, int);
53 int		 elf_convert(char *, size_t);
54 void		 elf_sort(void);
55 char		*guess_static_local_name(char *);
56 struct itype	*find_symb(struct itype *, size_t);
57 void		 dump_type(struct itype *);
58 void		 dump_func(struct itype *, int *);
59 void		 dump_obj(struct itype *, int *);
60 
61 /* elf.c */
62 int		 iself(const char *, size_t);
63 int		 elf_getshstab(const char *, size_t, const char **, size_t *);
64 ssize_t		 elf_getsymtab(const char *, size_t, const char *, size_t,
65 		     const Elf_Sym **, size_t *, const char **, size_t *);
66 ssize_t		 elf_getsection(char *, size_t, const char *, const char *,
67 		     size_t, const char **, size_t *);
68 
69 /* parse.c */
70 void		 dwarf_parse(const char *, size_t, const char *, size_t);
71 
72 const char	*ctf_enc2name(unsigned short);
73 
74 /* lists of parsed types and functions */
75 struct itype_queue itypeq = TAILQ_HEAD_INITIALIZER(itypeq);
76 struct itype_queue ifuncq = TAILQ_HEAD_INITIALIZER(ifuncq);
77 struct itype_queue iobjq = TAILQ_HEAD_INITIALIZER(iobjq);
78 
79 __dead void
80 usage(void)
81 {
82 	fprintf(stderr, "usage: %s [-d] -l label -o outfile file\n",
83 	    getprogname());
84 	exit(1);
85 }
86 
87 int
88 main(int argc, char *argv[])
89 {
90 	const char *filename, *label = NULL, *outfile = NULL;
91 	int dump = 0;
92 	int ch, error = 0;
93 	struct itype *it;
94 
95 	setlocale(LC_ALL, "");
96 
97 	while ((ch = getopt(argc, argv, "dl:o:")) != -1) {
98 		switch (ch) {
99 		case 'd':
100 			dump = 1;	/* ctfdump(1)-like SUNW_ctf sections */
101 			break;
102 		case 'l':
103 			if (label != NULL)
104 				usage();
105 			label = optarg;
106 			break;
107 		case 'o':
108 			if (outfile != NULL)
109 				usage();
110 			outfile = optarg;
111 			break;
112 		default:
113 			usage();
114 		}
115 	}
116 
117 	argc -= optind;
118 	argv += optind;
119 
120 	if (argc != 1)
121 		usage();
122 
123 	/* Either dump the sections, or write it out. */
124 	if ((dump && (outfile != NULL || label != NULL)) ||
125 	    (!dump && (outfile == NULL || label == NULL)))
126 		usage();
127 
128 	filename = *argv;
129 
130 	if (unveil(filename, "r") == -1)
131 		err(1, "unveil");
132 
133 	if (outfile != NULL) {
134 		if (unveil(outfile, "wc") == -1)
135 			err(1, "unveil");
136 	}
137 
138 	if (pledge("stdio rpath wpath cpath", NULL) == -1)
139 		err(1, "pledge");
140 
141 	error = convert(filename);
142 	if (error != 0)
143 		return error;
144 
145 	if (outfile != NULL) {
146 		if (pledge("stdio wpath cpath", NULL) == -1)
147 			err(1, "pledge");
148 
149 		error = generate(outfile, label, 1);
150 		if (error != 0)
151 			return error;
152 	}
153 
154 	if (dump) {
155 		if (pledge("stdio", NULL) == -1)
156 			err(1, "pledge");
157 
158 		int fidx = -1, oidx = -1;
159 
160 		TAILQ_FOREACH(it, &iobjq, it_symb)
161 			dump_obj(it, &oidx);
162 		printf("\n");
163 
164 		TAILQ_FOREACH(it, &ifuncq, it_symb)
165 			dump_func(it, &fidx);
166 		printf("\n");
167 
168 		TAILQ_FOREACH(it, &itypeq, it_next) {
169 			if (it->it_flags & (ITF_FUNC|ITF_OBJ))
170 				continue;
171 
172 			dump_type(it);
173 		}
174 
175 		return 0;
176 	}
177 
178 	return 0;
179 }
180 
181 int
182 convert(const char *path)
183 {
184 	struct stat		 st;
185 	int			 fd, error = 1;
186 	char			*p;
187 
188 	fd = open(path, O_RDONLY);
189 	if (fd == -1) {
190 		warn("open %s", path);
191 		return 1;
192 	}
193 	if (fstat(fd, &st) == -1) {
194 		warn("fstat %s", path);
195 		close(fd);
196 		return 1;
197 	}
198 	if ((uintmax_t)st.st_size > SIZE_MAX) {
199 		warnx("file too big to fit memory");
200 		close(fd);
201 		return 1;
202 	}
203 
204 	p = mmap(NULL, st.st_size, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0);
205 	if (p == MAP_FAILED)
206 		err(1, "mmap");
207 
208 	if (iself(p, st.st_size))
209 		error = elf_convert(p, st.st_size);
210 
211 	munmap(p, st.st_size);
212 	close(fd);
213 
214 	return error;
215 }
216 
217 const char		*dstrbuf;
218 size_t			 dstrlen;
219 const char		*strtab;
220 const Elf_Sym		*symtab;
221 size_t			 strtabsz, nsymb;
222 
223 int
224 elf_convert(char *p, size_t filesize)
225 {
226 	const char		*shstab;
227 	const char		*infobuf, *abbuf;
228 	size_t			 infolen, ablen;
229 	size_t			 shstabsz;
230 
231 	/* Find section header string table location and size. */
232 	if (elf_getshstab(p, filesize, &shstab, &shstabsz))
233 		return 1;
234 
235 	/* Find symbol table and associated string table. */
236 	if (elf_getsymtab(p, filesize, shstab, shstabsz, &symtab, &nsymb,
237 	    &strtab, &strtabsz) == -1)
238 		warnx("symbol table not found");
239 
240 	/* Find abbreviation location and size. */
241 	if (elf_getsection(p, filesize, DEBUG_ABBREV, shstab, shstabsz, &abbuf,
242 	    &ablen) == -1) {
243 		warnx("%s section not found", DEBUG_ABBREV);
244 		return 1;
245 	}
246 
247 	if (elf_getsection(p, filesize, DEBUG_INFO, shstab, shstabsz, &infobuf,
248 	    &infolen) == -1) {
249 		warnx("%s section not found", DEBUG_INFO);
250 		return 1;
251 	}
252 
253 	/* Find string table location and size. */
254 	if (elf_getsection(p, filesize, DEBUG_STR, shstab, shstabsz, &dstrbuf,
255 	    &dstrlen) == -1)
256 		warnx("%s section not found", DEBUG_STR);
257 
258 	dwarf_parse(infobuf, infolen, abbuf, ablen);
259 
260 	/* Sort functions */
261 	elf_sort();
262 
263 	return 0;
264 }
265 
266 /*
267  * Guess which part of a local symbol name correspond to the variable
268  * name.
269  *
270  * gcc 4.2.1 emits:
271  *
272  *	varname.id
273  *
274  * clang 8 emits:
275  *
276  *	funcname.varname
277  *
278  */
279 char *
280 guess_static_local_name(char *sname)
281 {
282 	const char *errstr;
283 	char *first, *second;
284 
285 	first = strtok(sname, ".");
286 	if (first == NULL)
287 		return NULL;
288 
289 	/* Skip meta symbols - gcc style. */
290 	if (strncmp(first, "__func__", sizeof("__func__") - 1) == 0 ||
291 	    strncmp(first, "__FUNCTION__", sizeof("__FUNCTION__") - 1) == 0 ||
292 	    strncmp(first, "__warned", sizeof("__warned") - 1) == 0)
293 	    	return NULL;
294 
295 	second = strtok(NULL, "\0");
296 	if (second == NULL)
297 		return first;
298 
299 	/* Skip meta symbols - clang style. */
300 	if (strncmp(second, "__warned", sizeof("__warned") - 1) == 0)
301 	    	return NULL;
302 
303 	/* If `second' isn't a number, assume clang-style name. */
304 	if (strtonum(second, 1, INT_MAX, &errstr) == 0)
305 		return second;
306 
307 	return first;
308 }
309 
310 struct itype *
311 find_symb(struct itype *tmp, size_t stroff)
312 {
313 	struct itype		*it;
314 	char 			*sname, *p;
315 
316 	if (strtab == NULL || stroff >= strtabsz)
317 		return NULL;
318 
319 	sname = xstrdup(strtab + stroff);
320 	if ((p = guess_static_local_name(sname)) == NULL) {
321 		free(sname);
322 		return NULL;
323 	}
324 
325 	strlcpy(tmp->it_name, p, ITNAME_MAX);
326 	free(sname);
327 	it = RB_FIND(isymb_tree, &isymbt, tmp);
328 
329 	/* Restore original name */
330 	if (it == NULL)
331 		strlcpy(tmp->it_name, (strtab + stroff), ITNAME_MAX);
332 
333 	return it;
334 }
335 
336 void
337 elf_sort(void)
338 {
339 	struct itype		*it, tmp;
340 	size_t			 i;
341 
342 	memset(&tmp, 0, sizeof(tmp));
343 	for (i = 0; i < nsymb; i++) {
344 		const Elf_Sym	*st = &symtab[i];
345 
346 		if (st->st_shndx == SHN_UNDEF || st->st_shndx == SHN_COMMON)
347 			continue;
348 
349 		switch (ELF_ST_TYPE(st->st_info)) {
350 		case STT_FUNC:
351 			tmp.it_flags = ITF_FUNC;
352 			break;
353 		case STT_OBJECT:
354 			tmp.it_flags = ITF_OBJ;
355 			break;
356 		default:
357 			continue;
358 		}
359 
360 		it = find_symb(&tmp, st->st_name);
361 		if (it == NULL) {
362 			/* Insert 'unknown' entry to match symbol order. */
363 			it = it_dup(&tmp);
364 			it->it_refp = it;
365 #ifdef DEBUG
366 			warnx("symbol not found: %s", it_name(it));
367 #endif
368 		}
369 
370 		if (it->it_flags & ITF_INSERTED) {
371 #ifdef DEBUG
372 			warnx("%s: already inserted", it_name(it));
373 #endif
374 			it = it_dup(it);
375 		}
376 
377 		/* Save symbol index for dump. */
378 		it->it_ref = i;
379 
380 		it->it_flags |= ITF_INSERTED;
381 		if (it->it_flags & ITF_FUNC)
382 			TAILQ_INSERT_TAIL(&ifuncq, it, it_symb);
383 		else
384 			TAILQ_INSERT_TAIL(&iobjq, it, it_symb);
385 	}
386 }
387 
388 const char *
389 type_name(struct itype *it)
390 {
391 	const char *name;
392 
393 	name = it_name(it);
394 	if (name == NULL)
395 		return "(anon)";
396 
397 	return name;
398 }
399 
400 /* Display parsed types a la ctfdump(1) */
401 void
402 dump_type(struct itype *it)
403 {
404 	struct imember *im;
405 
406 #ifdef DEBUG
407 	switch (it->it_type) {
408 	case CTF_K_POINTER:
409 	case CTF_K_TYPEDEF:
410 	case CTF_K_VOLATILE:
411 	case CTF_K_CONST:
412 	case CTF_K_RESTRICT:
413 	case CTF_K_ARRAY:
414 	case CTF_K_FUNCTION:
415 		if (it->it_refp == NULL) {
416 			printf("unresolved: %s type=%d\n", it_name(it),
417 			    it->it_type);
418 			return;
419 		}
420 	default:
421 		break;
422 	}
423 #endif
424 
425 	switch (it->it_type) {
426 	case CTF_K_FLOAT:
427 	case CTF_K_INTEGER:
428 		printf("  [%u] %s %s encoding=%s offset=0 bits=%u\n",
429 		    it->it_idx,
430 		    (it->it_type == CTF_K_INTEGER) ? "INTEGER" : "FLOAT",
431 		    it_name(it), ctf_enc2name(it->it_enc), it->it_size);
432 		break;
433 	case CTF_K_POINTER:
434 		printf("  <%u> POINTER %s refers to %u\n", it->it_idx,
435 		    type_name(it), it->it_refp->it_idx);
436 		break;
437 	case CTF_K_TYPEDEF:
438 		printf("  <%u> TYPEDEF %s refers to %u\n",
439 		    it->it_idx, it_name(it), it->it_refp->it_idx);
440 		break;
441 	case CTF_K_VOLATILE:
442 		printf("  <%u> VOLATILE %s refers to %u\n", it->it_idx,
443 		    type_name(it), it->it_refp->it_idx);
444 		break;
445 	case CTF_K_CONST:
446 		printf("  <%u> CONST %s refers to %u\n", it->it_idx,
447 		    type_name(it), it->it_refp->it_idx);
448 		break;
449 	case CTF_K_RESTRICT:
450 		printf("  <%u> RESTRICT %s refers to %u\n", it->it_idx,
451 		    it_name(it), it->it_refp->it_idx);
452 		break;
453 	case CTF_K_ARRAY:
454 		printf("  [%u] ARRAY %s content: %u index: %u nelems: %u\n",
455 		    it->it_idx, type_name(it), it->it_refp->it_idx, long_tidx,
456 		    it->it_nelems);
457 		printf("\n");
458 		break;
459 	case CTF_K_STRUCT:
460 	case CTF_K_UNION:
461 		printf("  [%u] %s %s (%u bytes)\n", it->it_idx,
462 		    (it->it_type == CTF_K_STRUCT) ? "STRUCT" : "UNION",
463 		    type_name(it), it->it_size);
464 		TAILQ_FOREACH(im, &it->it_members, im_next) {
465 			printf("\t%s type=%u off=%zu\n",
466 			    (im_name(im) == NULL) ? "unknown" : im_name(im),
467 			    im->im_refp ? im->im_refp->it_idx : 0, im->im_off);
468 		}
469 		printf("\n");
470 		break;
471 	case CTF_K_ENUM:
472 		printf("  [%u] ENUM %s\n", it->it_idx, type_name(it));
473 		TAILQ_FOREACH(im, &it->it_members, im_next) {
474 			printf("\t%s = %zu\n", im_name(im), im->im_ref);
475 		}
476 		printf("\n");
477 		break;
478 	case CTF_K_FUNCTION:
479 		printf("  [%u] FUNCTION (%s) returns: %u args: (",
480 		    it->it_idx, (it_name(it) != NULL) ? it_name(it) : "anon",
481 		    it->it_refp->it_idx);
482 		TAILQ_FOREACH(im, &it->it_members, im_next) {
483 			printf("%u%s", im->im_refp->it_idx,
484 			    TAILQ_NEXT(im, im_next) ? ", " : "");
485 		}
486 		printf(")\n");
487 		break;
488 	default:
489 		assert(0 == 1);
490 	}
491 }
492 
493 void
494 dump_func(struct itype *it, int *idx)
495 {
496 	struct imember *im;
497 
498 	(*idx)++;
499 
500 	if (it->it_type == CTF_K_UNKNOWN && it->it_nelems == 0)
501 		return;
502 
503 	printf("  [%u] FUNC (%s) returns: %u args: (", (*idx),
504 	    (it_name(it) != NULL) ? it_name(it) : "unknown",
505 	    it->it_refp->it_idx);
506 	TAILQ_FOREACH(im, &it->it_members, im_next) {
507 		printf("%u%s", im->im_refp->it_idx,
508 		    TAILQ_NEXT(im, im_next) ? ", " : "");
509 	}
510 	printf(")\n");
511 }
512 
513 void
514 dump_obj(struct itype *it, int *idx)
515 {
516 	int l;
517 
518 	(*idx)++;
519 
520 	l = printf("  [%u] %u", (*idx), it->it_refp->it_idx);
521 	printf("%*s %s (%llu)\n", 14 - l, "", it_name(it), it->it_ref);
522 }
523 
524 const char *
525 ctf_enc2name(unsigned short enc)
526 {
527 	static const char *enc_name[] = { "SIGNED", "CHAR", "SIGNED CHAR",
528 	    "BOOL", "SIGNED BOOL" };
529 	static char invalid[7];
530 
531 	if (enc == CTF_INT_VARARGS)
532 		return "VARARGS";
533 
534 	if (enc > 0 && enc < nitems(enc_name))
535 		return enc_name[enc - 1];
536 
537 	snprintf(invalid, sizeof(invalid), "0x%x", enc);
538 	return invalid;
539 }
540