xref: /openbsd/usr.bin/ctfconv/parse.c (revision 4bdff4be)
1 /*	$OpenBSD: parse.c,v 1.15 2022/12/26 18:43:49 jmc Exp $ */
2 
3 /*
4  * Copyright (c) 2016-2017 Martin Pieuchot
5  * Copyright (c) 2016 Jasper Lievisse Adriaanse <jasper@openbsd.org>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 
20 /*
21  * DWARF to IT (internal type) representation parser.
22  */
23 
24 #include <sys/queue.h>
25 #include <sys/tree.h>
26 #include <sys/types.h>
27 #include <sys/ctf.h>
28 
29 #include <assert.h>
30 #include <limits.h>
31 #include <err.h>
32 #include <stdlib.h>
33 #include <string.h>
34 
35 #include "itype.h"
36 #include "xmalloc.h"
37 #include "dwarf.h"
38 #include "dw.h"
39 #include "pool.h"
40 
41 #ifdef DEBUG
42 #include <stdio.h>
43 #endif
44 
45 #ifndef NOPOOL
46 struct pool it_pool, im_pool, ir_pool;
47 #endif /* NOPOOL */
48 
49 #ifndef nitems
50 #define nitems(_a)	(sizeof((_a)) / sizeof((_a)[0]))
51 #endif
52 
53 #define DPRINTF(x...)	do { /*printf(x)*/ } while (0)
54 
55 #define VOID_OFFSET	1	/* Fake offset for generating "void" type. */
56 
57 /*
58  * Tree used to resolve per-CU types based on their offset in
59  * the abbrev section.
60  */
61 RB_HEAD(ioff_tree, itype);
62 
63 /*
64  * Per-type trees used to merge existing types with the ones of
65  * a newly parsed CU.
66  */
67 RB_HEAD(itype_tree, itype)	 itypet[CTF_K_MAX];
68 
69 /*
70  * Tree of symbols used to build a list matching the order of
71  * the ELF symbol table.
72  */
73 struct isymb_tree	 isymbt;
74 
75 struct itype		*void_it;		/* no type is emited for void */
76 uint16_t		 tidx, fidx, oidx;	/* type, func & object IDs */
77 uint16_t		 long_tidx;		/* index of "long", for array */
78 
79 
80 void		 cu_stat(void);
81 void		 cu_parse(struct dwcu *, struct itype_queue *,
82 		     struct ioff_tree *);
83 void		 cu_resolve(struct dwcu *, struct itype_queue *,
84 		     struct ioff_tree *);
85 void		 cu_reference(struct dwcu *, struct itype_queue *);
86 void		 cu_merge(struct dwcu *, struct itype_queue *);
87 
88 struct itype	*parse_base(struct dwdie *, size_t);
89 struct itype	*parse_refers(struct dwdie *, size_t, int);
90 struct itype	*parse_array(struct dwdie *, size_t);
91 struct itype	*parse_enum(struct dwdie *, size_t);
92 struct itype	*parse_struct(struct dwdie *, size_t, int, size_t);
93 struct itype	*parse_function(struct dwdie *, size_t);
94 struct itype	*parse_funcptr(struct dwdie *, size_t);
95 struct itype	*parse_variable(struct dwdie *, size_t);
96 
97 void		 subparse_subrange(struct dwdie *, size_t, struct itype *);
98 void		 subparse_enumerator(struct dwdie *, size_t, struct itype *);
99 void		 subparse_member(struct dwdie *, size_t, struct itype *, size_t);
100 void		 subparse_arguments(struct dwdie *, size_t, struct itype *);
101 
102 size_t		 dav2val(struct dwaval *, size_t);
103 const char	*dav2str(struct dwaval *);
104 const char	*enc2name(unsigned short);
105 
106 struct itype	*it_new(uint64_t, size_t, const char *, uint32_t, uint16_t,
107 		     uint64_t, uint16_t, unsigned int);
108 void		 it_merge(struct itype *, struct itype *);
109 void		 it_reference(struct itype *);
110 void		 it_free(struct itype *);
111 int		 it_cmp(struct itype *, struct itype *);
112 int		 it_name_cmp(struct itype *, struct itype *);
113 int		 it_off_cmp(struct itype *, struct itype *);
114 void		 ir_add(struct itype *, struct itype *);
115 void		 ir_purge(struct itype *);
116 struct imember	*im_new(const char *, size_t, size_t);
117 
118 RB_GENERATE(itype_tree, itype, it_node, it_cmp);
119 RB_GENERATE(isymb_tree, itype, it_node, it_name_cmp);
120 RB_GENERATE(ioff_tree, itype, it_node, it_off_cmp);
121 
122 /*
123  * Construct a list of internal type and functions based on DWARF
124  * INFO and ABBREV sections.
125  *
126  * Multiple CUs are supported.
127  */
128 void
129 dwarf_parse(const char *infobuf, size_t infolen, const char *abbuf,
130     size_t ablen)
131 {
132 	struct dwbuf		 info = { .buf = infobuf, .len = infolen };
133 	struct dwbuf		 abbrev = { .buf = abbuf, .len = ablen };
134 	struct dwcu		*dcu = NULL;
135 	struct ioff_tree	 cu_iofft;
136 	struct itype_queue	 cu_itypeq;
137 	struct itype		*it;
138 	int			 i;
139 
140 	for (i = 0; i < CTF_K_MAX; i++)
141 		RB_INIT(&itypet[i]);
142 	RB_INIT(&isymbt);
143 
144 	void_it = it_new(++tidx, VOID_OFFSET, "void", 0,
145 	    CTF_INT_SIGNED, 0, CTF_K_INTEGER, 0);
146 	TAILQ_INSERT_TAIL(&itypeq, void_it, it_next);
147 
148 	while (dw_cu_parse(&info, &abbrev, infolen, &dcu) == 0) {
149 		TAILQ_INIT(&cu_itypeq);
150 
151 		/* We use a tree to speed-up type resolution. */
152 		RB_INIT(&cu_iofft);
153 
154 		/* Parse this CU */
155 		cu_parse(dcu, &cu_itypeq, &cu_iofft);
156 
157 		/* Resolve its types. */
158 		cu_resolve(dcu, &cu_itypeq, &cu_iofft);
159 		assert(RB_EMPTY(&cu_iofft));
160 
161 		/* Mark used type as such. */
162 		cu_reference(dcu, &cu_itypeq);
163 
164 #ifdef DEBUG
165 		/* Dump statistics for current CU. */
166 		cu_stat();
167 #endif
168 
169 		/* Merge them with the common type list. */
170 		cu_merge(dcu, &cu_itypeq);
171 
172 		dw_dcu_free(dcu);
173 	}
174 
175 	/* We force array's index type to be 'long', for that we need its ID. */
176 	RB_FOREACH(it, itype_tree, &itypet[CTF_K_INTEGER]) {
177 		if (it_name(it) == NULL || it->it_size != (8 * sizeof(long)))
178 			continue;
179 
180 		if (strcmp(it_name(it), "unsigned") == 0) {
181 			long_tidx = it->it_idx;
182 			break;
183 		}
184 	}
185 }
186 
187 struct itype *
188 it_new(uint64_t index, size_t off, const char *name, uint32_t size,
189     uint16_t enc, uint64_t ref, uint16_t type, unsigned int flags)
190 {
191 	struct itype *it;
192 #ifndef NOPOOL
193 	static int it_pool_inited = 0;
194 
195 	if (!it_pool_inited) {
196 		pool_init(&it_pool, "it", 512, sizeof(struct itype));
197 		pool_init(&im_pool, "im", 1024, sizeof(struct imember));
198 		pool_init(&ir_pool, "ir", 1024, sizeof(struct itref));
199 		it_pool_inited = 1;
200 	}
201 #endif
202 
203 	assert((name != NULL) || !(flags & (ITF_FUNC|ITF_OBJ)));
204 
205 	it = pmalloc(&it_pool, sizeof(*it));
206 	SIMPLEQ_INIT(&it->it_refs);
207 	TAILQ_INIT(&it->it_members);
208 	it->it_off = off;
209 	it->it_ref = ref;
210 	it->it_refp = NULL;
211 	it->it_size = size;
212 	it->it_nelems = 0;
213 	it->it_enc = enc;
214 	it->it_idx = index;
215 	it->it_type = type;
216 	it->it_flags = flags;
217 
218 	if (name == NULL) {
219 		it->it_flags |= ITF_ANON;
220 	} else {
221 		size_t n;
222 
223 		if ((n = strlcpy(it->it_name, name, ITNAME_MAX)) > ITNAME_MAX)
224 			warnx("name %s too long %zd > %d", name, n, ITNAME_MAX);
225 	}
226 
227 	return it;
228 }
229 
230 struct itype *
231 it_dup(struct itype *it)
232 {
233 	struct imember *copim, *im;
234 	struct itype *copit;
235 
236 	copit = it_new(it->it_idx, it->it_off, it_name(it), it->it_size,
237 	    it->it_enc, it->it_ref, it->it_type, it->it_flags);
238 
239 	copit->it_refp = it->it_refp;
240 	copit->it_nelems = it->it_nelems;
241 
242 	TAILQ_FOREACH(im, &it->it_members, im_next) {
243 		copim = im_new(im_name(im), im->im_ref, im->im_off);
244 		copim->im_refp = im->im_refp;
245 		TAILQ_INSERT_TAIL(&copit->it_members, copim, im_next);
246 	}
247 
248 	return copit;
249 }
250 
251 /*
252  * Merge the content of ``it'', the full type declaration into the
253  * forwarding representation ``fwd''.
254  */
255 void
256 it_merge(struct itype *fwd, struct itype *it)
257 {
258 	assert(fwd->it_flags & ITF_FORWARD);
259 	assert(fwd->it_type == it->it_type);
260 	assert(TAILQ_EMPTY(&fwd->it_members));
261 	assert(SIMPLEQ_EMPTY(&it->it_refs));
262 
263 	fwd->it_off = it->it_off;
264 	fwd->it_ref = it->it_ref;
265 	fwd->it_refp = it->it_refp;
266 	fwd->it_size = it->it_size;
267 	fwd->it_nelems = it->it_nelems;
268 	fwd->it_enc = it->it_enc;
269 	fwd->it_flags = it->it_flags;
270 
271 	TAILQ_CONCAT(&fwd->it_members, &it->it_members, im_next);
272 	assert(TAILQ_EMPTY(&it->it_members));
273 }
274 
275 const char *
276 it_name(struct itype *it)
277 {
278 	if (!(it->it_flags & ITF_ANON))
279 		return it->it_name;
280 
281 	return NULL;
282 }
283 
284 void
285 it_reference(struct itype *it)
286 {
287 	struct imember *im;
288 
289 	if (it == NULL || it->it_flags & ITF_USED)
290 		return;
291 
292 	it->it_flags |= ITF_USED;
293 
294 	it_reference(it->it_refp);
295 	TAILQ_FOREACH(im, &it->it_members, im_next)
296 		it_reference(im->im_refp);
297 }
298 
299 void
300 it_free(struct itype *it)
301 {
302 	struct imember *im;
303 
304 	if (it == NULL)
305 		return;
306 
307 	while ((im = TAILQ_FIRST(&it->it_members)) != NULL) {
308 		TAILQ_REMOVE(&it->it_members, im, im_next);
309 		pfree(&im_pool, im);
310 	}
311 
312 	ir_purge(it);
313 	pfree(&it_pool, it);
314 }
315 
316 /*
317  * Return 0 if ``a'' matches ``b''.
318  */
319 int
320 it_cmp(struct itype *a, struct itype *b)
321 {
322 	int diff;
323 
324 	if ((diff = (a->it_type - b->it_type)) != 0)
325 		return diff;
326 
327 	/* Basic types need to have the same size. */
328 	if ((a->it_type == CTF_K_INTEGER || a->it_type == CTF_K_FLOAT) &&
329 	    (diff = (a->it_size - b->it_size) != 0))
330 		return diff;
331 
332 	/* Match by name */
333 	if (!(a->it_flags & ITF_ANON) && !(b->it_flags & ITF_ANON))
334 		return strcmp(it_name(a), it_name(b));
335 
336 	/* Only one of them is anonym */
337 	if ((a->it_flags & ITF_ANON) != (b->it_flags & ITF_ANON))
338 		return (a->it_flags & ITF_ANON) ? -1 : 1;
339 
340 	/* Match by reference */
341 	if ((a->it_refp != NULL) && (b->it_refp != NULL))
342 		return it_cmp(a->it_refp, b->it_refp);
343 
344 	return 1;
345 }
346 
347 int
348 it_name_cmp(struct itype *a, struct itype *b)
349 {
350 	int diff;
351 
352 	if ((diff = strcmp(it_name(a), it_name(b))) != 0)
353 		return diff;
354 
355 	return ((a->it_flags|ITF_MASK) - (b->it_flags|ITF_MASK));
356 }
357 
358 int
359 it_off_cmp(struct itype *a, struct itype *b)
360 {
361 	return a->it_off - b->it_off;
362 }
363 
364 void
365 ir_add(struct itype *it, struct itype *tmp)
366 {
367 	struct itref *ir;
368 
369 	SIMPLEQ_FOREACH(ir, &tmp->it_refs, ir_next) {
370 		if (ir->ir_itp == it)
371 			return;
372 	}
373 
374 	ir = pmalloc(&ir_pool, sizeof(*ir));
375 	ir->ir_itp = it;
376 	SIMPLEQ_INSERT_TAIL(&tmp->it_refs, ir, ir_next);
377 }
378 
379 void
380 ir_purge(struct itype *it)
381 {
382 	struct itref *ir;
383 
384 	while ((ir = SIMPLEQ_FIRST(&it->it_refs)) != NULL) {
385 		SIMPLEQ_REMOVE_HEAD(&it->it_refs, ir_next);
386 		pfree(&ir_pool, ir);
387 	}
388 }
389 
390 struct imember *
391 im_new(const char *name, size_t ref, size_t off)
392 {
393 	struct imember *im;
394 
395 	im = pmalloc(&im_pool, sizeof(*im));
396 	im->im_ref = ref;
397 	im->im_off = off;
398 	im->im_refp = NULL;
399 	if (name == NULL) {
400 		im->im_flags = IMF_ANON;
401 	} else {
402 		size_t n;
403 
404 		n = strlcpy(im->im_name, name, ITNAME_MAX);
405 		if (n > ITNAME_MAX)
406 			warnx("name %s too long %zd > %d", name, n,
407 			    ITNAME_MAX);
408 		im->im_flags = 0;
409 	}
410 
411 	return im;
412 }
413 
414 const char *
415 im_name(struct imember *im)
416 {
417 	if (!(im->im_flags & IMF_ANON))
418 		return im->im_name;
419 
420 	return NULL;
421 }
422 
423 void
424 cu_stat(void)
425 {
426 #ifndef NOPOOL
427 	pool_dump();
428 #endif
429 }
430 
431 /*
432  * Iterate over all types found in a given CU.  For all non-resolved types
433  * use their DWARF relative offset to find the relative type they are pointing
434  * to.  The CU offset tree, `cuot', is used to speedup relative type lookup.
435  */
436 void
437 cu_resolve(struct dwcu *dcu, struct itype_queue *cutq, struct ioff_tree *cuot)
438 {
439 	struct itype	*it, *ref, tmp;
440 	struct imember	*im;
441 	unsigned int	 toresolve;
442 	size_t		 off = dcu->dcu_offset;
443 
444 	TAILQ_FOREACH(it, cutq, it_next) {
445 		if (!(it->it_flags & (ITF_UNRES|ITF_UNRES_MEMB)))
446 			continue;
447 
448 		/* If this type references another one, try to find it. */
449 		if (it->it_flags & ITF_UNRES) {
450 			tmp.it_off = it->it_ref + off;
451 			ref = RB_FIND(ioff_tree, cuot, &tmp);
452 			if (ref != NULL) {
453 				it->it_refp = ref;
454 				ir_add(it, ref);
455 				it->it_flags &= ~ITF_UNRES;
456 			}
457 		}
458 
459 		/* If this type has members, resolve all of them. */
460 		toresolve = it->it_nelems;
461 		if ((it->it_flags & ITF_UNRES_MEMB) && toresolve > 0) {
462 			TAILQ_FOREACH(im, &it->it_members, im_next) {
463 				tmp.it_off = im->im_ref + off;
464 				ref = RB_FIND(ioff_tree, cuot, &tmp);
465 				if (ref != NULL) {
466 					im->im_refp = ref;
467 					ir_add(it, ref);
468 					toresolve--;
469 				}
470 			}
471 			if (toresolve == 0)
472 				it->it_flags &= ~ITF_UNRES_MEMB;
473 		}
474 #if defined(DEBUG)
475 		if (it->it_flags & (ITF_UNRES|ITF_UNRES_MEMB)) {
476 			printf("0x%zx: %s type=%d unresolved 0x%llx",
477 			    it->it_off, it_name(it), it->it_type, it->it_ref);
478 			if (toresolve)
479 				printf(": %d members", toresolve);
480 			TAILQ_FOREACH(im, &it->it_members, im_next) {
481 				if (im->im_refp != NULL)
482 					continue;
483 				printf("\n%zu: %s", im->im_ref, im_name(im));
484 			}
485 			printf("\n");
486 		}
487 #endif /* defined(DEBUG) */
488 	}
489 
490 	/* We'll reuse the tree for the next CU, so empty it. */
491 	RB_FOREACH_SAFE(it, ioff_tree, cuot, ref)
492 		RB_REMOVE(ioff_tree, cuot, it);
493 }
494 
495 void
496 cu_reference(struct dwcu *dcu, struct itype_queue *cutq)
497 {
498 	struct itype *it;
499 
500 	TAILQ_FOREACH(it, cutq, it_next) {
501 		if (it->it_flags & (ITF_OBJ|ITF_FUNC))
502 			it_reference(it);
503 	}
504 }
505 
506 /*
507  * Merge type representation from a CU with already known types.
508  */
509 void
510 cu_merge(struct dwcu *dcu, struct itype_queue *cutq)
511 {
512 	struct itype *it, *nit, *prev, *first;
513 	int diff;
514 
515 	/* First ``it'' that needs a duplicate check. */
516 	first = TAILQ_FIRST(cutq);
517 	if (first == NULL)
518 		return;
519 
520 	TAILQ_CONCAT(&itypeq, cutq, it_next);
521 
522 	/*
523 	 * First pass: merge types
524 	 */
525 	for (it = first; it != NULL; it = nit) {
526 		nit = TAILQ_NEXT(it, it_next);
527 
528 		/* Move functions & variable to their own list. */
529 		if (it->it_flags & (ITF_FUNC|ITF_OBJ)) {
530 			/*
531 			 * FIXME: allow static variables with the same name
532 			 * to be of different type.
533 			 */
534 			if (RB_FIND(isymb_tree, &isymbt, it) == NULL)
535 				RB_INSERT(isymb_tree, &isymbt, it);
536 			continue;
537 		}
538 
539 		/* Look if we already have this type. */
540 		if (it->it_flags & ITF_USED)
541 			prev = RB_FIND(itype_tree, &itypet[it->it_type], it);
542 		else
543 			prev = NULL;
544 
545 		if (prev != NULL) {
546 			struct itype *old = it;
547 			struct itref *ir;
548 			struct imember *im;
549 
550 			/* Substitute references */
551 			while ((ir = SIMPLEQ_FIRST(&old->it_refs)) != NULL) {
552 				it = ir->ir_itp;
553 
554 				SIMPLEQ_REMOVE_HEAD(&old->it_refs, ir_next);
555 				pfree(&ir_pool, ir);
556 
557 				if (it->it_refp == old)
558 					it->it_refp = prev;
559 
560 				TAILQ_FOREACH(im, &it->it_members, im_next) {
561 					if (im->im_refp == old)
562 						im->im_refp = prev;
563 				}
564 			}
565 
566 			/* If we first got a forward reference, complete it. */
567 			if ((prev->it_flags & ITF_FORWARD) &&
568 			    (old->it_flags & ITF_FORWARD) == 0)
569 			    	it_merge(prev, old);
570 
571 			old->it_flags &= ~ITF_USED;
572 		} else if (it->it_flags & ITF_USED) {
573 			RB_INSERT(itype_tree, &itypet[it->it_type], it);
574 		}
575 	}
576 
577 	/*
578 	 * Second pass: update indexes
579 	 */
580 	diff = 0;
581 	for (it = first; it != NULL; it = nit) {
582 		nit = TAILQ_NEXT(it, it_next);
583 
584 		if (it->it_flags & (ITF_FUNC|ITF_OBJ))
585 			continue;
586 
587 		/* Adjust indexes */
588 		if (it->it_flags & ITF_USED) {
589 			it->it_idx -= diff;
590 			continue;
591 		}
592 
593 		/* Remove unused */
594 		TAILQ_REMOVE(&itypeq, it, it_next);
595 		it_free(it);
596 		diff++;
597 	}
598 
599 	/* Update global index to match removed entries. */
600 	it = TAILQ_LAST(&itypeq, itype_queue);
601 	while (it->it_flags & (ITF_FUNC|ITF_OBJ))
602 		it = TAILQ_PREV(it, itype_queue, it_next);
603 
604 	tidx = it->it_idx;
605 }
606 
607 /*
608  * Parse a CU.
609  */
610 void
611 cu_parse(struct dwcu *dcu, struct itype_queue *cutq, struct ioff_tree *cuot)
612 {
613 	struct itype *it = NULL;
614 	struct dwdie *die;
615 	size_t psz = dcu->dcu_psize;
616 	size_t off = dcu->dcu_offset;
617 
618 	assert(RB_EMPTY(cuot));
619 
620 	SIMPLEQ_FOREACH(die, &dcu->dcu_dies, die_next) {
621 		uint64_t tag = die->die_dab->dab_tag;
622 
623 		switch (tag) {
624 		case DW_TAG_array_type:
625 			it = parse_array(die, dcu->dcu_psize);
626 			break;
627 		case DW_TAG_enumeration_type:
628 			it = parse_enum(die, dcu->dcu_psize);
629 			break;
630 		case DW_TAG_pointer_type:
631 			it = parse_refers(die, psz, CTF_K_POINTER);
632 			break;
633 		case DW_TAG_structure_type:
634 			it = parse_struct(die, psz, CTF_K_STRUCT, off);
635 			if (it == NULL)
636 				continue;
637 			break;
638 		case DW_TAG_typedef:
639 			it = parse_refers(die, psz, CTF_K_TYPEDEF);
640 			break;
641 		case DW_TAG_union_type:
642 			it = parse_struct(die, psz, CTF_K_UNION, off);
643 			if (it == NULL)
644 				continue;
645 			break;
646 		case DW_TAG_base_type:
647 			it = parse_base(die, psz);
648 			if (it == NULL)
649 				continue;
650 			break;
651 		case DW_TAG_const_type:
652 			it = parse_refers(die, psz, CTF_K_CONST);
653 			break;
654 		case DW_TAG_volatile_type:
655 			it = parse_refers(die, psz, CTF_K_VOLATILE);
656 			break;
657 		case DW_TAG_restrict_type:
658 			it = parse_refers(die, psz, CTF_K_RESTRICT);
659 			break;
660 		case DW_TAG_subprogram:
661 			it = parse_function(die, psz);
662 			if (it == NULL)
663 				continue;
664 			break;
665 		case DW_TAG_subroutine_type:
666 			it = parse_funcptr(die, psz);
667 			break;
668 		/*
669 		 * Children are assumed to be right after their parent in
670 		 * the list.  The parent parsing function takes care of
671 		 * parsing them.
672 		 */
673 		 case DW_TAG_member:
674 			 assert(it->it_type == CTF_K_STRUCT ||
675 			    it->it_type == CTF_K_UNION ||
676 			    it->it_type == CTF_K_ENUM);
677 			continue;
678 		 case DW_TAG_subrange_type:
679 			assert(it->it_type == CTF_K_ARRAY);
680 			continue;
681 		case DW_TAG_formal_parameter:
682 			/*
683 			 * If we skipped the second inline definition,
684 			 * skip its arguments.
685 			 */
686 			if (it == NULL)
687 				continue;
688 
689 			/* See comment in subparse_arguments(). */
690 			if (it->it_type == CTF_K_STRUCT ||
691 			    it->it_type == CTF_K_UNION ||
692 			    it->it_type == CTF_K_ENUM ||
693 			    it->it_type == CTF_K_TYPEDEF)
694 				continue;
695 
696 			if (it->it_flags & ITF_OBJ)
697 				continue;
698 
699 			assert(it->it_type == CTF_K_FUNCTION);
700 			continue;
701 		case DW_TAG_variable:
702 			it = parse_variable(die, psz);
703 			/* Unnamed variables are discarded. */
704 			if (it == NULL)
705 				continue;
706 			break;
707 #if 1
708 		case DW_TAG_lexical_block:
709 		case DW_TAG_inlined_subroutine:
710 			continue;
711 #endif
712 		case DW_TAG_compile_unit:
713 		default:
714 			DPRINTF("%s\n", dw_tag2name(tag));
715 			continue;
716 		}
717 
718 		TAILQ_INSERT_TAIL(cutq, it, it_next);
719 		RB_INSERT(ioff_tree, cuot, it);
720 	}
721 }
722 
723 struct itype *
724 parse_base(struct dwdie *die, size_t psz)
725 {
726 	struct itype *it;
727 	struct dwaval *dav;
728 	uint16_t encoding, enc = 0, bits = 0;
729 	int type;
730 
731 	SIMPLEQ_FOREACH(dav, &die->die_avals, dav_next) {
732 		switch (dav->dav_dat->dat_attr) {
733 		case DW_AT_encoding:
734 			enc = dav2val(dav, psz);
735 			break;
736 		case DW_AT_byte_size:
737 			bits = 8 * dav2val(dav, psz);
738 			break;
739 		default:
740 			DPRINTF("%s\n", dw_at2name(dav->dav_dat->dat_attr));
741 			break;
742 		}
743 	}
744 
745 	switch (enc) {
746 	case DW_ATE_unsigned:
747 	case DW_ATE_address:
748 		encoding = 0;
749 		type = CTF_K_INTEGER;
750 		break;
751 	case DW_ATE_unsigned_char:
752 		encoding = CTF_INT_CHAR;
753 		type = CTF_K_INTEGER;
754 		break;
755 	case DW_ATE_signed:
756 		encoding = CTF_INT_SIGNED;
757 		type = CTF_K_INTEGER;
758 		break;
759 	case DW_ATE_signed_char:
760 		encoding = CTF_INT_SIGNED | CTF_INT_CHAR;
761 		type = CTF_K_INTEGER;
762 		break;
763 	case DW_ATE_boolean:
764 		encoding = CTF_INT_SIGNED | CTF_INT_BOOL;
765 		type = CTF_K_INTEGER;
766 		break;
767 	case DW_ATE_float:
768 		if (bits < psz)
769 			encoding = CTF_FP_SINGLE;
770 		else if (bits == psz)
771 			encoding = CTF_FP_DOUBLE;
772 		else
773 			encoding = CTF_FP_LDOUBLE;
774 		type = CTF_K_FLOAT;
775 		break;
776 	case DW_ATE_complex_float:
777 		if (bits < psz)
778 			encoding = CTF_FP_CPLX;
779 		else if (bits == psz)
780 			encoding = CTF_FP_DCPLX;
781 		else
782 			encoding = CTF_FP_LDCPLX;
783 		type = CTF_K_FLOAT;
784 		break;
785 	case DW_ATE_imaginary_float:
786 		if (bits < psz)
787 			encoding = CTF_FP_IMAGRY;
788 		else if (bits == psz)
789 			encoding = CTF_FP_DIMAGRY;
790 		else
791 			encoding = CTF_FP_LDIMAGRY;
792 		type = CTF_K_FLOAT;
793 		break;
794 	default:
795 		DPRINTF("unknown encoding: %d\n", enc);
796 		return (NULL);
797 	}
798 
799 	it = it_new(++tidx, die->die_offset, enc2name(enc), bits,
800 	    encoding, 0, type, 0);
801 
802 	return it;
803 }
804 
805 struct itype *
806 parse_refers(struct dwdie *die, size_t psz, int type)
807 {
808 	struct itype *it;
809 	struct dwaval *dav;
810 	const char *name = NULL;
811 	size_t ref = 0, size = 0;
812 
813 	SIMPLEQ_FOREACH(dav, &die->die_avals, dav_next) {
814 		switch (dav->dav_dat->dat_attr) {
815 		case DW_AT_name:
816 			name = dav2str(dav);
817 			break;
818 		case DW_AT_type:
819 			ref = dav2val(dav, psz);
820 			break;
821 		case DW_AT_byte_size:
822 			size = dav2val(dav, psz);
823 			assert(size < UINT_MAX);
824 			break;
825 		default:
826 			DPRINTF("%s\n", dw_at2name(dav->dav_dat->dat_attr));
827 			break;
828 		}
829 	}
830 
831 	it = it_new(++tidx, die->die_offset, name, size, 0, ref, type,
832 	    ITF_UNRES);
833 
834 	if (it->it_ref == 0 && (it->it_size == sizeof(void *) ||
835 	    type == CTF_K_CONST || type == CTF_K_VOLATILE ||
836 	    type == CTF_K_POINTER)) {
837 		/* Work around GCC/clang not emiting a type for void */
838 		it->it_flags &= ~ITF_UNRES;
839 		it->it_ref = VOID_OFFSET;
840 		it->it_refp = void_it;
841 	}
842 
843 	return it;
844 }
845 
846 struct itype *
847 parse_array(struct dwdie *die, size_t psz)
848 {
849 	struct itype *it;
850 	struct dwaval *dav;
851 	const char *name = NULL;
852 	size_t ref = 0;
853 
854 	SIMPLEQ_FOREACH(dav, &die->die_avals, dav_next) {
855 		switch (dav->dav_dat->dat_attr) {
856 		case DW_AT_name:
857 			name = dav2str(dav);
858 			break;
859 		case DW_AT_type:
860 			ref = dav2val(dav, psz);
861 			break;
862 		default:
863 			DPRINTF("%s\n", dw_at2name(dav->dav_dat->dat_attr));
864 			break;
865 		}
866 	}
867 
868 	it = it_new(++tidx, die->die_offset, name, 0, 0, ref, CTF_K_ARRAY,
869 	    ITF_UNRES);
870 
871 	subparse_subrange(die, psz, it);
872 
873 	return it;
874 }
875 
876 struct itype *
877 parse_enum(struct dwdie *die, size_t psz)
878 {
879 	struct itype *it;
880 	struct dwaval *dav;
881 	const char *name = NULL;
882 	size_t size = 0;
883 
884 	SIMPLEQ_FOREACH(dav, &die->die_avals, dav_next) {
885 		switch (dav->dav_dat->dat_attr) {
886 		case DW_AT_byte_size:
887 			size = dav2val(dav, psz);
888 			assert(size < UINT_MAX);
889 			break;
890 		case DW_AT_name:
891 			name = dav2str(dav);
892 			break;
893 		default:
894 			DPRINTF("%s\n", dw_at2name(dav->dav_dat->dat_attr));
895 			break;
896 		}
897 	}
898 
899 	it = it_new(++tidx, die->die_offset, name, size, 0, 0, CTF_K_ENUM, 0);
900 
901 	subparse_enumerator(die, psz, it);
902 
903 	return it;
904 }
905 
906 void
907 subparse_subrange(struct dwdie *die, size_t psz, struct itype *it)
908 {
909 	struct dwaval *dav;
910 
911 	assert(it->it_type == CTF_K_ARRAY);
912 
913 	if (die->die_dab->dab_children == DW_CHILDREN_no)
914 		return;
915 
916 	/*
917 	 * This loop assumes that the children of a DIE are just
918 	 * after it on the list.
919 	 */
920 	while ((die = SIMPLEQ_NEXT(die, die_next)) != NULL) {
921 		uint64_t tag = die->die_dab->dab_tag;
922 		size_t nelems = 0;
923 
924 		if (tag != DW_TAG_subrange_type)
925 			break;
926 
927 		SIMPLEQ_FOREACH(dav, &die->die_avals, dav_next) {
928 			switch (dav->dav_dat->dat_attr) {
929 			case DW_AT_count:
930 				nelems = dav2val(dav, psz);
931 				break;
932 			case DW_AT_upper_bound:
933 				nelems = dav2val(dav, psz) + 1;
934 				break;
935 			default:
936 				DPRINTF("%s\n",
937 				    dw_at2name(dav->dav_dat->dat_attr));
938 				break;
939 			}
940 		}
941 
942 		assert(nelems < UINT_MAX);
943 		it->it_nelems = nelems;
944 	}
945 }
946 
947 void
948 subparse_enumerator(struct dwdie *die, size_t psz, struct itype *it)
949 {
950 	struct imember *im;
951 	struct dwaval *dav;
952 
953 	assert(it->it_type == CTF_K_ENUM);
954 
955 	if (die->die_dab->dab_children == DW_CHILDREN_no)
956 		return;
957 
958 	/*
959 	 * This loop assumes that the children of a DIE are just
960 	 * after it on the list.
961 	 */
962 	while ((die = SIMPLEQ_NEXT(die, die_next)) != NULL) {
963 		uint64_t tag = die->die_dab->dab_tag;
964 		size_t val = 0;
965 		const char *name = NULL;
966 
967 		if (tag != DW_TAG_enumerator)
968 			break;
969 
970 		SIMPLEQ_FOREACH(dav, &die->die_avals, dav_next) {
971 			switch (dav->dav_dat->dat_attr) {
972 			case DW_AT_name:
973 				name = dav2str(dav);
974 				break;
975 			case DW_AT_const_value:
976 				val = dav2val(dav, psz);
977 				break;
978 			default:
979 				DPRINTF("%s\n",
980 				    dw_at2name(dav->dav_dat->dat_attr));
981 				break;
982 			}
983 		}
984 
985 		if (name == NULL) {
986 			warnx("%s with anon member", it_name(it));
987 			continue;
988 		}
989 
990 		im = im_new(name, val, 0);
991 		assert(it->it_nelems < UINT_MAX);
992 		it->it_nelems++;
993 		TAILQ_INSERT_TAIL(&it->it_members, im, im_next);
994 	}
995 }
996 
997 struct itype *
998 parse_struct(struct dwdie *die, size_t psz, int type, size_t off)
999 {
1000 	struct itype *it = NULL;
1001 	struct dwaval *dav;
1002 	const char *name = NULL;
1003 	unsigned int flags = 0;
1004 	size_t size = 0;
1005 	int forward = 0;
1006 
1007 	SIMPLEQ_FOREACH(dav, &die->die_avals, dav_next) {
1008 		switch (dav->dav_dat->dat_attr) {
1009 		case DW_AT_declaration:
1010 			forward = dav2val(dav, psz);
1011 			break;
1012 		case DW_AT_byte_size:
1013 			size = dav2val(dav, psz);
1014 			assert(size < UINT_MAX);
1015 			break;
1016 		case DW_AT_name:
1017 			name = dav2str(dav);
1018 			break;
1019 		default:
1020 			DPRINTF("%s\n", dw_at2name(dav->dav_dat->dat_attr));
1021 			break;
1022 		}
1023 	}
1024 
1025 
1026 	if (forward)
1027 		flags = ITF_FORWARD;
1028 	it = it_new(++tidx, die->die_offset, name, size, 0, 0, type, flags);
1029 	subparse_member(die, psz, it, off);
1030 
1031 	return it;
1032 }
1033 
1034 void
1035 subparse_member(struct dwdie *die, size_t psz, struct itype *it, size_t offset)
1036 {
1037 	struct imember *im;
1038 	struct dwaval *dav;
1039 	const char *name;
1040 	size_t off = 0, ref = 0, bits = 0;
1041 	uint8_t lvl = die->die_lvl;
1042 
1043 	assert(it->it_type == CTF_K_STRUCT || it->it_type == CTF_K_UNION);
1044 
1045 	if (die->die_dab->dab_children == DW_CHILDREN_no)
1046 		return;
1047 
1048 	/*
1049 	 * This loop assumes that the children of a DIE are just
1050 	 * after it on the list.
1051 	 */
1052 	while ((die = SIMPLEQ_NEXT(die, die_next)) != NULL) {
1053 		int64_t tag = die->die_dab->dab_tag;
1054 
1055 		name = NULL;
1056 		if (die->die_lvl <= lvl)
1057 			break;
1058 
1059 		/* Skip members of members */
1060 		if (die->die_lvl > lvl + 1)
1061 			continue;
1062 		/*
1063 		 * Nested declaration.
1064 		 *
1065 		 * This matches the case where a ``struct'', ``union'',
1066 		 * ``enum'' or ``typedef'' is first declared "inside" a
1067 		 * union or struct declaration.
1068 		 */
1069 		if (tag == DW_TAG_structure_type || tag == DW_TAG_union_type ||
1070 		    tag == DW_TAG_enumeration_type || tag == DW_TAG_typedef)
1071 			continue;
1072 
1073 		it->it_flags |= ITF_UNRES_MEMB;
1074 
1075 		SIMPLEQ_FOREACH(dav, &die->die_avals, dav_next) {
1076 			switch (dav->dav_dat->dat_attr) {
1077 			case DW_AT_name:
1078 				name = dav2str(dav);
1079 				break;
1080 			case DW_AT_type:
1081 				ref = dav2val(dav, psz);
1082 				break;
1083 			case DW_AT_data_member_location:
1084 				off = 8 * dav2val(dav, psz);
1085 				break;
1086 			case DW_AT_bit_size:
1087 				bits = dav2val(dav, psz);
1088 				assert(bits < USHRT_MAX);
1089 				break;
1090 			default:
1091 				DPRINTF("%s\n",
1092 				    dw_at2name(dav->dav_dat->dat_attr));
1093 				break;
1094 			}
1095 		}
1096 
1097 		/*
1098 		 * When a structure is declared inside an union, we
1099 		 * have to generate a reference to make the resolver
1100 		 * happy.
1101 		 */
1102 		if ((ref == 0) && (tag == DW_TAG_structure_type))
1103 			ref = die->die_offset - offset;
1104 
1105 		im = im_new(name, ref, off);
1106 		assert(it->it_nelems < UINT_MAX);
1107 		it->it_nelems++;
1108 		TAILQ_INSERT_TAIL(&it->it_members, im, im_next);
1109 	}
1110 }
1111 
1112 
1113 void
1114 subparse_arguments(struct dwdie *die, size_t psz, struct itype *it)
1115 {
1116 	struct imember *im;
1117 	struct dwaval *dav;
1118 	size_t ref = 0;
1119 
1120 	assert(it->it_type == CTF_K_FUNCTION);
1121 
1122 	if (die->die_dab->dab_children == DW_CHILDREN_no)
1123 		return;
1124 
1125 	/*
1126 	 * This loop assumes that the children of a DIE are after it
1127 	 * on the list.
1128 	 */
1129 	while ((die = SIMPLEQ_NEXT(die, die_next)) != NULL) {
1130 		uint64_t tag = die->die_dab->dab_tag;
1131 
1132 		if (tag == DW_TAG_unspecified_parameters) {
1133 			/* TODO */
1134 			continue;
1135 		}
1136 
1137 		/*
1138 		 * Nested declaration.
1139 		 *
1140 		 * This matches the case where a ``struct'', ``union'',
1141 		 * ``enum'', ``typedef'' or ``static'' variable is first
1142 		 * declared inside a function declaration.
1143 		 */
1144 		switch (tag) {
1145 		case DW_TAG_structure_type:
1146 		case DW_TAG_union_type:
1147 		case DW_TAG_enumeration_type:
1148 		case DW_TAG_typedef:
1149 		case DW_TAG_variable:
1150 			continue;
1151 		default:
1152 			break;
1153 		}
1154 
1155 		if (tag != DW_TAG_formal_parameter)
1156 			break;
1157 
1158 		it->it_flags |= ITF_UNRES_MEMB;
1159 
1160 		SIMPLEQ_FOREACH(dav, &die->die_avals, dav_next) {
1161 			switch (dav->dav_dat->dat_attr) {
1162 			case DW_AT_type:
1163 				ref = dav2val(dav, psz);
1164 				break;
1165 			default:
1166 				DPRINTF("%s\n",
1167 				    dw_at2name(dav->dav_dat->dat_attr));
1168 				break;
1169 			}
1170 		}
1171 
1172 		im = im_new(NULL, ref, 0);
1173 		assert(it->it_nelems < UINT_MAX);
1174 		it->it_nelems++;
1175 		TAILQ_INSERT_TAIL(&it->it_members, im, im_next);
1176 	}
1177 }
1178 
1179 struct itype *
1180 parse_function(struct dwdie *die, size_t psz)
1181 {
1182 	struct itype *it;
1183 	struct dwaval *dav;
1184 	const char *name = NULL;
1185 	size_t ref = 0;
1186 
1187 	SIMPLEQ_FOREACH(dav, &die->die_avals, dav_next) {
1188 		switch (dav->dav_dat->dat_attr) {
1189 		case DW_AT_name:
1190 			name = dav2str(dav);
1191 			break;
1192 		case DW_AT_type:
1193 			ref = dav2val(dav, psz);
1194 			break;
1195 		case DW_AT_abstract_origin:
1196 			/*
1197 			 * Skip second empty definition for inline
1198 			 * functions.
1199 			 */
1200 			return NULL;
1201 		default:
1202 			DPRINTF("%s\n", dw_at2name(dav->dav_dat->dat_attr));
1203 			break;
1204 		}
1205 	}
1206 
1207 	/*
1208 	 * Work around for clang 4.0 generating DW_TAG_subprogram without
1209 	 * any attribute.
1210 	 */
1211 	if (name == NULL)
1212 		return NULL;
1213 
1214 	it = it_new(++fidx, die->die_offset, name, 0, 0, ref, CTF_K_FUNCTION,
1215 	    ITF_UNRES|ITF_FUNC);
1216 
1217 	subparse_arguments(die, psz, it);
1218 
1219 	if (it->it_ref == 0) {
1220 		/* Work around GCC not emiting a type for void */
1221 		it->it_flags &= ~ITF_UNRES;
1222 		it->it_ref = VOID_OFFSET;
1223 		it->it_refp = void_it;
1224 	}
1225 
1226 	return it;
1227 }
1228 
1229 struct itype *
1230 parse_funcptr(struct dwdie *die, size_t psz)
1231 {
1232 	struct itype *it;
1233 	struct dwaval *dav;
1234 	const char *name = NULL;
1235 	size_t ref = 0;
1236 
1237 	SIMPLEQ_FOREACH(dav, &die->die_avals, dav_next) {
1238 		switch (dav->dav_dat->dat_attr) {
1239 		case DW_AT_name:
1240 			name = dav2str(dav);
1241 			break;
1242 		case DW_AT_type:
1243 			ref = dav2val(dav, psz);
1244 			break;
1245 		default:
1246 			DPRINTF("%s\n", dw_at2name(dav->dav_dat->dat_attr));
1247 			break;
1248 		}
1249 	}
1250 
1251 	it = it_new(++tidx, die->die_offset, name, 0, 0, ref, CTF_K_FUNCTION,
1252 	    ITF_UNRES);
1253 
1254 	subparse_arguments(die, psz, it);
1255 
1256 	if (it->it_ref == 0) {
1257 		/* Work around GCC not emiting a type for void */
1258 		it->it_flags &= ~ITF_UNRES;
1259 		it->it_ref = VOID_OFFSET;
1260 		it->it_refp = void_it;
1261 	}
1262 
1263 	return it;
1264 }
1265 
1266 struct itype *
1267 parse_variable(struct dwdie *die, size_t psz)
1268 {
1269 	struct itype *it = NULL;
1270 	struct dwaval *dav;
1271 	const char *name = NULL;
1272 	size_t ref = 0;
1273 	int forward = 0, global = 0;
1274 
1275 	SIMPLEQ_FOREACH(dav, &die->die_avals, dav_next) {
1276 		switch (dav->dav_dat->dat_attr) {
1277 		case DW_AT_declaration:
1278 			forward = dav2val(dav, psz);
1279 			break;
1280 		case DW_AT_name:
1281 			name = dav2str(dav);
1282 			break;
1283 		case DW_AT_type:
1284 			ref = dav2val(dav, psz);
1285 			break;
1286 		case DW_AT_location:
1287 			switch (dav->dav_dat->dat_form) {
1288 			case DW_FORM_block:
1289 			case DW_FORM_block1:
1290 			case DW_FORM_block2:
1291 			case DW_FORM_block4:
1292 				global = 1;
1293 				break;
1294 			default:
1295 				break;
1296 			}
1297 			break;
1298 		default:
1299 			DPRINTF("%s\n", dw_at2name(dav->dav_dat->dat_attr));
1300 			break;
1301 		}
1302 	}
1303 
1304 
1305 	if (global && !forward && name != NULL) {
1306 		it = it_new(++oidx, die->die_offset, name, 0, 0, ref, 0,
1307 		    ITF_UNRES|ITF_OBJ);
1308 	}
1309 
1310 	return it;
1311 }
1312 
1313 size_t
1314 dav2val(struct dwaval *dav, size_t psz)
1315 {
1316 	uint64_t val = (uint64_t)-1;
1317 
1318 	switch (dav->dav_dat->dat_form) {
1319 	case DW_FORM_addr:
1320 	case DW_FORM_ref_addr:
1321 		if (psz == sizeof(uint32_t))
1322 			val = dav->dav_u32;
1323 		else
1324 			val = dav->dav_u64;
1325 		break;
1326 	case DW_FORM_block1:
1327 	case DW_FORM_block2:
1328 	case DW_FORM_block4:
1329 	case DW_FORM_block:
1330 		dw_loc_parse(&dav->dav_buf, NULL, &val, NULL);
1331 		break;
1332 	case DW_FORM_flag:
1333 	case DW_FORM_data1:
1334 	case DW_FORM_ref1:
1335 		val = dav->dav_u8;
1336 		break;
1337 	case DW_FORM_data2:
1338 	case DW_FORM_ref2:
1339 		val = dav->dav_u16;
1340 		break;
1341 	case DW_FORM_data4:
1342 	case DW_FORM_ref4:
1343 		val = dav->dav_u32;
1344 		break;
1345 	case DW_FORM_sdata:
1346 	case DW_FORM_data8:
1347 	case DW_FORM_ref8:
1348 		val = dav->dav_u64;
1349 		break;
1350 	case DW_FORM_strp:
1351 		val = dav->dav_u32;
1352 		break;
1353 	case DW_FORM_flag_present:
1354 		val = 1;
1355 		break;
1356 	default:
1357 		break;
1358 	}
1359 
1360 	return val;
1361 }
1362 
1363 const char *
1364 dav2str(struct dwaval *dav)
1365 {
1366 	const char *str = NULL;
1367 	extern const char *dstrbuf;
1368 	extern size_t dstrlen;
1369 
1370 	switch (dav->dav_dat->dat_form) {
1371 	case DW_FORM_string:
1372 		str = dav->dav_str;
1373 		break;
1374 	case DW_FORM_strp:
1375 		if (dav->dav_u32 >= dstrlen)
1376 			str = NULL;
1377 		else
1378 			str = dstrbuf + dav->dav_u32;
1379 		break;
1380 	default:
1381 		break;
1382 	}
1383 
1384 	return str;
1385 }
1386 
1387 const char *
1388 enc2name(unsigned short enc)
1389 {
1390 	static const char *enc_name[] = { "address", "boolean", "complex float",
1391 	    "float", "signed", "char", "unsigned", "unsigned char",
1392 	    "imaginary float", "packed decimal", "numeric string", "edited",
1393 	    "signed fixed", "unsigned fixed", "decimal float" };
1394 
1395 	if (enc > 0 && enc <= nitems(enc_name))
1396 		return enc_name[enc - 1];
1397 
1398 	return "invalid";
1399 }
1400