1 /* radare - LGPL - Copyright 2012-2020 - houndthe */
2 
3 #include "base_types.h"
4 #include <sdb.h>
5 #include <r_anal.h>
6 #include <r_bin_dwarf.h>
7 #include <string.h>
8 
9 typedef struct dwarf_parse_context_t {
10 	const RAnal *anal;
11 	const RBinDwarfDie *all_dies;
12 	const ut64 count;
13 	Sdb *sdb;
14 	HtUP/*<ut64 offset, DwarfDie *die>*/ *die_map;
15 	HtUP/*<offset, RBinDwarfLocList*>*/  *locations;
16 	char *lang; // for demangling
17 } Context;
18 
19 typedef struct dwarf_function_t {
20 	ut64 addr;
21 	const char *name;
22 	const char *signature;
23 	bool is_external;
24 	bool is_method;
25 	bool is_virtual;
26 	bool is_trampoline; // intermediary in making call to another func
27 	ut8 access; // public = 1, protected = 2, private = 3, if not set assume private
28 	ut64 vtable_addr; // location description
29 	ut64 call_conv; // normal || program || nocall
30 } Function;
31 
32 typedef enum dwarf_location_kind {
33 	LOCATION_UNKNOWN = 0,
34 	LOCATION_GLOBAL = 1,
35 	LOCATION_BP = 2,
36 	LOCATION_SP = 3,
37 	LOCATION_REGISTER = 4,
38 } VariableLocationKind;
39 typedef struct dwarf_var_location_t {
40 	VariableLocationKind kind;
41 	ut64 address;
42 	ut64 reg_num;
43 	st64 offset;
44 	const char *reg_name; /* string literal */
45 } VariableLocation;
46 
47 typedef struct dwarf_variable_t {
48 	VariableLocation *location;
49 	char *name;
50 	char *type;
51 } Variable;
52 
variable_free(Variable * var)53 static void variable_free(Variable *var) {
54 	free (var->name);
55 	free (var->location);
56 	free (var->type);
57 	free (var);
58 }
59 
60 /* return -1 if attr isn't found */
find_attr_idx(const RBinDwarfDie * die,st32 attr_name)61 static inline st32 find_attr_idx(const RBinDwarfDie *die, st32 attr_name) {
62 	st32 i;
63 	r_return_val_if_fail (die, -1);
64 	for (i = 0; i < die->count; i++) {
65 		if (die->attr_values[i].attr_name == attr_name) {
66 			return i;
67 		}
68 	}
69 	return -1;
70 }
71 
72 /* return NULL if attr isn't found */
find_attr(const RBinDwarfDie * die,st32 attr_name)73 static RBinDwarfAttrValue *find_attr(const RBinDwarfDie *die, st32 attr_name) {
74 	st32 i;
75 	r_return_val_if_fail (die, NULL);
76 	for (i = 0; i < die->count; i++) {
77 		if (die->attr_values[i].attr_name == attr_name) {
78 			return &die->attr_values[i];
79 		}
80 	}
81 	return NULL;
82 }
83 
84 /**
85  * @brief Pasted from r_strbuf_*
86  *        Prepends string before a last occurence of character c
87  * 	      Used to replicate proper C declaration for function pointers
88  * @param sb
89  * @param s
90  * @param c
91  */
strbuf_rev_prepend_char(RStrBuf * sb,const char * s,int c)92 static bool strbuf_rev_prepend_char(RStrBuf *sb, const char *s, int c) {
93 	r_return_val_if_fail (sb && s, false);
94 	size_t l = strlen (s);
95 	// fast path if no chars to append
96 	if (l == 0) {
97 		return true;
98 	}
99 	size_t newlen = l + sb->len;
100 	char *ns = malloc (newlen + 1);
101 	bool ret = false;
102 	char *sb_str = sb->ptr ? sb->ptr : sb->buf;
103 	char *pivot = strrchr (sb_str, c);
104 	if (!pivot) {
105 		free (ns);
106 		return false;
107 	}
108 	size_t idx = pivot - sb_str;
109 	if (ns) {
110 		memcpy (ns, sb_str, idx);
111 		memcpy (ns + idx, s, l);
112 		memcpy (ns + idx + l, sb_str + idx, sb->len - idx);
113 		ns[newlen] = 0;
114 		ret = r_strbuf_set (sb, ns) != NULL;
115 		free (ns);
116 	}
117 	return ret;
118 }
119 /**
120  * @brief Pasted from r_strbuf_*
121  * 	      Appends string after a first occurence of character c
122  * 	      Used to replicate proper C declaration for function pointers
123  * @param sb
124  * @param s
125  * @param needle
126  */
strbuf_rev_append_char(RStrBuf * sb,const char * s,const char * needle)127 static bool strbuf_rev_append_char(RStrBuf *sb, const char *s, const char *needle) {
128 	r_return_val_if_fail (sb && s, false);
129 	size_t l = strlen (s);
130 	// fast path if no chars to append
131 	if (l == 0) {
132 		return true;
133 	}
134 	bool ret = false;
135 	char *sb_str = sb->ptr ? sb->ptr : sb->buf;
136 	char *pivot = strstr (sb_str, needle);
137 	if (!pivot) {
138 		return false;
139 	}
140 	pivot += strlen (needle);
141 	size_t idx = pivot - sb_str;
142 	size_t newlen = l + sb->len;
143 	char *ns = malloc (newlen + 1);
144 	if (ns) {
145 		memcpy (ns, sb_str, idx);
146 		memcpy (ns + idx, s, l);
147 		memcpy (ns + idx + l, sb_str + idx, sb->len - idx);
148 		ns[newlen] = 0;
149 		ret = r_strbuf_set (sb, ns) != NULL;
150 		free (ns);
151 	}
152 	return ret;
153 }
154 
create_type_name_from_offset(ut64 offset)155 static inline char *create_type_name_from_offset(ut64 offset) {
156 	return r_str_newf ("type_0x%" PFMT64x, offset);
157 }
158 
159 /**
160  * @brief Get the DIE name or create unique one from it's offset
161  *
162  * @param die
163  * @return char* DIEs name or NULL if error
164  */
get_die_name(const RBinDwarfDie * die)165 static char *get_die_name(const RBinDwarfDie *die) {
166 	char *name = NULL;
167 	st32 name_attr_idx = find_attr_idx (die, DW_AT_name);
168 
169 	if (name_attr_idx != -1 && die->attr_values[name_attr_idx].string.content) {
170 		name = strdup (die->attr_values[name_attr_idx].string.content);
171 	} else {
172 		name = create_type_name_from_offset (die->offset);
173 	}
174 	return name;
175 }
176 
177 /**
178  * @brief Get the DIE size in bits
179  *
180  * @param die
181  * @return ut64 size in bits or 0 if not found
182  */
get_die_size(const RBinDwarfDie * die)183 static ut64 get_die_size(const RBinDwarfDie *die) {
184 	ut64 size = 0;
185 	st32 byte_size_idx = find_attr_idx (die, DW_AT_byte_size);
186 
187 	if (byte_size_idx != -1) {
188 		size = die->attr_values[byte_size_idx].uconstant * CHAR_BIT;
189 	} else {
190 		st32 bit_size_idx = find_attr_idx (die, DW_AT_bit_size);
191 
192 		if (bit_size_idx != -1) {
193 			size = die->attr_values[bit_size_idx].uconstant;
194 		}
195 	}
196 	return size;
197 }
198 
199 /**
200  * @brief Parses array type entry signature into strbuf
201  *
202  * @param ctx
203  * @param idx index of the current entry
204  * @param strbuf strbuf to store the type into
205  * @return st32 -1 if error else 0
206  */
parse_array_type(Context * ctx,ut64 idx,RStrBuf * strbuf)207 static st32 parse_array_type(Context *ctx, ut64 idx, RStrBuf *strbuf) {
208 	const RBinDwarfDie *die = &ctx->all_dies[idx++];
209 
210 	if (die->has_children) {
211 		int child_depth = 1;
212 		size_t j;
213 		for (j = idx; child_depth > 0 && j < ctx->count; j++) {
214 			const RBinDwarfDie *child_die = &ctx->all_dies[j];
215 			// right now we skip non direct descendats of the structure
216 			// can be also DW_TAG_suprogram for class methods or tag for templates
217 			if (child_depth == 1 && child_die->tag == DW_TAG_subrange_type) {
218 				size_t i;
219 				for (i = 0; i < child_die->count; i++) {
220 					const RBinDwarfAttrValue *value = &child_die->attr_values[i];
221 					switch (value->attr_name) {
222 					case DW_AT_upper_bound:
223 					case DW_AT_count:
224 						r_strbuf_appendf (strbuf, "[%" PFMT64d "]", value->uconstant + 1);
225 						break;
226 
227 					default:
228 						break;
229 					}
230 				}
231 			}
232 			if (child_die->has_children) {
233 				child_depth++;
234 			}
235 			// sibling list is terminated by null entry
236 			if (child_die->abbrev_code == 0) {
237 				child_depth--;
238 			}
239 		}
240 	}
241 	return 0;
242 }
243 
244 /**
245  * @brief Recursively parses type entry of a certain offset into strbuf
246  *        saves type size into *size
247  *
248  * @param ctx
249  * @param offset offset of the type entry
250  * @param strbuf string to store the type into
251  * @param size ptr to size of a type to fill up (can be NULL if unwanted)
252  * @return st32 -1 if error else DW_TAG of the entry
253  *
254  * TODO make cache for type entries, one type is usually referenced
255  * multiple times which means it's parsed multiple times instead of once
256  */
parse_type(Context * ctx,const ut64 offset,RStrBuf * strbuf,ut64 * size)257 static st32 parse_type (Context *ctx, const ut64 offset, RStrBuf *strbuf, ut64 *size) {
258 	r_return_val_if_fail (strbuf, -1);
259 	RBinDwarfDie *die = ht_up_find (ctx->die_map, offset, NULL);
260 	if (!die) {
261 		return -1;
262 	}
263 
264 	st32 type_idx;
265 	st32 tag;
266 	char *name = NULL;
267 	// get size of first type DIE that has size
268 	if (size && *size == 0) {
269 		*size = get_die_size (die);
270 	}
271 	switch (die->tag) {
272 	// this should be recursive search for the type until you find base/user defined type
273 	case DW_TAG_pointer_type:
274 		type_idx = find_attr_idx (die, DW_AT_type);
275 		if (type_idx == -1) {
276 			r_strbuf_append (strbuf, "void");
277 			r_strbuf_append (strbuf, " *");
278 		} else {
279 			tag = parse_type (ctx, die->attr_values[type_idx].reference, strbuf, size);
280 			if (tag == DW_TAG_subroutine_type) {
281 				strbuf_rev_prepend_char (strbuf, "(*)", '(');
282 			} else if (tag == DW_TAG_pointer_type) {
283 				if (!strbuf_rev_append_char (strbuf, "*", "(*")) {
284 					strbuf_rev_prepend_char (strbuf, "*", '*');
285 				}
286 			} else {
287 				r_strbuf_append (strbuf, " *");
288 			}
289 		}
290 		break;
291 	// We won't parse them as a complete type, because that will already be done
292 	// so just a name now
293 	case DW_TAG_typedef:
294 	case DW_TAG_base_type:
295 	case DW_TAG_structure_type:
296 	case DW_TAG_enumeration_type:
297 	case DW_TAG_union_type:
298 	case DW_TAG_class_type:
299 		name = get_die_name (die);
300 		r_strbuf_append (strbuf, name);
301 		free (name);
302 		break;
303 	case DW_TAG_subroutine_type:
304 		type_idx = find_attr_idx (die, DW_AT_type);
305 		if (type_idx == -1) {
306 			r_strbuf_append (strbuf, "void");
307 		} else {
308 			parse_type (ctx, die->attr_values[type_idx].reference, strbuf, size);
309 		}
310 		r_strbuf_append (strbuf, " (");
311 		if (die->has_children) { // has parameters
312 		}
313 		r_strbuf_append (strbuf, ")");
314 		break;
315 	case DW_TAG_array_type:
316 		type_idx = find_attr_idx (die, DW_AT_type);
317 		if (type_idx != -1) {
318 			parse_type (ctx, die->attr_values[type_idx].reference, strbuf, size);
319 		}
320 		parse_array_type (ctx, die - ctx->all_dies, strbuf);
321 		break;
322 	case DW_TAG_const_type:
323 		type_idx = find_attr_idx (die, DW_AT_type);
324 		if (type_idx != -1) {
325 			parse_type (ctx, die->attr_values[type_idx].reference, strbuf, size);
326 		}
327 		r_strbuf_append (strbuf, " const");
328 		break;
329 	case DW_TAG_volatile_type:
330 		type_idx = find_attr_idx (die, DW_AT_type);
331 		if (type_idx != -1) {
332 			parse_type (ctx, die->attr_values[type_idx].reference, strbuf, size);
333 		}
334 		r_strbuf_append (strbuf, " volatile");
335 		break;
336 	case DW_TAG_restrict_type:
337 		type_idx = find_attr_idx (die, DW_AT_type);
338 		if (type_idx != -1) {
339 			parse_type (ctx, die->attr_values[type_idx].reference, strbuf, size);
340 		}
341 		r_strbuf_append (strbuf, " restrict");
342 		break;
343 	case DW_TAG_rvalue_reference_type:
344 		type_idx = find_attr_idx (die, DW_AT_type);
345 		if (type_idx != -1) {
346 			parse_type (ctx, die->attr_values[type_idx].reference, strbuf, size);
347 		}
348 		r_strbuf_append (strbuf, " &&");
349 		break;
350 	case DW_TAG_reference_type:
351 		type_idx = find_attr_idx (die, DW_AT_type);
352 		if (type_idx != -1) {
353 			parse_type (ctx, die->attr_values[type_idx].reference, strbuf, size);
354 		}
355 		r_strbuf_append (strbuf, " &");
356 		break;
357 	default:
358 		break;
359 	}
360 	return (st32)die->tag;
361 }
362 
363 /**
364  * @brief Parses structured entry into *result RAnalStructMember
365  * http://www.dwarfstd.org/doc/DWARF4.pdf#page=102&zoom=100,0,0
366  *
367  * @param ctx
368  * @param idx index of the current entry
369  * @param result ptr to result member to fill up
370  * @return RAnalStructMember* ptr to parsed Member
371  */
parse_struct_member(Context * ctx,ut64 idx,RAnalStructMember * result)372 static RAnalStructMember *parse_struct_member (Context *ctx, ut64 idx, RAnalStructMember *result) {
373 	r_return_val_if_fail (result, NULL);
374 	const RBinDwarfDie *die = &ctx->all_dies[idx];
375 
376 	char *name = NULL;
377 	char *type = NULL;
378 	ut64 offset = 0;
379 	ut64 size = 0;
380 	RStrBuf strbuf;
381 	r_strbuf_init (&strbuf);
382 	size_t i;
383 	for (i = 0; i < die->count; i++) {
384 		RBinDwarfAttrValue *value = &die->attr_values[i];
385 		switch (die->attr_values[i].attr_name) {
386 		case DW_AT_name:
387 			name = get_die_name (die);
388 			if (!name) {
389 				goto cleanup;
390 			}
391 			break;
392 		case DW_AT_type:
393 			parse_type (ctx, value->reference, &strbuf, &size);
394 			type = r_strbuf_drain_nofree (&strbuf);
395 			if (!type || !*type) {
396 				goto cleanup;
397 			}
398 			break;
399 		case DW_AT_data_member_location:
400 			/*
401 				2 cases, 1.: If val is integer, it offset in bytes from
402 				the beginning of containing entity. If containing entity has
403 				a bit offset, member has that bit offset aswell
404 				2.: value is a location description
405 				http://www.dwarfstd.org/doc/DWARF4.pdf#page=39&zoom=100,0,0
406 			*/
407 			offset = value->uconstant;
408 			break;
409 		case DW_AT_accessibility: // private, public etc.
410 		case DW_AT_mutable: // flag is it is mutable
411 		case DW_AT_data_bit_offset:
412 			/*
413 				int that specifies the number of bits from beginning
414 				of containing entity to the beginning of the data member
415 			*/
416 			break;
417 		// If the size of a data member is not the same as the
418 		//  size of the type given for the data member
419 		case DW_AT_byte_size:
420 			size = value->uconstant * CHAR_BIT;
421 			break;
422 		case DW_AT_bit_size:
423 			size = value->uconstant;
424 			break;
425 		case DW_AT_containing_type:
426 		default:
427 			break;
428 		}
429 	}
430 
431 	result->name = name;
432 	result->type = type;
433 	result->offset = offset;
434 	result->size = size;
435 	return result;
436 cleanup:
437 	free (name);
438 	free (type);
439 	return NULL;
440 }
441 
442 /**
443  * @brief  Parses enum entry into *result RAnalEnumCase
444  * http://www.dwarfstd.org/doc/DWARF4.pdf#page=110&zoom=100,0,0
445  *
446  * @param ctx
447  * @param idx index of the current entry
448  * @param result ptr to result case to fill up
449  * @return RAnalEnumCase* Ptr to parsed enum case
450  */
parse_enumerator(Context * ctx,ut64 idx,RAnalEnumCase * result)451 static RAnalEnumCase *parse_enumerator(Context *ctx, ut64 idx, RAnalEnumCase *result) {
452 	const RBinDwarfDie *die = &ctx->all_dies[idx];
453 
454 	char *name = NULL;
455 	int val = 0;
456 	size_t i;
457 
458 	// Enumerator has DW_AT_name and DW_AT_const_value
459 	for (i = 0; i < die->count; i++) {
460 		RBinDwarfAttrValue *value = &die->attr_values[i];
461 		switch (die->attr_values[i].attr_name) {
462 		case DW_AT_name:
463 			name = get_die_name (die);
464 			if (!name) {
465 				goto cleanup;
466 			}
467 			break;
468 		case DW_AT_const_value:
469 			// ?? can be block, sdata, data, string w/e
470 			val = value->uconstant; // TODO solve the encoding, I don't know in which union member is it store
471 			break;
472 		default:
473 			break;
474 		}
475 	}
476 
477 	result->name = name;
478 	result->val = (int)val;
479 	return result;
480 cleanup:
481 	free (name);
482 	return NULL;
483 }
484 
485 /**
486  * @brief  Parses a structured entry (structs, classes, unions) into
487  *         RAnalBaseType and saves it using r_anal_save_base_type ()
488  *
489  * @param ctx
490  * @param idx index of the current entry
491  */
492 // http://www.dwarfstd.org/doc/DWARF4.pdf#page=102&zoom=100,0,0
parse_structure_type(Context * ctx,ut64 idx)493 static void parse_structure_type(Context *ctx, ut64 idx) {
494 	const RBinDwarfDie *die = &ctx->all_dies[idx];
495 
496 	RAnalBaseTypeKind kind;
497 	if (die->tag == DW_TAG_union_type) {
498 		kind = R_ANAL_BASE_TYPE_KIND_UNION;
499 	} else {
500 		kind = R_ANAL_BASE_TYPE_KIND_STRUCT;
501 	}
502 
503 	RAnalBaseType *base_type = r_anal_base_type_new (kind);
504 	if (!base_type) {
505 		return;
506 	}
507 
508 	base_type->name = get_die_name (die);
509 	if (!base_type->name) {
510 		goto cleanup;
511 	}
512 
513 	// if it is definition of previous declaration (TODO Fix, big ugly hotfix addition)
514 	st32 spec_attr_idx = find_attr_idx (die, DW_AT_specification);
515 	if (spec_attr_idx != -1) {
516 		RBinDwarfDie *decl_die = ht_up_find (ctx->die_map, die->attr_values[spec_attr_idx].reference, NULL);
517 		if (!decl_die) {
518 			goto cleanup;
519 		}
520 		st32 name_attr_idx = find_attr_idx (decl_die, DW_AT_name);
521 		if (name_attr_idx != -1) {
522 			free (base_type->name);
523 			base_type->name = get_die_name (decl_die);
524 		}
525 	}
526 
527 	base_type->size = get_die_size (die);
528 
529 	RAnalStructMember member = { 0 };
530 	// Parse out all members, can this in someway be extracted to a function?
531 	if (die->has_children) {
532 		int child_depth = 1; // Direct children of the node
533 		size_t j;
534 		idx++; // Move to the first children node
535 		for (j = idx; child_depth > 0 && j < ctx->count; j++) {
536 			const RBinDwarfDie *child_die = &ctx->all_dies[j];
537 			// we take only direct descendats of the structure
538 			// can be also DW_TAG_suprogram for class methods or tag for templates
539 			if (child_depth == 1 && child_die->tag == DW_TAG_member) {
540 				RAnalStructMember *result = parse_struct_member (ctx, j, &member);
541 				if (!result) {
542 					goto cleanup;
543 				} else {
544 					void *element = r_vector_push (&base_type->struct_data.members, &member);
545 					if (!element) {
546 						goto cleanup;
547 					}
548 				}
549 			}
550 			if (child_die->has_children) {
551 				child_depth++;
552 			}
553 			if (child_die->abbrev_code == 0) { // siblings terminator
554 				child_depth--;
555 			}
556 		}
557 	}
558 	r_anal_save_base_type (ctx->anal, base_type);
559 cleanup:
560 	r_anal_base_type_free (base_type);
561 }
562 
563 /**
564  * @brief Parses a enum entry into RAnalBaseType and saves it
565  *        int Sdb using r_anal_save_base_type ()
566  *
567  * @param ctx
568  * @param idx index of the current entry
569  */
parse_enum_type(Context * ctx,ut64 idx)570 static void parse_enum_type(Context *ctx, ut64 idx) {
571 	const RBinDwarfDie *die = &ctx->all_dies[idx];
572 
573 	RAnalBaseType *base_type = r_anal_base_type_new (R_ANAL_BASE_TYPE_KIND_ENUM);
574 	if (!base_type) {
575 		return;
576 	}
577 
578 	base_type->name = get_die_name (die);
579 	if (!base_type->name) {
580 		goto cleanup;
581 	}
582 	base_type->size = get_die_size (die);
583 
584 	st32 type_attr_idx = find_attr_idx (die, DW_AT_type);
585 	if (type_attr_idx != -1) {
586 		RStrBuf strbuf;
587 		r_strbuf_init (&strbuf);
588 		parse_type (ctx, die->attr_values[type_attr_idx].reference, &strbuf, &base_type->size);
589 		base_type->type = r_strbuf_drain_nofree (&strbuf);
590 	}
591 
592 	RAnalEnumCase cas;
593 	if (die->has_children) {
594 		int child_depth = 1; // Direct children of the node
595 		size_t j;
596 		idx++; // Move to the first children node
597 		for (j = idx; child_depth > 0 && j < ctx->count; j++) {
598 			const RBinDwarfDie *child_die = &ctx->all_dies[j];
599 			// we take only direct descendats of the structure
600 			if (child_depth == 1 && child_die->tag == DW_TAG_enumerator) {
601 				RAnalEnumCase *result = parse_enumerator (ctx, j, &cas);
602 				if (!result) {
603 					goto cleanup;
604 				} else {
605 					void *element = r_vector_push (&base_type->enum_data.cases, &cas);
606 					if (!element) {
607 						enum_type_case_free (result, NULL);
608 						goto cleanup;
609 					}
610 				}
611 			}
612 			if (child_die->has_children) {
613 				child_depth++;
614 			}
615 			// sibling list is terminated by null entry
616 			if (child_die->abbrev_code == 0) {
617 				child_depth--;
618 			}
619 		}
620 	}
621 	r_anal_save_base_type (ctx->anal, base_type);
622 cleanup:
623 	r_anal_base_type_free (base_type);
624 }
625 
626 /**
627  * @brief Parses a typedef entry into RAnalBaseType and saves it
628  *        using r_anal_save_base_type ()
629  *
630  * http://www.dwarfstd.org/doc/DWARF4.pdf#page=96&zoom=100,0,0
631  *
632  * @param ctx
633  * @param idx index of the current entry
634  */
parse_typedef(Context * ctx,ut64 idx)635 static void parse_typedef(Context *ctx, ut64 idx) {
636 	const RBinDwarfDie *die = &ctx->all_dies[idx];
637 
638 	char *name = NULL;
639 	char *type = NULL;
640 	ut64 size = 0;
641 	RStrBuf strbuf;
642 	r_strbuf_init (&strbuf);
643 	size_t i;
644 
645 	for (i = 0; i < die->count; i++) {
646 		RBinDwarfAttrValue *value = &die->attr_values[i];
647 		switch (die->attr_values[i].attr_name) {
648 		case DW_AT_name:
649 			name = get_die_name (die);
650 			if (!name) {
651 				goto cleanup;
652 			}
653 			break;
654 		case DW_AT_type:
655 			parse_type (ctx, value->reference, &strbuf, &size);
656 			type = r_strbuf_drain_nofree (&strbuf);
657 			if (!type) {
658 				goto cleanup;
659 			}
660 			break;
661 		default:
662 			break;
663 		}
664 	}
665 	if (!name) { // type has to have a name for now
666 		goto cleanup;
667 	}
668 	RAnalBaseType *base_type = r_anal_base_type_new (R_ANAL_BASE_TYPE_KIND_TYPEDEF);
669 	if (!base_type) {
670 		goto cleanup;
671 	}
672 	base_type->name = name;
673 	base_type->type = type;
674 	r_anal_save_base_type (ctx->anal, base_type);
675 	r_anal_base_type_free (base_type);
676 	r_strbuf_fini (&strbuf);
677 	return;
678 cleanup:
679 	free (name);
680 	free (type);
681 	r_strbuf_fini (&strbuf);
682 }
683 
parse_atomic_type(Context * ctx,ut64 idx)684 static void parse_atomic_type(Context *ctx, ut64 idx) {
685 	const RBinDwarfDie *die = &ctx->all_dies[idx];
686 
687 	char *name = NULL;
688 	ut64 size = 0;
689 	size_t i;
690 	// TODO support endiannity and encoding in future?
691 	for (i = 0; i < die->count; i++) {
692 		RBinDwarfAttrValue *value = &die->attr_values[i];
693 		switch (die->attr_values[i].attr_name) {
694 		case DW_AT_name:
695 			if (!value->string.content) {
696 				name = create_type_name_from_offset (die->offset);
697 			} else {
698 				name = strdup (value->string.content);
699 			}
700 			if (!name) {
701 				return;
702 			}
703 			break;
704 		case DW_AT_byte_size:
705 			size = value->uconstant * CHAR_BIT;
706 			break;
707 		case DW_AT_bit_size:
708 			size = value->uconstant;
709 			break;
710 		case DW_AT_encoding:
711 		default:
712 			break;
713 		}
714 	}
715 	if (!name) { // type has to have a name for now
716 		return;
717 	}
718 	RAnalBaseType *base_type = r_anal_base_type_new (R_ANAL_BASE_TYPE_KIND_ATOMIC);
719 	if (!base_type) {
720 		return;
721 	}
722 	base_type->name = name;
723 	base_type->size = size;
724 	r_anal_save_base_type (ctx->anal, base_type);
725 	r_anal_base_type_free (base_type);
726 }
727 
get_specification_die_name(const RBinDwarfDie * die)728 static const char *get_specification_die_name(const RBinDwarfDie *die) {
729 	st32 linkage_name_attr_idx = find_attr_idx (die, DW_AT_linkage_name);
730 	if (linkage_name_attr_idx != -1 && die->attr_values[linkage_name_attr_idx].string.content) {
731 		return die->attr_values[linkage_name_attr_idx].string.content;
732 	}
733 	st32 name_attr_idx = find_attr_idx (die, DW_AT_name);
734 	if (name_attr_idx != -1 && die->attr_values[name_attr_idx].string.content) {
735 		return die->attr_values[name_attr_idx].string.content;
736 	}
737 	return NULL;
738 }
739 
get_spec_die_type(Context * ctx,RBinDwarfDie * die,RStrBuf * ret_type)740 static void get_spec_die_type(Context *ctx, RBinDwarfDie *die, RStrBuf *ret_type) {
741 	st32 attr_idx = find_attr_idx (die, DW_AT_type);
742 	if (attr_idx != -1) {
743 		ut64 size = 0;
744 		parse_type (ctx, die->attr_values[attr_idx].reference, ret_type, &size);
745 	}
746 }
747 
748 /* For some languages linkage name is more informative like C++,
749    but for Rust it's rubbish and the normal name is fine */
prefer_linkage_name(char * lang)750 static bool prefer_linkage_name(char *lang) {
751 	if (!strcmp (lang, "rust")) {
752 		return false;
753 	} else if (!strcmp (lang, "ada")) {
754 		return false;
755 	}
756 	return true;
757 }
758 
parse_abstract_origin(Context * ctx,ut64 offset,RStrBuf * type,const char ** name)759 static void parse_abstract_origin(Context *ctx, ut64 offset, RStrBuf *type, const char **name) {
760 	RBinDwarfDie *die = ht_up_find (ctx->die_map, offset, NULL);
761 	if (die) {
762 		size_t i;
763 		ut64 size = 0;
764 		bool has_linkage_name = false;
765 		bool get_linkage_name = prefer_linkage_name (ctx->lang);
766 		for (i = 0; i < die->count; i++) {
767 			const RBinDwarfAttrValue *val = &die->attr_values[i];
768 			switch (val->attr_name) {
769 			case DW_AT_name:
770 				if (!get_linkage_name || !has_linkage_name) {
771 					*name = val->string.content;
772 				}
773 				break;
774 			case DW_AT_linkage_name:
775 			case DW_AT_MIPS_linkage_name:
776 				*name = val->string.content;
777 				has_linkage_name = true;
778 				break;
779 			case DW_AT_type:
780 				parse_type (ctx, val->reference, type, &size);
781 				break;
782 			default:
783 				break;
784 			}
785 		}
786 	}
787 }
788 
789 /* x86_64 https://software.intel.com/sites/default/files/article/402129/mpx-linux64-abi.pdf */
map_dwarf_reg_to_x86_64_reg(ut64 reg_num,VariableLocationKind * kind)790 static const char *map_dwarf_reg_to_x86_64_reg(ut64 reg_num, VariableLocationKind *kind) {
791 	*kind = LOCATION_REGISTER;
792 	switch (reg_num) {
793 		case 0: return "rax";
794 		case 1: return "rdx";
795 		case 2: return "rcx";
796 		case 3: return "rbx";
797 		case 4: return "rsi";
798 		case 5: return "rdi";
799 		case 6:
800 			*kind = LOCATION_BP;
801 			return "rbp";
802 		case 7:
803 			*kind = LOCATION_SP;
804 			return "rsp";
805 		case 8: return "r8";
806 		case 9: return "r9";
807 		case 10: return "r10";
808 		case 11: return "r11";
809 		case 12: return "r12";
810 		case 13: return "r13";
811 		case 14: return "r14";
812 		case 15: return "r15";
813 		case 17: return "xmm0";
814 		case 18: return "xmm1";
815 		case 19: return "xmm2";
816 		case 20: return "xmm3";
817 		case 21: return "xmm4";
818 		case 22: return "xmm5";
819 		case 23: return "xmm6";
820 		case 24: return "xmm7";
821 		default:
822 			*kind = LOCATION_UNKNOWN;
823 			return "unsupported_reg";
824 	}
825 }
826 
827 /* x86 https://01.org/sites/default/files/file_attach/intel386-psabi-1.0.pdf */
map_dwarf_reg_to_x86_reg(ut64 reg_num,VariableLocationKind * kind)828 static const char *map_dwarf_reg_to_x86_reg(ut64 reg_num, VariableLocationKind *kind) {
829 	*kind = LOCATION_REGISTER;
830 	switch (reg_num) {
831 		case 0: return "eax";
832 		case 1: return "edx";
833 		case 2: return "ecx";
834 		case 3: return "ebx";
835 		case 4:
836 			*kind = LOCATION_SP;
837 			return "esp";
838 		case 5:
839 			*kind = LOCATION_BP;
840 			return "ebp";
841 		case 6: return "esi";
842 		case 7: return "edi";
843 		case 21: return "xmm0";
844 		case 22: return "xmm1";
845 		case 23: return "xmm2";
846 		case 24: return "xmm3";
847 		case 25: return "xmm4";
848 		case 26: return "xmm5";
849 		case 27: return "xmm6";
850 		case 28: return "xmm7";
851 		default:
852 			r_warn_if_reached ();
853 			*kind = LOCATION_UNKNOWN;
854 			return "unsupported_reg";
855 	}
856 }
857 
858 /* https://refspecs.linuxfoundation.org/ELF/ppc64/PPC-elf64abi-1.9.html#DW-REG */
map_dwarf_reg_to_ppc64_reg(ut64 reg_num,VariableLocationKind * kind)859 static const char *map_dwarf_reg_to_ppc64_reg(ut64 reg_num, VariableLocationKind *kind) {
860 	*kind = LOCATION_REGISTER;
861 	switch (reg_num) {
862 		case 0: return "r0";
863 		case 1:
864 			*kind = LOCATION_SP;
865 			return "r1";
866 		case 2: return "r2";
867 		case 3: return "r3";
868 		case 4: return "r4";
869 		case 5: return "r5";
870 		case 6: return "r6";
871 		case 7: return "r7";
872 		case 8: return "r8";
873 		case 9: return "r9";
874 		case 10: return "r10";
875 		case 11: return "r11";
876 		case 12: return "r12";
877 		case 13: return "r13";
878 		case 14: return "r14";
879 		case 15: return "r15";
880 		case 16: return "r16";
881 		case 17: return "r17";
882 		case 18: return "r18";
883 		case 19: return "r19";
884 		case 20: return "r20";
885 		case 21: return "r21";
886 		case 22: return "r22";
887 		case 23: return "r23";
888 		case 24: return "r24";
889 		case 25: return "r25";
890 		case 26: return "r26";
891 		case 27: return "r27";
892 		case 28: return "r28";
893 		case 29: return "r29";
894 		case 30: return "r30";
895 		case 31: return "r31";
896 		default:
897 			r_warn_if_reached ();
898 			*kind = LOCATION_UNKNOWN;
899 			return "unsupported_reg";
900 	}
901 }
902 
903 /* returns string literal register name!
904    TODO add more arches                 */
get_dwarf_reg_name(char * arch,int reg_num,VariableLocationKind * kind,int bits)905 static const char *get_dwarf_reg_name(char *arch, int reg_num, VariableLocationKind *kind, int bits) {
906 	if (!strcmp (arch, "x86")) {
907 		if (bits == 64) {
908 			return map_dwarf_reg_to_x86_64_reg (reg_num, kind);
909 		} else {
910 			return map_dwarf_reg_to_x86_reg (reg_num, kind);
911 		}
912 	} else if (!strcmp (arch, "ppc")) {
913 		if (bits == 64) {
914 			return map_dwarf_reg_to_ppc64_reg (reg_num, kind);
915 		}
916 	}
917 	*kind = LOCATION_UNKNOWN;
918 	return "unsupported_reg";
919 }
920 
find_largest_loc_range(RList * loc_list)921 static RBinDwarfLocRange *find_largest_loc_range (RList *loc_list) {
922 	RBinDwarfLocRange *largest = NULL;
923 	ut64 max_range_size = 0;
924 	RListIter *iter;
925 	RBinDwarfLocRange *range;
926 	r_list_foreach (loc_list, iter, range) {
927 		ut64 diff = range->end - range->start;
928 		if (diff > max_range_size) {
929 			max_range_size = diff ;
930 			largest = range;
931 		}
932 	}
933 	return largest;
934 }
935 
936 /* TODO move a lot of the parsing here into dwarf.c and do only processing here */
parse_dwarf_location(Context * ctx,const RBinDwarfAttrValue * loc,const RBinDwarfAttrValue * frame_base)937 static VariableLocation *parse_dwarf_location (Context *ctx, const RBinDwarfAttrValue *loc, const RBinDwarfAttrValue *frame_base) {
938 	/* reg5 - val is in register 5
939 	fbreg <leb> - offset from frame base
940 	regx <leb> - contents is in register X
941 	addr <addr> - contents is in at addr
942 	bregXX <leb> - contents is at offset from specified register
943 	- we now support 3 options: SP, BP and register based arguments */
944 
945 	/* Loclist offset is usually CONSTANT or REFERENCE at older DWARF versions, new one has LocListPtr for that */
946 	if (loc->kind != DW_AT_KIND_BLOCK && loc->kind != DW_AT_KIND_LOCLISTPTR && loc->kind != DW_AT_KIND_REFERENCE && loc->kind != DW_AT_KIND_CONSTANT) {
947 		return NULL;
948 	}
949 	RBinDwarfBlock block;
950 	if (loc->kind == DW_AT_KIND_LOCLISTPTR || loc->kind == DW_AT_KIND_REFERENCE || loc->kind == DW_AT_KIND_CONSTANT) {
951 		ut64 offset = loc->reference;
952 		RBinDwarfLocList *range_list = ht_up_find (ctx->locations, offset, NULL);
953 		if (!range_list) { /* for some reason offset isn't there, wrong parsing or malformed dwarf */
954 			return NULL;
955 		}
956 		/* use the largest range as a variable */
957 		RBinDwarfLocRange *range = find_largest_loc_range (range_list->list);
958 		if (!range) {
959 			return NULL;
960 		}
961 		/* Very rough and sloppy, refactor this hacked up stuff */
962 		block = *range->expression;
963 		// range->expression... etc
964 	} else {
965 		block = loc->block;
966 	}
967 	VariableLocationKind kind = LOCATION_UNKNOWN;
968 	st64 offset = 0;
969 	ut64 address = 0;
970 	ut64 reg_num = -1;
971 	const char *reg_name = NULL; /* literal */
972 	size_t i;
973 	for (i = 0; i < block.length; i++) {
974 		switch (block.data[i]) {
975 		case DW_OP_fbreg: {
976 		/* TODO sometimes CFA is referenced, but we don't parse that yet
977 		   just an offset involving framebase of a function*/
978 			if (i == block.length - 1) {
979 				return NULL;
980 			}
981 			const ut8 *dump = &block.data[++i];
982 			offset = r_sleb128 (&dump, &block.data[loc->block.length]);
983 			if (frame_base) {
984 				/* recursive parsing, but frame_base should be only one, but someone
985 				   could make malicious resource exhaustion attack, so a depth counter might be cool? */
986 				VariableLocation *location = parse_dwarf_location (ctx, frame_base, NULL);
987 				if (location) {
988 					location->offset += offset;
989 					return location;
990 				}
991 				return NULL;
992 			} else {
993 				/* Might happen if frame_base has a frame_base reference? I don't think it can tho */
994 				return NULL;
995 			}
996 			break;
997 		}
998 		case DW_OP_reg0:
999 		case DW_OP_reg1:
1000 		case DW_OP_reg2:
1001 		case DW_OP_reg3:
1002 		case DW_OP_reg4:
1003 		case DW_OP_reg5:
1004 		case DW_OP_reg6:
1005 		case DW_OP_reg7:
1006 		case DW_OP_reg8:
1007 		case DW_OP_reg9:
1008 		case DW_OP_reg10:
1009 		case DW_OP_reg11:
1010 		case DW_OP_reg12:
1011 		case DW_OP_reg13:
1012 		case DW_OP_reg14:
1013 		case DW_OP_reg15:
1014 		case DW_OP_reg16:
1015 		case DW_OP_reg17:
1016 		case DW_OP_reg18:
1017 		case DW_OP_reg19:
1018 		case DW_OP_reg20:
1019 		case DW_OP_reg21:
1020 		case DW_OP_reg22:
1021 		case DW_OP_reg23:
1022 		case DW_OP_reg24:
1023 		case DW_OP_reg25:
1024 		case DW_OP_reg26:
1025 		case DW_OP_reg27:
1026 		case DW_OP_reg28:
1027 		case DW_OP_reg29:
1028 		case DW_OP_reg30:
1029 		case DW_OP_reg31: {
1030 			/* Will mostly be used for SP based arguments */
1031 			/* TODO I need to find binaries that uses this so I can test it out*/
1032 			reg_num = block.data[i] - DW_OP_reg0; // get the reg number
1033 			reg_name = get_dwarf_reg_name (ctx->anal->cpu, reg_num, &kind, ctx->anal->bits);
1034 			break;
1035 		}
1036 		case DW_OP_breg0:
1037 		case DW_OP_breg1:
1038 		case DW_OP_breg2:
1039 		case DW_OP_breg3:
1040 		case DW_OP_breg4:
1041 		case DW_OP_breg5:
1042 		case DW_OP_breg6:
1043 		case DW_OP_breg7:
1044 		case DW_OP_breg8:
1045 		case DW_OP_breg9:
1046 		case DW_OP_breg10:
1047 		case DW_OP_breg11:
1048 		case DW_OP_breg12:
1049 		case DW_OP_breg13:
1050 		case DW_OP_breg14:
1051 		case DW_OP_breg15:
1052 		case DW_OP_breg16:
1053 		case DW_OP_breg17:
1054 		case DW_OP_breg18:
1055 		case DW_OP_breg19:
1056 		case DW_OP_breg20:
1057 		case DW_OP_breg21:
1058 		case DW_OP_breg22:
1059 		case DW_OP_breg23:
1060 		case DW_OP_breg24:
1061 		case DW_OP_breg25:
1062 		case DW_OP_breg26:
1063 		case DW_OP_breg27:
1064 		case DW_OP_breg28:
1065 		case DW_OP_breg29:
1066 		case DW_OP_breg30:
1067 		case DW_OP_breg31: {
1068 			if (i == block.length - 1) {
1069 				return NULL;
1070 			}
1071 			/* The single operand of the DW_OP_bregn operations provides
1072 			signed LEB128 offset from the specified register.  */
1073 			reg_num = block.data[i] - DW_OP_breg0; // get the reg number
1074 			const ut8 *buffer = &block.data[++i];
1075 			offset = r_sleb128 (&buffer, &block.data[block.length]);
1076 			/* TODO do a proper expression parsing, move by the amount of bytes sleb reads */
1077 			i += buffer - &block.data[0];
1078 			reg_name = get_dwarf_reg_name (ctx->anal->cpu, reg_num, &kind, ctx->anal->bits);
1079 			break;
1080 		}
1081 		case DW_OP_bregx: {
1082 			if (i == block.length - 1) {
1083 				return NULL;
1084 			}
1085 			/* 2 operands, reg_number, offset*/
1086 			/* I need to find binaries that uses this so I can test it out*/
1087 			const ut8 *buffer = &block.data[++i];
1088 			const ut8 *buf_end = &block.data[block.length];
1089 			buffer = r_uleb128 (buffer, buf_end - buffer, &reg_num, NULL);
1090 			if (buffer == buf_end) {
1091 				return NULL;
1092 			}
1093 			offset = r_sleb128 (&buffer, buf_end);
1094 			reg_name = get_dwarf_reg_name (ctx->anal->cpu, reg_num, &kind, ctx->anal->bits);
1095 			break;
1096 		}
1097 		case DW_OP_addr: {
1098 			/* The DW_OP_addr operation has a single operand that encodes a machine address and whose
1099 			size is the size of an address on the target machine.  */
1100 			const int addr_size = ctx->anal->bits / 8;
1101 			const ut8 *dump = &block.data[++i];
1102 			/* malformed, not enough bytes to represent address */
1103 			if (block.length - i < addr_size) {
1104 				return NULL;
1105 			}
1106 			switch (addr_size) {
1107 			case 1:
1108 				address = r_read_ble8 (dump);
1109 				break;
1110 			case 2:
1111 				address = r_read_ble16 (dump, ctx->anal->big_endian);
1112 				break;
1113 			case 4:
1114 				address = r_read_ble32 (dump, ctx->anal->big_endian);
1115 				break;
1116 			case 8:
1117 				address = r_read_ble64 (dump, ctx->anal->big_endian);
1118 				break;
1119 			default:
1120 				r_warn_if_reached (); /* weird addr_size */
1121 				return NULL;
1122 			}
1123 			kind = LOCATION_GLOBAL; // address
1124 			break;
1125 		}
1126 		case DW_OP_call_frame_cfa: {
1127 			// REMOVE XXX
1128 			kind = LOCATION_BP;
1129 			offset += 16;
1130 			break;
1131 		}
1132 		default:
1133 			break;
1134 		}
1135 	}
1136 	if (kind == LOCATION_UNKNOWN) {
1137 		return NULL;
1138 	}
1139 	VariableLocation *location = R_NEW0 (VariableLocation);
1140 	if (location) {
1141 		location->reg_name = reg_name;
1142 		location->reg_num = reg_num;
1143 		location->kind = kind;
1144 		location->offset = offset;
1145 		location->address = address;
1146 	}
1147 	return location;
1148 }
1149 
parse_function_args_and_vars(Context * ctx,ut64 idx,RStrBuf * args,RList * variables)1150 static st32 parse_function_args_and_vars(Context *ctx, ut64 idx, RStrBuf *args, RList/*<Variable*>*/ *variables) {
1151 	const RBinDwarfDie *die = &ctx->all_dies[idx++];
1152 
1153 	if (die->has_children) {
1154 		int child_depth = 1;
1155 
1156 		bool get_linkage_name = prefer_linkage_name (ctx->lang);
1157 		bool has_linkage_name = false;
1158 		int argNumber = 1;
1159 		size_t j;
1160 		for (j = idx; child_depth > 0 && j < ctx->count; j++) {
1161 			const RBinDwarfDie *child_die = &ctx->all_dies[j];
1162 			RStrBuf type;
1163 			r_strbuf_init (&type);
1164 			const char *name = NULL;
1165 			if (child_die->tag == DW_TAG_formal_parameter || child_die->tag == DW_TAG_variable) {
1166 				Variable *var = R_NEW0 (Variable);
1167 				size_t i;
1168 				for (i = 0; i < child_die->count; i++) {
1169 					const RBinDwarfAttrValue *val = &child_die->attr_values[i];
1170 					switch (val->attr_name) {
1171 					case DW_AT_name:
1172 						if (!get_linkage_name || !has_linkage_name) {
1173 							name = val->string.content;
1174 						}
1175 						break;
1176 					case DW_AT_linkage_name:
1177 					case DW_AT_MIPS_linkage_name:
1178 						name = val->string.content;
1179 						has_linkage_name = true;
1180 						break;
1181 					case DW_AT_type:
1182 						parse_type (ctx, val->reference, &type, NULL);
1183 						break;
1184 					// abstract origin is supposed to have omitted information
1185 					case DW_AT_abstract_origin:
1186 						parse_abstract_origin (ctx, val->reference, &type, &name);
1187 						break;
1188 					case DW_AT_location:
1189 						var->location = parse_dwarf_location (ctx, val, find_attr (die, DW_AT_frame_base));
1190 						break;
1191 					default:
1192 						break;
1193 					}
1194 				}
1195 				if (child_die->tag == DW_TAG_formal_parameter && child_depth == 1) {
1196 					/* arguments sometimes have only type, create generic argX */
1197 					if (type.len) {
1198 						if (!name) {
1199 							var->name = r_str_newf ("arg%d", argNumber);
1200 						} else {
1201 							var->name = strdup (name);
1202 						}
1203 						r_strbuf_appendf (args, "%s %s,", r_strbuf_get (&type), var->name);
1204 						var->type = strdup (r_strbuf_get (&type));
1205 						r_list_append (variables, var);
1206 					} else {
1207 						variable_free (var);
1208 					}
1209 					argNumber++;
1210 				} else { /* DW_TAG_variable */
1211 					if (name && type.len) {
1212 						var->name = strdup (name);
1213 						var->type = strdup (r_strbuf_get (&type));
1214 						r_list_append (variables, var);
1215 					} else {
1216 						variable_free (var);
1217 					}
1218 					r_strbuf_fini (&type);
1219 				}
1220 			} else if (child_depth == 1 && child_die->tag == DW_TAG_unspecified_parameters) {
1221 				r_strbuf_appendf (args, "va_args ...,");
1222 			}
1223 			if (child_die->has_children) {
1224 				child_depth++;
1225 			}
1226 			if (child_die->abbrev_code == 0) { /* sibling list is terminated by null entry */
1227 				child_depth--;
1228 			}
1229 			r_strbuf_fini (&type);
1230 		}
1231 		if (args->len > 0) {
1232 			r_strbuf_slice (args, 0, args->len - 1);
1233 		}
1234 	}
1235 	return 0;
1236 }
1237 
sdb_save_dwarf_function(Function * dwarf_fcn,RList * variables,Sdb * sdb)1238 static void sdb_save_dwarf_function(Function *dwarf_fcn, RList/*<Variable*>*/ *variables, Sdb *sdb) {
1239 	char *sname = r_str_sanitize_sdb_key (dwarf_fcn->name);
1240 	sdb_set (sdb, sname, "fcn", 0);
1241 
1242 	char *addr_key = r_str_newf ("fcn.%s.addr", sname);
1243 	char *addr_val = r_str_newf ("0x%" PFMT64x "", dwarf_fcn->addr);
1244 	sdb_set (sdb, addr_key, addr_val, 0);
1245 	free (addr_key);
1246 	free (addr_val);
1247 
1248 	/* so we can have name without sanitization */
1249 	char *name_key = r_str_newf ("fcn.%s.name", sname);
1250 	char *name_val = r_str_newf ("%s", dwarf_fcn->name);
1251 	sdb_set (sdb, name_key, name_val, 0);
1252 	free (name_key);
1253 	free (name_val);
1254 
1255 	char *signature_key = r_str_newf ("fcn.%s.sig", sname);
1256 	sdb_set (sdb, signature_key, dwarf_fcn->signature, 0);
1257 	free (signature_key);
1258 
1259 	RStrBuf vars;
1260 	r_strbuf_init (&vars);
1261 	RListIter *iter;
1262 	Variable *var;
1263 	r_list_foreach (variables, iter, var) {
1264 		if (!var->location) {
1265 			/* NULL location probably means optimized out, maybe put a comment there */
1266 			continue;
1267 		}
1268 		char *key = NULL;
1269 		char *val = NULL;
1270 		switch (var->location->kind) {
1271 		case LOCATION_BP: {
1272 			/* value = "type, storage, additional info based on storage (offset)" */
1273 
1274 			r_strbuf_appendf (&vars, "%s,", var->name);
1275 			key = r_str_newf ("fcn.%s.var.%s", sname, var->name);
1276 			val = r_str_newf ("%s,%" PFMT64d ",%s", "b", var->location->offset, var->type);
1277 			sdb_set (sdb, key, val, 0);
1278 			break;
1279 		}
1280 		case LOCATION_SP: {
1281 			/* value = "type, storage, additional info based on storage (offset)" */
1282 
1283 			r_strbuf_appendf (&vars, "%s,", var->name);
1284 			key = r_str_newf ("fcn.%s.var.%s", sname, var->name);
1285 			val = r_str_newf ("%s,%" PFMT64d ",%s", "s", var->location->offset, var->type);
1286 			sdb_set (sdb, key, val, 0);
1287 			break;
1288 		}
1289 		case LOCATION_GLOBAL: {
1290 			/* value = "type, storage, additional info based on storage (address)" */
1291 
1292 			r_strbuf_appendf (&vars, "%s,", var->name);
1293 			key = r_str_newf ("fcn.%s.var.%s", sname, var->name);
1294 			val = r_str_newf ("%s,%" PFMT64u ",%s", "g", var->location->address, var->type);
1295 			sdb_set (sdb, key, val, 0);
1296 			break;
1297 		}
1298 		case LOCATION_REGISTER: {
1299 			/* value = "type, storage, additional info based on storage (register name)" */
1300 
1301 			r_strbuf_appendf (&vars, "%s,", var->name);
1302 			key = r_str_newf ("fcn.%s.var.%s", sname, var->name);
1303 			val = r_str_newf ("%s,%s,%s", "r", var->location->reg_name, var->type);
1304 			sdb_set (sdb, key, val, 0);
1305 			break;
1306 		}
1307 
1308 		default:
1309 			/* else location is unknown (optimized out), skip the var */
1310 			break;
1311 		}
1312 		free (key);
1313 		free (val);
1314 	}
1315 	if (vars.len > 0) { /* remove the extra , */
1316 		r_strbuf_slice (&vars, 0, vars.len - 1); /* leaks? */
1317 	}
1318 	char *vars_key = r_str_newf ("fcn.%s.vars", sname);
1319 	char *vars_val = r_str_newf ("%s", r_strbuf_get (&vars));
1320 	sdb_set (sdb, vars_key, vars_val, 0);
1321 	free (vars_key);
1322 	free (vars_val);
1323 	r_strbuf_fini (&vars);
1324 	free (sname);
1325 }
1326 
1327 /**
1328  * @brief Parse function,it's arguments, variables and
1329  *        save the information into the Sdb
1330  *
1331  * @param ctx
1332  * @param idx Current entry index
1333  */
parse_function(Context * ctx,ut64 idx)1334 static void parse_function(Context *ctx, ut64 idx) {
1335 	const RBinDwarfDie *die = &ctx->all_dies[idx];
1336 
1337 	Function fcn = { 0 };
1338 	bool has_linkage_name = false;
1339 	bool get_linkage_name = prefer_linkage_name (ctx->lang);
1340 	RStrBuf ret_type;
1341 	r_strbuf_init (&ret_type);
1342 	if (find_attr_idx (die, DW_AT_declaration) != -1) {
1343 		return; /* just declaration skip */
1344 	}
1345 	size_t i;
1346 	/* For rust binaries prefer regular name not linkage TODO */
1347 	for (i = 0; i < die->count; i++) {
1348 		RBinDwarfAttrValue *val = &die->attr_values[i];
1349 		switch (die->attr_values[i].attr_name) {
1350 		case DW_AT_name:
1351 			if (!get_linkage_name || !has_linkage_name) {
1352 				fcn.name = val->string.content;
1353 			}
1354 			break;
1355 		case DW_AT_linkage_name:
1356 		case DW_AT_MIPS_linkage_name:
1357 			fcn.name = val->string.content;
1358 			has_linkage_name = true;
1359 			break;
1360 		case DW_AT_low_pc:
1361 		case DW_AT_entry_pc:
1362 			fcn.addr = val->address;
1363 			break;
1364 		case DW_AT_specification: /* reference to declaration DIE with more info */
1365 		{
1366 			RBinDwarfDie *spec_die = ht_up_find (ctx->die_map, val->reference, NULL);
1367 			if (spec_die) {
1368 				fcn.name = get_specification_die_name (spec_die); /* I assume that if specification has a name, this DIE hasn't */
1369 				get_spec_die_type (ctx, spec_die, &ret_type);
1370 			}
1371 			break;
1372 		}
1373 		case DW_AT_type:
1374 			parse_type (ctx, val->reference, &ret_type, NULL);
1375 			break;
1376 		case DW_AT_virtuality:
1377 			fcn.is_method = true; /* method specific attr */
1378 			fcn.is_virtual = true;
1379 			break;
1380 		case DW_AT_object_pointer:
1381 			fcn.is_method = true;
1382 			break;
1383 		case DW_AT_vtable_elem_location:
1384 			fcn.is_method = true;
1385 			fcn.vtable_addr = 0; /* TODO we might use this information */
1386 			break;
1387 		case DW_AT_accessibility:
1388 			fcn.is_method = true;
1389 			fcn.access = (ut8)val->uconstant;
1390 			break;
1391 		case DW_AT_external:
1392 			fcn.is_external = true;
1393 			break;
1394 		case DW_AT_trampoline:
1395 			fcn.is_trampoline = true;
1396 			break;
1397 		case DW_AT_ranges:
1398 		case DW_AT_high_pc:
1399 		default:
1400 			break;
1401 		}
1402 	}
1403 	if (!fcn.name || !fcn.addr) { /* we need a name, faddr */
1404 		goto cleanup;
1405 	}
1406 	RStrBuf args;
1407 	r_strbuf_init (&args);
1408 	/* TODO do the same for arguments in future so we can use their location */
1409 	RList/*<Variable*>*/  *variables = r_list_new ();
1410 	parse_function_args_and_vars (ctx, idx, &args, variables);
1411 
1412 	if (ret_type.len == 0) { /* DW_AT_type is omitted in case of `void` ret type */
1413 		r_strbuf_append (&ret_type, "void");
1414 	}
1415 	r_warn_if_fail (ctx->lang);
1416 	char *new_name = ctx->anal->binb.demangle (NULL, ctx->lang, fcn.name, fcn.addr, false);
1417 	fcn.name = new_name ? new_name : strdup (fcn.name);
1418 	fcn.signature = r_str_newf ("%s %s(%s);", r_strbuf_get (&ret_type), fcn.name, r_strbuf_get (&args));
1419 	sdb_save_dwarf_function (&fcn, variables, ctx->sdb);
1420 
1421 	free ((char *)fcn.signature);
1422 	free ((char *)fcn.name);
1423 
1424 	RListIter *iter;
1425 	Variable *var;
1426 	r_list_foreach (variables, iter, var) {
1427 		variable_free (var);
1428 	}
1429 	r_list_free (variables);
1430 	r_strbuf_fini (&args);
1431 cleanup:
1432 	r_strbuf_fini (&ret_type);
1433 }
1434 
1435 /**
1436  * @brief Get's language from comp unit for demangling
1437  *
1438  * @param die
1439  * @return char* string literal language represantation for demangling BinDemangle
1440  */
parse_comp_unit_lang(const RBinDwarfDie * die)1441 static char *parse_comp_unit_lang(const RBinDwarfDie *die) {
1442 	r_return_val_if_fail (die, NULL);
1443 
1444 	int idx = find_attr_idx (die, DW_AT_language);
1445 	char *lang = "cxx"; // default fallback
1446 	if (idx == -1) {
1447 		/* What to do now, it should have  one?, just assume C++ */
1448 		return lang;
1449 	}
1450 	const RBinDwarfAttrValue *val = &die->attr_values[idx];
1451 	r_warn_if_fail (val->kind == DW_AT_KIND_CONSTANT);
1452 
1453 	switch (val->uconstant)
1454 	{
1455 	case DW_LANG_Java:
1456 		return "java";
1457 	case DW_LANG_ObjC:
1458 	/* subideal, TODO research if dwarf gives me enough info to properly separate C++ and ObjC mangling */
1459 	case DW_LANG_ObjC_plus_plus:
1460 		return "objc";
1461 	case DW_LANG_D:
1462 		return "dlang";
1463 	case DW_LANG_Rust:
1464 		return "rust";
1465 	case DW_LANG_C_plus_plus:
1466 	case DW_LANG_C_plus_plus_14:
1467 	/* no demangling available */
1468 	case DW_LANG_Ada83:
1469 	case DW_LANG_Cobol74:
1470 	case DW_LANG_Cobol85:
1471 	case DW_LANG_Fortran77:
1472 	case DW_LANG_Fortran90:
1473 	case DW_LANG_Pascal83:
1474 	case DW_LANG_Modula2:
1475 	case DW_LANG_Ada95:
1476 	case DW_LANG_Fortran95:
1477 	case DW_LANG_PLI:
1478 	case DW_LANG_Python:
1479 	case DW_LANG_Swift:
1480 	case DW_LANG_Julia:
1481 	case DW_LANG_Dylan:
1482 	case DW_LANG_Fortran03:
1483 	case DW_LANG_Fortran08:
1484 	case DW_LANG_UPC:
1485 	case DW_LANG_C:
1486 	case DW_LANG_C89:
1487 	case DW_LANG_C99:
1488 	case DW_LANG_C11:
1489 	default:
1490 		return lang;
1491 	}
1492 	return lang;
1493 }
1494 
1495 /**
1496  * @brief Delegates DIE to it's proper parsing method
1497  *
1498  * @param ctx
1499  * @param idx index of the current entry
1500  */
parse_type_entry(Context * ctx,ut64 idx)1501 static void parse_type_entry(Context *ctx, ut64 idx) {
1502 	r_return_if_fail (ctx);
1503 
1504 	const RBinDwarfDie *die = &ctx->all_dies[idx];
1505 	switch (die->tag) {
1506 	case DW_TAG_structure_type:
1507 	case DW_TAG_union_type:
1508 	case DW_TAG_class_type:
1509 		parse_structure_type (ctx, idx);
1510 		break;
1511 	case DW_TAG_enumeration_type:
1512 		parse_enum_type (ctx, idx);
1513 		break;
1514 	case DW_TAG_typedef:
1515 		parse_typedef (ctx, idx);
1516 		break;
1517 	case DW_TAG_base_type:
1518 		parse_atomic_type (ctx, idx);
1519 		break;
1520 	case DW_TAG_subprogram:
1521 		parse_function (ctx, idx);
1522 		break;
1523 	case DW_TAG_compile_unit:
1524 		/* used for name demangling */
1525 		ctx->lang = parse_comp_unit_lang (die);
1526 	default:
1527 		break;
1528 	}
1529 }
1530 
1531 /**
1532  * @brief Parses type and function information out of DWARF entries
1533  *        and stores them to the sdb for further use
1534  *
1535  * @param anal
1536  * @param ctx
1537  */
r_anal_dwarf_process_info(const RAnal * anal,RAnalDwarfContext * ctx)1538 R_API void r_anal_dwarf_process_info(const RAnal *anal, RAnalDwarfContext *ctx) {
1539 	r_return_if_fail (ctx && anal);
1540 	Sdb *dwarf_sdb =  sdb_ns (anal->sdb, "dwarf", 1);
1541 	size_t i, j;
1542 	const RBinDwarfDebugInfo *info = ctx->info;
1543 	for (i = 0; i < info->count; i++) {
1544 		RBinDwarfCompUnit *unit = &info->comp_units[i];
1545 		Context dw_context = { // context per unit?
1546 			.anal = anal,
1547 			.all_dies = unit->dies,
1548 			.count = unit->count,
1549 			.die_map = info->lookup_table,
1550 			.sdb = dwarf_sdb,
1551 			.locations = ctx->loc,
1552 			.lang = NULL
1553 		};
1554 		for (j = 0; j < unit->count; j++) {
1555 			parse_type_entry (&dw_context, j);
1556 		}
1557 	}
1558 }
1559 
filter_sdb_function_names(void * user,const char * k,const char * v)1560 bool filter_sdb_function_names(void *user, const char *k, const char *v) {
1561 	(void) user;
1562 	(void) k;
1563 	return !strcmp (v, "fcn");
1564 }
1565 
1566 /**
1567  * @brief Use parsed DWARF function info from Sdb in the anal functions
1568  *  XXX right now we only save parsed name and variables, we can't use signature now
1569  *  XXX refactor to be more readable
1570  * @param anal
1571  * @param dwarf_sdb
1572  */
r_anal_dwarf_integrate_functions(RAnal * anal,RFlag * flags,Sdb * dwarf_sdb)1573 R_API void r_anal_dwarf_integrate_functions(RAnal *anal, RFlag *flags, Sdb *dwarf_sdb) {
1574 	r_return_if_fail (anal && dwarf_sdb);
1575 
1576 	/* get all entries with value == func */
1577 	SdbList *sdb_list = sdb_foreach_list_filter (dwarf_sdb, filter_sdb_function_names, false);
1578 	SdbListIter *it;
1579 	SdbKv *kv;
1580 	/* iterate all function entries */
1581 	ls_foreach (sdb_list, it, kv) {
1582 		char *func_sname = kv->base.key;
1583 
1584 		char *addr_key = r_str_newf ("fcn.%s.addr", func_sname);
1585 		ut64 faddr = sdb_num_get (dwarf_sdb, addr_key, 0);
1586 		free (addr_key);
1587 
1588 		/* if the function is analyzed so we can edit */
1589 		RAnalFunction *fcn = r_anal_get_function_at (anal, faddr);
1590 		if (fcn) {
1591 			/* prepend dwarf debug info stuff with dbg. */
1592 			char *real_name_key = r_str_newf ("fcn.%s.name", func_sname);
1593 			char *real_name = sdb_get (dwarf_sdb, real_name_key, 0);
1594 			free (real_name_key);
1595 
1596 			char *dwf_name = r_str_newf ("dbg.%s", real_name);
1597 			free (real_name);
1598 
1599 			r_anal_function_rename (fcn, dwf_name);
1600 			free (dwf_name);
1601 
1602 			char *tmp = r_str_newf ("fcn.%s.sig", func_sname);
1603 			char *fcnstr = sdb_get (dwarf_sdb, tmp, 0);
1604 			free (tmp);
1605 			/* Apply signature as a comment at a function address */
1606 			r_meta_set_string (anal, R_META_TYPE_COMMENT, faddr, fcnstr);
1607 			free (fcnstr);
1608 		}
1609 		char *var_names_key = r_str_newf ("fcn.%s.vars", func_sname);
1610 		char *vars = sdb_get (dwarf_sdb, var_names_key, NULL);
1611 		char *var_name;
1612 		sdb_aforeach (var_name, vars) {
1613 			char *var_key = r_str_newf ("fcn.%s.var.%s", func_sname, var_name);
1614 			char *var_data = sdb_get (dwarf_sdb, var_key, NULL);
1615 			if (!var_data) {
1616 				goto loop_end;
1617 			}
1618 			char *extra = NULL;
1619 			char *kind = sdb_anext (var_data, &extra);
1620 			char *type = NULL;
1621 			extra = sdb_anext (extra, &type);
1622 			st64 offset = 0;
1623 			if (*kind != 'r') {
1624 				offset = strtol (extra, NULL, 10);
1625 			}
1626 			if (*kind == 'g') { /* global, fixed addr TODO add size to variables? */
1627 				char *global_name = r_str_newf ("global_%s", var_name);
1628 				r_flag_unset_off (flags, offset);
1629 				r_flag_set_next (flags, global_name, offset, 4);
1630 				free (global_name);
1631 			} else if (*kind == 's' && fcn) {
1632 				r_anal_function_set_var (fcn, offset - fcn->maxstack, *kind, type, 4, false, var_name);
1633 			} else if (*kind == 'r' && fcn) {
1634 				RRegItem *i = r_reg_get (anal->reg, extra, -1);
1635 				if (!i) {
1636 					goto loop_end;
1637 				}
1638 				r_anal_function_set_var (fcn, i->index, *kind, type, 4, false, var_name);
1639 			} else if (fcn) { /* kind == 'b' */
1640 				r_anal_function_set_var (fcn, offset - fcn->bp_off, *kind, type, 4, false, var_name);
1641 			}
1642 			free (var_key);
1643 			free (var_data);
1644 		loop_end:
1645 			sdb_aforeach_next (var_name);
1646 		}
1647 		free (var_names_key);
1648 		free (vars);
1649 	}
1650 	ls_free (sdb_list);
1651 }
1652