1 /* radare - LGPL - Copyright 2012-2020 - houndthe */
2
3 #include "base_types.h"
4 #include <sdb.h>
5 #include <r_anal.h>
6 #include <r_bin_dwarf.h>
7 #include <string.h>
8
9 typedef struct dwarf_parse_context_t {
10 const RAnal *anal;
11 const RBinDwarfDie *all_dies;
12 const ut64 count;
13 Sdb *sdb;
14 HtUP/*<ut64 offset, DwarfDie *die>*/ *die_map;
15 HtUP/*<offset, RBinDwarfLocList*>*/ *locations;
16 char *lang; // for demangling
17 } Context;
18
19 typedef struct dwarf_function_t {
20 ut64 addr;
21 const char *name;
22 const char *signature;
23 bool is_external;
24 bool is_method;
25 bool is_virtual;
26 bool is_trampoline; // intermediary in making call to another func
27 ut8 access; // public = 1, protected = 2, private = 3, if not set assume private
28 ut64 vtable_addr; // location description
29 ut64 call_conv; // normal || program || nocall
30 } Function;
31
32 typedef enum dwarf_location_kind {
33 LOCATION_UNKNOWN = 0,
34 LOCATION_GLOBAL = 1,
35 LOCATION_BP = 2,
36 LOCATION_SP = 3,
37 LOCATION_REGISTER = 4,
38 } VariableLocationKind;
39 typedef struct dwarf_var_location_t {
40 VariableLocationKind kind;
41 ut64 address;
42 ut64 reg_num;
43 st64 offset;
44 const char *reg_name; /* string literal */
45 } VariableLocation;
46
47 typedef struct dwarf_variable_t {
48 VariableLocation *location;
49 char *name;
50 char *type;
51 } Variable;
52
variable_free(Variable * var)53 static void variable_free(Variable *var) {
54 free (var->name);
55 free (var->location);
56 free (var->type);
57 free (var);
58 }
59
60 /* return -1 if attr isn't found */
find_attr_idx(const RBinDwarfDie * die,st32 attr_name)61 static inline st32 find_attr_idx(const RBinDwarfDie *die, st32 attr_name) {
62 st32 i;
63 r_return_val_if_fail (die, -1);
64 for (i = 0; i < die->count; i++) {
65 if (die->attr_values[i].attr_name == attr_name) {
66 return i;
67 }
68 }
69 return -1;
70 }
71
72 /* return NULL if attr isn't found */
find_attr(const RBinDwarfDie * die,st32 attr_name)73 static RBinDwarfAttrValue *find_attr(const RBinDwarfDie *die, st32 attr_name) {
74 st32 i;
75 r_return_val_if_fail (die, NULL);
76 for (i = 0; i < die->count; i++) {
77 if (die->attr_values[i].attr_name == attr_name) {
78 return &die->attr_values[i];
79 }
80 }
81 return NULL;
82 }
83
84 /**
85 * @brief Pasted from r_strbuf_*
86 * Prepends string before a last occurence of character c
87 * Used to replicate proper C declaration for function pointers
88 * @param sb
89 * @param s
90 * @param c
91 */
strbuf_rev_prepend_char(RStrBuf * sb,const char * s,int c)92 static bool strbuf_rev_prepend_char(RStrBuf *sb, const char *s, int c) {
93 r_return_val_if_fail (sb && s, false);
94 size_t l = strlen (s);
95 // fast path if no chars to append
96 if (l == 0) {
97 return true;
98 }
99 size_t newlen = l + sb->len;
100 char *ns = malloc (newlen + 1);
101 bool ret = false;
102 char *sb_str = sb->ptr ? sb->ptr : sb->buf;
103 char *pivot = strrchr (sb_str, c);
104 if (!pivot) {
105 free (ns);
106 return false;
107 }
108 size_t idx = pivot - sb_str;
109 if (ns) {
110 memcpy (ns, sb_str, idx);
111 memcpy (ns + idx, s, l);
112 memcpy (ns + idx + l, sb_str + idx, sb->len - idx);
113 ns[newlen] = 0;
114 ret = r_strbuf_set (sb, ns) != NULL;
115 free (ns);
116 }
117 return ret;
118 }
119 /**
120 * @brief Pasted from r_strbuf_*
121 * Appends string after a first occurence of character c
122 * Used to replicate proper C declaration for function pointers
123 * @param sb
124 * @param s
125 * @param needle
126 */
strbuf_rev_append_char(RStrBuf * sb,const char * s,const char * needle)127 static bool strbuf_rev_append_char(RStrBuf *sb, const char *s, const char *needle) {
128 r_return_val_if_fail (sb && s, false);
129 size_t l = strlen (s);
130 // fast path if no chars to append
131 if (l == 0) {
132 return true;
133 }
134 bool ret = false;
135 char *sb_str = sb->ptr ? sb->ptr : sb->buf;
136 char *pivot = strstr (sb_str, needle);
137 if (!pivot) {
138 return false;
139 }
140 pivot += strlen (needle);
141 size_t idx = pivot - sb_str;
142 size_t newlen = l + sb->len;
143 char *ns = malloc (newlen + 1);
144 if (ns) {
145 memcpy (ns, sb_str, idx);
146 memcpy (ns + idx, s, l);
147 memcpy (ns + idx + l, sb_str + idx, sb->len - idx);
148 ns[newlen] = 0;
149 ret = r_strbuf_set (sb, ns) != NULL;
150 free (ns);
151 }
152 return ret;
153 }
154
create_type_name_from_offset(ut64 offset)155 static inline char *create_type_name_from_offset(ut64 offset) {
156 return r_str_newf ("type_0x%" PFMT64x, offset);
157 }
158
159 /**
160 * @brief Get the DIE name or create unique one from it's offset
161 *
162 * @param die
163 * @return char* DIEs name or NULL if error
164 */
get_die_name(const RBinDwarfDie * die)165 static char *get_die_name(const RBinDwarfDie *die) {
166 char *name = NULL;
167 st32 name_attr_idx = find_attr_idx (die, DW_AT_name);
168
169 if (name_attr_idx != -1 && die->attr_values[name_attr_idx].string.content) {
170 name = strdup (die->attr_values[name_attr_idx].string.content);
171 } else {
172 name = create_type_name_from_offset (die->offset);
173 }
174 return name;
175 }
176
177 /**
178 * @brief Get the DIE size in bits
179 *
180 * @param die
181 * @return ut64 size in bits or 0 if not found
182 */
get_die_size(const RBinDwarfDie * die)183 static ut64 get_die_size(const RBinDwarfDie *die) {
184 ut64 size = 0;
185 st32 byte_size_idx = find_attr_idx (die, DW_AT_byte_size);
186
187 if (byte_size_idx != -1) {
188 size = die->attr_values[byte_size_idx].uconstant * CHAR_BIT;
189 } else {
190 st32 bit_size_idx = find_attr_idx (die, DW_AT_bit_size);
191
192 if (bit_size_idx != -1) {
193 size = die->attr_values[bit_size_idx].uconstant;
194 }
195 }
196 return size;
197 }
198
199 /**
200 * @brief Parses array type entry signature into strbuf
201 *
202 * @param ctx
203 * @param idx index of the current entry
204 * @param strbuf strbuf to store the type into
205 * @return st32 -1 if error else 0
206 */
parse_array_type(Context * ctx,ut64 idx,RStrBuf * strbuf)207 static st32 parse_array_type(Context *ctx, ut64 idx, RStrBuf *strbuf) {
208 const RBinDwarfDie *die = &ctx->all_dies[idx++];
209
210 if (die->has_children) {
211 int child_depth = 1;
212 size_t j;
213 for (j = idx; child_depth > 0 && j < ctx->count; j++) {
214 const RBinDwarfDie *child_die = &ctx->all_dies[j];
215 // right now we skip non direct descendats of the structure
216 // can be also DW_TAG_suprogram for class methods or tag for templates
217 if (child_depth == 1 && child_die->tag == DW_TAG_subrange_type) {
218 size_t i;
219 for (i = 0; i < child_die->count; i++) {
220 const RBinDwarfAttrValue *value = &child_die->attr_values[i];
221 switch (value->attr_name) {
222 case DW_AT_upper_bound:
223 case DW_AT_count:
224 r_strbuf_appendf (strbuf, "[%" PFMT64d "]", value->uconstant + 1);
225 break;
226
227 default:
228 break;
229 }
230 }
231 }
232 if (child_die->has_children) {
233 child_depth++;
234 }
235 // sibling list is terminated by null entry
236 if (child_die->abbrev_code == 0) {
237 child_depth--;
238 }
239 }
240 }
241 return 0;
242 }
243
244 /**
245 * @brief Recursively parses type entry of a certain offset into strbuf
246 * saves type size into *size
247 *
248 * @param ctx
249 * @param offset offset of the type entry
250 * @param strbuf string to store the type into
251 * @param size ptr to size of a type to fill up (can be NULL if unwanted)
252 * @return st32 -1 if error else DW_TAG of the entry
253 *
254 * TODO make cache for type entries, one type is usually referenced
255 * multiple times which means it's parsed multiple times instead of once
256 */
parse_type(Context * ctx,const ut64 offset,RStrBuf * strbuf,ut64 * size)257 static st32 parse_type (Context *ctx, const ut64 offset, RStrBuf *strbuf, ut64 *size) {
258 r_return_val_if_fail (strbuf, -1);
259 RBinDwarfDie *die = ht_up_find (ctx->die_map, offset, NULL);
260 if (!die) {
261 return -1;
262 }
263
264 st32 type_idx;
265 st32 tag;
266 char *name = NULL;
267 // get size of first type DIE that has size
268 if (size && *size == 0) {
269 *size = get_die_size (die);
270 }
271 switch (die->tag) {
272 // this should be recursive search for the type until you find base/user defined type
273 case DW_TAG_pointer_type:
274 type_idx = find_attr_idx (die, DW_AT_type);
275 if (type_idx == -1) {
276 r_strbuf_append (strbuf, "void");
277 r_strbuf_append (strbuf, " *");
278 } else {
279 tag = parse_type (ctx, die->attr_values[type_idx].reference, strbuf, size);
280 if (tag == DW_TAG_subroutine_type) {
281 strbuf_rev_prepend_char (strbuf, "(*)", '(');
282 } else if (tag == DW_TAG_pointer_type) {
283 if (!strbuf_rev_append_char (strbuf, "*", "(*")) {
284 strbuf_rev_prepend_char (strbuf, "*", '*');
285 }
286 } else {
287 r_strbuf_append (strbuf, " *");
288 }
289 }
290 break;
291 // We won't parse them as a complete type, because that will already be done
292 // so just a name now
293 case DW_TAG_typedef:
294 case DW_TAG_base_type:
295 case DW_TAG_structure_type:
296 case DW_TAG_enumeration_type:
297 case DW_TAG_union_type:
298 case DW_TAG_class_type:
299 name = get_die_name (die);
300 r_strbuf_append (strbuf, name);
301 free (name);
302 break;
303 case DW_TAG_subroutine_type:
304 type_idx = find_attr_idx (die, DW_AT_type);
305 if (type_idx == -1) {
306 r_strbuf_append (strbuf, "void");
307 } else {
308 parse_type (ctx, die->attr_values[type_idx].reference, strbuf, size);
309 }
310 r_strbuf_append (strbuf, " (");
311 if (die->has_children) { // has parameters
312 }
313 r_strbuf_append (strbuf, ")");
314 break;
315 case DW_TAG_array_type:
316 type_idx = find_attr_idx (die, DW_AT_type);
317 if (type_idx != -1) {
318 parse_type (ctx, die->attr_values[type_idx].reference, strbuf, size);
319 }
320 parse_array_type (ctx, die - ctx->all_dies, strbuf);
321 break;
322 case DW_TAG_const_type:
323 type_idx = find_attr_idx (die, DW_AT_type);
324 if (type_idx != -1) {
325 parse_type (ctx, die->attr_values[type_idx].reference, strbuf, size);
326 }
327 r_strbuf_append (strbuf, " const");
328 break;
329 case DW_TAG_volatile_type:
330 type_idx = find_attr_idx (die, DW_AT_type);
331 if (type_idx != -1) {
332 parse_type (ctx, die->attr_values[type_idx].reference, strbuf, size);
333 }
334 r_strbuf_append (strbuf, " volatile");
335 break;
336 case DW_TAG_restrict_type:
337 type_idx = find_attr_idx (die, DW_AT_type);
338 if (type_idx != -1) {
339 parse_type (ctx, die->attr_values[type_idx].reference, strbuf, size);
340 }
341 r_strbuf_append (strbuf, " restrict");
342 break;
343 case DW_TAG_rvalue_reference_type:
344 type_idx = find_attr_idx (die, DW_AT_type);
345 if (type_idx != -1) {
346 parse_type (ctx, die->attr_values[type_idx].reference, strbuf, size);
347 }
348 r_strbuf_append (strbuf, " &&");
349 break;
350 case DW_TAG_reference_type:
351 type_idx = find_attr_idx (die, DW_AT_type);
352 if (type_idx != -1) {
353 parse_type (ctx, die->attr_values[type_idx].reference, strbuf, size);
354 }
355 r_strbuf_append (strbuf, " &");
356 break;
357 default:
358 break;
359 }
360 return (st32)die->tag;
361 }
362
363 /**
364 * @brief Parses structured entry into *result RAnalStructMember
365 * http://www.dwarfstd.org/doc/DWARF4.pdf#page=102&zoom=100,0,0
366 *
367 * @param ctx
368 * @param idx index of the current entry
369 * @param result ptr to result member to fill up
370 * @return RAnalStructMember* ptr to parsed Member
371 */
parse_struct_member(Context * ctx,ut64 idx,RAnalStructMember * result)372 static RAnalStructMember *parse_struct_member (Context *ctx, ut64 idx, RAnalStructMember *result) {
373 r_return_val_if_fail (result, NULL);
374 const RBinDwarfDie *die = &ctx->all_dies[idx];
375
376 char *name = NULL;
377 char *type = NULL;
378 ut64 offset = 0;
379 ut64 size = 0;
380 RStrBuf strbuf;
381 r_strbuf_init (&strbuf);
382 size_t i;
383 for (i = 0; i < die->count; i++) {
384 RBinDwarfAttrValue *value = &die->attr_values[i];
385 switch (die->attr_values[i].attr_name) {
386 case DW_AT_name:
387 name = get_die_name (die);
388 if (!name) {
389 goto cleanup;
390 }
391 break;
392 case DW_AT_type:
393 parse_type (ctx, value->reference, &strbuf, &size);
394 type = r_strbuf_drain_nofree (&strbuf);
395 if (!type || !*type) {
396 goto cleanup;
397 }
398 break;
399 case DW_AT_data_member_location:
400 /*
401 2 cases, 1.: If val is integer, it offset in bytes from
402 the beginning of containing entity. If containing entity has
403 a bit offset, member has that bit offset aswell
404 2.: value is a location description
405 http://www.dwarfstd.org/doc/DWARF4.pdf#page=39&zoom=100,0,0
406 */
407 offset = value->uconstant;
408 break;
409 case DW_AT_accessibility: // private, public etc.
410 case DW_AT_mutable: // flag is it is mutable
411 case DW_AT_data_bit_offset:
412 /*
413 int that specifies the number of bits from beginning
414 of containing entity to the beginning of the data member
415 */
416 break;
417 // If the size of a data member is not the same as the
418 // size of the type given for the data member
419 case DW_AT_byte_size:
420 size = value->uconstant * CHAR_BIT;
421 break;
422 case DW_AT_bit_size:
423 size = value->uconstant;
424 break;
425 case DW_AT_containing_type:
426 default:
427 break;
428 }
429 }
430
431 result->name = name;
432 result->type = type;
433 result->offset = offset;
434 result->size = size;
435 return result;
436 cleanup:
437 free (name);
438 free (type);
439 return NULL;
440 }
441
442 /**
443 * @brief Parses enum entry into *result RAnalEnumCase
444 * http://www.dwarfstd.org/doc/DWARF4.pdf#page=110&zoom=100,0,0
445 *
446 * @param ctx
447 * @param idx index of the current entry
448 * @param result ptr to result case to fill up
449 * @return RAnalEnumCase* Ptr to parsed enum case
450 */
parse_enumerator(Context * ctx,ut64 idx,RAnalEnumCase * result)451 static RAnalEnumCase *parse_enumerator(Context *ctx, ut64 idx, RAnalEnumCase *result) {
452 const RBinDwarfDie *die = &ctx->all_dies[idx];
453
454 char *name = NULL;
455 int val = 0;
456 size_t i;
457
458 // Enumerator has DW_AT_name and DW_AT_const_value
459 for (i = 0; i < die->count; i++) {
460 RBinDwarfAttrValue *value = &die->attr_values[i];
461 switch (die->attr_values[i].attr_name) {
462 case DW_AT_name:
463 name = get_die_name (die);
464 if (!name) {
465 goto cleanup;
466 }
467 break;
468 case DW_AT_const_value:
469 // ?? can be block, sdata, data, string w/e
470 val = value->uconstant; // TODO solve the encoding, I don't know in which union member is it store
471 break;
472 default:
473 break;
474 }
475 }
476
477 result->name = name;
478 result->val = (int)val;
479 return result;
480 cleanup:
481 free (name);
482 return NULL;
483 }
484
485 /**
486 * @brief Parses a structured entry (structs, classes, unions) into
487 * RAnalBaseType and saves it using r_anal_save_base_type ()
488 *
489 * @param ctx
490 * @param idx index of the current entry
491 */
492 // http://www.dwarfstd.org/doc/DWARF4.pdf#page=102&zoom=100,0,0
parse_structure_type(Context * ctx,ut64 idx)493 static void parse_structure_type(Context *ctx, ut64 idx) {
494 const RBinDwarfDie *die = &ctx->all_dies[idx];
495
496 RAnalBaseTypeKind kind;
497 if (die->tag == DW_TAG_union_type) {
498 kind = R_ANAL_BASE_TYPE_KIND_UNION;
499 } else {
500 kind = R_ANAL_BASE_TYPE_KIND_STRUCT;
501 }
502
503 RAnalBaseType *base_type = r_anal_base_type_new (kind);
504 if (!base_type) {
505 return;
506 }
507
508 base_type->name = get_die_name (die);
509 if (!base_type->name) {
510 goto cleanup;
511 }
512
513 // if it is definition of previous declaration (TODO Fix, big ugly hotfix addition)
514 st32 spec_attr_idx = find_attr_idx (die, DW_AT_specification);
515 if (spec_attr_idx != -1) {
516 RBinDwarfDie *decl_die = ht_up_find (ctx->die_map, die->attr_values[spec_attr_idx].reference, NULL);
517 if (!decl_die) {
518 goto cleanup;
519 }
520 st32 name_attr_idx = find_attr_idx (decl_die, DW_AT_name);
521 if (name_attr_idx != -1) {
522 free (base_type->name);
523 base_type->name = get_die_name (decl_die);
524 }
525 }
526
527 base_type->size = get_die_size (die);
528
529 RAnalStructMember member = { 0 };
530 // Parse out all members, can this in someway be extracted to a function?
531 if (die->has_children) {
532 int child_depth = 1; // Direct children of the node
533 size_t j;
534 idx++; // Move to the first children node
535 for (j = idx; child_depth > 0 && j < ctx->count; j++) {
536 const RBinDwarfDie *child_die = &ctx->all_dies[j];
537 // we take only direct descendats of the structure
538 // can be also DW_TAG_suprogram for class methods or tag for templates
539 if (child_depth == 1 && child_die->tag == DW_TAG_member) {
540 RAnalStructMember *result = parse_struct_member (ctx, j, &member);
541 if (!result) {
542 goto cleanup;
543 } else {
544 void *element = r_vector_push (&base_type->struct_data.members, &member);
545 if (!element) {
546 goto cleanup;
547 }
548 }
549 }
550 if (child_die->has_children) {
551 child_depth++;
552 }
553 if (child_die->abbrev_code == 0) { // siblings terminator
554 child_depth--;
555 }
556 }
557 }
558 r_anal_save_base_type (ctx->anal, base_type);
559 cleanup:
560 r_anal_base_type_free (base_type);
561 }
562
563 /**
564 * @brief Parses a enum entry into RAnalBaseType and saves it
565 * int Sdb using r_anal_save_base_type ()
566 *
567 * @param ctx
568 * @param idx index of the current entry
569 */
parse_enum_type(Context * ctx,ut64 idx)570 static void parse_enum_type(Context *ctx, ut64 idx) {
571 const RBinDwarfDie *die = &ctx->all_dies[idx];
572
573 RAnalBaseType *base_type = r_anal_base_type_new (R_ANAL_BASE_TYPE_KIND_ENUM);
574 if (!base_type) {
575 return;
576 }
577
578 base_type->name = get_die_name (die);
579 if (!base_type->name) {
580 goto cleanup;
581 }
582 base_type->size = get_die_size (die);
583
584 st32 type_attr_idx = find_attr_idx (die, DW_AT_type);
585 if (type_attr_idx != -1) {
586 RStrBuf strbuf;
587 r_strbuf_init (&strbuf);
588 parse_type (ctx, die->attr_values[type_attr_idx].reference, &strbuf, &base_type->size);
589 base_type->type = r_strbuf_drain_nofree (&strbuf);
590 }
591
592 RAnalEnumCase cas;
593 if (die->has_children) {
594 int child_depth = 1; // Direct children of the node
595 size_t j;
596 idx++; // Move to the first children node
597 for (j = idx; child_depth > 0 && j < ctx->count; j++) {
598 const RBinDwarfDie *child_die = &ctx->all_dies[j];
599 // we take only direct descendats of the structure
600 if (child_depth == 1 && child_die->tag == DW_TAG_enumerator) {
601 RAnalEnumCase *result = parse_enumerator (ctx, j, &cas);
602 if (!result) {
603 goto cleanup;
604 } else {
605 void *element = r_vector_push (&base_type->enum_data.cases, &cas);
606 if (!element) {
607 enum_type_case_free (result, NULL);
608 goto cleanup;
609 }
610 }
611 }
612 if (child_die->has_children) {
613 child_depth++;
614 }
615 // sibling list is terminated by null entry
616 if (child_die->abbrev_code == 0) {
617 child_depth--;
618 }
619 }
620 }
621 r_anal_save_base_type (ctx->anal, base_type);
622 cleanup:
623 r_anal_base_type_free (base_type);
624 }
625
626 /**
627 * @brief Parses a typedef entry into RAnalBaseType and saves it
628 * using r_anal_save_base_type ()
629 *
630 * http://www.dwarfstd.org/doc/DWARF4.pdf#page=96&zoom=100,0,0
631 *
632 * @param ctx
633 * @param idx index of the current entry
634 */
parse_typedef(Context * ctx,ut64 idx)635 static void parse_typedef(Context *ctx, ut64 idx) {
636 const RBinDwarfDie *die = &ctx->all_dies[idx];
637
638 char *name = NULL;
639 char *type = NULL;
640 ut64 size = 0;
641 RStrBuf strbuf;
642 r_strbuf_init (&strbuf);
643 size_t i;
644
645 for (i = 0; i < die->count; i++) {
646 RBinDwarfAttrValue *value = &die->attr_values[i];
647 switch (die->attr_values[i].attr_name) {
648 case DW_AT_name:
649 name = get_die_name (die);
650 if (!name) {
651 goto cleanup;
652 }
653 break;
654 case DW_AT_type:
655 parse_type (ctx, value->reference, &strbuf, &size);
656 type = r_strbuf_drain_nofree (&strbuf);
657 if (!type) {
658 goto cleanup;
659 }
660 break;
661 default:
662 break;
663 }
664 }
665 if (!name) { // type has to have a name for now
666 goto cleanup;
667 }
668 RAnalBaseType *base_type = r_anal_base_type_new (R_ANAL_BASE_TYPE_KIND_TYPEDEF);
669 if (!base_type) {
670 goto cleanup;
671 }
672 base_type->name = name;
673 base_type->type = type;
674 r_anal_save_base_type (ctx->anal, base_type);
675 r_anal_base_type_free (base_type);
676 r_strbuf_fini (&strbuf);
677 return;
678 cleanup:
679 free (name);
680 free (type);
681 r_strbuf_fini (&strbuf);
682 }
683
parse_atomic_type(Context * ctx,ut64 idx)684 static void parse_atomic_type(Context *ctx, ut64 idx) {
685 const RBinDwarfDie *die = &ctx->all_dies[idx];
686
687 char *name = NULL;
688 ut64 size = 0;
689 size_t i;
690 // TODO support endiannity and encoding in future?
691 for (i = 0; i < die->count; i++) {
692 RBinDwarfAttrValue *value = &die->attr_values[i];
693 switch (die->attr_values[i].attr_name) {
694 case DW_AT_name:
695 if (!value->string.content) {
696 name = create_type_name_from_offset (die->offset);
697 } else {
698 name = strdup (value->string.content);
699 }
700 if (!name) {
701 return;
702 }
703 break;
704 case DW_AT_byte_size:
705 size = value->uconstant * CHAR_BIT;
706 break;
707 case DW_AT_bit_size:
708 size = value->uconstant;
709 break;
710 case DW_AT_encoding:
711 default:
712 break;
713 }
714 }
715 if (!name) { // type has to have a name for now
716 return;
717 }
718 RAnalBaseType *base_type = r_anal_base_type_new (R_ANAL_BASE_TYPE_KIND_ATOMIC);
719 if (!base_type) {
720 return;
721 }
722 base_type->name = name;
723 base_type->size = size;
724 r_anal_save_base_type (ctx->anal, base_type);
725 r_anal_base_type_free (base_type);
726 }
727
get_specification_die_name(const RBinDwarfDie * die)728 static const char *get_specification_die_name(const RBinDwarfDie *die) {
729 st32 linkage_name_attr_idx = find_attr_idx (die, DW_AT_linkage_name);
730 if (linkage_name_attr_idx != -1 && die->attr_values[linkage_name_attr_idx].string.content) {
731 return die->attr_values[linkage_name_attr_idx].string.content;
732 }
733 st32 name_attr_idx = find_attr_idx (die, DW_AT_name);
734 if (name_attr_idx != -1 && die->attr_values[name_attr_idx].string.content) {
735 return die->attr_values[name_attr_idx].string.content;
736 }
737 return NULL;
738 }
739
get_spec_die_type(Context * ctx,RBinDwarfDie * die,RStrBuf * ret_type)740 static void get_spec_die_type(Context *ctx, RBinDwarfDie *die, RStrBuf *ret_type) {
741 st32 attr_idx = find_attr_idx (die, DW_AT_type);
742 if (attr_idx != -1) {
743 ut64 size = 0;
744 parse_type (ctx, die->attr_values[attr_idx].reference, ret_type, &size);
745 }
746 }
747
748 /* For some languages linkage name is more informative like C++,
749 but for Rust it's rubbish and the normal name is fine */
prefer_linkage_name(char * lang)750 static bool prefer_linkage_name(char *lang) {
751 if (!strcmp (lang, "rust")) {
752 return false;
753 } else if (!strcmp (lang, "ada")) {
754 return false;
755 }
756 return true;
757 }
758
parse_abstract_origin(Context * ctx,ut64 offset,RStrBuf * type,const char ** name)759 static void parse_abstract_origin(Context *ctx, ut64 offset, RStrBuf *type, const char **name) {
760 RBinDwarfDie *die = ht_up_find (ctx->die_map, offset, NULL);
761 if (die) {
762 size_t i;
763 ut64 size = 0;
764 bool has_linkage_name = false;
765 bool get_linkage_name = prefer_linkage_name (ctx->lang);
766 for (i = 0; i < die->count; i++) {
767 const RBinDwarfAttrValue *val = &die->attr_values[i];
768 switch (val->attr_name) {
769 case DW_AT_name:
770 if (!get_linkage_name || !has_linkage_name) {
771 *name = val->string.content;
772 }
773 break;
774 case DW_AT_linkage_name:
775 case DW_AT_MIPS_linkage_name:
776 *name = val->string.content;
777 has_linkage_name = true;
778 break;
779 case DW_AT_type:
780 parse_type (ctx, val->reference, type, &size);
781 break;
782 default:
783 break;
784 }
785 }
786 }
787 }
788
789 /* x86_64 https://software.intel.com/sites/default/files/article/402129/mpx-linux64-abi.pdf */
map_dwarf_reg_to_x86_64_reg(ut64 reg_num,VariableLocationKind * kind)790 static const char *map_dwarf_reg_to_x86_64_reg(ut64 reg_num, VariableLocationKind *kind) {
791 *kind = LOCATION_REGISTER;
792 switch (reg_num) {
793 case 0: return "rax";
794 case 1: return "rdx";
795 case 2: return "rcx";
796 case 3: return "rbx";
797 case 4: return "rsi";
798 case 5: return "rdi";
799 case 6:
800 *kind = LOCATION_BP;
801 return "rbp";
802 case 7:
803 *kind = LOCATION_SP;
804 return "rsp";
805 case 8: return "r8";
806 case 9: return "r9";
807 case 10: return "r10";
808 case 11: return "r11";
809 case 12: return "r12";
810 case 13: return "r13";
811 case 14: return "r14";
812 case 15: return "r15";
813 case 17: return "xmm0";
814 case 18: return "xmm1";
815 case 19: return "xmm2";
816 case 20: return "xmm3";
817 case 21: return "xmm4";
818 case 22: return "xmm5";
819 case 23: return "xmm6";
820 case 24: return "xmm7";
821 default:
822 *kind = LOCATION_UNKNOWN;
823 return "unsupported_reg";
824 }
825 }
826
827 /* x86 https://01.org/sites/default/files/file_attach/intel386-psabi-1.0.pdf */
map_dwarf_reg_to_x86_reg(ut64 reg_num,VariableLocationKind * kind)828 static const char *map_dwarf_reg_to_x86_reg(ut64 reg_num, VariableLocationKind *kind) {
829 *kind = LOCATION_REGISTER;
830 switch (reg_num) {
831 case 0: return "eax";
832 case 1: return "edx";
833 case 2: return "ecx";
834 case 3: return "ebx";
835 case 4:
836 *kind = LOCATION_SP;
837 return "esp";
838 case 5:
839 *kind = LOCATION_BP;
840 return "ebp";
841 case 6: return "esi";
842 case 7: return "edi";
843 case 21: return "xmm0";
844 case 22: return "xmm1";
845 case 23: return "xmm2";
846 case 24: return "xmm3";
847 case 25: return "xmm4";
848 case 26: return "xmm5";
849 case 27: return "xmm6";
850 case 28: return "xmm7";
851 default:
852 r_warn_if_reached ();
853 *kind = LOCATION_UNKNOWN;
854 return "unsupported_reg";
855 }
856 }
857
858 /* https://refspecs.linuxfoundation.org/ELF/ppc64/PPC-elf64abi-1.9.html#DW-REG */
map_dwarf_reg_to_ppc64_reg(ut64 reg_num,VariableLocationKind * kind)859 static const char *map_dwarf_reg_to_ppc64_reg(ut64 reg_num, VariableLocationKind *kind) {
860 *kind = LOCATION_REGISTER;
861 switch (reg_num) {
862 case 0: return "r0";
863 case 1:
864 *kind = LOCATION_SP;
865 return "r1";
866 case 2: return "r2";
867 case 3: return "r3";
868 case 4: return "r4";
869 case 5: return "r5";
870 case 6: return "r6";
871 case 7: return "r7";
872 case 8: return "r8";
873 case 9: return "r9";
874 case 10: return "r10";
875 case 11: return "r11";
876 case 12: return "r12";
877 case 13: return "r13";
878 case 14: return "r14";
879 case 15: return "r15";
880 case 16: return "r16";
881 case 17: return "r17";
882 case 18: return "r18";
883 case 19: return "r19";
884 case 20: return "r20";
885 case 21: return "r21";
886 case 22: return "r22";
887 case 23: return "r23";
888 case 24: return "r24";
889 case 25: return "r25";
890 case 26: return "r26";
891 case 27: return "r27";
892 case 28: return "r28";
893 case 29: return "r29";
894 case 30: return "r30";
895 case 31: return "r31";
896 default:
897 r_warn_if_reached ();
898 *kind = LOCATION_UNKNOWN;
899 return "unsupported_reg";
900 }
901 }
902
903 /* returns string literal register name!
904 TODO add more arches */
get_dwarf_reg_name(char * arch,int reg_num,VariableLocationKind * kind,int bits)905 static const char *get_dwarf_reg_name(char *arch, int reg_num, VariableLocationKind *kind, int bits) {
906 if (!strcmp (arch, "x86")) {
907 if (bits == 64) {
908 return map_dwarf_reg_to_x86_64_reg (reg_num, kind);
909 } else {
910 return map_dwarf_reg_to_x86_reg (reg_num, kind);
911 }
912 } else if (!strcmp (arch, "ppc")) {
913 if (bits == 64) {
914 return map_dwarf_reg_to_ppc64_reg (reg_num, kind);
915 }
916 }
917 *kind = LOCATION_UNKNOWN;
918 return "unsupported_reg";
919 }
920
find_largest_loc_range(RList * loc_list)921 static RBinDwarfLocRange *find_largest_loc_range (RList *loc_list) {
922 RBinDwarfLocRange *largest = NULL;
923 ut64 max_range_size = 0;
924 RListIter *iter;
925 RBinDwarfLocRange *range;
926 r_list_foreach (loc_list, iter, range) {
927 ut64 diff = range->end - range->start;
928 if (diff > max_range_size) {
929 max_range_size = diff ;
930 largest = range;
931 }
932 }
933 return largest;
934 }
935
936 /* TODO move a lot of the parsing here into dwarf.c and do only processing here */
parse_dwarf_location(Context * ctx,const RBinDwarfAttrValue * loc,const RBinDwarfAttrValue * frame_base)937 static VariableLocation *parse_dwarf_location (Context *ctx, const RBinDwarfAttrValue *loc, const RBinDwarfAttrValue *frame_base) {
938 /* reg5 - val is in register 5
939 fbreg <leb> - offset from frame base
940 regx <leb> - contents is in register X
941 addr <addr> - contents is in at addr
942 bregXX <leb> - contents is at offset from specified register
943 - we now support 3 options: SP, BP and register based arguments */
944
945 /* Loclist offset is usually CONSTANT or REFERENCE at older DWARF versions, new one has LocListPtr for that */
946 if (loc->kind != DW_AT_KIND_BLOCK && loc->kind != DW_AT_KIND_LOCLISTPTR && loc->kind != DW_AT_KIND_REFERENCE && loc->kind != DW_AT_KIND_CONSTANT) {
947 return NULL;
948 }
949 RBinDwarfBlock block;
950 if (loc->kind == DW_AT_KIND_LOCLISTPTR || loc->kind == DW_AT_KIND_REFERENCE || loc->kind == DW_AT_KIND_CONSTANT) {
951 ut64 offset = loc->reference;
952 RBinDwarfLocList *range_list = ht_up_find (ctx->locations, offset, NULL);
953 if (!range_list) { /* for some reason offset isn't there, wrong parsing or malformed dwarf */
954 return NULL;
955 }
956 /* use the largest range as a variable */
957 RBinDwarfLocRange *range = find_largest_loc_range (range_list->list);
958 if (!range) {
959 return NULL;
960 }
961 /* Very rough and sloppy, refactor this hacked up stuff */
962 block = *range->expression;
963 // range->expression... etc
964 } else {
965 block = loc->block;
966 }
967 VariableLocationKind kind = LOCATION_UNKNOWN;
968 st64 offset = 0;
969 ut64 address = 0;
970 ut64 reg_num = -1;
971 const char *reg_name = NULL; /* literal */
972 size_t i;
973 for (i = 0; i < block.length; i++) {
974 switch (block.data[i]) {
975 case DW_OP_fbreg: {
976 /* TODO sometimes CFA is referenced, but we don't parse that yet
977 just an offset involving framebase of a function*/
978 if (i == block.length - 1) {
979 return NULL;
980 }
981 const ut8 *dump = &block.data[++i];
982 offset = r_sleb128 (&dump, &block.data[loc->block.length]);
983 if (frame_base) {
984 /* recursive parsing, but frame_base should be only one, but someone
985 could make malicious resource exhaustion attack, so a depth counter might be cool? */
986 VariableLocation *location = parse_dwarf_location (ctx, frame_base, NULL);
987 if (location) {
988 location->offset += offset;
989 return location;
990 }
991 return NULL;
992 } else {
993 /* Might happen if frame_base has a frame_base reference? I don't think it can tho */
994 return NULL;
995 }
996 break;
997 }
998 case DW_OP_reg0:
999 case DW_OP_reg1:
1000 case DW_OP_reg2:
1001 case DW_OP_reg3:
1002 case DW_OP_reg4:
1003 case DW_OP_reg5:
1004 case DW_OP_reg6:
1005 case DW_OP_reg7:
1006 case DW_OP_reg8:
1007 case DW_OP_reg9:
1008 case DW_OP_reg10:
1009 case DW_OP_reg11:
1010 case DW_OP_reg12:
1011 case DW_OP_reg13:
1012 case DW_OP_reg14:
1013 case DW_OP_reg15:
1014 case DW_OP_reg16:
1015 case DW_OP_reg17:
1016 case DW_OP_reg18:
1017 case DW_OP_reg19:
1018 case DW_OP_reg20:
1019 case DW_OP_reg21:
1020 case DW_OP_reg22:
1021 case DW_OP_reg23:
1022 case DW_OP_reg24:
1023 case DW_OP_reg25:
1024 case DW_OP_reg26:
1025 case DW_OP_reg27:
1026 case DW_OP_reg28:
1027 case DW_OP_reg29:
1028 case DW_OP_reg30:
1029 case DW_OP_reg31: {
1030 /* Will mostly be used for SP based arguments */
1031 /* TODO I need to find binaries that uses this so I can test it out*/
1032 reg_num = block.data[i] - DW_OP_reg0; // get the reg number
1033 reg_name = get_dwarf_reg_name (ctx->anal->cpu, reg_num, &kind, ctx->anal->bits);
1034 break;
1035 }
1036 case DW_OP_breg0:
1037 case DW_OP_breg1:
1038 case DW_OP_breg2:
1039 case DW_OP_breg3:
1040 case DW_OP_breg4:
1041 case DW_OP_breg5:
1042 case DW_OP_breg6:
1043 case DW_OP_breg7:
1044 case DW_OP_breg8:
1045 case DW_OP_breg9:
1046 case DW_OP_breg10:
1047 case DW_OP_breg11:
1048 case DW_OP_breg12:
1049 case DW_OP_breg13:
1050 case DW_OP_breg14:
1051 case DW_OP_breg15:
1052 case DW_OP_breg16:
1053 case DW_OP_breg17:
1054 case DW_OP_breg18:
1055 case DW_OP_breg19:
1056 case DW_OP_breg20:
1057 case DW_OP_breg21:
1058 case DW_OP_breg22:
1059 case DW_OP_breg23:
1060 case DW_OP_breg24:
1061 case DW_OP_breg25:
1062 case DW_OP_breg26:
1063 case DW_OP_breg27:
1064 case DW_OP_breg28:
1065 case DW_OP_breg29:
1066 case DW_OP_breg30:
1067 case DW_OP_breg31: {
1068 if (i == block.length - 1) {
1069 return NULL;
1070 }
1071 /* The single operand of the DW_OP_bregn operations provides
1072 signed LEB128 offset from the specified register. */
1073 reg_num = block.data[i] - DW_OP_breg0; // get the reg number
1074 const ut8 *buffer = &block.data[++i];
1075 offset = r_sleb128 (&buffer, &block.data[block.length]);
1076 /* TODO do a proper expression parsing, move by the amount of bytes sleb reads */
1077 i += buffer - &block.data[0];
1078 reg_name = get_dwarf_reg_name (ctx->anal->cpu, reg_num, &kind, ctx->anal->bits);
1079 break;
1080 }
1081 case DW_OP_bregx: {
1082 if (i == block.length - 1) {
1083 return NULL;
1084 }
1085 /* 2 operands, reg_number, offset*/
1086 /* I need to find binaries that uses this so I can test it out*/
1087 const ut8 *buffer = &block.data[++i];
1088 const ut8 *buf_end = &block.data[block.length];
1089 buffer = r_uleb128 (buffer, buf_end - buffer, ®_num, NULL);
1090 if (buffer == buf_end) {
1091 return NULL;
1092 }
1093 offset = r_sleb128 (&buffer, buf_end);
1094 reg_name = get_dwarf_reg_name (ctx->anal->cpu, reg_num, &kind, ctx->anal->bits);
1095 break;
1096 }
1097 case DW_OP_addr: {
1098 /* The DW_OP_addr operation has a single operand that encodes a machine address and whose
1099 size is the size of an address on the target machine. */
1100 const int addr_size = ctx->anal->bits / 8;
1101 const ut8 *dump = &block.data[++i];
1102 /* malformed, not enough bytes to represent address */
1103 if (block.length - i < addr_size) {
1104 return NULL;
1105 }
1106 switch (addr_size) {
1107 case 1:
1108 address = r_read_ble8 (dump);
1109 break;
1110 case 2:
1111 address = r_read_ble16 (dump, ctx->anal->big_endian);
1112 break;
1113 case 4:
1114 address = r_read_ble32 (dump, ctx->anal->big_endian);
1115 break;
1116 case 8:
1117 address = r_read_ble64 (dump, ctx->anal->big_endian);
1118 break;
1119 default:
1120 r_warn_if_reached (); /* weird addr_size */
1121 return NULL;
1122 }
1123 kind = LOCATION_GLOBAL; // address
1124 break;
1125 }
1126 case DW_OP_call_frame_cfa: {
1127 // REMOVE XXX
1128 kind = LOCATION_BP;
1129 offset += 16;
1130 break;
1131 }
1132 default:
1133 break;
1134 }
1135 }
1136 if (kind == LOCATION_UNKNOWN) {
1137 return NULL;
1138 }
1139 VariableLocation *location = R_NEW0 (VariableLocation);
1140 if (location) {
1141 location->reg_name = reg_name;
1142 location->reg_num = reg_num;
1143 location->kind = kind;
1144 location->offset = offset;
1145 location->address = address;
1146 }
1147 return location;
1148 }
1149
parse_function_args_and_vars(Context * ctx,ut64 idx,RStrBuf * args,RList * variables)1150 static st32 parse_function_args_and_vars(Context *ctx, ut64 idx, RStrBuf *args, RList/*<Variable*>*/ *variables) {
1151 const RBinDwarfDie *die = &ctx->all_dies[idx++];
1152
1153 if (die->has_children) {
1154 int child_depth = 1;
1155
1156 bool get_linkage_name = prefer_linkage_name (ctx->lang);
1157 bool has_linkage_name = false;
1158 int argNumber = 1;
1159 size_t j;
1160 for (j = idx; child_depth > 0 && j < ctx->count; j++) {
1161 const RBinDwarfDie *child_die = &ctx->all_dies[j];
1162 RStrBuf type;
1163 r_strbuf_init (&type);
1164 const char *name = NULL;
1165 if (child_die->tag == DW_TAG_formal_parameter || child_die->tag == DW_TAG_variable) {
1166 Variable *var = R_NEW0 (Variable);
1167 size_t i;
1168 for (i = 0; i < child_die->count; i++) {
1169 const RBinDwarfAttrValue *val = &child_die->attr_values[i];
1170 switch (val->attr_name) {
1171 case DW_AT_name:
1172 if (!get_linkage_name || !has_linkage_name) {
1173 name = val->string.content;
1174 }
1175 break;
1176 case DW_AT_linkage_name:
1177 case DW_AT_MIPS_linkage_name:
1178 name = val->string.content;
1179 has_linkage_name = true;
1180 break;
1181 case DW_AT_type:
1182 parse_type (ctx, val->reference, &type, NULL);
1183 break;
1184 // abstract origin is supposed to have omitted information
1185 case DW_AT_abstract_origin:
1186 parse_abstract_origin (ctx, val->reference, &type, &name);
1187 break;
1188 case DW_AT_location:
1189 var->location = parse_dwarf_location (ctx, val, find_attr (die, DW_AT_frame_base));
1190 break;
1191 default:
1192 break;
1193 }
1194 }
1195 if (child_die->tag == DW_TAG_formal_parameter && child_depth == 1) {
1196 /* arguments sometimes have only type, create generic argX */
1197 if (type.len) {
1198 if (!name) {
1199 var->name = r_str_newf ("arg%d", argNumber);
1200 } else {
1201 var->name = strdup (name);
1202 }
1203 r_strbuf_appendf (args, "%s %s,", r_strbuf_get (&type), var->name);
1204 var->type = strdup (r_strbuf_get (&type));
1205 r_list_append (variables, var);
1206 } else {
1207 variable_free (var);
1208 }
1209 argNumber++;
1210 } else { /* DW_TAG_variable */
1211 if (name && type.len) {
1212 var->name = strdup (name);
1213 var->type = strdup (r_strbuf_get (&type));
1214 r_list_append (variables, var);
1215 } else {
1216 variable_free (var);
1217 }
1218 r_strbuf_fini (&type);
1219 }
1220 } else if (child_depth == 1 && child_die->tag == DW_TAG_unspecified_parameters) {
1221 r_strbuf_appendf (args, "va_args ...,");
1222 }
1223 if (child_die->has_children) {
1224 child_depth++;
1225 }
1226 if (child_die->abbrev_code == 0) { /* sibling list is terminated by null entry */
1227 child_depth--;
1228 }
1229 r_strbuf_fini (&type);
1230 }
1231 if (args->len > 0) {
1232 r_strbuf_slice (args, 0, args->len - 1);
1233 }
1234 }
1235 return 0;
1236 }
1237
sdb_save_dwarf_function(Function * dwarf_fcn,RList * variables,Sdb * sdb)1238 static void sdb_save_dwarf_function(Function *dwarf_fcn, RList/*<Variable*>*/ *variables, Sdb *sdb) {
1239 char *sname = r_str_sanitize_sdb_key (dwarf_fcn->name);
1240 sdb_set (sdb, sname, "fcn", 0);
1241
1242 char *addr_key = r_str_newf ("fcn.%s.addr", sname);
1243 char *addr_val = r_str_newf ("0x%" PFMT64x "", dwarf_fcn->addr);
1244 sdb_set (sdb, addr_key, addr_val, 0);
1245 free (addr_key);
1246 free (addr_val);
1247
1248 /* so we can have name without sanitization */
1249 char *name_key = r_str_newf ("fcn.%s.name", sname);
1250 char *name_val = r_str_newf ("%s", dwarf_fcn->name);
1251 sdb_set (sdb, name_key, name_val, 0);
1252 free (name_key);
1253 free (name_val);
1254
1255 char *signature_key = r_str_newf ("fcn.%s.sig", sname);
1256 sdb_set (sdb, signature_key, dwarf_fcn->signature, 0);
1257 free (signature_key);
1258
1259 RStrBuf vars;
1260 r_strbuf_init (&vars);
1261 RListIter *iter;
1262 Variable *var;
1263 r_list_foreach (variables, iter, var) {
1264 if (!var->location) {
1265 /* NULL location probably means optimized out, maybe put a comment there */
1266 continue;
1267 }
1268 char *key = NULL;
1269 char *val = NULL;
1270 switch (var->location->kind) {
1271 case LOCATION_BP: {
1272 /* value = "type, storage, additional info based on storage (offset)" */
1273
1274 r_strbuf_appendf (&vars, "%s,", var->name);
1275 key = r_str_newf ("fcn.%s.var.%s", sname, var->name);
1276 val = r_str_newf ("%s,%" PFMT64d ",%s", "b", var->location->offset, var->type);
1277 sdb_set (sdb, key, val, 0);
1278 break;
1279 }
1280 case LOCATION_SP: {
1281 /* value = "type, storage, additional info based on storage (offset)" */
1282
1283 r_strbuf_appendf (&vars, "%s,", var->name);
1284 key = r_str_newf ("fcn.%s.var.%s", sname, var->name);
1285 val = r_str_newf ("%s,%" PFMT64d ",%s", "s", var->location->offset, var->type);
1286 sdb_set (sdb, key, val, 0);
1287 break;
1288 }
1289 case LOCATION_GLOBAL: {
1290 /* value = "type, storage, additional info based on storage (address)" */
1291
1292 r_strbuf_appendf (&vars, "%s,", var->name);
1293 key = r_str_newf ("fcn.%s.var.%s", sname, var->name);
1294 val = r_str_newf ("%s,%" PFMT64u ",%s", "g", var->location->address, var->type);
1295 sdb_set (sdb, key, val, 0);
1296 break;
1297 }
1298 case LOCATION_REGISTER: {
1299 /* value = "type, storage, additional info based on storage (register name)" */
1300
1301 r_strbuf_appendf (&vars, "%s,", var->name);
1302 key = r_str_newf ("fcn.%s.var.%s", sname, var->name);
1303 val = r_str_newf ("%s,%s,%s", "r", var->location->reg_name, var->type);
1304 sdb_set (sdb, key, val, 0);
1305 break;
1306 }
1307
1308 default:
1309 /* else location is unknown (optimized out), skip the var */
1310 break;
1311 }
1312 free (key);
1313 free (val);
1314 }
1315 if (vars.len > 0) { /* remove the extra , */
1316 r_strbuf_slice (&vars, 0, vars.len - 1); /* leaks? */
1317 }
1318 char *vars_key = r_str_newf ("fcn.%s.vars", sname);
1319 char *vars_val = r_str_newf ("%s", r_strbuf_get (&vars));
1320 sdb_set (sdb, vars_key, vars_val, 0);
1321 free (vars_key);
1322 free (vars_val);
1323 r_strbuf_fini (&vars);
1324 free (sname);
1325 }
1326
1327 /**
1328 * @brief Parse function,it's arguments, variables and
1329 * save the information into the Sdb
1330 *
1331 * @param ctx
1332 * @param idx Current entry index
1333 */
parse_function(Context * ctx,ut64 idx)1334 static void parse_function(Context *ctx, ut64 idx) {
1335 const RBinDwarfDie *die = &ctx->all_dies[idx];
1336
1337 Function fcn = { 0 };
1338 bool has_linkage_name = false;
1339 bool get_linkage_name = prefer_linkage_name (ctx->lang);
1340 RStrBuf ret_type;
1341 r_strbuf_init (&ret_type);
1342 if (find_attr_idx (die, DW_AT_declaration) != -1) {
1343 return; /* just declaration skip */
1344 }
1345 size_t i;
1346 /* For rust binaries prefer regular name not linkage TODO */
1347 for (i = 0; i < die->count; i++) {
1348 RBinDwarfAttrValue *val = &die->attr_values[i];
1349 switch (die->attr_values[i].attr_name) {
1350 case DW_AT_name:
1351 if (!get_linkage_name || !has_linkage_name) {
1352 fcn.name = val->string.content;
1353 }
1354 break;
1355 case DW_AT_linkage_name:
1356 case DW_AT_MIPS_linkage_name:
1357 fcn.name = val->string.content;
1358 has_linkage_name = true;
1359 break;
1360 case DW_AT_low_pc:
1361 case DW_AT_entry_pc:
1362 fcn.addr = val->address;
1363 break;
1364 case DW_AT_specification: /* reference to declaration DIE with more info */
1365 {
1366 RBinDwarfDie *spec_die = ht_up_find (ctx->die_map, val->reference, NULL);
1367 if (spec_die) {
1368 fcn.name = get_specification_die_name (spec_die); /* I assume that if specification has a name, this DIE hasn't */
1369 get_spec_die_type (ctx, spec_die, &ret_type);
1370 }
1371 break;
1372 }
1373 case DW_AT_type:
1374 parse_type (ctx, val->reference, &ret_type, NULL);
1375 break;
1376 case DW_AT_virtuality:
1377 fcn.is_method = true; /* method specific attr */
1378 fcn.is_virtual = true;
1379 break;
1380 case DW_AT_object_pointer:
1381 fcn.is_method = true;
1382 break;
1383 case DW_AT_vtable_elem_location:
1384 fcn.is_method = true;
1385 fcn.vtable_addr = 0; /* TODO we might use this information */
1386 break;
1387 case DW_AT_accessibility:
1388 fcn.is_method = true;
1389 fcn.access = (ut8)val->uconstant;
1390 break;
1391 case DW_AT_external:
1392 fcn.is_external = true;
1393 break;
1394 case DW_AT_trampoline:
1395 fcn.is_trampoline = true;
1396 break;
1397 case DW_AT_ranges:
1398 case DW_AT_high_pc:
1399 default:
1400 break;
1401 }
1402 }
1403 if (!fcn.name || !fcn.addr) { /* we need a name, faddr */
1404 goto cleanup;
1405 }
1406 RStrBuf args;
1407 r_strbuf_init (&args);
1408 /* TODO do the same for arguments in future so we can use their location */
1409 RList/*<Variable*>*/ *variables = r_list_new ();
1410 parse_function_args_and_vars (ctx, idx, &args, variables);
1411
1412 if (ret_type.len == 0) { /* DW_AT_type is omitted in case of `void` ret type */
1413 r_strbuf_append (&ret_type, "void");
1414 }
1415 r_warn_if_fail (ctx->lang);
1416 char *new_name = ctx->anal->binb.demangle (NULL, ctx->lang, fcn.name, fcn.addr, false);
1417 fcn.name = new_name ? new_name : strdup (fcn.name);
1418 fcn.signature = r_str_newf ("%s %s(%s);", r_strbuf_get (&ret_type), fcn.name, r_strbuf_get (&args));
1419 sdb_save_dwarf_function (&fcn, variables, ctx->sdb);
1420
1421 free ((char *)fcn.signature);
1422 free ((char *)fcn.name);
1423
1424 RListIter *iter;
1425 Variable *var;
1426 r_list_foreach (variables, iter, var) {
1427 variable_free (var);
1428 }
1429 r_list_free (variables);
1430 r_strbuf_fini (&args);
1431 cleanup:
1432 r_strbuf_fini (&ret_type);
1433 }
1434
1435 /**
1436 * @brief Get's language from comp unit for demangling
1437 *
1438 * @param die
1439 * @return char* string literal language represantation for demangling BinDemangle
1440 */
parse_comp_unit_lang(const RBinDwarfDie * die)1441 static char *parse_comp_unit_lang(const RBinDwarfDie *die) {
1442 r_return_val_if_fail (die, NULL);
1443
1444 int idx = find_attr_idx (die, DW_AT_language);
1445 char *lang = "cxx"; // default fallback
1446 if (idx == -1) {
1447 /* What to do now, it should have one?, just assume C++ */
1448 return lang;
1449 }
1450 const RBinDwarfAttrValue *val = &die->attr_values[idx];
1451 r_warn_if_fail (val->kind == DW_AT_KIND_CONSTANT);
1452
1453 switch (val->uconstant)
1454 {
1455 case DW_LANG_Java:
1456 return "java";
1457 case DW_LANG_ObjC:
1458 /* subideal, TODO research if dwarf gives me enough info to properly separate C++ and ObjC mangling */
1459 case DW_LANG_ObjC_plus_plus:
1460 return "objc";
1461 case DW_LANG_D:
1462 return "dlang";
1463 case DW_LANG_Rust:
1464 return "rust";
1465 case DW_LANG_C_plus_plus:
1466 case DW_LANG_C_plus_plus_14:
1467 /* no demangling available */
1468 case DW_LANG_Ada83:
1469 case DW_LANG_Cobol74:
1470 case DW_LANG_Cobol85:
1471 case DW_LANG_Fortran77:
1472 case DW_LANG_Fortran90:
1473 case DW_LANG_Pascal83:
1474 case DW_LANG_Modula2:
1475 case DW_LANG_Ada95:
1476 case DW_LANG_Fortran95:
1477 case DW_LANG_PLI:
1478 case DW_LANG_Python:
1479 case DW_LANG_Swift:
1480 case DW_LANG_Julia:
1481 case DW_LANG_Dylan:
1482 case DW_LANG_Fortran03:
1483 case DW_LANG_Fortran08:
1484 case DW_LANG_UPC:
1485 case DW_LANG_C:
1486 case DW_LANG_C89:
1487 case DW_LANG_C99:
1488 case DW_LANG_C11:
1489 default:
1490 return lang;
1491 }
1492 return lang;
1493 }
1494
1495 /**
1496 * @brief Delegates DIE to it's proper parsing method
1497 *
1498 * @param ctx
1499 * @param idx index of the current entry
1500 */
parse_type_entry(Context * ctx,ut64 idx)1501 static void parse_type_entry(Context *ctx, ut64 idx) {
1502 r_return_if_fail (ctx);
1503
1504 const RBinDwarfDie *die = &ctx->all_dies[idx];
1505 switch (die->tag) {
1506 case DW_TAG_structure_type:
1507 case DW_TAG_union_type:
1508 case DW_TAG_class_type:
1509 parse_structure_type (ctx, idx);
1510 break;
1511 case DW_TAG_enumeration_type:
1512 parse_enum_type (ctx, idx);
1513 break;
1514 case DW_TAG_typedef:
1515 parse_typedef (ctx, idx);
1516 break;
1517 case DW_TAG_base_type:
1518 parse_atomic_type (ctx, idx);
1519 break;
1520 case DW_TAG_subprogram:
1521 parse_function (ctx, idx);
1522 break;
1523 case DW_TAG_compile_unit:
1524 /* used for name demangling */
1525 ctx->lang = parse_comp_unit_lang (die);
1526 default:
1527 break;
1528 }
1529 }
1530
1531 /**
1532 * @brief Parses type and function information out of DWARF entries
1533 * and stores them to the sdb for further use
1534 *
1535 * @param anal
1536 * @param ctx
1537 */
r_anal_dwarf_process_info(const RAnal * anal,RAnalDwarfContext * ctx)1538 R_API void r_anal_dwarf_process_info(const RAnal *anal, RAnalDwarfContext *ctx) {
1539 r_return_if_fail (ctx && anal);
1540 Sdb *dwarf_sdb = sdb_ns (anal->sdb, "dwarf", 1);
1541 size_t i, j;
1542 const RBinDwarfDebugInfo *info = ctx->info;
1543 for (i = 0; i < info->count; i++) {
1544 RBinDwarfCompUnit *unit = &info->comp_units[i];
1545 Context dw_context = { // context per unit?
1546 .anal = anal,
1547 .all_dies = unit->dies,
1548 .count = unit->count,
1549 .die_map = info->lookup_table,
1550 .sdb = dwarf_sdb,
1551 .locations = ctx->loc,
1552 .lang = NULL
1553 };
1554 for (j = 0; j < unit->count; j++) {
1555 parse_type_entry (&dw_context, j);
1556 }
1557 }
1558 }
1559
filter_sdb_function_names(void * user,const char * k,const char * v)1560 bool filter_sdb_function_names(void *user, const char *k, const char *v) {
1561 (void) user;
1562 (void) k;
1563 return !strcmp (v, "fcn");
1564 }
1565
1566 /**
1567 * @brief Use parsed DWARF function info from Sdb in the anal functions
1568 * XXX right now we only save parsed name and variables, we can't use signature now
1569 * XXX refactor to be more readable
1570 * @param anal
1571 * @param dwarf_sdb
1572 */
r_anal_dwarf_integrate_functions(RAnal * anal,RFlag * flags,Sdb * dwarf_sdb)1573 R_API void r_anal_dwarf_integrate_functions(RAnal *anal, RFlag *flags, Sdb *dwarf_sdb) {
1574 r_return_if_fail (anal && dwarf_sdb);
1575
1576 /* get all entries with value == func */
1577 SdbList *sdb_list = sdb_foreach_list_filter (dwarf_sdb, filter_sdb_function_names, false);
1578 SdbListIter *it;
1579 SdbKv *kv;
1580 /* iterate all function entries */
1581 ls_foreach (sdb_list, it, kv) {
1582 char *func_sname = kv->base.key;
1583
1584 char *addr_key = r_str_newf ("fcn.%s.addr", func_sname);
1585 ut64 faddr = sdb_num_get (dwarf_sdb, addr_key, 0);
1586 free (addr_key);
1587
1588 /* if the function is analyzed so we can edit */
1589 RAnalFunction *fcn = r_anal_get_function_at (anal, faddr);
1590 if (fcn) {
1591 /* prepend dwarf debug info stuff with dbg. */
1592 char *real_name_key = r_str_newf ("fcn.%s.name", func_sname);
1593 char *real_name = sdb_get (dwarf_sdb, real_name_key, 0);
1594 free (real_name_key);
1595
1596 char *dwf_name = r_str_newf ("dbg.%s", real_name);
1597 free (real_name);
1598
1599 r_anal_function_rename (fcn, dwf_name);
1600 free (dwf_name);
1601
1602 char *tmp = r_str_newf ("fcn.%s.sig", func_sname);
1603 char *fcnstr = sdb_get (dwarf_sdb, tmp, 0);
1604 free (tmp);
1605 /* Apply signature as a comment at a function address */
1606 r_meta_set_string (anal, R_META_TYPE_COMMENT, faddr, fcnstr);
1607 free (fcnstr);
1608 }
1609 char *var_names_key = r_str_newf ("fcn.%s.vars", func_sname);
1610 char *vars = sdb_get (dwarf_sdb, var_names_key, NULL);
1611 char *var_name;
1612 sdb_aforeach (var_name, vars) {
1613 char *var_key = r_str_newf ("fcn.%s.var.%s", func_sname, var_name);
1614 char *var_data = sdb_get (dwarf_sdb, var_key, NULL);
1615 if (!var_data) {
1616 goto loop_end;
1617 }
1618 char *extra = NULL;
1619 char *kind = sdb_anext (var_data, &extra);
1620 char *type = NULL;
1621 extra = sdb_anext (extra, &type);
1622 st64 offset = 0;
1623 if (*kind != 'r') {
1624 offset = strtol (extra, NULL, 10);
1625 }
1626 if (*kind == 'g') { /* global, fixed addr TODO add size to variables? */
1627 char *global_name = r_str_newf ("global_%s", var_name);
1628 r_flag_unset_off (flags, offset);
1629 r_flag_set_next (flags, global_name, offset, 4);
1630 free (global_name);
1631 } else if (*kind == 's' && fcn) {
1632 r_anal_function_set_var (fcn, offset - fcn->maxstack, *kind, type, 4, false, var_name);
1633 } else if (*kind == 'r' && fcn) {
1634 RRegItem *i = r_reg_get (anal->reg, extra, -1);
1635 if (!i) {
1636 goto loop_end;
1637 }
1638 r_anal_function_set_var (fcn, i->index, *kind, type, 4, false, var_name);
1639 } else if (fcn) { /* kind == 'b' */
1640 r_anal_function_set_var (fcn, offset - fcn->bp_off, *kind, type, 4, false, var_name);
1641 }
1642 free (var_key);
1643 free (var_data);
1644 loop_end:
1645 sdb_aforeach_next (var_name);
1646 }
1647 free (var_names_key);
1648 free (vars);
1649 }
1650 ls_free (sdb_list);
1651 }
1652