1 #include "Introspection.h"
2
3 #ifdef WITH_INTROSPECTION
4
5 #include "Debug.h"
6 #include "Error.h"
7 #include "LLVM_Headers.h"
8 #include "Util.h"
9
10 #include <iostream>
11 #include <sstream>
12 #include <stdio.h>
13 #include <string>
14
15 // defines backtrace, which gets the call stack as instruction pointers
16 #include <execinfo.h>
17
18 #include <regex>
19
20 using std::map;
21 using std::pair;
22 using std::vector;
23
24 namespace Halide {
25 namespace Internal {
26 namespace Introspection {
27
28 // All of this only works with DWARF debug info on linux and OS X. For
29 // other platforms, WITH_INTROSPECTION should be off.
30 #ifdef __APPLE__
31 extern "C" void _NSGetExecutablePath(char *, int32_t *);
get_program_name(char * name,int32_t size)32 void get_program_name(char *name, int32_t size) {
33 _NSGetExecutablePath(name, &size);
34 }
35 #else
36 // glibc defines the binary name for us
37 extern "C" char *program_invocation_name;
38 void get_program_name(char *name, int32_t size) {
39 strncpy(name, program_invocation_name, size);
40 }
41 #endif
42
43 namespace {
44
45 template<typename T>
load_misaligned(const T * p)46 inline T load_misaligned(const T *p) {
47 T result;
48 memcpy(&result, p, sizeof(T));
49 return result;
50 }
51
52 #if LLVM_VERSION >= 100
53 typedef uint64_t llvm_offset_t;
54 #else
55 typedef uint32_t llvm_offset_t;
56 #endif
57
58 } // namespace
59
60 class DebugSections {
61
62 bool calibrated;
63
64 struct FieldFormat {
65 uint64_t name, form;
FieldFormatHalide::Internal::Introspection::DebugSections::FieldFormat66 FieldFormat()
67 : name(0), form(0) {
68 }
FieldFormatHalide::Internal::Introspection::DebugSections::FieldFormat69 FieldFormat(uint64_t n, uint64_t f)
70 : name(n), form(f) {
71 }
72 };
73
74 struct EntryFormat {
75 uint64_t code, tag;
76 bool has_children;
EntryFormatHalide::Internal::Introspection::DebugSections::EntryFormat77 EntryFormat()
78 : code(0), tag(0), has_children(false) {
79 }
80 vector<FieldFormat> fields;
81 };
82 vector<EntryFormat> entry_formats;
83
84 struct LiveRange {
85 uint64_t pc_begin, pc_end;
86 };
87
88 struct TypeInfo;
89
90 struct GlobalVariable {
91 std::string name;
92 TypeInfo *type;
93 uint64_t type_def_loc;
94 uint64_t def_loc, spec_loc;
95 uint64_t addr;
GlobalVariableHalide::Internal::Introspection::DebugSections::GlobalVariable96 GlobalVariable()
97 : name(""),
98 type(nullptr),
99 type_def_loc(0),
100 def_loc(0),
101 spec_loc(0),
102 addr(0) {
103 }
operator <Halide::Internal::Introspection::DebugSections::GlobalVariable104 bool operator<(const GlobalVariable &other) const {
105 return addr < other.addr;
106 }
107 };
108 vector<GlobalVariable> global_variables;
109
110 struct HeapObject {
111 uint64_t addr;
112 TypeInfo *type;
113 struct Member {
114 uint64_t addr;
115 std::string name;
116 TypeInfo *type;
operator <Halide::Internal::Introspection::DebugSections::HeapObject::Member117 bool operator<(const Member &other) const {
118 return addr < other.addr;
119 }
120 };
121 vector<Member> members;
122 };
123 map<uint64_t, HeapObject> heap_objects;
124
125 struct LocalVariable {
126 std::string name;
127 TypeInfo *type;
128 int stack_offset;
129 uint64_t type_def_loc;
130 uint64_t def_loc, origin_loc;
131 // Some local vars are only alive for certain address ranges
132 // (e.g. those inside a lexical block). If the ranges vector
133 // is empty, the variables are alive for the entire containing
134 // function.
135 vector<LiveRange> live_ranges;
LocalVariableHalide::Internal::Introspection::DebugSections::LocalVariable136 LocalVariable()
137 : name(""),
138 type(nullptr),
139 stack_offset(0),
140 type_def_loc(0),
141 def_loc(0),
142 origin_loc(0) {
143 }
144 };
145
146 struct FunctionInfo {
147 std::string name;
148 uint64_t pc_begin, pc_end;
149 vector<LocalVariable> variables;
150 uint64_t def_loc, spec_loc;
151 // The stack variable offsets are w.r.t either:
152 // gcc: the top of the stack frame (one below the return address to the caller)
153 // clang with frame pointers: the bottom of the stack frame (one above the return address to this function)
154 // clang without frame pointers: the top of the stack frame (...TODO...)
155 enum { Unknown = 0,
156 GCC,
157 ClangFP,
158 ClangNoFP } frame_base;
FunctionInfoHalide::Internal::Introspection::DebugSections::FunctionInfo159 FunctionInfo()
160 : name(""), pc_begin(0), pc_end(0), def_loc(0), spec_loc(0) {
161 }
162
operator <Halide::Internal::Introspection::DebugSections::FunctionInfo163 bool operator<(const FunctionInfo &other) const {
164 return pc_begin < other.pc_begin;
165 }
166 };
167 vector<FunctionInfo> functions;
168
169 vector<std::string> source_files;
170 struct LineInfo {
171 uint64_t pc;
172 uint32_t line;
173 uint32_t file; // Index into source_files
operator <Halide::Internal::Introspection::DebugSections::LineInfo174 bool operator<(const LineInfo &other) const {
175 return pc < other.pc;
176 }
177 };
178 vector<LineInfo> source_lines;
179
180 struct TypeInfo {
181 std::string name;
182 uint64_t size;
183 uint64_t def_loc;
184 vector<LocalVariable> members;
185
186 // TypeInfo can also be used to represent a pointer to
187 // another type, in which case there's a single member, which
188 // represents the value pointed to (its name is empty and its
189 // stack_offset is meaningless).
190 enum { Primitive,
191 Class,
192 Struct,
193 Pointer,
194 Typedef,
195 Const,
196 Reference,
197 Array } type;
198
TypeInfoHalide::Internal::Introspection::DebugSections::TypeInfo199 TypeInfo()
200 : size(0), def_loc(0), type(Primitive) {
201 }
202 };
203 vector<TypeInfo> types;
204
205 public:
206 bool working;
207
DebugSections(const std::string & binary)208 DebugSections(const std::string &binary)
209 : calibrated(false), working(false) {
210 std::string binary_path = binary;
211 #ifdef __APPLE__
212 size_t last_slash = binary_path.rfind('/');
213 if (last_slash == std::string::npos ||
214 last_slash >= binary_path.size() - 1) {
215 last_slash = 0;
216 } else {
217 last_slash++;
218 }
219 std::string file_only = binary_path.substr(last_slash, binary_path.size() - last_slash);
220 binary_path += ".dSYM/Contents/Resources/DWARF/" + file_only;
221 #endif
222
223 debug(5) << "Loading " << binary_path << "\n";
224
225 load_and_parse_object_file(binary_path);
226 }
227
count_trailing_zeros(int64_t x)228 int count_trailing_zeros(int64_t x) {
229 for (int i = 0; i < 64; i++) {
230 if (x & (1 << i)) {
231 return i;
232 }
233 }
234 return 64;
235 }
236
calibrate_pc_offset(void (* fn)())237 void calibrate_pc_offset(void (*fn)()) {
238 // Calibrate for the offset between the instruction pointers
239 // in the debug info and the instruction pointers in the
240 // actual file.
241 bool found = false;
242 uint64_t pc_real = (uint64_t)fn;
243 int64_t pc_adjust = 0;
244 for (size_t i = 0; i < functions.size(); i++) {
245 if (functions[i].name == "HalideIntrospectionCanary::offset_marker" &&
246 functions[i].pc_begin) {
247
248 uint64_t pc_debug = functions[i].pc_begin;
249
250 if (calibrated) {
251 // If we're already calibrated, we should find a function with a matching pc
252 if (pc_debug == pc_real) {
253 return;
254 }
255 } else {
256 int64_t pc_adj = pc_real - pc_debug;
257
258 // Offset must be a multiple of 4096
259 if (pc_adj & (4095)) {
260 continue;
261 }
262
263 // If we find multiple matches, pick the one with more trailing zeros
264 if (!found ||
265 count_trailing_zeros(pc_adj) > count_trailing_zeros(pc_adjust)) {
266 pc_adjust = pc_adj;
267 found = true;
268 }
269 }
270 }
271 }
272
273 if (!found) {
274 if (!calibrated) {
275 debug(2) << "Failed to find HalideIntrospectionCanary::offset_marker\n";
276 } else {
277 debug(2) << "Failed to find HalideIntrospectionCanary::offset_marker at the expected location\n";
278 }
279 working = false;
280 return;
281 }
282
283 debug(5) << "Program counter adjustment between debug info and actual code: " << pc_adjust << "\n";
284
285 for (size_t i = 0; i < functions.size(); i++) {
286 FunctionInfo &f = functions[i];
287 f.pc_begin += pc_adjust;
288 f.pc_end += pc_adjust;
289 for (size_t j = 0; j < f.variables.size(); j++) {
290 LocalVariable &v = f.variables[j];
291 for (size_t k = 0; k < v.live_ranges.size(); k++) {
292 v.live_ranges[k].pc_begin += pc_adjust;
293 v.live_ranges[k].pc_end += pc_adjust;
294 }
295 }
296 }
297
298 for (size_t i = 0; i < source_lines.size(); i++) {
299 source_lines[i].pc += pc_adjust;
300 }
301
302 for (size_t i = 0; i < global_variables.size(); i++) {
303 global_variables[i].addr += pc_adjust;
304 }
305
306 calibrated = true;
307 }
308
find_global_variable(const void * global_pointer)309 int find_global_variable(const void *global_pointer) {
310 if (global_variables.empty()) {
311 debug(5) << "Considering possible global at " << global_pointer << " but global_variables is empty\n";
312 return -1;
313 }
314 debug(5) << "Considering possible global at " << global_pointer << "\n";
315
316 debug(5) << "Known globals range from " << std::hex << global_variables.front().addr << " to " << global_variables.back().addr << std::dec << "\n";
317 uint64_t address = (uint64_t)(global_pointer);
318 size_t hi = global_variables.size();
319 size_t lo = 0;
320 while (hi > lo + 1) {
321 size_t mid = (hi + lo) / 2;
322 uint64_t addr_mid = global_variables[mid].addr;
323 if (address < addr_mid) {
324 hi = mid;
325 } else {
326 lo = mid;
327 }
328 }
329
330 if (lo >= global_variables.size()) {
331 return -1;
332 }
333
334 // There may be multiple matching addresses. Walk backwards to find the first one.
335 size_t idx = lo;
336 while (idx > 0 && global_variables[idx - 1].addr == global_variables[lo].addr) {
337 idx--;
338 }
339
340 // Check the address is indeed inside the object found
341 uint64_t end_ptr = global_variables[idx].addr;
342 TypeInfo *t = global_variables[idx].type;
343 if (t == nullptr) {
344 return -1;
345 }
346 uint64_t size = t->size;
347 while (t->type == TypeInfo::Array) {
348 t = t->members[0].type;
349 size *= t->size;
350 }
351 end_ptr += size;
352 if (address < global_variables[idx].addr ||
353 address >= end_ptr) {
354 return -1;
355 }
356
357 return (int)idx;
358 }
359
360 // Get the debug name of a global var from a pointer to it
get_global_variable_name(const void * global_pointer,const std::string & type_name="")361 std::string get_global_variable_name(const void *global_pointer, const std::string &type_name = "") {
362 // Find the index of the first global variable with this address
363 int idx = find_global_variable(global_pointer);
364
365 if (idx < 0) {
366 // No matching global variable found.
367 return "";
368 }
369
370 uint64_t address = (uint64_t)global_pointer;
371
372 std::regex re(type_name);
373
374 // Now test all of them
375 for (; (size_t)idx < global_variables.size() && global_variables[idx].addr <= address; idx++) {
376
377 GlobalVariable &v = global_variables[idx];
378 TypeInfo *elem_type = nullptr;
379 if (v.type && v.type->type == TypeInfo::Array && v.type->size) {
380 elem_type = v.type->members[0].type;
381 }
382
383 debug(5) << "Closest global is " << v.name << " at " << std::hex << v.addr << std::dec;
384 if (v.type) {
385 debug(5) << " with type " << v.type->name << "\n";
386 } else {
387 debug(5) << "\n";
388 }
389
390 if (v.addr == address &&
391 (type_name.empty() ||
392 (v.type && regex_match(v.type->name, re)))) {
393 return v.name;
394 } else if (elem_type && // Check if it's an array element
395 (type_name.empty() ||
396 (elem_type && regex_match(elem_type->name, re)))) {
397 int64_t array_size_bytes = v.type->size * elem_type->size;
398 int64_t pos_bytes = address - v.addr;
399 if (pos_bytes >= 0 &&
400 pos_bytes < array_size_bytes &&
401 pos_bytes % elem_type->size == 0) {
402 std::ostringstream oss;
403 oss << v.name << "[" << (pos_bytes / elem_type->size) << "]";
404 debug(5) << "Successful match to array element\n";
405 return oss.str();
406 } else {
407 debug(5) << "Failed match to array element: " << pos_bytes << " " << array_size_bytes << " " << elem_type->size << "\n";
408 }
409 }
410 }
411
412 // No match
413 return "";
414 }
415
register_heap_object(const void * obj,size_t size,const void * helper)416 void register_heap_object(const void *obj, size_t size, const void *helper) {
417 // helper should be a pointer to a global
418 int idx = find_global_variable(helper);
419 if (idx == -1) {
420 debug(5) << "Could not find helper object: " << helper << "\n";
421 return;
422 }
423 const GlobalVariable &ptr = global_variables[idx];
424 debug(5) << "helper object is " << ptr.name << " at " << std::hex << ptr.addr << std::dec;
425 if (ptr.type) {
426 debug(5) << " with type " << ptr.type->name << "\n";
427 } else {
428 debug(5) << " with unknown type!\n";
429 return;
430 }
431
432 internal_assert(ptr.type->type == TypeInfo::Pointer)
433 << "The type of the helper object was supposed to be a pointer\n";
434 internal_assert(ptr.type->members.size() == 1);
435 TypeInfo *object_type = ptr.type->members[0].type;
436 internal_assert(object_type);
437
438 debug(5) << "The object has type: " << object_type->name << "\n";
439
440 internal_assert(size == object_type->size);
441
442 HeapObject heap_object;
443 heap_object.type = object_type;
444 heap_object.addr = (uint64_t)obj;
445
446 // Recursively enumerate the members.
447 for (size_t i = 0; i < object_type->members.size(); i++) {
448 const LocalVariable &member_spec = object_type->members[i];
449 HeapObject::Member member;
450 member.name = member_spec.name;
451 member.type = member_spec.type;
452 member.addr = heap_object.addr + member_spec.stack_offset;
453 if (member.type) {
454 heap_object.members.push_back(member);
455 debug(5) << member.name << " - " << (int)(member.type->type) << "\n";
456 }
457 }
458
459 // Note that this loop pushes elements onto the vector it's
460 // iterating over as it goes - that's what makes the
461 // enumeration recursive.
462 for (size_t i = 0; i < heap_object.members.size(); i++) {
463 HeapObject::Member parent = heap_object.members[i];
464
465 // Stop at references or pointers. We could register them
466 // recursively (and basically write a garbage collector
467 // object tracker), but that's beyond the scope of what
468 // we're trying to do here. Besides, predicting the
469 // addresses of their children-of-children might follow a
470 // dangling pointer.
471 if (parent.type->type == TypeInfo::Pointer ||
472 parent.type->type == TypeInfo::Reference) continue;
473
474 for (size_t j = 0; j < parent.type->members.size(); j++) {
475 const LocalVariable &member_spec = parent.type->members[j];
476 TypeInfo *member_type = member_spec.type;
477
478 HeapObject::Member child;
479 child.type = member_type;
480
481 if (parent.type->type == TypeInfo::Typedef ||
482 parent.type->type == TypeInfo::Const) {
483 // We're just following a type modifier. It's still the same member.
484 child.name = parent.name;
485 } else if (parent.type->type == TypeInfo::Array) {
486 child.name = ""; // the '[index]' gets added in the query routine.
487 } else {
488 child.name = member_spec.name;
489 }
490
491 child.addr = parent.addr + member_spec.stack_offset;
492
493 if (child.type) {
494 debug(5) << child.name << " - " << (int)(child.type->type) << "\n";
495 heap_object.members.push_back(child);
496 }
497 }
498 }
499
500 // Sort by member address, but use stable stort so that parents stay before children.
501 std::stable_sort(heap_object.members.begin(), heap_object.members.end());
502
503 debug(5) << "Children of heap object of type " << object_type->name << " at " << obj << ":\n";
504 for (size_t i = 0; i < heap_object.members.size(); i++) {
505 const HeapObject::Member &mem = heap_object.members[i];
506 debug(5) << std::hex << mem.addr << std::dec << ": " << mem.type->name << " " << mem.name << "\n";
507 }
508
509 heap_objects[heap_object.addr] = heap_object;
510 }
511
deregister_heap_object(const void * obj,size_t size)512 void deregister_heap_object(const void *obj, size_t size) {
513 heap_objects.erase((uint64_t)obj);
514 }
515
516 // Get the debug name of a member of a heap variable from a pointer to it
get_heap_member_name(const void * ptr,const std::string & type_name="")517 std::string get_heap_member_name(const void *ptr, const std::string &type_name = "") {
518 debug(5) << "Getting heap member name of " << ptr << "\n";
519
520 if (heap_objects.empty()) {
521 debug(5) << "No registered heap objects\n";
522 return "";
523 }
524
525 uint64_t addr = (uint64_t)ptr;
526 std::map<uint64_t, HeapObject>::iterator it = heap_objects.upper_bound(addr);
527
528 if (it == heap_objects.begin()) {
529 debug(5) << "No heap objects less than this address\n";
530 return "";
531 }
532
533 // 'it' is the first element strictly greater than addr, so go
534 // back one to get less-than-or-equal-to.
535 it--;
536
537 const HeapObject &obj = it->second;
538 uint64_t object_start = it->first;
539 uint64_t object_end = object_start + obj.type->size;
540 if (addr < object_start || addr >= object_end) {
541 debug(5) << "Not contained in any heap object\n";
542 return "";
543 }
544
545 std::ostringstream name;
546
547 std::regex re(type_name);
548
549 // Look in the members for the appropriate offset.
550 for (size_t i = 0; i < obj.members.size(); i++) {
551 TypeInfo *t = obj.members[i].type;
552
553 if (!t) continue;
554
555 debug(5) << "Comparing to member " << obj.members[i].name
556 << " at address " << std::hex << obj.members[i].addr << std::dec
557 << " with type " << t->name
558 << " and type type " << (int)t->type << "\n";
559
560 if (obj.members[i].addr == addr &&
561 (type_name.empty() ||
562 regex_match(t->name, re))) {
563 name << obj.members[i].name;
564 return name.str();
565 }
566
567 // For arrays, we only unpacked the first element.
568 if (t->type == TypeInfo::Array) {
569 TypeInfo *elem_type = t->members[0].type;
570 uint64_t array_start_addr = obj.members[i].addr;
571 uint64_t array_end_addr = array_start_addr + t->size * elem_type->size;
572 debug(5) << "Array runs from " << std::hex << array_start_addr << " to " << array_end_addr << "\n";
573 if (elem_type && addr >= array_start_addr && addr < array_end_addr) {
574 // Adjust the query address backwards to lie
575 // within the first array element and remember the
576 // array index to correct the name later.
577 uint64_t containing_elem = (addr - array_start_addr) / elem_type->size;
578 addr -= containing_elem * elem_type->size;
579 debug(5) << "Query belongs to this array. Adjusting query address backwards to "
580 << std::hex << addr << std::dec << "\n";
581 name << obj.members[i].name << "[" << containing_elem << "]";
582 }
583 } else if (t->type == TypeInfo::Struct ||
584 t->type == TypeInfo::Class ||
585 t->type == TypeInfo::Primitive) {
586 // If I'm not this member, but am contained within it, incorporate its name.
587 uint64_t struct_start_addr = obj.members[i].addr;
588 uint64_t struct_end_addr = struct_start_addr + t->size;
589 debug(5) << "Struct runs from " << std::hex << struct_start_addr << " to " << struct_end_addr << "\n";
590 if (addr >= struct_start_addr && addr < struct_end_addr) {
591 name << obj.members[i].name << ".";
592 }
593 }
594 }
595
596 debug(5) << "Didn't seem to be any of the members of this heap object\n";
597 return "";
598 }
599
600 // Get the debug name of a stack variable from a pointer to it
get_stack_variable_name(const void * stack_pointer,const std::string & type_name="")601 std::string get_stack_variable_name(const void *stack_pointer, const std::string &type_name = "") {
602
603 // Check it's a plausible stack pointer
604 int marker = 0;
605 uint64_t marker_addr = (uint64_t)▮
606 uint64_t top_of_stack;
607 if (marker_addr >> 63) {
608 top_of_stack = (uint64_t)(-1);
609 } else {
610 // Conservatively assume top of stack is first multiple of
611 // 1GB larger than the marker (seriously, who allocates
612 // 1GB of stack space?).
613 top_of_stack = ((marker_addr >> 30) + 1) << 30;
614 }
615
616 if ((uint64_t)stack_pointer > top_of_stack ||
617 (uint64_t)stack_pointer < marker_addr) {
618 return "";
619 }
620
621 struct frame_info {
622 frame_info *frame_pointer;
623 void *return_address;
624 };
625
626 frame_info *fp = (frame_info *)__builtin_frame_address(0);
627 frame_info *next_fp = nullptr;
628
629 // Walk up the stack until we pass the pointer.
630 debug(5) << "Walking up the stack\n";
631 while (fp < stack_pointer) {
632 debug(5) << "frame pointer: " << (void *)(fp->frame_pointer)
633 << " return address: " << fp->return_address << "\n";
634 next_fp = fp;
635 if (fp->frame_pointer < fp) {
636 // If we ever jump downwards, something is
637 // wrong. Maybe this was a heap pointer.
638 debug(5) << "Bailing out because fp decreased\n";
639 return "";
640 }
641 fp = fp->frame_pointer;
642 if (fp < (void *)&marker) {
643 // If we're still below the marker after one hop,
644 // something is wrong. Maybe this was a heap pointer.
645 debug(5) << "Bailing out because we're below the marker\n";
646 return "";
647 }
648 }
649
650 if (!next_fp) {
651 // If we didn't manage to walk up one frame, something is
652 // wrong. Maybe this was a heap pointer.
653 debug(5) << "Bailing out because we didn't even walk up one frame\n";
654 return "";
655 }
656
657 // It's a stack variable in the function containing address
658 // next_fp->return_address
659
660 // Get the program counter at the position of the call
661 uint64_t pc = (uint64_t)(next_fp->return_address) - 5; // -5 for the callq instruction
662
663 FunctionInfo *func = find_containing_function(next_fp->return_address);
664
665 if (!func) {
666 debug(5) << "Bailing out because we couldn't find the containing function\n";
667 return "";
668 }
669
670 // Now what is its offset in that function's frame? The return
671 // address is always at the top of the frame.
672 int offset_above = (int)((int64_t)(stack_pointer) - (int64_t)(fp));
673 int offset_below = (int)((int64_t)(stack_pointer) - (int64_t)(next_fp));
674
675 const int addr_size = sizeof(void *);
676
677 int offset;
678 if (func->frame_base == FunctionInfo::GCC) {
679 offset = offset_above - 2 * addr_size;
680 } else if (func->frame_base == FunctionInfo::ClangFP) {
681 offset = offset_above;
682 } else if (func->frame_base == FunctionInfo::ClangNoFP) {
683 offset = offset_below - 2 * addr_size;
684 } else {
685 debug(5) << "Bailing out because containing function used an unknown mechanism for specifying stack offsets\n";
686 return "";
687 }
688
689 debug(5) << "Searching for var at offset " << offset << "\n";
690
691 std::regex re(type_name);
692
693 for (size_t j = 0; j < func->variables.size(); j++) {
694 const LocalVariable &var = func->variables[j];
695 debug(5) << "Var " << var.name << " is at offset " << var.stack_offset << "\n";
696
697 // Reject it if we're not in its live ranges
698 if (var.live_ranges.size()) {
699 bool in_live_range = false;
700 for (size_t i = 0; i < var.live_ranges.size(); i++) {
701 if (pc >= var.live_ranges[i].pc_begin &&
702 pc < var.live_ranges[i].pc_end) {
703 in_live_range = true;
704 break;
705 }
706 }
707 if (!in_live_range) {
708 debug(5) << "Skipping var because we're not in any of its live ranges\n";
709 continue;
710 }
711 }
712
713 TypeInfo *type = var.type;
714 TypeInfo *elem_type = nullptr;
715 if (type && type->type == TypeInfo::Array && type->size) {
716 elem_type = type->members[0].type;
717 }
718
719 if (offset == var.stack_offset && var.type) {
720 debug(5) << "Considering match: " << var.type->name << ", " << var.name << "\n";
721 }
722
723 if (offset == var.stack_offset &&
724 (type_name.empty() ||
725 (type && regex_match(type->name, re)))) {
726 debug(5) << "Successful match to scalar var\n";
727 return var.name;
728 } else if (elem_type && // Check if it's an array element
729 (type_name.empty() ||
730 (elem_type && // Check the type matches
731 regex_match(elem_type->name, re)))) {
732 int64_t array_size_bytes = type->size * elem_type->size;
733 int64_t pos_bytes = offset - var.stack_offset;
734 if (pos_bytes >= 0 &&
735 pos_bytes < array_size_bytes &&
736 pos_bytes % elem_type->size == 0) {
737 std::ostringstream oss;
738 oss << var.name << "[" << (pos_bytes / elem_type->size) << "]";
739 debug(5) << "Successful match to array element\n";
740 return oss.str();
741 } else {
742 debug(5) << "No match to array element: " << type->size << " " << array_size_bytes << " " << pos_bytes << " " << elem_type->size << "\n";
743 }
744 }
745 }
746
747 debug(5) << "Failed to find variable at the matching offset with the given type\n";
748 return "";
749 }
750
751 // Look up n stack frames and get the source location as filename:line
get_source_location()752 std::string get_source_location() {
753 debug(5) << "Finding source location\n";
754
755 if (!source_lines.size()) {
756 debug(5) << "Bailing out because we have no source lines\n";
757 return "";
758 }
759
760 const int max_stack_frames = 256;
761
762 // Get the backtrace
763 vector<void *> trace(max_stack_frames);
764 int trace_size = backtrace(&trace[0], (int)(trace.size()));
765
766 for (int frame = 2; frame < trace_size; frame++) {
767 uint64_t address = (uint64_t)trace[frame];
768
769 debug(5) << "Considering address " << ((void *)address) << "\n";
770
771 const uint8_t *inst_ptr = (const uint8_t *)address;
772 if (inst_ptr[-5] == 0xe8) {
773 // The actual address of the call is probably 5 bytes
774 // earlier (using callq with an immediate address)
775 address -= 5;
776 } else if (inst_ptr[-2] == 0xff) {
777 // Or maybe it's 2 bytes earlier (using callq with a
778 // register address)
779 address -= 2;
780 } else {
781 debug(5) << "Skipping function because there's no callq before " << (const void *)(inst_ptr) << "\n";
782 continue;
783 }
784
785 // Binary search into functions
786 FunctionInfo *f = find_containing_function((void *)address);
787
788 // If no debug info for this function, we must still be
789 // inside libHalide. Continue searching upwards.
790 if (!f) {
791 debug(5) << "Skipping function because we have no debug info for it\n";
792 continue;
793 }
794
795 debug(5) << "Containing function is " << f->name << "\n";
796
797 // If we're still in the Halide namespace, continue searching
798 if (f->name.size() > 8 &&
799 f->name.substr(0, 8) == "Halide::") {
800 debug(5) << "Skipping function because it's in the Halide namespace\n";
801 continue;
802 }
803
804 // Binary search into source_lines
805 size_t hi = source_lines.size();
806 size_t lo = 0;
807 while (hi > lo + 1) {
808 size_t mid = (hi + lo) / 2;
809 uint64_t pc_mid = source_lines[mid].pc;
810 if (address < pc_mid) {
811 hi = mid;
812 } else {
813 lo = mid;
814 }
815 }
816
817 const std::string &file = source_files[source_lines[lo].file];
818 int line = source_lines[lo].line;
819
820 std::ostringstream oss;
821 oss << file << ":" << line;
822
823 debug(5) << "Source location is " << oss.str() << "\n";
824
825 return oss.str();
826 }
827
828 debug(5) << "Bailing out because we reached the end of the backtrace\n";
829 return "";
830 }
831
dump()832 void dump() {
833 // Dump all the types
834 for (size_t i = 0; i < types.size(); i++) {
835 printf("Class %s of size %llu @ %llx: \n",
836 types[i].name.c_str(),
837 (unsigned long long)(types[i].size),
838 (unsigned long long)(types[i].def_loc));
839 for (size_t j = 0; j < types[i].members.size(); j++) {
840 TypeInfo *c = types[i].members[j].type;
841 const char *type_name = "(unknown)";
842 if (c) {
843 type_name = c->name.c_str();
844 }
845 printf(" Member %s at %d of type %s @ %llx\n",
846 types[i].members[j].name.c_str(),
847 types[i].members[j].stack_offset,
848 type_name,
849 (long long unsigned)types[i].members[j].type_def_loc);
850 }
851 }
852
853 // Dump all the functions and their local variables
854 for (size_t i = 0; i < functions.size(); i++) {
855 const FunctionInfo &f = functions[i];
856 printf("Function %s at %llx - %llx (frame_base %d): \n",
857 f.name.c_str(),
858 (unsigned long long)(f.pc_begin),
859 (unsigned long long)(f.pc_end),
860 (int)f.frame_base);
861 for (size_t j = 0; j < f.variables.size(); j++) {
862 const LocalVariable &v = f.variables[j];
863 TypeInfo *c = v.type;
864 const char *type_name = "(unknown)";
865 if (c) {
866 type_name = c->name.c_str();
867 }
868 printf(" Variable %s at %d of type %s @ %llx\n",
869 v.name.c_str(),
870 v.stack_offset,
871 type_name,
872 (long long unsigned)v.type_def_loc);
873 for (size_t k = 0; k < v.live_ranges.size(); k++) {
874 printf(" Live range: %llx - %llx\n",
875 (unsigned long long)v.live_ranges[k].pc_begin,
876 (unsigned long long)v.live_ranges[k].pc_end);
877 }
878 }
879 }
880
881 // Dump the pc -> source file relationship
882 for (size_t i = 0; i < source_lines.size(); i++) {
883 printf("%p -> %s:%d\n",
884 (void *)(source_lines[i].pc),
885 source_files[source_lines[i].file].c_str(),
886 source_lines[i].line);
887 }
888
889 // Dump the global variables
890 for (size_t i = 0; i < global_variables.size(); i++) {
891 const GlobalVariable &v = global_variables[i];
892 TypeInfo *c = v.type;
893 const char *type_name = "(unknown)";
894 if (c) {
895 type_name = c->name.c_str();
896 }
897 printf(" Global variable %s at %llx of type %s\n",
898 v.name.c_str(),
899 (long long unsigned)v.addr,
900 type_name);
901 }
902 }
903
dump_stack_frame(void * ptr)904 bool dump_stack_frame(void *ptr) {
905 FunctionInfo *fi = find_containing_function(ptr);
906 if (fi == nullptr) {
907 debug(0) << "Failed to find function containing " << ptr << " in debug info\n";
908 return false;
909 }
910
911 debug(0) << fi->name << ":\n";
912 for (const LocalVariable &v : fi->variables) {
913 TypeInfo *t = v.type;
914 debug(0) << " ";
915 if (t) {
916 debug(0) << t->name << " ";
917 } else {
918 debug(0) << "(unknown type) ";
919 }
920 debug(0) << v.name << " @ " << v.stack_offset << "\n";
921 }
922 return true;
923 }
924
925 private:
load_and_parse_object_file(const std::string & binary)926 void load_and_parse_object_file(const std::string &binary) {
927 llvm::object::ObjectFile *obj = nullptr;
928
929 // Open the object file in question.
930 llvm::Expected<llvm::object::OwningBinary<llvm::object::ObjectFile>> maybe_obj =
931 llvm::object::ObjectFile::createObjectFile(binary);
932
933 if (!maybe_obj) {
934 consumeError(maybe_obj.takeError());
935 debug(1) << "Failed to load binary:" << binary << "\n";
936 return;
937 }
938
939 obj = maybe_obj.get().getBinary();
940
941 if (obj) {
942 working = true;
943 parse_object_file(obj);
944 } else {
945 debug(1) << "Could not load object file: " << binary << "\n";
946 working = false;
947 }
948 }
949
parse_object_file(llvm::object::ObjectFile * obj)950 void parse_object_file(llvm::object::ObjectFile *obj) {
951 // Look for the debug_info, debug_abbrev, debug_line, and debug_str sections
952 llvm::StringRef debug_info, debug_abbrev, debug_str, debug_line, debug_ranges;
953
954 #ifdef __APPLE__
955 std::string prefix = "__";
956 #else
957 std::string prefix = ".";
958 #endif
959
960 for (llvm::object::section_iterator iter = obj->section_begin();
961 iter != obj->section_end(); ++iter) {
962 #if LLVM_VERSION >= 100
963 auto expected_name = iter->getName();
964 internal_assert(expected_name);
965 llvm::StringRef name = expected_name.get();
966 #else
967 llvm::StringRef name;
968 iter->getName(name);
969 #endif
970 debug(2) << "Section: " << name.str() << "\n";
971 // ignore errors, just leave strings empty
972 auto e = iter->getContents();
973 if (e) {
974 if (name == prefix + "debug_info") {
975 debug_info = *e;
976 } else if (name == prefix + "debug_abbrev") {
977 debug_abbrev = *e;
978 } else if (name == prefix + "debug_str") {
979 debug_str = *e;
980 } else if (name == prefix + "debug_line") {
981 debug_line = *e;
982 } else if (name == prefix + "debug_ranges") {
983 debug_ranges = *e;
984 }
985 }
986 }
987
988 if (debug_info.empty() ||
989 debug_abbrev.empty() ||
990 debug_str.empty() ||
991 debug_line.empty() ||
992 debug_ranges.empty()) {
993 debug(2) << "Debugging sections not found\n";
994 working = false;
995 return;
996 }
997
998 {
999 // Parse the debug_info section to populate the functions and local variables
1000 llvm::DataExtractor extractor(debug_info, true, obj->getBytesInAddress());
1001 llvm::DataExtractor debug_abbrev_extractor(debug_abbrev, true, obj->getBytesInAddress());
1002 parse_debug_info(extractor, debug_abbrev_extractor, debug_str, debug_ranges);
1003 }
1004
1005 {
1006 llvm::DataExtractor e(debug_line, true, obj->getBytesInAddress());
1007 parse_debug_line(e);
1008 }
1009 }
1010
parse_debug_ranges(const llvm::DataExtractor & e)1011 void parse_debug_ranges(const llvm::DataExtractor &e) {
1012 }
1013
parse_debug_abbrev(const llvm::DataExtractor & e,llvm_offset_t off=0)1014 void parse_debug_abbrev(const llvm::DataExtractor &e, llvm_offset_t off = 0) {
1015 entry_formats.clear();
1016 while (1) {
1017 EntryFormat fmt;
1018 fmt.code = e.getULEB128(&off);
1019 if (!fmt.code) break;
1020 fmt.tag = e.getULEB128(&off);
1021 fmt.has_children = (e.getU8(&off) != 0);
1022 // Get the attributes
1023 /*
1024 printf("code = %lu\n"
1025 " tag = %lu\n"
1026 " has_children = %u\n", fmt.code, fmt.tag, fmt.has_children);
1027 */
1028 while (1) {
1029 uint64_t name = e.getULEB128(&off);
1030 uint64_t form = e.getULEB128(&off);
1031 if (!name && !form) break;
1032 //printf(" name = %lu, form = %lu\n", name, form);
1033
1034 FieldFormat f_fmt(name, form);
1035 fmt.fields.push_back(f_fmt);
1036 }
1037 entry_formats.push_back(fmt);
1038 }
1039 }
1040
parse_debug_info(const llvm::DataExtractor & e,const llvm::DataExtractor & debug_abbrev,llvm::StringRef debug_str,llvm::StringRef debug_ranges)1041 void parse_debug_info(const llvm::DataExtractor &e,
1042 const llvm::DataExtractor &debug_abbrev,
1043 llvm::StringRef debug_str,
1044 llvm::StringRef debug_ranges) {
1045 // Offset into the section
1046 llvm_offset_t off = 0;
1047
1048 llvm::StringRef debug_info = e.getData();
1049
1050 // A constant to use indicating that we don't know the stack
1051 // offset of a variable.
1052 const int no_location = 0x80000000;
1053
1054 while (1) {
1055 uint64_t start_of_unit_header = off;
1056
1057 // Parse compilation unit header
1058 bool dwarf_64;
1059 uint64_t unit_length = e.getU32(&off);
1060 if (unit_length == 0xffffffff) {
1061 dwarf_64 = true;
1062 unit_length = e.getU64(&off);
1063 } else {
1064 dwarf_64 = false;
1065 }
1066
1067 if (!unit_length) {
1068 // A zero-length compilation unit indicates the end of
1069 // the list.
1070 break;
1071 }
1072
1073 uint64_t start_of_unit = off;
1074
1075 uint16_t dwarf_version = e.getU16(&off);
1076
1077 uint64_t debug_abbrev_offset = 0;
1078 if (dwarf_64) {
1079 debug_abbrev_offset = e.getU64(&off);
1080 } else {
1081 debug_abbrev_offset = e.getU32(&off);
1082 }
1083 parse_debug_abbrev(debug_abbrev, debug_abbrev_offset);
1084
1085 uint8_t address_size = e.getU8(&off);
1086
1087 vector<pair<FunctionInfo, int>> func_stack;
1088 vector<pair<TypeInfo, int>> type_stack;
1089 vector<pair<std::string, int>> namespace_stack;
1090 vector<pair<vector<LiveRange>, int>> live_range_stack;
1091
1092 int stack_depth = 0;
1093
1094 uint64_t compile_unit_base_pc = 0;
1095
1096 // From the dwarf 4 spec
1097 const unsigned tag_array_type = 0x01;
1098 const unsigned tag_class_type = 0x02;
1099 const unsigned tag_lexical_block = 0x0b;
1100 const unsigned tag_member = 0x0d;
1101 const unsigned tag_pointer_type = 0x0f;
1102 const unsigned tag_reference_type = 0x10;
1103 const unsigned tag_compile_unit = 0x11;
1104 const unsigned tag_structure_type = 0x13;
1105 const unsigned tag_typedef = 0x16;
1106 const unsigned tag_inlined_subroutine = 0x1d;
1107 const unsigned tag_subrange_type = 0x21;
1108 const unsigned tag_base_type = 0x24;
1109 const unsigned tag_const_type = 0x26;
1110 const unsigned tag_function = 0x2e;
1111 const unsigned tag_variable = 0x34;
1112 const unsigned tag_namespace = 0x39;
1113
1114 const unsigned attr_location = 0x02;
1115 const unsigned attr_name = 0x03;
1116 const unsigned attr_byte_size = 0x0b;
1117 const unsigned attr_low_pc = 0x11;
1118 const unsigned attr_high_pc = 0x12;
1119 const unsigned attr_upper_bound = 0x2f;
1120 const unsigned attr_abstract_origin = 0x31;
1121 const unsigned attr_count = 0x37;
1122 const unsigned attr_data_member_location = 0x38;
1123 const unsigned attr_frame_base = 0x40;
1124 const unsigned attr_specification = 0x47;
1125 const unsigned attr_type = 0x49;
1126 const unsigned attr_ranges = 0x55;
1127
1128 while (off - start_of_unit < unit_length) {
1129 uint64_t location = off;
1130
1131 // Grab the next debugging information entry
1132 uint64_t abbrev_code = e.getULEB128(&off);
1133
1134 // A null entry indicates we're popping the stack.
1135 if (abbrev_code == 0) {
1136 if (func_stack.size() &&
1137 stack_depth == func_stack.back().second) {
1138 const FunctionInfo &f = func_stack.back().first;
1139 functions.push_back(f);
1140 func_stack.pop_back();
1141 }
1142 if (type_stack.size() &&
1143 stack_depth == type_stack.back().second) {
1144 const TypeInfo &c = type_stack.back().first;
1145 types.push_back(c);
1146 type_stack.pop_back();
1147 }
1148 if (namespace_stack.size() &&
1149 stack_depth == namespace_stack.back().second) {
1150 namespace_stack.pop_back();
1151 }
1152 if (live_range_stack.size() &&
1153 stack_depth == live_range_stack.back().second) {
1154 live_range_stack.pop_back();
1155 }
1156 stack_depth--;
1157 continue;
1158 }
1159
1160 internal_assert(abbrev_code <= entry_formats.size());
1161 const EntryFormat &fmt = entry_formats[abbrev_code - 1];
1162 internal_assert(fmt.code == abbrev_code);
1163
1164 LocalVariable var;
1165 GlobalVariable gvar;
1166 FunctionInfo func;
1167 TypeInfo type_info;
1168 vector<LiveRange> live_ranges;
1169 type_info.def_loc = location;
1170 func.def_loc = location;
1171 var.def_loc = location;
1172 gvar.def_loc = location;
1173 std::string namespace_name;
1174
1175 std::string containing_namespace;
1176 if (type_stack.size()) {
1177 containing_namespace = type_stack.back().first.name + "::";
1178 } else {
1179 for (size_t i = 0; i < namespace_stack.size(); i++) {
1180 containing_namespace += namespace_stack[i].first + "::";
1181 }
1182 }
1183
1184 var.stack_offset = no_location;
1185
1186 if (fmt.has_children) {
1187 stack_depth++;
1188 }
1189
1190 // Track local vars found for this function
1191
1192 // Grab the fields
1193 for (size_t i = 0; i < fmt.fields.size(); i++) {
1194 unsigned attr = fmt.fields[i].name;
1195
1196 // A field can either be a constant value:
1197 uint64_t val = 0;
1198 // Or a variable length payload:
1199 const uint8_t *payload = nullptr;
1200 // If payload is non-null, val indicates the
1201 // payload size. If val is zero the payload is a
1202 // null-terminated string.
1203
1204 switch (fmt.fields[i].form) {
1205 case 1: // addr (4 or 8 bytes)
1206 {
1207 if (address_size == 4) {
1208 val = e.getU32(&off);
1209 } else {
1210 val = e.getU64(&off);
1211 }
1212 break;
1213 }
1214 case 2: // There is no case 2
1215 {
1216 internal_error << "What's form 2?";
1217 break;
1218 }
1219 case 3: // block2 (2 byte length followed by payload)
1220 {
1221 val = e.getU16(&off);
1222 payload = (const uint8_t *)(debug_info.data() + off);
1223 off += val;
1224 break;
1225 }
1226 case 4: // block4 (4 byte length followed by payload)
1227 {
1228 val = e.getU32(&off);
1229 payload = (const uint8_t *)(debug_info.data() + off);
1230 off += val;
1231 break;
1232 }
1233 case 5: // data2 (2 bytes)
1234 {
1235 val = e.getU16(&off);
1236 break;
1237 }
1238 case 6: // data4 (4 bytes)
1239 {
1240 val = e.getU32(&off);
1241 break;
1242 }
1243 case 7: // data8 (8 bytes)
1244 {
1245 val = e.getU64(&off);
1246 break;
1247 }
1248 case 8: // string (null terminated sequence of bytes)
1249 {
1250 val = 0;
1251 payload = (const uint8_t *)(debug_info.data() + off);
1252 while (e.getU8(&off))
1253 ;
1254 break;
1255 }
1256 case 9: // block (uleb128 length followed by payload)
1257 {
1258 val = e.getULEB128(&off);
1259 payload = (const uint8_t *)(debug_info.data() + off);
1260 off += val;
1261 break;
1262 }
1263 case 10: // block1 (1 byte length followed by payload)
1264 {
1265 val = e.getU8(&off);
1266 payload = (const uint8_t *)(debug_info.data() + off);
1267 off += val;
1268 break;
1269 }
1270 case 11: // data1 (1 byte)
1271 {
1272 val = e.getU8(&off);
1273 break;
1274 }
1275 case 12: // flag (1 byte)
1276 {
1277 val = e.getU8(&off);
1278 break;
1279 }
1280 case 13: // sdata (sleb128 constant)
1281 {
1282 val = (uint64_t)e.getSLEB128(&off);
1283 break;
1284 }
1285 case 14: // strp (offset into debug_str section. 4 bytes in dwarf 32, 8 in dwarf 64)
1286 {
1287 uint64_t offset;
1288 if (dwarf_64) {
1289 offset = e.getU64(&off);
1290 } else {
1291 offset = e.getU32(&off);
1292 }
1293 val = 0;
1294 payload = (const uint8_t *)(debug_str.data() + offset);
1295 break;
1296 }
1297 case 15: // udata (uleb128 constant)
1298 {
1299 val = e.getULEB128(&off);
1300 break;
1301 }
1302 case 16: // ref_addr (offset from beginning of debug_info. 4 bytes in dwarf 32, 8 in dwarf 64)
1303 {
1304 if ((dwarf_version <= 2 && address_size == 8) ||
1305 (dwarf_version > 2 && dwarf_64)) {
1306 val = e.getU64(&off);
1307 } else {
1308 val = e.getU32(&off);
1309 }
1310 break;
1311 }
1312 case 17: // ref1 (1 byte offset from the first byte of the compilation unit header)
1313 {
1314 val = e.getU8(&off) + start_of_unit_header;
1315 break;
1316 }
1317 case 18: // ref2 (2 byte version of the same)
1318 {
1319 val = e.getU16(&off) + start_of_unit_header;
1320 break;
1321 }
1322 case 19: // ref4 (4 byte version of the same)
1323 {
1324 val = e.getU32(&off) + start_of_unit_header;
1325 break;
1326 }
1327 case 20: // ref8 (8 byte version of the same)
1328 {
1329 val = e.getU64(&off) + start_of_unit_header;
1330 break;
1331 }
1332 case 21: // ref_udata (uleb128 version of the same)
1333 {
1334 val = e.getULEB128(&off) + start_of_unit_header;
1335 break;
1336 }
1337 case 22: // indirect
1338 {
1339 internal_error << "Can't handle indirect form";
1340 break;
1341 }
1342 case 23: // sec_offset
1343 {
1344 if (dwarf_64) {
1345 val = e.getU64(&off);
1346 } else {
1347 val = e.getU32(&off);
1348 }
1349 break;
1350 }
1351 case 24: // exprloc
1352 {
1353 // Length
1354 val = e.getULEB128(&off);
1355 // Payload (contains a DWARF expression to evaluate (ugh))
1356 payload = (const uint8_t *)(debug_info.data() + off);
1357 off += val;
1358 break;
1359 }
1360 case 25: // flag_present
1361 {
1362 val = 0;
1363 // Just the existence of this field is information apparently? There's no data.
1364 break;
1365 }
1366 case 32: // ref_sig8
1367 {
1368 // 64-bit type signature for a reference in its own type unit
1369 val = e.getU64(&off);
1370 break;
1371 }
1372 default:
1373 internal_error << "Unknown form";
1374 break;
1375 }
1376
1377 if (fmt.tag == tag_function) {
1378 if (attr == attr_name) {
1379 func.name = containing_namespace + std::string((const char *)payload);
1380 } else if (attr == attr_low_pc) {
1381 func.pc_begin = val;
1382 } else if (attr == attr_high_pc) {
1383 if (fmt.fields[i].form == 0x1) {
1384 // Literal address
1385 func.pc_end = val;
1386 } else {
1387 // Size of the thing
1388 func.pc_end = func.pc_begin + val;
1389 }
1390 } else if (attr == attr_frame_base) {
1391 // GCC style
1392 if (val == 1 && payload && payload[0] == 0x9c) {
1393 func.frame_base = FunctionInfo::GCC;
1394 } else if (val == 1 && payload && payload[0] == 0x56 && sizeof(void *) == 8) {
1395 func.frame_base = FunctionInfo::ClangFP;
1396 } else if (val == 1 && payload && payload[0] == 0x55 && sizeof(void *) == 4) {
1397 func.frame_base = FunctionInfo::ClangFP;
1398 } else if (val == 1 && payload && payload[0] == 0x57 && sizeof(void *) == 8) {
1399 func.frame_base = FunctionInfo::ClangNoFP;
1400 } else if (val == 1 && payload && payload[0] == 0x54 && sizeof(void *) == 4) {
1401 func.frame_base = FunctionInfo::ClangNoFP;
1402 } else {
1403 func.frame_base = FunctionInfo::Unknown;
1404 }
1405 } else if (attr == attr_specification) {
1406 func.spec_loc = val;
1407 }
1408 } else if (fmt.tag == tag_base_type) {
1409 if (attr == attr_name) {
1410 type_info.name = containing_namespace + std::string((const char *)payload);
1411 type_info.type = TypeInfo::Primitive;
1412 } else if (attr == attr_byte_size) {
1413 type_info.size = val;
1414 }
1415 } else if (fmt.tag == tag_class_type) {
1416 if (attr == attr_name) {
1417 type_info.name = containing_namespace + std::string((const char *)payload);
1418 type_info.type = TypeInfo::Class;
1419 } else if (attr == attr_byte_size) {
1420 type_info.size = val;
1421 }
1422 } else if (fmt.tag == tag_structure_type) {
1423 if (attr == attr_name) {
1424 type_info.name = containing_namespace + std::string((const char *)payload);
1425 type_info.type = TypeInfo::Struct;
1426 } else if (attr == attr_byte_size) {
1427 type_info.size = val;
1428 }
1429 } else if (fmt.tag == tag_typedef) {
1430 if (attr == attr_name) {
1431 type_info.name = containing_namespace + std::string((const char *)payload);
1432 type_info.type = TypeInfo::Typedef;
1433 } else if (attr == attr_type) {
1434 // Approximate a typedef as a single-member class
1435 LocalVariable m;
1436 m.type_def_loc = val;
1437 m.stack_offset = 0;
1438 type_info.members.push_back(m);
1439 }
1440 } else if (fmt.tag == tag_pointer_type) {
1441 if (attr == attr_type) {
1442 // Approximate a pointer type as a single-member class
1443 LocalVariable m;
1444 m.type_def_loc = val;
1445 m.stack_offset = 0;
1446 type_info.members.push_back(m);
1447 type_info.type = TypeInfo::Pointer;
1448 // Assume the size is the address size
1449 type_info.size = address_size;
1450 } else if (attr == attr_byte_size) {
1451 // Should really be 4 or 8
1452 type_info.size = val;
1453 }
1454 } else if (fmt.tag == tag_reference_type) {
1455 if (attr == attr_type) {
1456 LocalVariable m;
1457 m.type_def_loc = val;
1458 m.stack_offset = 0;
1459 type_info.members.push_back(m);
1460 type_info.type = TypeInfo::Reference;
1461 } else if (attr == attr_byte_size) {
1462 // Should really be 4 or 8
1463 type_info.size = val;
1464 }
1465 } else if (fmt.tag == tag_const_type) {
1466 if (attr == attr_type) {
1467 LocalVariable m;
1468 m.type_def_loc = val;
1469 m.stack_offset = 0;
1470 type_info.members.push_back(m);
1471 type_info.type = TypeInfo::Const;
1472 } else if (attr == attr_byte_size) {
1473 // Should really be 4 or 8
1474 type_info.size = val;
1475 }
1476 } else if (fmt.tag == tag_array_type) {
1477 if (attr == attr_type) {
1478 LocalVariable m;
1479 m.type_def_loc = val;
1480 m.stack_offset = 0;
1481 type_info.members.push_back(m);
1482 type_info.type = TypeInfo::Array;
1483 } else if (attr == attr_byte_size) {
1484 // According to the dwarf spec, this
1485 // should be the number of bytes the array
1486 // occupies, but compilers seem to emit
1487 // the number of array entries instead.
1488 type_info.size = val;
1489 }
1490 } else if (fmt.tag == tag_variable) {
1491 if (attr == attr_name) {
1492 if (func_stack.empty()) {
1493 // Global var
1494 gvar.name = containing_namespace + std::string((const char *)payload);
1495 } else {
1496 // Either a local var, or a static var inside a function
1497 gvar.name = var.name = std::string((const char *)payload);
1498 }
1499 } else if (attr == attr_location) {
1500 // We only understand locations which are
1501 // offsets from the function's frame
1502 if (payload && payload[0] == 0x91) {
1503 // It's a local
1504 // payload + 1 is a sleb128
1505 var.stack_offset = (int)(get_sleb128(payload + 1));
1506 } else if (payload && payload[0] == 0x03 && val == (sizeof(void *) + 1)) {
1507 // It's a global
1508 // payload + 1 is an address
1509 const void *addr = load_misaligned((const void *const *)(payload + 1));
1510 gvar.addr = (uint64_t)(addr);
1511 } else {
1512 // Some other format that we don't understand
1513 var.stack_offset = no_location;
1514 }
1515 } else if (attr == attr_type) {
1516 var.type_def_loc = val;
1517 gvar.type_def_loc = val;
1518 } else if (attr == attr_abstract_origin) {
1519 // This is a stack variable imported from a function that was inlined.
1520 var.origin_loc = val;
1521 } else if (attr == attr_specification) {
1522 // This is an instance of a global var with a prototype elsewhere
1523 gvar.spec_loc = val;
1524 }
1525 } else if (fmt.tag == tag_member) {
1526 if (attr == attr_name) {
1527 var.name = std::string((const char *)payload);
1528 if (type_stack.size()) {
1529 gvar.name = type_stack.back().first.name + "::" + var.name;
1530 } else {
1531 gvar.name = var.name;
1532 }
1533 } else if (attr == attr_data_member_location) {
1534 if (!payload) {
1535 var.stack_offset = val;
1536 } else if (payload[0] == 0x23) {
1537 var.stack_offset = (int)(get_uleb128(payload + 1));
1538 }
1539 } else if (attr == attr_type) {
1540 var.type_def_loc = val;
1541 gvar.type_def_loc = val;
1542 }
1543 } else if (fmt.tag == tag_namespace) {
1544 if (attr == attr_name) {
1545 namespace_name = std::string((const char *)payload);
1546 }
1547 } else if (fmt.tag == tag_subrange_type) {
1548 // Could be telling us the size of an array type
1549 if (attr == attr_upper_bound &&
1550 type_stack.size() &&
1551 type_stack.back().first.type == TypeInfo::Array) {
1552 type_stack.back().first.size = val + 1;
1553 } else if (attr == attr_count &&
1554 type_stack.size() &&
1555 type_stack.back().first.type == TypeInfo::Array) {
1556 type_stack.back().first.size = val;
1557 }
1558 } else if (fmt.tag == tag_inlined_subroutine ||
1559 fmt.tag == tag_lexical_block) {
1560 if (attr == attr_low_pc) {
1561 LiveRange r = {val, val};
1562 live_ranges.push_back(r);
1563 } else if (attr == attr_high_pc && live_ranges.size()) {
1564 if (fmt.fields[i].form == 0x1) {
1565 // Literal address
1566 live_ranges.back().pc_end = val;
1567 } else {
1568 // Size
1569 live_ranges.back().pc_end = live_ranges.back().pc_begin + val;
1570 }
1571 } else if (attr == attr_ranges) {
1572 if (val < debug_ranges.size()) {
1573 // It's an array of addresses
1574 const void *const *ptr = (const void *const *)(debug_ranges.data() + val);
1575 const void *const *end = (const void *const *)(debug_ranges.data() + debug_ranges.size());
1576 // Note: might not be properly aligned; use memcpy to avoid
1577 // sanitizer warnings
1578 while (load_misaligned(ptr) && ptr < end - 1) {
1579 LiveRange r = {(uint64_t)load_misaligned(ptr), (uint64_t)load_misaligned(ptr + 1)};
1580 r.pc_begin += compile_unit_base_pc;
1581 r.pc_end += compile_unit_base_pc;
1582 live_ranges.push_back(r);
1583 ptr += 2;
1584 }
1585 }
1586 }
1587 } else if (fmt.tag == tag_compile_unit) {
1588 if (attr == attr_low_pc) {
1589 compile_unit_base_pc = val;
1590 }
1591 }
1592 }
1593
1594 if (fmt.tag == tag_variable) {
1595 if (func_stack.size() && !gvar.addr) {
1596 if (live_range_stack.size()) {
1597 var.live_ranges = live_range_stack.back().first;
1598 }
1599 func_stack.back().first.variables.push_back(var);
1600 } else {
1601 global_variables.push_back(gvar);
1602 }
1603 } else if (fmt.tag == tag_member &&
1604 type_stack.size()) {
1605 if (var.stack_offset == no_location) {
1606 // A member with no stack offset location is probably the prototype for a static member
1607 global_variables.push_back(gvar);
1608 } else {
1609 type_stack.back().first.members.push_back(var);
1610 }
1611
1612 } else if (fmt.tag == tag_function) {
1613 if (fmt.has_children) {
1614 func_stack.emplace_back(func, stack_depth);
1615 } else {
1616 functions.push_back(func);
1617 }
1618 } else if (fmt.tag == tag_class_type ||
1619 fmt.tag == tag_structure_type ||
1620 fmt.tag == tag_array_type ||
1621 fmt.tag == tag_base_type) {
1622 if (fmt.has_children) {
1623 type_stack.emplace_back(type_info, stack_depth);
1624 } else {
1625 types.push_back(type_info);
1626 }
1627 } else if ((fmt.tag == tag_typedef ||
1628 fmt.tag == tag_pointer_type ||
1629 fmt.tag == tag_reference_type ||
1630 fmt.tag == tag_const_type) &&
1631 type_info.members.size() == 1) {
1632 types.push_back(type_info);
1633 } else if (fmt.tag == tag_namespace && fmt.has_children) {
1634 if (namespace_name.empty()) {
1635 namespace_name = "_";
1636 }
1637 namespace_stack.emplace_back(namespace_name, stack_depth);
1638 } else if ((fmt.tag == tag_inlined_subroutine ||
1639 fmt.tag == tag_lexical_block) &&
1640 live_ranges.size() && fmt.has_children) {
1641 live_range_stack.emplace_back(live_ranges, stack_depth);
1642 }
1643 }
1644 }
1645
1646 // Connect function definitions to their declarations
1647 {
1648 std::map<uint64_t, FunctionInfo *> func_map;
1649 for (size_t i = 0; i < functions.size(); i++) {
1650 func_map[functions[i].def_loc] = &functions[i];
1651 }
1652
1653 for (size_t i = 0; i < functions.size(); i++) {
1654 if (functions[i].spec_loc) {
1655 FunctionInfo *spec = func_map[functions[i].spec_loc];
1656 if (spec) {
1657 functions[i].name = spec->name;
1658 }
1659 }
1660 }
1661 }
1662
1663 // Connect inlined variable instances to their origins
1664 {
1665 std::map<uint64_t, LocalVariable *> var_map;
1666 for (size_t i = 0; i < functions.size(); i++) {
1667 for (size_t j = 0; j < functions[i].variables.size(); j++) {
1668 var_map[functions[i].variables[j].def_loc] = &(functions[i].variables[j]);
1669 }
1670 }
1671
1672 for (size_t i = 0; i < functions.size(); i++) {
1673 for (size_t j = 0; j < functions[i].variables.size(); j++) {
1674 LocalVariable &v = functions[i].variables[j];
1675 uint64_t loc = v.origin_loc;
1676 if (loc) {
1677 LocalVariable *origin = var_map[loc];
1678 if (origin) {
1679 v.name = origin->name;
1680 v.type = origin->type;
1681 v.type_def_loc = origin->type_def_loc;
1682 } else {
1683 debug(5) << "Variable with bad abstract origin: " << loc << "\n";
1684 }
1685 }
1686 }
1687 }
1688 }
1689
1690 // Connect global variable instances to their prototypes
1691 {
1692 std::map<uint64_t, GlobalVariable *> var_map;
1693 for (size_t i = 0; i < global_variables.size(); i++) {
1694 GlobalVariable &var = global_variables[i];
1695 debug(5) << "var " << var.name << " is at " << var.def_loc << "\n";
1696 if (var.spec_loc || var.name.empty()) {
1697 // Not a prototype
1698 continue;
1699 }
1700 var_map[var.def_loc] = &var;
1701 }
1702
1703 for (size_t i = 0; i < global_variables.size(); i++) {
1704 GlobalVariable &var = global_variables[i];
1705 if (var.name.empty() && var.spec_loc) {
1706 GlobalVariable *spec = var_map[var.spec_loc];
1707 if (spec) {
1708 var.name = spec->name;
1709 var.type = spec->type;
1710 var.type_def_loc = spec->type_def_loc;
1711 } else {
1712 debug(5) << "Global variable with bad spec loc: " << var.spec_loc << "\n";
1713 }
1714 }
1715 }
1716 }
1717
1718 // Hook up the type pointers
1719 {
1720 std::map<uint64_t, TypeInfo *> type_map;
1721 for (size_t i = 0; i < types.size(); i++) {
1722 type_map[types[i].def_loc] = &types[i];
1723 }
1724
1725 for (size_t i = 0; i < functions.size(); i++) {
1726 for (size_t j = 0; j < functions[i].variables.size(); j++) {
1727 functions[i].variables[j].type =
1728 type_map[functions[i].variables[j].type_def_loc];
1729 }
1730 }
1731
1732 for (size_t i = 0; i < global_variables.size(); i++) {
1733 global_variables[i].type =
1734 type_map[global_variables[i].type_def_loc];
1735 }
1736
1737 for (size_t i = 0; i < types.size(); i++) {
1738 for (size_t j = 0; j < types[i].members.size(); j++) {
1739 types[i].members[j].type =
1740 type_map[types[i].members[j].type_def_loc];
1741 }
1742 }
1743 }
1744
1745 for (size_t i = 0; i < types.size(); i++) {
1746 // Set the names of the pointer types
1747 vector<std::string> suffix;
1748 TypeInfo *t = &types[i];
1749 while (t) {
1750 if (t->type == TypeInfo::Pointer) {
1751 suffix.emplace_back("*");
1752 internal_assert(t->members.size() == 1);
1753 t = t->members[0].type;
1754 } else if (t->type == TypeInfo::Reference) {
1755 suffix.emplace_back("&");
1756 internal_assert(t->members.size() == 1);
1757 t = t->members[0].type;
1758 } else if (t->type == TypeInfo::Const) {
1759 suffix.emplace_back("const");
1760 internal_assert(t->members.size() == 1);
1761 t = t->members[0].type;
1762 } else if (t->type == TypeInfo::Array) {
1763 // Do we know the size?
1764 if (t->size != 0) {
1765 std::ostringstream oss;
1766 oss << "[" << t->size << "]";
1767 suffix.push_back(oss.str());
1768 } else {
1769 suffix.emplace_back("[]");
1770 }
1771 internal_assert(t->members.size() == 1);
1772 t = t->members[0].type;
1773 } else {
1774 break;
1775 }
1776 }
1777
1778 if (t && suffix.size()) {
1779 types[i].name = t->name;
1780 while (suffix.size()) {
1781 types[i].name += " " + suffix.back();
1782 suffix.pop_back();
1783 }
1784 }
1785 }
1786
1787 // Fix up the sizes of typedefs where we know the underlying type
1788 for (size_t i = 0; i < types.size(); i++) {
1789 TypeInfo *t = &types[i];
1790 if (types[i].type == TypeInfo::Typedef &&
1791 !t->members.empty() &&
1792 t->members[0].type) {
1793 t->size = t->members[0].type->size;
1794 }
1795 }
1796
1797 // Unpack class members into the local variables list.
1798 for (size_t i = 0; i < functions.size(); i++) {
1799 vector<LocalVariable> new_vars = functions[i].variables;
1800 for (size_t j = 0; j < new_vars.size(); j++) {
1801 // If new_vars[j] is a class type, unpack its members
1802 // immediately after this point.
1803 const LocalVariable &v = new_vars[j];
1804 if (v.type &&
1805 (v.type->type == TypeInfo::Struct ||
1806 v.type->type == TypeInfo::Class ||
1807 v.type->type == TypeInfo::Typedef)) {
1808 size_t members = v.type->members.size();
1809 new_vars.insert(new_vars.begin() + j + 1,
1810 v.type->members.begin(),
1811 v.type->members.end());
1812
1813 // Typedefs retain the same name and stack offset
1814 if (new_vars[j].type->type == TypeInfo::Typedef) {
1815 new_vars[j + 1].name = new_vars[j].name;
1816 new_vars[j + 1].stack_offset = new_vars[j].stack_offset;
1817 } else {
1818 // Correct the stack offsets and names
1819 for (size_t k = 0; k < members; k++) {
1820 new_vars[j + k + 1].stack_offset += new_vars[j].stack_offset;
1821 if (new_vars[j + k + 1].name.size() &&
1822 new_vars[j].name.size()) {
1823 new_vars[j + k + 1].name = new_vars[j].name + "." + new_vars[j + k + 1].name;
1824 }
1825 }
1826 }
1827 }
1828 }
1829 functions[i].variables.swap(new_vars);
1830
1831 if (functions[i].variables.size()) {
1832 debug(5) << "Function " << functions[i].name << ":\n";
1833 for (size_t j = 0; j < functions[i].variables.size(); j++) {
1834 if (functions[i].variables[j].type) {
1835 debug(5) << " " << functions[i].variables[j].type->name << " " << functions[i].variables[j].name << "\n";
1836 }
1837 }
1838 }
1839 }
1840
1841 // Unpack class members of global variables
1842 for (size_t i = 0; i < global_variables.size(); i++) {
1843 GlobalVariable v = global_variables[i];
1844 if (v.type && v.addr &&
1845 (v.type->type == TypeInfo::Struct ||
1846 v.type->type == TypeInfo::Class ||
1847 v.type->type == TypeInfo::Typedef)) {
1848 debug(5) << "Unpacking members of " << v.name << " at " << std::hex << v.addr << "\n";
1849 vector<LocalVariable> &members = v.type->members;
1850 for (size_t j = 0; j < members.size(); j++) {
1851 GlobalVariable mem;
1852 if (!v.name.empty() && !members[j].name.empty()) {
1853 mem.name = v.name + "." + members[j].name;
1854 } else {
1855 // Might be a member of an anonymous struct?
1856 mem.name = members[j].name;
1857 }
1858 mem.type = members[j].type;
1859 mem.type_def_loc = members[j].type_def_loc;
1860 mem.addr = v.addr + members[j].stack_offset;
1861 debug(5) << " Member " << mem.name << " goes at " << mem.addr << "\n";
1862 global_variables.push_back(mem);
1863 }
1864 debug(5) << std::dec;
1865 }
1866 }
1867
1868 // Drop functions for which we don't know the program counter,
1869 // and variables for which we don't know the stack offset,
1870 // name, or type.
1871 {
1872 vector<FunctionInfo> trimmed;
1873 for (size_t i = 0; i < functions.size(); i++) {
1874 FunctionInfo &f = functions[i];
1875 if (!f.pc_begin ||
1876 !f.pc_end ||
1877 f.name.empty()) {
1878 //debug(5) << "Dropping " << f.name << "\n";
1879 continue;
1880 }
1881
1882 vector<LocalVariable> vars;
1883 for (size_t j = 0; j < f.variables.size(); j++) {
1884 LocalVariable &v = f.variables[j];
1885 if (!v.name.empty() && v.type && v.stack_offset != no_location) {
1886 vars.push_back(v);
1887 } else {
1888 //debug(5) << "Dropping " << v.name << "\n";
1889 }
1890 }
1891 f.variables.clear();
1892 trimmed.push_back(f);
1893 trimmed.back().variables = vars;
1894 }
1895 std::swap(functions, trimmed);
1896 }
1897
1898 // Drop globals for which we don't know the address or name
1899 {
1900 vector<GlobalVariable> trimmed;
1901 for (size_t i = 0; i < global_variables.size(); i++) {
1902 GlobalVariable &v = global_variables[i];
1903 if (!v.name.empty() && v.addr) {
1904 trimmed.push_back(v);
1905 }
1906 }
1907
1908 std::swap(global_variables, trimmed);
1909 }
1910
1911 // Sort the functions list by program counter
1912 std::sort(functions.begin(), functions.end());
1913
1914 // Sort the global variables by address
1915 std::sort(global_variables.begin(), global_variables.end());
1916 }
1917
parse_debug_line(const llvm::DataExtractor & e)1918 void parse_debug_line(const llvm::DataExtractor &e) {
1919 llvm_offset_t off = 0;
1920
1921 // For every compilation unit
1922 while (1) {
1923 // Parse the header
1924 uint32_t unit_length = e.getU32(&off);
1925
1926 if (unit_length == 0) {
1927 // No more units
1928 break;
1929 }
1930
1931 llvm_offset_t unit_end = off + unit_length;
1932
1933 debug(5) << "Parsing compilation unit from " << off << " to " << unit_end << "\n";
1934
1935 uint16_t version = e.getU16(&off);
1936 internal_assert(version >= 2);
1937
1938 uint32_t header_length = e.getU32(&off);
1939 llvm_offset_t end_header_off = off + header_length;
1940 uint8_t min_instruction_length = e.getU8(&off);
1941 uint8_t max_ops_per_instruction = 1;
1942 if (version >= 4) {
1943 // This is for VLIW architectures
1944 max_ops_per_instruction = e.getU8(&off);
1945 }
1946 uint8_t default_is_stmt = e.getU8(&off);
1947 int8_t line_base = (int8_t)e.getU8(&off);
1948 uint8_t line_range = e.getU8(&off);
1949 uint8_t opcode_base = e.getU8(&off);
1950
1951 vector<uint8_t> standard_opcode_length(opcode_base);
1952 for (int i = 1; i < opcode_base; i++) {
1953 // Note we don't use entry 0
1954 standard_opcode_length[i] = e.getU8(&off);
1955 }
1956
1957 vector<std::string> include_dirs;
1958 // The current directory is implicitly the first dir.
1959 include_dirs.emplace_back(".");
1960 while (off < end_header_off) {
1961 const char *s = e.getCStr(&off);
1962 if (s && s[0]) {
1963 include_dirs.emplace_back(s);
1964 } else {
1965 break;
1966 }
1967 }
1968
1969 // The first source file index for this compilation unit.
1970 int source_files_base = source_files.size();
1971
1972 while (off < end_header_off) {
1973 const char *name = e.getCStr(&off);
1974 if (name && name[0]) {
1975 uint64_t dir = e.getULEB128(&off);
1976 uint64_t mod_time = e.getULEB128(&off);
1977 uint64_t length = e.getULEB128(&off);
1978 (void)mod_time;
1979 (void)length;
1980 internal_assert(dir <= include_dirs.size());
1981 source_files.push_back(include_dirs[dir] + "/" + name);
1982 } else {
1983 break;
1984 }
1985 }
1986
1987 internal_assert(off == end_header_off) << "Failed parsing section .debug_line";
1988
1989 // Now parse the table. It uses a state machine with the following fields:
1990 struct {
1991 // Current program counter
1992 uint64_t address;
1993 // Which op within that instruction (for VLIW archs)
1994 uint32_t op_index;
1995 // File and line index;
1996 uint32_t file, line, column;
1997 bool is_stmt, basic_block, end_sequence, prologue_end, epilogue_begin;
1998 // The ISA of the architecture (e.g. x86-64 vs armv7 vs thumb)
1999 uint32_t isa;
2000 // The id of the block to which this line belongs
2001 uint32_t discriminator;
2002
2003 void append_row(vector<LineInfo> &lines) {
2004 LineInfo l = {address, line, file};
2005 lines.push_back(l);
2006 }
2007 } state, initial_state;
2008
2009 // Initialize the state table.
2010 initial_state.address = 0;
2011 initial_state.op_index = 0;
2012 initial_state.file = 0;
2013 initial_state.line = 1;
2014 initial_state.column = 0;
2015 initial_state.is_stmt = default_is_stmt;
2016 initial_state.basic_block = false;
2017 initial_state.end_sequence = false;
2018 initial_state.prologue_end = false;
2019 initial_state.epilogue_begin = false;
2020 initial_state.isa = 0;
2021 initial_state.discriminator = 0;
2022 state = initial_state;
2023
2024 // For every sequence.
2025 while (off < unit_end) {
2026 uint8_t opcode = e.getU8(&off);
2027
2028 if (opcode == 0) {
2029 // Extended opcodes
2030 llvm_offset_t ext_offset = off;
2031 uint64_t len = e.getULEB128(&off);
2032 llvm_offset_t arg_size = len - (off - ext_offset);
2033 uint8_t sub_opcode = e.getU8(&off);
2034 switch (sub_opcode) {
2035 case 1: // end_sequence
2036 {
2037 state.end_sequence = true;
2038 state.append_row(source_lines);
2039 state = initial_state;
2040 break;
2041 }
2042 case 2: // set_address
2043 {
2044 state.address = e.getAddress(&off);
2045 break;
2046 }
2047 case 3: // define_file
2048 {
2049 const char *name = e.getCStr(&off);
2050 uint64_t dir_index = e.getULEB128(&off);
2051 uint64_t mod_time = e.getULEB128(&off);
2052 uint64_t length = e.getULEB128(&off);
2053 (void)mod_time;
2054 (void)length;
2055 internal_assert(dir_index < include_dirs.size());
2056 source_files.push_back(include_dirs[dir_index] + "/" + name);
2057 break;
2058 }
2059 case 4: // set_discriminator
2060 {
2061 state.discriminator = e.getULEB128(&off);
2062 break;
2063 }
2064 default: // Some unknown thing. Skip it.
2065 off += arg_size;
2066 }
2067 } else if (opcode < opcode_base) {
2068 // A standard opcode
2069 switch (opcode) {
2070 case 1: // copy
2071 {
2072 state.append_row(source_lines);
2073 state.basic_block = false;
2074 state.prologue_end = false;
2075 state.epilogue_begin = false;
2076 state.discriminator = 0;
2077 break;
2078 }
2079 case 2: // advance_pc
2080 {
2081 uint64_t advance = e.getULEB128(&off);
2082 state.address += min_instruction_length * ((state.op_index + advance) / max_ops_per_instruction);
2083 state.op_index = (state.op_index + advance) % max_ops_per_instruction;
2084 break;
2085 }
2086 case 3: // advance_line
2087 {
2088 state.line += e.getSLEB128(&off);
2089 break;
2090 }
2091 case 4: // set_file
2092 {
2093 state.file = e.getULEB128(&off) - 1 + source_files_base;
2094 break;
2095 }
2096 case 5: // set_column
2097 {
2098 state.column = e.getULEB128(&off);
2099 break;
2100 }
2101 case 6: // negate_stmt
2102 {
2103 state.is_stmt = !state.is_stmt;
2104 break;
2105 }
2106 case 7: // set_basic_block
2107 {
2108 state.basic_block = true;
2109 break;
2110 }
2111 case 8: // const_add_pc
2112 {
2113 // Same as special opcode 255 (but doesn't emit a row or reset state)
2114 uint8_t adjust_opcode = 255 - opcode_base;
2115 uint64_t advance = adjust_opcode / line_range;
2116 state.address += min_instruction_length * ((state.op_index + advance) / max_ops_per_instruction);
2117 state.op_index = (state.op_index + advance) % max_ops_per_instruction;
2118 break;
2119 }
2120 case 9: // fixed_advance_pc
2121 {
2122 uint16_t advance = e.getU16(&off);
2123 state.address += advance;
2124 break;
2125 }
2126 case 10: // set_prologue_end
2127 {
2128 state.prologue_end = true;
2129 break;
2130 }
2131 case 11: // set_epilogue_begin
2132 {
2133 state.epilogue_begin = true;
2134 break;
2135 }
2136 case 12: // set_isa
2137 {
2138 state.isa = e.getULEB128(&off);
2139 break;
2140 }
2141 default: {
2142 // Unknown standard opcode. Skip over the args.
2143 uint8_t args = standard_opcode_length[opcode];
2144 for (int i = 0; i < args; i++) {
2145 e.getULEB128(&off);
2146 }
2147 }
2148 }
2149 } else {
2150 // Special opcode
2151 uint8_t adjust_opcode = opcode - opcode_base;
2152 uint64_t advance_op = adjust_opcode / line_range;
2153 uint64_t advance_line = line_base + adjust_opcode % line_range;
2154 state.address += min_instruction_length * ((state.op_index + advance_op) / max_ops_per_instruction);
2155 state.op_index = (state.op_index + advance_op) % max_ops_per_instruction;
2156 state.line += advance_line;
2157 state.append_row(source_lines);
2158 state.basic_block = false;
2159 state.prologue_end = false;
2160 state.epilogue_begin = false;
2161 state.discriminator = 0;
2162 }
2163 }
2164 }
2165
2166 // Sort the sequences and functions by low PC to make searching into it faster.
2167 std::sort(source_lines.begin(), source_lines.end());
2168 }
2169
find_containing_function(void * addr)2170 FunctionInfo *find_containing_function(void *addr) {
2171 uint64_t address = (uint64_t)addr;
2172 debug(5) << "Searching for function containing address " << addr << "\n";
2173 size_t hi = functions.size();
2174 size_t lo = 0;
2175 while (hi > lo) {
2176 size_t mid = (hi + lo) / 2;
2177 uint64_t pc_mid_begin = functions[mid].pc_begin;
2178 uint64_t pc_mid_end = functions[mid].pc_end;
2179 if (address < pc_mid_begin) {
2180 hi = mid;
2181 } else if (address > pc_mid_end) {
2182 lo = mid + 1;
2183 } else {
2184 debug(5) << "At function " << functions[mid].name
2185 << " spanning: " << (void *)pc_mid_begin
2186 << ", " << (void *)pc_mid_end << "\n";
2187 return &functions[mid];
2188 }
2189 }
2190
2191 return nullptr;
2192 }
2193
get_sleb128(const uint8_t * ptr)2194 int64_t get_sleb128(const uint8_t *ptr) {
2195 int64_t result = 0;
2196 unsigned shift = 0;
2197 uint8_t byte = 0;
2198
2199 while (1) {
2200 internal_assert(shift < 57);
2201 byte = *ptr++;
2202 result |= (uint64_t)(byte & 0x7f) << shift;
2203 shift += 7;
2204 if ((byte & 0x80) == 0) {
2205 break;
2206 }
2207 }
2208
2209 // Second-highest bit of the final byte gives the sign.
2210 if (shift < 64 && (byte & 0x40)) {
2211 // Fill the rest of the bytes with ones.
2212 result |= -(1ULL << shift);
2213 }
2214
2215 return result;
2216 }
2217
get_uleb128(const uint8_t * ptr)2218 int64_t get_uleb128(const uint8_t *ptr) {
2219 uint64_t result = 0;
2220 unsigned shift = 0;
2221 uint8_t byte = 0;
2222
2223 while (1) {
2224 internal_assert(shift < 57);
2225 byte = *ptr++;
2226 result |= (uint64_t)(byte & 0x7f) << shift;
2227 shift += 7;
2228 if ((byte & 0x80) == 0) {
2229 return result;
2230 }
2231 }
2232 }
2233 };
2234
2235 namespace {
2236 DebugSections *debug_sections = nullptr;
2237 }
2238
dump_stack_frame()2239 bool dump_stack_frame() {
2240 if (!debug_sections || !debug_sections->working) {
2241 return false;
2242 }
2243 void *ptr = __builtin_return_address(0);
2244 return debug_sections->dump_stack_frame(ptr);
2245 }
2246
get_variable_name(const void * var,const std::string & expected_type)2247 std::string get_variable_name(const void *var, const std::string &expected_type) {
2248 if (!debug_sections) return "";
2249 if (!debug_sections->working) return "";
2250 std::string name = debug_sections->get_stack_variable_name(var, expected_type);
2251 if (name.empty()) {
2252 // Maybe it's a member of a heap object.
2253 name = debug_sections->get_heap_member_name(var, expected_type);
2254 }
2255 if (name.empty()) {
2256 // Maybe it's a global
2257 name = debug_sections->get_global_variable_name(var, expected_type);
2258 }
2259
2260 return name;
2261 }
2262
get_source_location()2263 std::string get_source_location() {
2264 if (!debug_sections) return "";
2265 if (!debug_sections->working) return "";
2266 return debug_sections->get_source_location();
2267 }
2268
register_heap_object(const void * obj,size_t size,const void * helper)2269 void register_heap_object(const void *obj, size_t size, const void *helper) {
2270 if (!debug_sections) return;
2271 if (!debug_sections->working) return;
2272 if (!helper) return;
2273 debug_sections->register_heap_object(obj, size, helper);
2274 }
2275
deregister_heap_object(const void * obj,size_t size)2276 void deregister_heap_object(const void *obj, size_t size) {
2277 if (!debug_sections) return;
2278 if (!debug_sections->working) return;
2279 debug_sections->deregister_heap_object(obj, size);
2280 }
2281
saves_frame_pointer(void * fn)2282 bool saves_frame_pointer(void *fn) {
2283 // On x86-64, if we save the frame pointer, the first two instructions should be pushing the stack pointer and the frame pointer:
2284 const uint8_t *ptr = (const uint8_t *)(fn);
2285 // Skip over a valid-branch-target marker (endbr64), if there is
2286 // one. These sometimes start functions to help detect control flow
2287 // violations.
2288 if (ptr[0] == 0xf3 && ptr[1] == 0x0f && ptr[2] == 0x1e && ptr[3] == 0xfa) {
2289 ptr += 4;
2290 }
2291 return ptr[0] == 0x55; // push %rbp
2292 }
2293
test_compilation_unit(bool (* test)(bool (*)(const void *,const std::string &)),bool (* test_a)(const void *,const std::string &),void (* calib)())2294 void test_compilation_unit(bool (*test)(bool (*)(const void *, const std::string &)),
2295 bool (*test_a)(const void *, const std::string &),
2296 void (*calib)()) {
2297 #ifdef __ARM__
2298 return;
2299 #else
2300
2301 // Skip entirely on arm or 32-bit
2302 if (sizeof(void *) == 4) {
2303 return;
2304 }
2305
2306 debug(5) << "Testing compilation unit with offset_marker at " << reinterpret_bits<void *>(calib) << "\n";
2307
2308 if (!debug_sections) {
2309 char path[2048];
2310 get_program_name(path, sizeof(path));
2311 debug_sections = new DebugSections(path);
2312 }
2313
2314 if (!saves_frame_pointer(reinterpret_bits<void *>(&test_compilation_unit)) ||
2315 !saves_frame_pointer(reinterpret_bits<void *>(test))) {
2316 // Make sure libHalide and the test compilation unit both save the frame pointer
2317 debug_sections->working = false;
2318 debug(5) << "Failed because frame pointer not saved\n";
2319 } else if (debug_sections->working) {
2320 debug_sections->calibrate_pc_offset(calib);
2321 if (!debug_sections->working) {
2322 debug(5) << "Failed because offset calibration failed\n";
2323 return;
2324 }
2325
2326 debug_sections->working = (*test)(test_a);
2327 if (!debug_sections->working) {
2328 debug(5) << "Failed because test routine failed\n";
2329 return;
2330 }
2331
2332 debug(5) << "Test passed\n";
2333 }
2334
2335 #endif
2336 }
2337
2338 } // namespace Introspection
2339 } // namespace Internal
2340 } // namespace Halide
2341
2342 #else // WITH_INTROSPECTION
2343
2344 namespace Halide {
2345 namespace Internal {
2346 namespace Introspection {
2347
get_variable_name(const void * var,const std::string & expected_type)2348 std::string get_variable_name(const void *var, const std::string &expected_type) {
2349 return "";
2350 }
2351
get_source_location()2352 std::string get_source_location() {
2353 return "";
2354 }
2355
register_heap_object(const void * obj,size_t size,const void * helper)2356 void register_heap_object(const void *obj, size_t size, const void *helper) {
2357 }
2358
deregister_heap_object(const void * obj,size_t size)2359 void deregister_heap_object(const void *obj, size_t size) {
2360 }
2361
test_compilation_unit(bool (* test)(bool (*)(const void *,const std::string &)),bool (* test_a)(const void *,const std::string &),void (* calib)())2362 void test_compilation_unit(bool (*test)(bool (*)(const void *, const std::string &)),
2363 bool (*test_a)(const void *, const std::string &),
2364 void (*calib)()) {
2365 }
2366
2367 } // namespace Introspection
2368 } // namespace Internal
2369 } // namespace Halide
2370
2371 #endif
2372