1 // Copyright (c) 2010 Google Inc. All Rights Reserved.
2 //
3 // Redistribution and use in source and binary forms, with or without
4 // modification, are permitted provided that the following conditions are
5 // met:
6 //
7 // * Redistributions of source code must retain the above copyright
8 // notice, this list of conditions and the following disclaimer.
9 // * Redistributions in binary form must reproduce the above
10 // copyright notice, this list of conditions and the following disclaimer
11 // in the documentation and/or other materials provided with the
12 // distribution.
13 // * Neither the name of Google Inc. nor the names of its
14 // contributors may be used to endorse or promote products derived from
15 // this software without specific prior written permission.
16 //
17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29 // CFI reader author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
30
31 // Implementation of dwarf2reader::LineInfo, dwarf2reader::CompilationUnit,
32 // and dwarf2reader::CallFrameInfo. See dwarf2reader.h for details.
33
34 #include "common/dwarf/dwarf2reader.h"
35
36 #include <assert.h>
37 #include <stdint.h>
38 #include <stdio.h>
39 #include <string.h>
40
41 #include <map>
42 #include <memory>
43 #include <stack>
44 #include <string>
45 #include <utility>
46
47 #include <sys/stat.h>
48
49 #include "common/dwarf/bytereader-inl.h"
50 #include "common/dwarf/bytereader.h"
51 #include "common/dwarf/line_state_machine.h"
52 #include "common/using_std_string.h"
53
54 namespace dwarf2reader {
55
CompilationUnit(const string & path,const SectionMap & sections,uint64 offset,ByteReader * reader,Dwarf2Handler * handler)56 CompilationUnit::CompilationUnit(const string& path,
57 const SectionMap& sections, uint64 offset,
58 ByteReader* reader, Dwarf2Handler* handler)
59 : path_(path), offset_from_section_start_(offset), reader_(reader),
60 sections_(sections), handler_(handler), abbrevs_(),
61 string_buffer_(NULL), string_buffer_length_(0),
62 str_offsets_buffer_(NULL), str_offsets_buffer_length_(0),
63 addr_buffer_(NULL), addr_buffer_length_(0),
64 is_split_dwarf_(false), dwo_id_(0), dwo_name_(),
65 skeleton_dwo_id_(0), ranges_base_(0), addr_base_(0),
66 have_checked_for_dwp_(false), dwp_path_(),
67 dwp_byte_reader_(), dwp_reader_() {}
68
69 // Initialize a compilation unit from a .dwo or .dwp file.
70 // In this case, we need the .debug_addr section from the
71 // executable file that contains the corresponding skeleton
72 // compilation unit. We also inherit the Dwarf2Handler from
73 // the executable file, and call it as if we were still
74 // processing the original compilation unit.
75
SetSplitDwarf(const uint8_t * addr_buffer,uint64 addr_buffer_length,uint64 addr_base,uint64 ranges_base,uint64 dwo_id)76 void CompilationUnit::SetSplitDwarf(const uint8_t* addr_buffer,
77 uint64 addr_buffer_length,
78 uint64 addr_base,
79 uint64 ranges_base,
80 uint64 dwo_id) {
81 is_split_dwarf_ = true;
82 addr_buffer_ = addr_buffer;
83 addr_buffer_length_ = addr_buffer_length;
84 addr_base_ = addr_base;
85 ranges_base_ = ranges_base;
86 skeleton_dwo_id_ = dwo_id;
87 }
88
89 // Read a DWARF2/3 abbreviation section.
90 // Each abbrev consists of a abbreviation number, a tag, a byte
91 // specifying whether the tag has children, and a list of
92 // attribute/form pairs.
93 // The list of forms is terminated by a 0 for the attribute, and a
94 // zero for the form. The entire abbreviation section is terminated
95 // by a zero for the code.
96
ReadAbbrevs()97 void CompilationUnit::ReadAbbrevs() {
98 if (abbrevs_)
99 return;
100
101 // First get the debug_abbrev section. ".debug_abbrev" is the name
102 // recommended in the DWARF spec, and used on Linux;
103 // "__debug_abbrev" is the name used in Mac OS X Mach-O files.
104 SectionMap::const_iterator iter = sections_.find(".debug_abbrev");
105 if (iter == sections_.end())
106 iter = sections_.find("__debug_abbrev");
107 assert(iter != sections_.end());
108
109 abbrevs_ = new std::vector<Abbrev>;
110 abbrevs_->resize(1);
111
112 // The only way to check whether we are reading over the end of the
113 // buffer would be to first compute the size of the leb128 data by
114 // reading it, then go back and read it again.
115 const uint8_t *abbrev_start = iter->second.first +
116 header_.abbrev_offset;
117 const uint8_t *abbrevptr = abbrev_start;
118 #ifndef NDEBUG
119 const uint64 abbrev_length = iter->second.second - header_.abbrev_offset;
120 #endif
121
122 while (1) {
123 CompilationUnit::Abbrev abbrev;
124 size_t len;
125 const uint64 number = reader_->ReadUnsignedLEB128(abbrevptr, &len);
126
127 if (number == 0)
128 break;
129 abbrev.number = number;
130 abbrevptr += len;
131
132 assert(abbrevptr < abbrev_start + abbrev_length);
133 const uint64 tag = reader_->ReadUnsignedLEB128(abbrevptr, &len);
134 abbrevptr += len;
135 abbrev.tag = static_cast<enum DwarfTag>(tag);
136
137 assert(abbrevptr < abbrev_start + abbrev_length);
138 abbrev.has_children = reader_->ReadOneByte(abbrevptr);
139 abbrevptr += 1;
140
141 assert(abbrevptr < abbrev_start + abbrev_length);
142
143 while (1) {
144 const uint64 nametemp = reader_->ReadUnsignedLEB128(abbrevptr, &len);
145 abbrevptr += len;
146
147 assert(abbrevptr < abbrev_start + abbrev_length);
148 const uint64 formtemp = reader_->ReadUnsignedLEB128(abbrevptr, &len);
149 abbrevptr += len;
150 if (nametemp == 0 && formtemp == 0)
151 break;
152
153 const enum DwarfAttribute name =
154 static_cast<enum DwarfAttribute>(nametemp);
155 const enum DwarfForm form = static_cast<enum DwarfForm>(formtemp);
156 abbrev.attributes.push_back(std::make_pair(name, form));
157 }
158 assert(abbrev.number == abbrevs_->size());
159 abbrevs_->push_back(abbrev);
160 }
161 }
162
163 // Skips a single DIE's attributes.
SkipDIE(const uint8_t * start,const Abbrev & abbrev)164 const uint8_t *CompilationUnit::SkipDIE(const uint8_t* start,
165 const Abbrev& abbrev) {
166 for (AttributeList::const_iterator i = abbrev.attributes.begin();
167 i != abbrev.attributes.end();
168 i++) {
169 start = SkipAttribute(start, i->second);
170 }
171 return start;
172 }
173
174 // Skips a single attribute form's data.
SkipAttribute(const uint8_t * start,enum DwarfForm form)175 const uint8_t *CompilationUnit::SkipAttribute(const uint8_t *start,
176 enum DwarfForm form) {
177 size_t len;
178
179 switch (form) {
180 case DW_FORM_indirect:
181 form = static_cast<enum DwarfForm>(reader_->ReadUnsignedLEB128(start,
182 &len));
183 start += len;
184 return SkipAttribute(start, form);
185
186 case DW_FORM_flag_present:
187 return start;
188 case DW_FORM_data1:
189 case DW_FORM_flag:
190 case DW_FORM_ref1:
191 return start + 1;
192 case DW_FORM_ref2:
193 case DW_FORM_data2:
194 return start + 2;
195 case DW_FORM_ref4:
196 case DW_FORM_data4:
197 return start + 4;
198 case DW_FORM_ref8:
199 case DW_FORM_data8:
200 case DW_FORM_ref_sig8:
201 return start + 8;
202 case DW_FORM_string:
203 return start + strlen(reinterpret_cast<const char *>(start)) + 1;
204 case DW_FORM_udata:
205 case DW_FORM_ref_udata:
206 case DW_FORM_GNU_str_index:
207 case DW_FORM_GNU_addr_index:
208 reader_->ReadUnsignedLEB128(start, &len);
209 return start + len;
210
211 case DW_FORM_sdata:
212 reader_->ReadSignedLEB128(start, &len);
213 return start + len;
214 case DW_FORM_addr:
215 return start + reader_->AddressSize();
216 case DW_FORM_ref_addr:
217 // DWARF2 and 3/4 differ on whether ref_addr is address size or
218 // offset size.
219 assert(header_.version >= 2);
220 if (header_.version == 2) {
221 return start + reader_->AddressSize();
222 } else if (header_.version >= 3) {
223 return start + reader_->OffsetSize();
224 }
225 break;
226
227 case DW_FORM_block1:
228 return start + 1 + reader_->ReadOneByte(start);
229 case DW_FORM_block2:
230 return start + 2 + reader_->ReadTwoBytes(start);
231 case DW_FORM_block4:
232 return start + 4 + reader_->ReadFourBytes(start);
233 case DW_FORM_block:
234 case DW_FORM_exprloc: {
235 uint64 size = reader_->ReadUnsignedLEB128(start, &len);
236 return start + size + len;
237 }
238 case DW_FORM_strp:
239 case DW_FORM_sec_offset:
240 return start + reader_->OffsetSize();
241 }
242 fprintf(stderr,"Unhandled form type");
243 return NULL;
244 }
245
246 // Read a DWARF2/3 header.
247 // The header is variable length in DWARF3 (and DWARF2 as extended by
248 // most compilers), and consists of an length field, a version number,
249 // the offset in the .debug_abbrev section for our abbrevs, and an
250 // address size.
ReadHeader()251 void CompilationUnit::ReadHeader() {
252 const uint8_t *headerptr = buffer_;
253 size_t initial_length_size;
254
255 assert(headerptr + 4 < buffer_ + buffer_length_);
256 const uint64 initial_length
257 = reader_->ReadInitialLength(headerptr, &initial_length_size);
258 headerptr += initial_length_size;
259 header_.length = initial_length;
260
261 assert(headerptr + 2 < buffer_ + buffer_length_);
262 header_.version = reader_->ReadTwoBytes(headerptr);
263 headerptr += 2;
264
265 assert(headerptr + reader_->OffsetSize() < buffer_ + buffer_length_);
266 header_.abbrev_offset = reader_->ReadOffset(headerptr);
267 headerptr += reader_->OffsetSize();
268
269 // Compare against less than or equal because this may be the last
270 // section in the file.
271 assert(headerptr + 1 <= buffer_ + buffer_length_);
272 header_.address_size = reader_->ReadOneByte(headerptr);
273 reader_->SetAddressSize(header_.address_size);
274 headerptr += 1;
275
276 after_header_ = headerptr;
277
278 // This check ensures that we don't have to do checking during the
279 // reading of DIEs. header_.length does not include the size of the
280 // initial length.
281 assert(buffer_ + initial_length_size + header_.length <=
282 buffer_ + buffer_length_);
283 }
284
Start()285 uint64 CompilationUnit::Start() {
286 // First get the debug_info section. ".debug_info" is the name
287 // recommended in the DWARF spec, and used on Linux; "__debug_info"
288 // is the name used in Mac OS X Mach-O files.
289 SectionMap::const_iterator iter = sections_.find(".debug_info");
290 if (iter == sections_.end())
291 iter = sections_.find("__debug_info");
292 assert(iter != sections_.end());
293
294 // Set up our buffer
295 buffer_ = iter->second.first + offset_from_section_start_;
296 buffer_length_ = iter->second.second - offset_from_section_start_;
297
298 // Read the header
299 ReadHeader();
300
301 // Figure out the real length from the end of the initial length to
302 // the end of the compilation unit, since that is the value we
303 // return.
304 uint64 ourlength = header_.length;
305 if (reader_->OffsetSize() == 8)
306 ourlength += 12;
307 else
308 ourlength += 4;
309
310 // See if the user wants this compilation unit, and if not, just return.
311 if (!handler_->StartCompilationUnit(offset_from_section_start_,
312 reader_->AddressSize(),
313 reader_->OffsetSize(),
314 header_.length,
315 header_.version))
316 return ourlength;
317
318 // Otherwise, continue by reading our abbreviation entries.
319 ReadAbbrevs();
320
321 // Set the string section if we have one. ".debug_str" is the name
322 // recommended in the DWARF spec, and used on Linux; "__debug_str"
323 // is the name used in Mac OS X Mach-O files.
324 iter = sections_.find(".debug_str");
325 if (iter == sections_.end())
326 iter = sections_.find("__debug_str");
327 if (iter != sections_.end()) {
328 string_buffer_ = iter->second.first;
329 string_buffer_length_ = iter->second.second;
330 }
331
332 // Set the string offsets section if we have one.
333 iter = sections_.find(".debug_str_offsets");
334 if (iter != sections_.end()) {
335 str_offsets_buffer_ = iter->second.first;
336 str_offsets_buffer_length_ = iter->second.second;
337 }
338
339 // Set the address section if we have one.
340 iter = sections_.find(".debug_addr");
341 if (iter != sections_.end()) {
342 addr_buffer_ = iter->second.first;
343 addr_buffer_length_ = iter->second.second;
344 }
345
346 // Now that we have our abbreviations, start processing DIE's.
347 ProcessDIEs();
348
349 // If this is a skeleton compilation unit generated with split DWARF,
350 // and the client needs the full debug info, we need to find the full
351 // compilation unit in a .dwo or .dwp file.
352 if (!is_split_dwarf_
353 && dwo_name_ != NULL
354 && handler_->NeedSplitDebugInfo())
355 ProcessSplitDwarf();
356
357 return ourlength;
358 }
359
360 // If one really wanted, you could merge SkipAttribute and
361 // ProcessAttribute
362 // This is all boring data manipulation and calling of the handler.
ProcessAttribute(uint64 dieoffset,const uint8_t * start,enum DwarfAttribute attr,enum DwarfForm form)363 const uint8_t *CompilationUnit::ProcessAttribute(
364 uint64 dieoffset, const uint8_t *start, enum DwarfAttribute attr,
365 enum DwarfForm form) {
366 size_t len;
367
368 switch (form) {
369 // DW_FORM_indirect is never used because it is such a space
370 // waster.
371 case DW_FORM_indirect:
372 form = static_cast<enum DwarfForm>(reader_->ReadUnsignedLEB128(start,
373 &len));
374 start += len;
375 return ProcessAttribute(dieoffset, start, attr, form);
376
377 case DW_FORM_flag_present:
378 ProcessAttributeUnsigned(dieoffset, attr, form, 1);
379 return start;
380 case DW_FORM_data1:
381 case DW_FORM_flag:
382 ProcessAttributeUnsigned(dieoffset, attr, form,
383 reader_->ReadOneByte(start));
384 return start + 1;
385 case DW_FORM_data2:
386 ProcessAttributeUnsigned(dieoffset, attr, form,
387 reader_->ReadTwoBytes(start));
388 return start + 2;
389 case DW_FORM_data4:
390 ProcessAttributeUnsigned(dieoffset, attr, form,
391 reader_->ReadFourBytes(start));
392 return start + 4;
393 case DW_FORM_data8:
394 ProcessAttributeUnsigned(dieoffset, attr, form,
395 reader_->ReadEightBytes(start));
396 return start + 8;
397 case DW_FORM_string: {
398 const char *str = reinterpret_cast<const char *>(start);
399 ProcessAttributeString(dieoffset, attr, form, str);
400 return start + strlen(str) + 1;
401 }
402 case DW_FORM_udata:
403 ProcessAttributeUnsigned(dieoffset, attr, form,
404 reader_->ReadUnsignedLEB128(start, &len));
405 return start + len;
406
407 case DW_FORM_sdata:
408 ProcessAttributeSigned(dieoffset, attr, form,
409 reader_->ReadSignedLEB128(start, &len));
410 return start + len;
411 case DW_FORM_addr:
412 ProcessAttributeUnsigned(dieoffset, attr, form,
413 reader_->ReadAddress(start));
414 return start + reader_->AddressSize();
415 case DW_FORM_sec_offset:
416 ProcessAttributeUnsigned(dieoffset, attr, form,
417 reader_->ReadOffset(start));
418 return start + reader_->OffsetSize();
419
420 case DW_FORM_ref1:
421 handler_->ProcessAttributeReference(dieoffset, attr, form,
422 reader_->ReadOneByte(start)
423 + offset_from_section_start_);
424 return start + 1;
425 case DW_FORM_ref2:
426 handler_->ProcessAttributeReference(dieoffset, attr, form,
427 reader_->ReadTwoBytes(start)
428 + offset_from_section_start_);
429 return start + 2;
430 case DW_FORM_ref4:
431 handler_->ProcessAttributeReference(dieoffset, attr, form,
432 reader_->ReadFourBytes(start)
433 + offset_from_section_start_);
434 return start + 4;
435 case DW_FORM_ref8:
436 handler_->ProcessAttributeReference(dieoffset, attr, form,
437 reader_->ReadEightBytes(start)
438 + offset_from_section_start_);
439 return start + 8;
440 case DW_FORM_ref_udata:
441 handler_->ProcessAttributeReference(dieoffset, attr, form,
442 reader_->ReadUnsignedLEB128(start,
443 &len)
444 + offset_from_section_start_);
445 return start + len;
446 case DW_FORM_ref_addr:
447 // DWARF2 and 3/4 differ on whether ref_addr is address size or
448 // offset size.
449 assert(header_.version >= 2);
450 if (header_.version == 2) {
451 handler_->ProcessAttributeReference(dieoffset, attr, form,
452 reader_->ReadAddress(start));
453 return start + reader_->AddressSize();
454 } else if (header_.version >= 3) {
455 handler_->ProcessAttributeReference(dieoffset, attr, form,
456 reader_->ReadOffset(start));
457 return start + reader_->OffsetSize();
458 }
459 break;
460 case DW_FORM_ref_sig8:
461 handler_->ProcessAttributeSignature(dieoffset, attr, form,
462 reader_->ReadEightBytes(start));
463 return start + 8;
464
465 case DW_FORM_block1: {
466 uint64 datalen = reader_->ReadOneByte(start);
467 handler_->ProcessAttributeBuffer(dieoffset, attr, form, start + 1,
468 datalen);
469 return start + 1 + datalen;
470 }
471 case DW_FORM_block2: {
472 uint64 datalen = reader_->ReadTwoBytes(start);
473 handler_->ProcessAttributeBuffer(dieoffset, attr, form, start + 2,
474 datalen);
475 return start + 2 + datalen;
476 }
477 case DW_FORM_block4: {
478 uint64 datalen = reader_->ReadFourBytes(start);
479 handler_->ProcessAttributeBuffer(dieoffset, attr, form, start + 4,
480 datalen);
481 return start + 4 + datalen;
482 }
483 case DW_FORM_block:
484 case DW_FORM_exprloc: {
485 uint64 datalen = reader_->ReadUnsignedLEB128(start, &len);
486 handler_->ProcessAttributeBuffer(dieoffset, attr, form, start + len,
487 datalen);
488 return start + datalen + len;
489 }
490 case DW_FORM_strp: {
491 assert(string_buffer_ != NULL);
492
493 const uint64 offset = reader_->ReadOffset(start);
494 assert(string_buffer_ + offset < string_buffer_ + string_buffer_length_);
495
496 const char *str = reinterpret_cast<const char *>(string_buffer_ + offset);
497 ProcessAttributeString(dieoffset, attr, form, str);
498 return start + reader_->OffsetSize();
499 }
500
501 case DW_FORM_GNU_str_index: {
502 uint64 str_index = reader_->ReadUnsignedLEB128(start, &len);
503 const uint8_t* offset_ptr =
504 str_offsets_buffer_ + str_index * reader_->OffsetSize();
505 const uint64 offset = reader_->ReadOffset(offset_ptr);
506 if (offset >= string_buffer_length_) {
507 return NULL;
508 }
509
510 const char* str = reinterpret_cast<const char *>(string_buffer_) + offset;
511 ProcessAttributeString(dieoffset, attr, form, str);
512 return start + len;
513 break;
514 }
515 case DW_FORM_GNU_addr_index: {
516 uint64 addr_index = reader_->ReadUnsignedLEB128(start, &len);
517 const uint8_t* addr_ptr =
518 addr_buffer_ + addr_base_ + addr_index * reader_->AddressSize();
519 ProcessAttributeUnsigned(dieoffset, attr, form,
520 reader_->ReadAddress(addr_ptr));
521 return start + len;
522 }
523 }
524 fprintf(stderr, "Unhandled form type\n");
525 return NULL;
526 }
527
ProcessDIE(uint64 dieoffset,const uint8_t * start,const Abbrev & abbrev)528 const uint8_t *CompilationUnit::ProcessDIE(uint64 dieoffset,
529 const uint8_t *start,
530 const Abbrev& abbrev) {
531 for (AttributeList::const_iterator i = abbrev.attributes.begin();
532 i != abbrev.attributes.end();
533 i++) {
534 start = ProcessAttribute(dieoffset, start, i->first, i->second);
535 }
536
537 // If this is a compilation unit in a split DWARF object, verify that
538 // the dwo_id matches. If it does not match, we will ignore this
539 // compilation unit.
540 if (abbrev.tag == DW_TAG_compile_unit
541 && is_split_dwarf_
542 && dwo_id_ != skeleton_dwo_id_) {
543 return NULL;
544 }
545
546 return start;
547 }
548
ProcessDIEs()549 void CompilationUnit::ProcessDIEs() {
550 const uint8_t *dieptr = after_header_;
551 size_t len;
552
553 // lengthstart is the place the length field is based on.
554 // It is the point in the header after the initial length field
555 const uint8_t *lengthstart = buffer_;
556
557 // In 64 bit dwarf, the initial length is 12 bytes, because of the
558 // 0xffffffff at the start.
559 if (reader_->OffsetSize() == 8)
560 lengthstart += 12;
561 else
562 lengthstart += 4;
563
564 std::stack<uint64> die_stack;
565
566 while (dieptr < (lengthstart + header_.length)) {
567 // We give the user the absolute offset from the beginning of
568 // debug_info, since they need it to deal with ref_addr forms.
569 uint64 absolute_offset = (dieptr - buffer_) + offset_from_section_start_;
570
571 uint64 abbrev_num = reader_->ReadUnsignedLEB128(dieptr, &len);
572
573 dieptr += len;
574
575 // Abbrev == 0 represents the end of a list of children, or padding
576 // at the end of the compilation unit.
577 if (abbrev_num == 0) {
578 if (die_stack.size() == 0)
579 // If it is padding, then we are done with the compilation unit's DIEs.
580 return;
581 const uint64 offset = die_stack.top();
582 die_stack.pop();
583 handler_->EndDIE(offset);
584 continue;
585 }
586
587 const Abbrev& abbrev = abbrevs_->at(static_cast<size_t>(abbrev_num));
588 const enum DwarfTag tag = abbrev.tag;
589 if (!handler_->StartDIE(absolute_offset, tag)) {
590 dieptr = SkipDIE(dieptr, abbrev);
591 } else {
592 dieptr = ProcessDIE(absolute_offset, dieptr, abbrev);
593 }
594
595 if (abbrev.has_children) {
596 die_stack.push(absolute_offset);
597 } else {
598 handler_->EndDIE(absolute_offset);
599 }
600 }
601 }
602
603 // Check for a valid ELF file and return the Address size.
604 // Returns 0 if not a valid ELF file.
GetElfWidth(const ElfReader & elf)605 inline int GetElfWidth(const ElfReader& elf) {
606 if (elf.IsElf32File())
607 return 4;
608 if (elf.IsElf64File())
609 return 8;
610 return 0;
611 }
612
ProcessSplitDwarf()613 void CompilationUnit::ProcessSplitDwarf() {
614 struct stat statbuf;
615 if (!have_checked_for_dwp_) {
616 // Look for a .dwp file in the same directory as the executable.
617 have_checked_for_dwp_ = true;
618 string dwp_suffix(".dwp");
619 dwp_path_ = path_ + dwp_suffix;
620 if (stat(dwp_path_.c_str(), &statbuf) != 0) {
621 // Fall back to a split .debug file in the same directory.
622 string debug_suffix(".debug");
623 dwp_path_ = path_;
624 size_t found = path_.rfind(debug_suffix);
625 if (found + debug_suffix.length() == path_.length())
626 dwp_path_ = dwp_path_.replace(found, debug_suffix.length(), dwp_suffix);
627 }
628 if (stat(dwp_path_.c_str(), &statbuf) == 0) {
629 ElfReader* elf = new ElfReader(dwp_path_);
630 int width = GetElfWidth(*elf);
631 if (width != 0) {
632 dwp_byte_reader_.reset(new ByteReader(reader_->GetEndianness()));
633 dwp_byte_reader_->SetAddressSize(width);
634 dwp_reader_.reset(new DwpReader(*dwp_byte_reader_, elf));
635 dwp_reader_->Initialize();
636 } else {
637 delete elf;
638 }
639 }
640 }
641 bool found_in_dwp = false;
642 if (dwp_reader_) {
643 // If we have a .dwp file, read the debug sections for the requested CU.
644 SectionMap sections;
645 dwp_reader_->ReadDebugSectionsForCU(dwo_id_, §ions);
646 if (!sections.empty()) {
647 found_in_dwp = true;
648 CompilationUnit dwp_comp_unit(dwp_path_, sections, 0,
649 dwp_byte_reader_.get(), handler_);
650 dwp_comp_unit.SetSplitDwarf(addr_buffer_, addr_buffer_length_, addr_base_,
651 ranges_base_, dwo_id_);
652 dwp_comp_unit.Start();
653 }
654 }
655 if (!found_in_dwp) {
656 // If no .dwp file, try to open the .dwo file.
657 if (stat(dwo_name_, &statbuf) == 0) {
658 ElfReader elf(dwo_name_);
659 int width = GetElfWidth(elf);
660 if (width != 0) {
661 ByteReader reader(ENDIANNESS_LITTLE);
662 reader.SetAddressSize(width);
663 SectionMap sections;
664 ReadDebugSectionsFromDwo(&elf, §ions);
665 CompilationUnit dwo_comp_unit(dwo_name_, sections, 0, &reader,
666 handler_);
667 dwo_comp_unit.SetSplitDwarf(addr_buffer_, addr_buffer_length_,
668 addr_base_, ranges_base_, dwo_id_);
669 dwo_comp_unit.Start();
670 }
671 }
672 }
673 }
674
ReadDebugSectionsFromDwo(ElfReader * elf_reader,SectionMap * sections)675 void CompilationUnit::ReadDebugSectionsFromDwo(ElfReader* elf_reader,
676 SectionMap* sections) {
677 static const char* const section_names[] = {
678 ".debug_abbrev",
679 ".debug_info",
680 ".debug_str_offsets",
681 ".debug_str"
682 };
683 for (unsigned int i = 0u;
684 i < sizeof(section_names)/sizeof(*(section_names)); ++i) {
685 string base_name = section_names[i];
686 string dwo_name = base_name + ".dwo";
687 size_t section_size;
688 const char* section_data = elf_reader->GetSectionByName(dwo_name,
689 §ion_size);
690 if (section_data != NULL)
691 sections->insert(std::make_pair(
692 base_name, std::make_pair(
693 reinterpret_cast<const uint8_t *>(section_data),
694 section_size)));
695 }
696 }
697
DwpReader(const ByteReader & byte_reader,ElfReader * elf_reader)698 DwpReader::DwpReader(const ByteReader& byte_reader, ElfReader* elf_reader)
699 : elf_reader_(elf_reader), byte_reader_(byte_reader),
700 cu_index_(NULL), cu_index_size_(0), string_buffer_(NULL),
701 string_buffer_size_(0), version_(0), ncolumns_(0), nunits_(0),
702 nslots_(0), phash_(NULL), pindex_(NULL), shndx_pool_(NULL),
703 offset_table_(NULL), size_table_(NULL), abbrev_data_(NULL),
704 abbrev_size_(0), info_data_(NULL), info_size_(0),
705 str_offsets_data_(NULL), str_offsets_size_(0) {}
706
~DwpReader()707 DwpReader::~DwpReader() {
708 if (elf_reader_) delete elf_reader_;
709 }
710
Initialize()711 void DwpReader::Initialize() {
712 cu_index_ = elf_reader_->GetSectionByName(".debug_cu_index",
713 &cu_index_size_);
714 if (cu_index_ == NULL) {
715 return;
716 }
717 // The .debug_str.dwo section is shared by all CUs in the file.
718 string_buffer_ = elf_reader_->GetSectionByName(".debug_str.dwo",
719 &string_buffer_size_);
720
721 version_ = byte_reader_.ReadFourBytes(
722 reinterpret_cast<const uint8_t *>(cu_index_));
723
724 if (version_ == 1) {
725 nslots_ = byte_reader_.ReadFourBytes(
726 reinterpret_cast<const uint8_t *>(cu_index_)
727 + 3 * sizeof(uint32));
728 phash_ = cu_index_ + 4 * sizeof(uint32);
729 pindex_ = phash_ + nslots_ * sizeof(uint64);
730 shndx_pool_ = pindex_ + nslots_ * sizeof(uint32);
731 if (shndx_pool_ >= cu_index_ + cu_index_size_) {
732 version_ = 0;
733 }
734 } else if (version_ == 2) {
735 ncolumns_ = byte_reader_.ReadFourBytes(
736 reinterpret_cast<const uint8_t *>(cu_index_) + sizeof(uint32));
737 nunits_ = byte_reader_.ReadFourBytes(
738 reinterpret_cast<const uint8_t *>(cu_index_) + 2 * sizeof(uint32));
739 nslots_ = byte_reader_.ReadFourBytes(
740 reinterpret_cast<const uint8_t *>(cu_index_) + 3 * sizeof(uint32));
741 phash_ = cu_index_ + 4 * sizeof(uint32);
742 pindex_ = phash_ + nslots_ * sizeof(uint64);
743 offset_table_ = pindex_ + nslots_ * sizeof(uint32);
744 size_table_ = offset_table_ + ncolumns_ * (nunits_ + 1) * sizeof(uint32);
745 abbrev_data_ = elf_reader_->GetSectionByName(".debug_abbrev.dwo",
746 &abbrev_size_);
747 info_data_ = elf_reader_->GetSectionByName(".debug_info.dwo", &info_size_);
748 str_offsets_data_ = elf_reader_->GetSectionByName(".debug_str_offsets.dwo",
749 &str_offsets_size_);
750 if (size_table_ >= cu_index_ + cu_index_size_) {
751 version_ = 0;
752 }
753 }
754 }
755
ReadDebugSectionsForCU(uint64 dwo_id,SectionMap * sections)756 void DwpReader::ReadDebugSectionsForCU(uint64 dwo_id,
757 SectionMap* sections) {
758 if (version_ == 1) {
759 int slot = LookupCU(dwo_id);
760 if (slot == -1) {
761 return;
762 }
763
764 // The index table points to the section index pool, where we
765 // can read a list of section indexes for the debug sections
766 // for the CU whose dwo_id we are looking for.
767 int index = byte_reader_.ReadFourBytes(
768 reinterpret_cast<const uint8_t *>(pindex_)
769 + slot * sizeof(uint32));
770 const char* shndx_list = shndx_pool_ + index * sizeof(uint32);
771 for (;;) {
772 if (shndx_list >= cu_index_ + cu_index_size_) {
773 version_ = 0;
774 return;
775 }
776 unsigned int shndx = byte_reader_.ReadFourBytes(
777 reinterpret_cast<const uint8_t *>(shndx_list));
778 shndx_list += sizeof(uint32);
779 if (shndx == 0)
780 break;
781 const char* section_name = elf_reader_->GetSectionName(shndx);
782 size_t section_size;
783 const char* section_data;
784 // We're only interested in these four debug sections.
785 // The section names in the .dwo file end with ".dwo", but we
786 // add them to the sections table with their normal names.
787 if (!strncmp(section_name, ".debug_abbrev", strlen(".debug_abbrev"))) {
788 section_data = elf_reader_->GetSectionByIndex(shndx, §ion_size);
789 sections->insert(std::make_pair(
790 ".debug_abbrev",
791 std::make_pair(reinterpret_cast<const uint8_t *> (section_data),
792 section_size)));
793 } else if (!strncmp(section_name, ".debug_info", strlen(".debug_info"))) {
794 section_data = elf_reader_->GetSectionByIndex(shndx, §ion_size);
795 sections->insert(std::make_pair(
796 ".debug_info",
797 std::make_pair(reinterpret_cast<const uint8_t *> (section_data),
798 section_size)));
799 } else if (!strncmp(section_name, ".debug_str_offsets",
800 strlen(".debug_str_offsets"))) {
801 section_data = elf_reader_->GetSectionByIndex(shndx, §ion_size);
802 sections->insert(std::make_pair(
803 ".debug_str_offsets",
804 std::make_pair(reinterpret_cast<const uint8_t *> (section_data),
805 section_size)));
806 }
807 }
808 sections->insert(std::make_pair(
809 ".debug_str",
810 std::make_pair(reinterpret_cast<const uint8_t *> (string_buffer_),
811 string_buffer_size_)));
812 } else if (version_ == 2) {
813 uint32 index = LookupCUv2(dwo_id);
814 if (index == 0) {
815 return;
816 }
817
818 // The index points to a row in each of the section offsets table
819 // and the section size table, where we can read the offsets and sizes
820 // of the contributions to each debug section from the CU whose dwo_id
821 // we are looking for. Row 0 of the section offsets table has the
822 // section ids for each column of the table. The size table begins
823 // with row 1.
824 const char* id_row = offset_table_;
825 const char* offset_row = offset_table_
826 + index * ncolumns_ * sizeof(uint32);
827 const char* size_row =
828 size_table_ + (index - 1) * ncolumns_ * sizeof(uint32);
829 if (size_row + ncolumns_ * sizeof(uint32) > cu_index_ + cu_index_size_) {
830 version_ = 0;
831 return;
832 }
833 for (unsigned int col = 0u; col < ncolumns_; ++col) {
834 uint32 section_id =
835 byte_reader_.ReadFourBytes(reinterpret_cast<const uint8_t *>(id_row)
836 + col * sizeof(uint32));
837 uint32 offset = byte_reader_.ReadFourBytes(
838 reinterpret_cast<const uint8_t *>(offset_row)
839 + col * sizeof(uint32));
840 uint32 size = byte_reader_.ReadFourBytes(
841 reinterpret_cast<const uint8_t *>(size_row) + col * sizeof(uint32));
842 if (section_id == DW_SECT_ABBREV) {
843 sections->insert(std::make_pair(
844 ".debug_abbrev",
845 std::make_pair(reinterpret_cast<const uint8_t *> (abbrev_data_)
846 + offset, size)));
847 } else if (section_id == DW_SECT_INFO) {
848 sections->insert(std::make_pair(
849 ".debug_info",
850 std::make_pair(reinterpret_cast<const uint8_t *> (info_data_)
851 + offset, size)));
852 } else if (section_id == DW_SECT_STR_OFFSETS) {
853 sections->insert(std::make_pair(
854 ".debug_str_offsets",
855 std::make_pair(reinterpret_cast<const uint8_t *> (str_offsets_data_)
856 + offset, size)));
857 }
858 }
859 sections->insert(std::make_pair(
860 ".debug_str",
861 std::make_pair(reinterpret_cast<const uint8_t *> (string_buffer_),
862 string_buffer_size_)));
863 }
864 }
865
LookupCU(uint64 dwo_id)866 int DwpReader::LookupCU(uint64 dwo_id) {
867 uint32 slot = static_cast<uint32>(dwo_id) & (nslots_ - 1);
868 uint64 probe = byte_reader_.ReadEightBytes(
869 reinterpret_cast<const uint8_t *>(phash_) + slot * sizeof(uint64));
870 if (probe != 0 && probe != dwo_id) {
871 uint32 secondary_hash =
872 (static_cast<uint32>(dwo_id >> 32) & (nslots_ - 1)) | 1;
873 do {
874 slot = (slot + secondary_hash) & (nslots_ - 1);
875 probe = byte_reader_.ReadEightBytes(
876 reinterpret_cast<const uint8_t *>(phash_) + slot * sizeof(uint64));
877 } while (probe != 0 && probe != dwo_id);
878 }
879 if (probe == 0)
880 return -1;
881 return slot;
882 }
883
LookupCUv2(uint64 dwo_id)884 uint32 DwpReader::LookupCUv2(uint64 dwo_id) {
885 uint32 slot = static_cast<uint32>(dwo_id) & (nslots_ - 1);
886 uint64 probe = byte_reader_.ReadEightBytes(
887 reinterpret_cast<const uint8_t *>(phash_) + slot * sizeof(uint64));
888 uint32 index = byte_reader_.ReadFourBytes(
889 reinterpret_cast<const uint8_t *>(pindex_) + slot * sizeof(uint32));
890 if (index != 0 && probe != dwo_id) {
891 uint32 secondary_hash =
892 (static_cast<uint32>(dwo_id >> 32) & (nslots_ - 1)) | 1;
893 do {
894 slot = (slot + secondary_hash) & (nslots_ - 1);
895 probe = byte_reader_.ReadEightBytes(
896 reinterpret_cast<const uint8_t *>(phash_) + slot * sizeof(uint64));
897 index = byte_reader_.ReadFourBytes(
898 reinterpret_cast<const uint8_t *>(pindex_) + slot * sizeof(uint32));
899 } while (index != 0 && probe != dwo_id);
900 }
901 return index;
902 }
903
LineInfo(const uint8_t * buffer,uint64 buffer_length,ByteReader * reader,LineInfoHandler * handler)904 LineInfo::LineInfo(const uint8_t *buffer, uint64 buffer_length,
905 ByteReader* reader, LineInfoHandler* handler):
906 handler_(handler), reader_(reader), buffer_(buffer) {
907 #ifndef NDEBUG
908 buffer_length_ = buffer_length;
909 #endif
910 header_.std_opcode_lengths = NULL;
911 }
912
Start()913 uint64 LineInfo::Start() {
914 ReadHeader();
915 ReadLines();
916 return after_header_ - buffer_;
917 }
918
919 // The header for a debug_line section is mildly complicated, because
920 // the line info is very tightly encoded.
ReadHeader()921 void LineInfo::ReadHeader() {
922 const uint8_t *lineptr = buffer_;
923 size_t initial_length_size;
924
925 const uint64 initial_length
926 = reader_->ReadInitialLength(lineptr, &initial_length_size);
927
928 lineptr += initial_length_size;
929 header_.total_length = initial_length;
930 assert(buffer_ + initial_length_size + header_.total_length <=
931 buffer_ + buffer_length_);
932
933 // Address size *must* be set by CU ahead of time.
934 assert(reader_->AddressSize() != 0);
935
936 header_.version = reader_->ReadTwoBytes(lineptr);
937 lineptr += 2;
938
939 header_.prologue_length = reader_->ReadOffset(lineptr);
940 lineptr += reader_->OffsetSize();
941
942 header_.min_insn_length = reader_->ReadOneByte(lineptr);
943 lineptr += 1;
944
945 if (header_.version >= 4) {
946 __attribute__((unused)) uint8 max_ops_per_insn =
947 reader_->ReadOneByte(lineptr);
948 ++lineptr;
949 assert(max_ops_per_insn == 1);
950 }
951
952 header_.default_is_stmt = reader_->ReadOneByte(lineptr);
953 lineptr += 1;
954
955 header_.line_base = *reinterpret_cast<const int8*>(lineptr);
956 lineptr += 1;
957
958 header_.line_range = reader_->ReadOneByte(lineptr);
959 lineptr += 1;
960
961 header_.opcode_base = reader_->ReadOneByte(lineptr);
962 lineptr += 1;
963
964 header_.std_opcode_lengths = new std::vector<unsigned char>;
965 header_.std_opcode_lengths->resize(header_.opcode_base + 1);
966 (*header_.std_opcode_lengths)[0] = 0;
967 for (int i = 1; i < header_.opcode_base; i++) {
968 (*header_.std_opcode_lengths)[i] = reader_->ReadOneByte(lineptr);
969 lineptr += 1;
970 }
971
972 // It is legal for the directory entry table to be empty.
973 if (*lineptr) {
974 uint32 dirindex = 1;
975 while (*lineptr) {
976 const char *dirname = reinterpret_cast<const char *>(lineptr);
977 handler_->DefineDir(dirname, dirindex);
978 lineptr += strlen(dirname) + 1;
979 dirindex++;
980 }
981 }
982 lineptr++;
983
984 // It is also legal for the file entry table to be empty.
985 if (*lineptr) {
986 uint32 fileindex = 1;
987 size_t len;
988 while (*lineptr) {
989 const char *filename = reinterpret_cast<const char *>(lineptr);
990 lineptr += strlen(filename) + 1;
991
992 uint64 dirindex = reader_->ReadUnsignedLEB128(lineptr, &len);
993 lineptr += len;
994
995 uint64 mod_time = reader_->ReadUnsignedLEB128(lineptr, &len);
996 lineptr += len;
997
998 uint64 filelength = reader_->ReadUnsignedLEB128(lineptr, &len);
999 lineptr += len;
1000 handler_->DefineFile(filename, fileindex, static_cast<uint32>(dirindex),
1001 mod_time, filelength);
1002 fileindex++;
1003 }
1004 }
1005 lineptr++;
1006
1007 after_header_ = lineptr;
1008 }
1009
1010 /* static */
ProcessOneOpcode(ByteReader * reader,LineInfoHandler * handler,const struct LineInfoHeader & header,const uint8_t * start,struct LineStateMachine * lsm,size_t * len,uintptr pc,bool * lsm_passes_pc)1011 bool LineInfo::ProcessOneOpcode(ByteReader* reader,
1012 LineInfoHandler* handler,
1013 const struct LineInfoHeader &header,
1014 const uint8_t *start,
1015 struct LineStateMachine* lsm,
1016 size_t* len,
1017 uintptr pc,
1018 bool *lsm_passes_pc) {
1019 size_t oplen = 0;
1020 size_t templen;
1021 uint8 opcode = reader->ReadOneByte(start);
1022 oplen++;
1023 start++;
1024
1025 // If the opcode is great than the opcode_base, it is a special
1026 // opcode. Most line programs consist mainly of special opcodes.
1027 if (opcode >= header.opcode_base) {
1028 opcode -= header.opcode_base;
1029 const int64 advance_address = (opcode / header.line_range)
1030 * header.min_insn_length;
1031 const int32 advance_line = (opcode % header.line_range)
1032 + header.line_base;
1033
1034 // Check if the lsm passes "pc". If so, mark it as passed.
1035 if (lsm_passes_pc &&
1036 lsm->address <= pc && pc < lsm->address + advance_address) {
1037 *lsm_passes_pc = true;
1038 }
1039
1040 lsm->address += advance_address;
1041 lsm->line_num += advance_line;
1042 lsm->basic_block = true;
1043 *len = oplen;
1044 return true;
1045 }
1046
1047 // Otherwise, we have the regular opcodes
1048 switch (opcode) {
1049 case DW_LNS_copy: {
1050 lsm->basic_block = false;
1051 *len = oplen;
1052 return true;
1053 }
1054
1055 case DW_LNS_advance_pc: {
1056 uint64 advance_address = reader->ReadUnsignedLEB128(start, &templen);
1057 oplen += templen;
1058
1059 // Check if the lsm passes "pc". If so, mark it as passed.
1060 if (lsm_passes_pc && lsm->address <= pc &&
1061 pc < lsm->address + header.min_insn_length * advance_address) {
1062 *lsm_passes_pc = true;
1063 }
1064
1065 lsm->address += header.min_insn_length * advance_address;
1066 }
1067 break;
1068 case DW_LNS_advance_line: {
1069 const int64 advance_line = reader->ReadSignedLEB128(start, &templen);
1070 oplen += templen;
1071 lsm->line_num += static_cast<int32>(advance_line);
1072
1073 // With gcc 4.2.1, we can get the line_no here for the first time
1074 // since DW_LNS_advance_line is called after DW_LNE_set_address is
1075 // called. So we check if the lsm passes "pc" here, not in
1076 // DW_LNE_set_address.
1077 if (lsm_passes_pc && lsm->address == pc) {
1078 *lsm_passes_pc = true;
1079 }
1080 }
1081 break;
1082 case DW_LNS_set_file: {
1083 const uint64 fileno = reader->ReadUnsignedLEB128(start, &templen);
1084 oplen += templen;
1085 lsm->file_num = static_cast<uint32>(fileno);
1086 }
1087 break;
1088 case DW_LNS_set_column: {
1089 const uint64 colno = reader->ReadUnsignedLEB128(start, &templen);
1090 oplen += templen;
1091 lsm->column_num = static_cast<uint32>(colno);
1092 }
1093 break;
1094 case DW_LNS_negate_stmt: {
1095 lsm->is_stmt = !lsm->is_stmt;
1096 }
1097 break;
1098 case DW_LNS_set_basic_block: {
1099 lsm->basic_block = true;
1100 }
1101 break;
1102 case DW_LNS_fixed_advance_pc: {
1103 const uint16 advance_address = reader->ReadTwoBytes(start);
1104 oplen += 2;
1105
1106 // Check if the lsm passes "pc". If so, mark it as passed.
1107 if (lsm_passes_pc &&
1108 lsm->address <= pc && pc < lsm->address + advance_address) {
1109 *lsm_passes_pc = true;
1110 }
1111
1112 lsm->address += advance_address;
1113 }
1114 break;
1115 case DW_LNS_const_add_pc: {
1116 const int64 advance_address = header.min_insn_length
1117 * ((255 - header.opcode_base)
1118 / header.line_range);
1119
1120 // Check if the lsm passes "pc". If so, mark it as passed.
1121 if (lsm_passes_pc &&
1122 lsm->address <= pc && pc < lsm->address + advance_address) {
1123 *lsm_passes_pc = true;
1124 }
1125
1126 lsm->address += advance_address;
1127 }
1128 break;
1129 case DW_LNS_extended_op: {
1130 const uint64 extended_op_len = reader->ReadUnsignedLEB128(start,
1131 &templen);
1132 start += templen;
1133 oplen += templen + extended_op_len;
1134
1135 const uint64 extended_op = reader->ReadOneByte(start);
1136 start++;
1137
1138 switch (extended_op) {
1139 case DW_LNE_end_sequence: {
1140 lsm->end_sequence = true;
1141 *len = oplen;
1142 return true;
1143 }
1144 break;
1145 case DW_LNE_set_address: {
1146 // With gcc 4.2.1, we cannot tell the line_no here since
1147 // DW_LNE_set_address is called before DW_LNS_advance_line is
1148 // called. So we do not check if the lsm passes "pc" here. See
1149 // also the comment in DW_LNS_advance_line.
1150 uint64 address = reader->ReadAddress(start);
1151 lsm->address = address;
1152 }
1153 break;
1154 case DW_LNE_define_file: {
1155 const char *filename = reinterpret_cast<const char *>(start);
1156
1157 templen = strlen(filename) + 1;
1158 start += templen;
1159
1160 uint64 dirindex = reader->ReadUnsignedLEB128(start, &templen);
1161 oplen += templen;
1162
1163 const uint64 mod_time = reader->ReadUnsignedLEB128(start,
1164 &templen);
1165 oplen += templen;
1166
1167 const uint64 filelength = reader->ReadUnsignedLEB128(start,
1168 &templen);
1169 oplen += templen;
1170
1171 if (handler) {
1172 handler->DefineFile(filename, -1, static_cast<uint32>(dirindex),
1173 mod_time, filelength);
1174 }
1175 }
1176 break;
1177 }
1178 }
1179 break;
1180
1181 default: {
1182 // Ignore unknown opcode silently
1183 if (header.std_opcode_lengths) {
1184 for (int i = 0; i < (*header.std_opcode_lengths)[opcode]; i++) {
1185 reader->ReadUnsignedLEB128(start, &templen);
1186 start += templen;
1187 oplen += templen;
1188 }
1189 }
1190 }
1191 break;
1192 }
1193 *len = oplen;
1194 return false;
1195 }
1196
ReadLines()1197 void LineInfo::ReadLines() {
1198 struct LineStateMachine lsm;
1199
1200 // lengthstart is the place the length field is based on.
1201 // It is the point in the header after the initial length field
1202 const uint8_t *lengthstart = buffer_;
1203
1204 // In 64 bit dwarf, the initial length is 12 bytes, because of the
1205 // 0xffffffff at the start.
1206 if (reader_->OffsetSize() == 8)
1207 lengthstart += 12;
1208 else
1209 lengthstart += 4;
1210
1211 const uint8_t *lineptr = after_header_;
1212 lsm.Reset(header_.default_is_stmt);
1213
1214 // The LineInfoHandler interface expects each line's length along
1215 // with its address, but DWARF only provides addresses (sans
1216 // length), and an end-of-sequence address; one infers the length
1217 // from the next address. So we report a line only when we get the
1218 // next line's address, or the end-of-sequence address.
1219 bool have_pending_line = false;
1220 uint64 pending_address = 0;
1221 uint32 pending_file_num = 0, pending_line_num = 0, pending_column_num = 0;
1222
1223 while (lineptr < lengthstart + header_.total_length) {
1224 size_t oplength;
1225 bool add_row = ProcessOneOpcode(reader_, handler_, header_,
1226 lineptr, &lsm, &oplength, (uintptr)-1,
1227 NULL);
1228 if (add_row) {
1229 if (have_pending_line)
1230 handler_->AddLine(pending_address, lsm.address - pending_address,
1231 pending_file_num, pending_line_num,
1232 pending_column_num);
1233 if (lsm.end_sequence) {
1234 lsm.Reset(header_.default_is_stmt);
1235 have_pending_line = false;
1236 } else {
1237 pending_address = lsm.address;
1238 pending_file_num = lsm.file_num;
1239 pending_line_num = lsm.line_num;
1240 pending_column_num = lsm.column_num;
1241 have_pending_line = true;
1242 }
1243 }
1244 lineptr += oplength;
1245 }
1246
1247 after_header_ = lengthstart + header_.total_length;
1248 }
1249
RangeListReader(const uint8_t * buffer,uint64 size,ByteReader * reader,RangeListHandler * handler)1250 RangeListReader::RangeListReader(const uint8_t *buffer, uint64 size,
1251 ByteReader *reader, RangeListHandler *handler)
1252 : buffer_(buffer), size_(size), reader_(reader), handler_(handler) { }
1253
ReadRangeList(uint64 offset)1254 bool RangeListReader::ReadRangeList(uint64 offset) {
1255 const uint64 max_address =
1256 (reader_->AddressSize() == 4) ? 0xffffffffUL
1257 : 0xffffffffffffffffULL;
1258 const uint64 entry_size = reader_->AddressSize() * 2;
1259 bool list_end = false;
1260
1261 do {
1262 if (offset > size_ - entry_size) {
1263 return false; // Invalid range detected
1264 }
1265
1266 uint64 start_address = reader_->ReadAddress(buffer_ + offset);
1267 uint64 end_address =
1268 reader_->ReadAddress(buffer_ + offset + reader_->AddressSize());
1269
1270 if (start_address == max_address) { // Base address selection
1271 handler_->SetBaseAddress(end_address);
1272 } else if (start_address == 0 && end_address == 0) { // End-of-list
1273 handler_->Finish();
1274 list_end = true;
1275 } else { // Add a range entry
1276 handler_->AddRange(start_address, end_address);
1277 }
1278
1279 offset += entry_size;
1280 } while (!list_end);
1281
1282 return true;
1283 }
1284
1285 // A DWARF rule for recovering the address or value of a register, or
1286 // computing the canonical frame address. There is one subclass of this for
1287 // each '*Rule' member function in CallFrameInfo::Handler.
1288 //
1289 // It's annoying that we have to handle Rules using pointers (because
1290 // the concrete instances can have an arbitrary size). They're small,
1291 // so it would be much nicer if we could just handle them by value
1292 // instead of fretting about ownership and destruction.
1293 //
1294 // It seems like all these could simply be instances of std::tr1::bind,
1295 // except that we need instances to be EqualityComparable, too.
1296 //
1297 // This could logically be nested within State, but then the qualified names
1298 // get horrendous.
1299 class CallFrameInfo::Rule {
1300 public:
~Rule()1301 virtual ~Rule() { }
1302
1303 // Tell HANDLER that, at ADDRESS in the program, REG can be recovered using
1304 // this rule. If REG is kCFARegister, then this rule describes how to compute
1305 // the canonical frame address. Return what the HANDLER member function
1306 // returned.
1307 virtual bool Handle(Handler *handler,
1308 uint64 address, int reg) const = 0;
1309
1310 // Equality on rules. We use these to decide which rules we need
1311 // to report after a DW_CFA_restore_state instruction.
1312 virtual bool operator==(const Rule &rhs) const = 0;
1313
operator !=(const Rule & rhs) const1314 bool operator!=(const Rule &rhs) const { return ! (*this == rhs); }
1315
1316 // Return a pointer to a copy of this rule.
1317 virtual Rule *Copy() const = 0;
1318
1319 // If this is a base+offset rule, change its base register to REG.
1320 // Otherwise, do nothing. (Ugly, but required for DW_CFA_def_cfa_register.)
SetBaseRegister(unsigned reg)1321 virtual void SetBaseRegister(unsigned reg) { }
1322
1323 // If this is a base+offset rule, change its offset to OFFSET. Otherwise,
1324 // do nothing. (Ugly, but required for DW_CFA_def_cfa_offset.)
SetOffset(long long offset)1325 virtual void SetOffset(long long offset) { }
1326 };
1327
1328 // Rule: the value the register had in the caller cannot be recovered.
1329 class CallFrameInfo::UndefinedRule: public CallFrameInfo::Rule {
1330 public:
UndefinedRule()1331 UndefinedRule() { }
~UndefinedRule()1332 ~UndefinedRule() { }
Handle(Handler * handler,uint64 address,int reg) const1333 bool Handle(Handler *handler, uint64 address, int reg) const {
1334 return handler->UndefinedRule(address, reg);
1335 }
operator ==(const Rule & rhs) const1336 bool operator==(const Rule &rhs) const {
1337 // dynamic_cast is allowed by the Google C++ Style Guide, if the use has
1338 // been carefully considered; cheap RTTI-like workarounds are forbidden.
1339 const UndefinedRule *our_rhs = dynamic_cast<const UndefinedRule *>(&rhs);
1340 return (our_rhs != NULL);
1341 }
Copy() const1342 Rule *Copy() const { return new UndefinedRule(*this); }
1343 };
1344
1345 // Rule: the register's value is the same as that it had in the caller.
1346 class CallFrameInfo::SameValueRule: public CallFrameInfo::Rule {
1347 public:
SameValueRule()1348 SameValueRule() { }
~SameValueRule()1349 ~SameValueRule() { }
Handle(Handler * handler,uint64 address,int reg) const1350 bool Handle(Handler *handler, uint64 address, int reg) const {
1351 return handler->SameValueRule(address, reg);
1352 }
operator ==(const Rule & rhs) const1353 bool operator==(const Rule &rhs) const {
1354 // dynamic_cast is allowed by the Google C++ Style Guide, if the use has
1355 // been carefully considered; cheap RTTI-like workarounds are forbidden.
1356 const SameValueRule *our_rhs = dynamic_cast<const SameValueRule *>(&rhs);
1357 return (our_rhs != NULL);
1358 }
Copy() const1359 Rule *Copy() const { return new SameValueRule(*this); }
1360 };
1361
1362 // Rule: the register is saved at OFFSET from BASE_REGISTER. BASE_REGISTER
1363 // may be CallFrameInfo::Handler::kCFARegister.
1364 class CallFrameInfo::OffsetRule: public CallFrameInfo::Rule {
1365 public:
OffsetRule(int base_register,long offset)1366 OffsetRule(int base_register, long offset)
1367 : base_register_(base_register), offset_(offset) { }
~OffsetRule()1368 ~OffsetRule() { }
Handle(Handler * handler,uint64 address,int reg) const1369 bool Handle(Handler *handler, uint64 address, int reg) const {
1370 return handler->OffsetRule(address, reg, base_register_, offset_);
1371 }
operator ==(const Rule & rhs) const1372 bool operator==(const Rule &rhs) const {
1373 // dynamic_cast is allowed by the Google C++ Style Guide, if the use has
1374 // been carefully considered; cheap RTTI-like workarounds are forbidden.
1375 const OffsetRule *our_rhs = dynamic_cast<const OffsetRule *>(&rhs);
1376 return (our_rhs &&
1377 base_register_ == our_rhs->base_register_ &&
1378 offset_ == our_rhs->offset_);
1379 }
Copy() const1380 Rule *Copy() const { return new OffsetRule(*this); }
1381 // We don't actually need SetBaseRegister or SetOffset here, since they
1382 // are only ever applied to CFA rules, for DW_CFA_def_cfa_offset, and it
1383 // doesn't make sense to use OffsetRule for computing the CFA: it
1384 // computes the address at which a register is saved, not a value.
1385 private:
1386 int base_register_;
1387 long offset_;
1388 };
1389
1390 // Rule: the value the register had in the caller is the value of
1391 // BASE_REGISTER plus offset. BASE_REGISTER may be
1392 // CallFrameInfo::Handler::kCFARegister.
1393 class CallFrameInfo::ValOffsetRule: public CallFrameInfo::Rule {
1394 public:
ValOffsetRule(int base_register,long offset)1395 ValOffsetRule(int base_register, long offset)
1396 : base_register_(base_register), offset_(offset) { }
~ValOffsetRule()1397 ~ValOffsetRule() { }
Handle(Handler * handler,uint64 address,int reg) const1398 bool Handle(Handler *handler, uint64 address, int reg) const {
1399 return handler->ValOffsetRule(address, reg, base_register_, offset_);
1400 }
operator ==(const Rule & rhs) const1401 bool operator==(const Rule &rhs) const {
1402 // dynamic_cast is allowed by the Google C++ Style Guide, if the use has
1403 // been carefully considered; cheap RTTI-like workarounds are forbidden.
1404 const ValOffsetRule *our_rhs = dynamic_cast<const ValOffsetRule *>(&rhs);
1405 return (our_rhs &&
1406 base_register_ == our_rhs->base_register_ &&
1407 offset_ == our_rhs->offset_);
1408 }
Copy() const1409 Rule *Copy() const { return new ValOffsetRule(*this); }
SetBaseRegister(unsigned reg)1410 void SetBaseRegister(unsigned reg) { base_register_ = reg; }
SetOffset(long long offset)1411 void SetOffset(long long offset) { offset_ = offset; }
1412 private:
1413 int base_register_;
1414 long offset_;
1415 };
1416
1417 // Rule: the register has been saved in another register REGISTER_NUMBER_.
1418 class CallFrameInfo::RegisterRule: public CallFrameInfo::Rule {
1419 public:
RegisterRule(int register_number)1420 explicit RegisterRule(int register_number)
1421 : register_number_(register_number) { }
~RegisterRule()1422 ~RegisterRule() { }
Handle(Handler * handler,uint64 address,int reg) const1423 bool Handle(Handler *handler, uint64 address, int reg) const {
1424 return handler->RegisterRule(address, reg, register_number_);
1425 }
operator ==(const Rule & rhs) const1426 bool operator==(const Rule &rhs) const {
1427 // dynamic_cast is allowed by the Google C++ Style Guide, if the use has
1428 // been carefully considered; cheap RTTI-like workarounds are forbidden.
1429 const RegisterRule *our_rhs = dynamic_cast<const RegisterRule *>(&rhs);
1430 return (our_rhs && register_number_ == our_rhs->register_number_);
1431 }
Copy() const1432 Rule *Copy() const { return new RegisterRule(*this); }
1433 private:
1434 int register_number_;
1435 };
1436
1437 // Rule: EXPRESSION evaluates to the address at which the register is saved.
1438 class CallFrameInfo::ExpressionRule: public CallFrameInfo::Rule {
1439 public:
ExpressionRule(const string & expression)1440 explicit ExpressionRule(const string &expression)
1441 : expression_(expression) { }
~ExpressionRule()1442 ~ExpressionRule() { }
Handle(Handler * handler,uint64 address,int reg) const1443 bool Handle(Handler *handler, uint64 address, int reg) const {
1444 return handler->ExpressionRule(address, reg, expression_);
1445 }
operator ==(const Rule & rhs) const1446 bool operator==(const Rule &rhs) const {
1447 // dynamic_cast is allowed by the Google C++ Style Guide, if the use has
1448 // been carefully considered; cheap RTTI-like workarounds are forbidden.
1449 const ExpressionRule *our_rhs = dynamic_cast<const ExpressionRule *>(&rhs);
1450 return (our_rhs && expression_ == our_rhs->expression_);
1451 }
Copy() const1452 Rule *Copy() const { return new ExpressionRule(*this); }
1453 private:
1454 string expression_;
1455 };
1456
1457 // Rule: EXPRESSION evaluates to the address at which the register is saved.
1458 class CallFrameInfo::ValExpressionRule: public CallFrameInfo::Rule {
1459 public:
ValExpressionRule(const string & expression)1460 explicit ValExpressionRule(const string &expression)
1461 : expression_(expression) { }
~ValExpressionRule()1462 ~ValExpressionRule() { }
Handle(Handler * handler,uint64 address,int reg) const1463 bool Handle(Handler *handler, uint64 address, int reg) const {
1464 return handler->ValExpressionRule(address, reg, expression_);
1465 }
operator ==(const Rule & rhs) const1466 bool operator==(const Rule &rhs) const {
1467 // dynamic_cast is allowed by the Google C++ Style Guide, if the use has
1468 // been carefully considered; cheap RTTI-like workarounds are forbidden.
1469 const ValExpressionRule *our_rhs =
1470 dynamic_cast<const ValExpressionRule *>(&rhs);
1471 return (our_rhs && expression_ == our_rhs->expression_);
1472 }
Copy() const1473 Rule *Copy() const { return new ValExpressionRule(*this); }
1474 private:
1475 string expression_;
1476 };
1477
1478 // A map from register numbers to rules.
1479 class CallFrameInfo::RuleMap {
1480 public:
RuleMap()1481 RuleMap() : cfa_rule_(NULL) { }
RuleMap(const RuleMap & rhs)1482 RuleMap(const RuleMap &rhs) : cfa_rule_(NULL) { *this = rhs; }
~RuleMap()1483 ~RuleMap() { Clear(); }
1484
1485 RuleMap &operator=(const RuleMap &rhs);
1486
1487 // Set the rule for computing the CFA to RULE. Take ownership of RULE.
SetCFARule(Rule * rule)1488 void SetCFARule(Rule *rule) { delete cfa_rule_; cfa_rule_ = rule; }
1489
1490 // Return the current CFA rule. Unlike RegisterRule, this RuleMap retains
1491 // ownership of the rule. We use this for DW_CFA_def_cfa_offset and
1492 // DW_CFA_def_cfa_register, and for detecting references to the CFA before
1493 // a rule for it has been established.
CFARule() const1494 Rule *CFARule() const { return cfa_rule_; }
1495
1496 // Return the rule for REG, or NULL if there is none. The caller takes
1497 // ownership of the result.
1498 Rule *RegisterRule(int reg) const;
1499
1500 // Set the rule for computing REG to RULE. Take ownership of RULE.
1501 void SetRegisterRule(int reg, Rule *rule);
1502
1503 // Make all the appropriate calls to HANDLER as if we were changing from
1504 // this RuleMap to NEW_RULES at ADDRESS. We use this to implement
1505 // DW_CFA_restore_state, where lots of rules can change simultaneously.
1506 // Return true if all handlers returned true; otherwise, return false.
1507 bool HandleTransitionTo(Handler *handler, uint64 address,
1508 const RuleMap &new_rules) const;
1509
1510 private:
1511 // A map from register numbers to Rules.
1512 typedef std::map<int, Rule *> RuleByNumber;
1513
1514 // Remove all register rules and clear cfa_rule_.
1515 void Clear();
1516
1517 // The rule for computing the canonical frame address. This RuleMap owns
1518 // this rule.
1519 Rule *cfa_rule_;
1520
1521 // A map from register numbers to postfix expressions to recover
1522 // their values. This RuleMap owns the Rules the map refers to.
1523 RuleByNumber registers_;
1524 };
1525
operator =(const RuleMap & rhs)1526 CallFrameInfo::RuleMap &CallFrameInfo::RuleMap::operator=(const RuleMap &rhs) {
1527 Clear();
1528 // Since each map owns the rules it refers to, assignment must copy them.
1529 if (rhs.cfa_rule_) cfa_rule_ = rhs.cfa_rule_->Copy();
1530 for (RuleByNumber::const_iterator it = rhs.registers_.begin();
1531 it != rhs.registers_.end(); it++)
1532 registers_[it->first] = it->second->Copy();
1533 return *this;
1534 }
1535
RegisterRule(int reg) const1536 CallFrameInfo::Rule *CallFrameInfo::RuleMap::RegisterRule(int reg) const {
1537 assert(reg != Handler::kCFARegister);
1538 RuleByNumber::const_iterator it = registers_.find(reg);
1539 if (it != registers_.end())
1540 return it->second->Copy();
1541 else
1542 return NULL;
1543 }
1544
SetRegisterRule(int reg,Rule * rule)1545 void CallFrameInfo::RuleMap::SetRegisterRule(int reg, Rule *rule) {
1546 assert(reg != Handler::kCFARegister);
1547 assert(rule);
1548 Rule **slot = ®isters_[reg];
1549 delete *slot;
1550 *slot = rule;
1551 }
1552
HandleTransitionTo(Handler * handler,uint64 address,const RuleMap & new_rules) const1553 bool CallFrameInfo::RuleMap::HandleTransitionTo(
1554 Handler *handler,
1555 uint64 address,
1556 const RuleMap &new_rules) const {
1557 // Transition from cfa_rule_ to new_rules.cfa_rule_.
1558 if (cfa_rule_ && new_rules.cfa_rule_) {
1559 if (*cfa_rule_ != *new_rules.cfa_rule_ &&
1560 !new_rules.cfa_rule_->Handle(handler, address,
1561 Handler::kCFARegister))
1562 return false;
1563 } else if (cfa_rule_) {
1564 // this RuleMap has a CFA rule but new_rules doesn't.
1565 // CallFrameInfo::Handler has no way to handle this --- and shouldn't;
1566 // it's garbage input. The instruction interpreter should have
1567 // detected this and warned, so take no action here.
1568 } else if (new_rules.cfa_rule_) {
1569 // This shouldn't be possible: NEW_RULES is some prior state, and
1570 // there's no way to remove entries.
1571 assert(0);
1572 } else {
1573 // Both CFA rules are empty. No action needed.
1574 }
1575
1576 // Traverse the two maps in order by register number, and report
1577 // whatever differences we find.
1578 RuleByNumber::const_iterator old_it = registers_.begin();
1579 RuleByNumber::const_iterator new_it = new_rules.registers_.begin();
1580 while (old_it != registers_.end() && new_it != new_rules.registers_.end()) {
1581 if (old_it->first < new_it->first) {
1582 // This RuleMap has an entry for old_it->first, but NEW_RULES
1583 // doesn't.
1584 //
1585 // This isn't really the right thing to do, but since CFI generally
1586 // only mentions callee-saves registers, and GCC's convention for
1587 // callee-saves registers is that they are unchanged, it's a good
1588 // approximation.
1589 if (!handler->SameValueRule(address, old_it->first))
1590 return false;
1591 old_it++;
1592 } else if (old_it->first > new_it->first) {
1593 // NEW_RULES has entry for new_it->first, but this RuleMap
1594 // doesn't. This shouldn't be possible: NEW_RULES is some prior
1595 // state, and there's no way to remove entries.
1596 assert(0);
1597 } else {
1598 // Both maps have an entry for this register. Report the new
1599 // rule if it is different.
1600 if (*old_it->second != *new_it->second &&
1601 !new_it->second->Handle(handler, address, new_it->first))
1602 return false;
1603 new_it++, old_it++;
1604 }
1605 }
1606 // Finish off entries from this RuleMap with no counterparts in new_rules.
1607 while (old_it != registers_.end()) {
1608 if (!handler->SameValueRule(address, old_it->first))
1609 return false;
1610 old_it++;
1611 }
1612 // Since we only make transitions from a rule set to some previously
1613 // saved rule set, and we can only add rules to the map, NEW_RULES
1614 // must have fewer rules than *this.
1615 assert(new_it == new_rules.registers_.end());
1616
1617 return true;
1618 }
1619
1620 // Remove all register rules and clear cfa_rule_.
Clear()1621 void CallFrameInfo::RuleMap::Clear() {
1622 delete cfa_rule_;
1623 cfa_rule_ = NULL;
1624 for (RuleByNumber::iterator it = registers_.begin();
1625 it != registers_.end(); it++)
1626 delete it->second;
1627 registers_.clear();
1628 }
1629
1630 // The state of the call frame information interpreter as it processes
1631 // instructions from a CIE and FDE.
1632 class CallFrameInfo::State {
1633 public:
1634 // Create a call frame information interpreter state with the given
1635 // reporter, reader, handler, and initial call frame info address.
State(ByteReader * reader,Handler * handler,Reporter * reporter,uint64 address)1636 State(ByteReader *reader, Handler *handler, Reporter *reporter,
1637 uint64 address)
1638 : reader_(reader), handler_(handler), reporter_(reporter),
1639 address_(address), entry_(NULL), cursor_(NULL) { }
1640
1641 // Interpret instructions from CIE, save the resulting rule set for
1642 // DW_CFA_restore instructions, and return true. On error, report
1643 // the problem to reporter_ and return false.
1644 bool InterpretCIE(const CIE &cie);
1645
1646 // Interpret instructions from FDE, and return true. On error,
1647 // report the problem to reporter_ and return false.
1648 bool InterpretFDE(const FDE &fde);
1649
1650 private:
1651 // The operands of a CFI instruction, for ParseOperands.
1652 struct Operands {
1653 unsigned register_number; // A register number.
1654 uint64 offset; // An offset or address.
1655 long signed_offset; // A signed offset.
1656 string expression; // A DWARF expression.
1657 };
1658
1659 // Parse CFI instruction operands from STATE's instruction stream as
1660 // described by FORMAT. On success, populate OPERANDS with the
1661 // results, and return true. On failure, report the problem and
1662 // return false.
1663 //
1664 // Each character of FORMAT should be one of the following:
1665 //
1666 // 'r' unsigned LEB128 register number (OPERANDS->register_number)
1667 // 'o' unsigned LEB128 offset (OPERANDS->offset)
1668 // 's' signed LEB128 offset (OPERANDS->signed_offset)
1669 // 'a' machine-size address (OPERANDS->offset)
1670 // (If the CIE has a 'z' augmentation string, 'a' uses the
1671 // encoding specified by the 'R' argument.)
1672 // '1' a one-byte offset (OPERANDS->offset)
1673 // '2' a two-byte offset (OPERANDS->offset)
1674 // '4' a four-byte offset (OPERANDS->offset)
1675 // '8' an eight-byte offset (OPERANDS->offset)
1676 // 'e' a DW_FORM_block holding a (OPERANDS->expression)
1677 // DWARF expression
1678 bool ParseOperands(const char *format, Operands *operands);
1679
1680 // Interpret one CFI instruction from STATE's instruction stream, update
1681 // STATE, report any rule changes to handler_, and return true. On
1682 // failure, report the problem and return false.
1683 bool DoInstruction();
1684
1685 // The following Do* member functions are subroutines of DoInstruction,
1686 // factoring out the actual work of operations that have several
1687 // different encodings.
1688
1689 // Set the CFA rule to be the value of BASE_REGISTER plus OFFSET, and
1690 // return true. On failure, report and return false. (Used for
1691 // DW_CFA_def_cfa and DW_CFA_def_cfa_sf.)
1692 bool DoDefCFA(unsigned base_register, long offset);
1693
1694 // Change the offset of the CFA rule to OFFSET, and return true. On
1695 // failure, report and return false. (Subroutine for
1696 // DW_CFA_def_cfa_offset and DW_CFA_def_cfa_offset_sf.)
1697 bool DoDefCFAOffset(long offset);
1698
1699 // Specify that REG can be recovered using RULE, and return true. On
1700 // failure, report and return false.
1701 bool DoRule(unsigned reg, Rule *rule);
1702
1703 // Specify that REG can be found at OFFSET from the CFA, and return true.
1704 // On failure, report and return false. (Subroutine for DW_CFA_offset,
1705 // DW_CFA_offset_extended, and DW_CFA_offset_extended_sf.)
1706 bool DoOffset(unsigned reg, long offset);
1707
1708 // Specify that the caller's value for REG is the CFA plus OFFSET,
1709 // and return true. On failure, report and return false. (Subroutine
1710 // for DW_CFA_val_offset and DW_CFA_val_offset_sf.)
1711 bool DoValOffset(unsigned reg, long offset);
1712
1713 // Restore REG to the rule established in the CIE, and return true. On
1714 // failure, report and return false. (Subroutine for DW_CFA_restore and
1715 // DW_CFA_restore_extended.)
1716 bool DoRestore(unsigned reg);
1717
1718 // Return the section offset of the instruction at cursor. For use
1719 // in error messages.
CursorOffset()1720 uint64 CursorOffset() { return entry_->offset + (cursor_ - entry_->start); }
1721
1722 // Report that entry_ is incomplete, and return false. For brevity.
ReportIncomplete()1723 bool ReportIncomplete() {
1724 reporter_->Incomplete(entry_->offset, entry_->kind);
1725 return false;
1726 }
1727
1728 // For reading multi-byte values with the appropriate endianness.
1729 ByteReader *reader_;
1730
1731 // The handler to which we should report the data we find.
1732 Handler *handler_;
1733
1734 // For reporting problems in the info we're parsing.
1735 Reporter *reporter_;
1736
1737 // The code address to which the next instruction in the stream applies.
1738 uint64 address_;
1739
1740 // The entry whose instructions we are currently processing. This is
1741 // first a CIE, and then an FDE.
1742 const Entry *entry_;
1743
1744 // The next instruction to process.
1745 const uint8_t *cursor_;
1746
1747 // The current set of rules.
1748 RuleMap rules_;
1749
1750 // The set of rules established by the CIE, used by DW_CFA_restore
1751 // and DW_CFA_restore_extended. We set this after interpreting the
1752 // CIE's instructions.
1753 RuleMap cie_rules_;
1754
1755 // A stack of saved states, for DW_CFA_remember_state and
1756 // DW_CFA_restore_state.
1757 std::stack<RuleMap> saved_rules_;
1758 };
1759
InterpretCIE(const CIE & cie)1760 bool CallFrameInfo::State::InterpretCIE(const CIE &cie) {
1761 entry_ = &cie;
1762 cursor_ = entry_->instructions;
1763 while (cursor_ < entry_->end)
1764 if (!DoInstruction())
1765 return false;
1766 // Note the rules established by the CIE, for use by DW_CFA_restore
1767 // and DW_CFA_restore_extended.
1768 cie_rules_ = rules_;
1769 return true;
1770 }
1771
InterpretFDE(const FDE & fde)1772 bool CallFrameInfo::State::InterpretFDE(const FDE &fde) {
1773 entry_ = &fde;
1774 cursor_ = entry_->instructions;
1775 while (cursor_ < entry_->end)
1776 if (!DoInstruction())
1777 return false;
1778 return true;
1779 }
1780
ParseOperands(const char * format,Operands * operands)1781 bool CallFrameInfo::State::ParseOperands(const char *format,
1782 Operands *operands) {
1783 size_t len;
1784 const char *operand;
1785
1786 for (operand = format; *operand; operand++) {
1787 size_t bytes_left = entry_->end - cursor_;
1788 switch (*operand) {
1789 case 'r':
1790 operands->register_number = reader_->ReadUnsignedLEB128(cursor_, &len);
1791 if (len > bytes_left) return ReportIncomplete();
1792 cursor_ += len;
1793 break;
1794
1795 case 'o':
1796 operands->offset = reader_->ReadUnsignedLEB128(cursor_, &len);
1797 if (len > bytes_left) return ReportIncomplete();
1798 cursor_ += len;
1799 break;
1800
1801 case 's':
1802 operands->signed_offset = reader_->ReadSignedLEB128(cursor_, &len);
1803 if (len > bytes_left) return ReportIncomplete();
1804 cursor_ += len;
1805 break;
1806
1807 case 'a':
1808 operands->offset =
1809 reader_->ReadEncodedPointer(cursor_, entry_->cie->pointer_encoding,
1810 &len);
1811 if (len > bytes_left) return ReportIncomplete();
1812 cursor_ += len;
1813 break;
1814
1815 case '1':
1816 if (1 > bytes_left) return ReportIncomplete();
1817 operands->offset = static_cast<unsigned char>(*cursor_++);
1818 break;
1819
1820 case '2':
1821 if (2 > bytes_left) return ReportIncomplete();
1822 operands->offset = reader_->ReadTwoBytes(cursor_);
1823 cursor_ += 2;
1824 break;
1825
1826 case '4':
1827 if (4 > bytes_left) return ReportIncomplete();
1828 operands->offset = reader_->ReadFourBytes(cursor_);
1829 cursor_ += 4;
1830 break;
1831
1832 case '8':
1833 if (8 > bytes_left) return ReportIncomplete();
1834 operands->offset = reader_->ReadEightBytes(cursor_);
1835 cursor_ += 8;
1836 break;
1837
1838 case 'e': {
1839 size_t expression_length = reader_->ReadUnsignedLEB128(cursor_, &len);
1840 if (len > bytes_left || expression_length > bytes_left - len)
1841 return ReportIncomplete();
1842 cursor_ += len;
1843 operands->expression = string(reinterpret_cast<const char *>(cursor_),
1844 expression_length);
1845 cursor_ += expression_length;
1846 break;
1847 }
1848
1849 default:
1850 assert(0);
1851 }
1852 }
1853
1854 return true;
1855 }
1856
DoInstruction()1857 bool CallFrameInfo::State::DoInstruction() {
1858 CIE *cie = entry_->cie;
1859 Operands ops;
1860
1861 // Our entry's kind should have been set by now.
1862 assert(entry_->kind != kUnknown);
1863
1864 // We shouldn't have been invoked unless there were more
1865 // instructions to parse.
1866 assert(cursor_ < entry_->end);
1867
1868 unsigned opcode = *cursor_++;
1869 if ((opcode & 0xc0) != 0) {
1870 switch (opcode & 0xc0) {
1871 // Advance the address.
1872 case DW_CFA_advance_loc: {
1873 size_t code_offset = opcode & 0x3f;
1874 address_ += code_offset * cie->code_alignment_factor;
1875 break;
1876 }
1877
1878 // Find a register at an offset from the CFA.
1879 case DW_CFA_offset:
1880 if (!ParseOperands("o", &ops) ||
1881 !DoOffset(opcode & 0x3f, ops.offset * cie->data_alignment_factor))
1882 return false;
1883 break;
1884
1885 // Restore the rule established for a register by the CIE.
1886 case DW_CFA_restore:
1887 if (!DoRestore(opcode & 0x3f)) return false;
1888 break;
1889
1890 // The 'if' above should have excluded this possibility.
1891 default:
1892 assert(0);
1893 }
1894
1895 // Return here, so the big switch below won't be indented.
1896 return true;
1897 }
1898
1899 switch (opcode) {
1900 // Set the address.
1901 case DW_CFA_set_loc:
1902 if (!ParseOperands("a", &ops)) return false;
1903 address_ = ops.offset;
1904 break;
1905
1906 // Advance the address.
1907 case DW_CFA_advance_loc1:
1908 if (!ParseOperands("1", &ops)) return false;
1909 address_ += ops.offset * cie->code_alignment_factor;
1910 break;
1911
1912 // Advance the address.
1913 case DW_CFA_advance_loc2:
1914 if (!ParseOperands("2", &ops)) return false;
1915 address_ += ops.offset * cie->code_alignment_factor;
1916 break;
1917
1918 // Advance the address.
1919 case DW_CFA_advance_loc4:
1920 if (!ParseOperands("4", &ops)) return false;
1921 address_ += ops.offset * cie->code_alignment_factor;
1922 break;
1923
1924 // Advance the address.
1925 case DW_CFA_MIPS_advance_loc8:
1926 if (!ParseOperands("8", &ops)) return false;
1927 address_ += ops.offset * cie->code_alignment_factor;
1928 break;
1929
1930 // Compute the CFA by adding an offset to a register.
1931 case DW_CFA_def_cfa:
1932 if (!ParseOperands("ro", &ops) ||
1933 !DoDefCFA(ops.register_number, ops.offset))
1934 return false;
1935 break;
1936
1937 // Compute the CFA by adding an offset to a register.
1938 case DW_CFA_def_cfa_sf:
1939 if (!ParseOperands("rs", &ops) ||
1940 !DoDefCFA(ops.register_number,
1941 ops.signed_offset * cie->data_alignment_factor))
1942 return false;
1943 break;
1944
1945 // Change the base register used to compute the CFA.
1946 case DW_CFA_def_cfa_register: {
1947 if (!ParseOperands("r", &ops)) return false;
1948 Rule *cfa_rule = rules_.CFARule();
1949 if (!cfa_rule) {
1950 if (!DoDefCFA(ops.register_number, ops.offset)) {
1951 reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset());
1952 return false;
1953 }
1954 } else {
1955 cfa_rule->SetBaseRegister(ops.register_number);
1956 if (!cfa_rule->Handle(handler_, address_,
1957 Handler::kCFARegister))
1958 return false;
1959 }
1960 break;
1961 }
1962
1963 // Change the offset used to compute the CFA.
1964 case DW_CFA_def_cfa_offset:
1965 if (!ParseOperands("o", &ops) ||
1966 !DoDefCFAOffset(ops.offset))
1967 return false;
1968 break;
1969
1970 // Change the offset used to compute the CFA.
1971 case DW_CFA_def_cfa_offset_sf:
1972 if (!ParseOperands("s", &ops) ||
1973 !DoDefCFAOffset(ops.signed_offset * cie->data_alignment_factor))
1974 return false;
1975 break;
1976
1977 // Specify an expression whose value is the CFA.
1978 case DW_CFA_def_cfa_expression: {
1979 if (!ParseOperands("e", &ops))
1980 return false;
1981 Rule *rule = new ValExpressionRule(ops.expression);
1982 rules_.SetCFARule(rule);
1983 if (!rule->Handle(handler_, address_,
1984 Handler::kCFARegister))
1985 return false;
1986 break;
1987 }
1988
1989 // The register's value cannot be recovered.
1990 case DW_CFA_undefined: {
1991 if (!ParseOperands("r", &ops) ||
1992 !DoRule(ops.register_number, new UndefinedRule()))
1993 return false;
1994 break;
1995 }
1996
1997 // The register's value is unchanged from its value in the caller.
1998 case DW_CFA_same_value: {
1999 if (!ParseOperands("r", &ops) ||
2000 !DoRule(ops.register_number, new SameValueRule()))
2001 return false;
2002 break;
2003 }
2004
2005 // Find a register at an offset from the CFA.
2006 case DW_CFA_offset_extended:
2007 if (!ParseOperands("ro", &ops) ||
2008 !DoOffset(ops.register_number,
2009 ops.offset * cie->data_alignment_factor))
2010 return false;
2011 break;
2012
2013 // The register is saved at an offset from the CFA.
2014 case DW_CFA_offset_extended_sf:
2015 if (!ParseOperands("rs", &ops) ||
2016 !DoOffset(ops.register_number,
2017 ops.signed_offset * cie->data_alignment_factor))
2018 return false;
2019 break;
2020
2021 // The register is saved at an offset from the CFA.
2022 case DW_CFA_GNU_negative_offset_extended:
2023 if (!ParseOperands("ro", &ops) ||
2024 !DoOffset(ops.register_number,
2025 -ops.offset * cie->data_alignment_factor))
2026 return false;
2027 break;
2028
2029 // The register's value is the sum of the CFA plus an offset.
2030 case DW_CFA_val_offset:
2031 if (!ParseOperands("ro", &ops) ||
2032 !DoValOffset(ops.register_number,
2033 ops.offset * cie->data_alignment_factor))
2034 return false;
2035 break;
2036
2037 // The register's value is the sum of the CFA plus an offset.
2038 case DW_CFA_val_offset_sf:
2039 if (!ParseOperands("rs", &ops) ||
2040 !DoValOffset(ops.register_number,
2041 ops.signed_offset * cie->data_alignment_factor))
2042 return false;
2043 break;
2044
2045 // The register has been saved in another register.
2046 case DW_CFA_register: {
2047 if (!ParseOperands("ro", &ops) ||
2048 !DoRule(ops.register_number, new RegisterRule(ops.offset)))
2049 return false;
2050 break;
2051 }
2052
2053 // An expression yields the address at which the register is saved.
2054 case DW_CFA_expression: {
2055 if (!ParseOperands("re", &ops) ||
2056 !DoRule(ops.register_number, new ExpressionRule(ops.expression)))
2057 return false;
2058 break;
2059 }
2060
2061 // An expression yields the caller's value for the register.
2062 case DW_CFA_val_expression: {
2063 if (!ParseOperands("re", &ops) ||
2064 !DoRule(ops.register_number, new ValExpressionRule(ops.expression)))
2065 return false;
2066 break;
2067 }
2068
2069 // Restore the rule established for a register by the CIE.
2070 case DW_CFA_restore_extended:
2071 if (!ParseOperands("r", &ops) ||
2072 !DoRestore( ops.register_number))
2073 return false;
2074 break;
2075
2076 // Save the current set of rules on a stack.
2077 case DW_CFA_remember_state:
2078 saved_rules_.push(rules_);
2079 break;
2080
2081 // Pop the current set of rules off the stack.
2082 case DW_CFA_restore_state: {
2083 if (saved_rules_.empty()) {
2084 reporter_->EmptyStateStack(entry_->offset, entry_->kind,
2085 CursorOffset());
2086 return false;
2087 }
2088 const RuleMap &new_rules = saved_rules_.top();
2089 if (rules_.CFARule() && !new_rules.CFARule()) {
2090 reporter_->ClearingCFARule(entry_->offset, entry_->kind,
2091 CursorOffset());
2092 return false;
2093 }
2094 rules_.HandleTransitionTo(handler_, address_, new_rules);
2095 rules_ = new_rules;
2096 saved_rules_.pop();
2097 break;
2098 }
2099
2100 // No operation. (Padding instruction.)
2101 case DW_CFA_nop:
2102 break;
2103
2104 // A SPARC register window save: Registers 8 through 15 (%o0-%o7)
2105 // are saved in registers 24 through 31 (%i0-%i7), and registers
2106 // 16 through 31 (%l0-%l7 and %i0-%i7) are saved at CFA offsets
2107 // (0-15 * the register size). The register numbers must be
2108 // hard-coded. A GNU extension, and not a pretty one.
2109 case DW_CFA_GNU_window_save: {
2110 // Save %o0-%o7 in %i0-%i7.
2111 for (int i = 8; i < 16; i++)
2112 if (!DoRule(i, new RegisterRule(i + 16)))
2113 return false;
2114 // Save %l0-%l7 and %i0-%i7 at the CFA.
2115 for (int i = 16; i < 32; i++)
2116 // Assume that the byte reader's address size is the same as
2117 // the architecture's register size. !@#%*^ hilarious.
2118 if (!DoRule(i, new OffsetRule(Handler::kCFARegister,
2119 (i - 16) * reader_->AddressSize())))
2120 return false;
2121 break;
2122 }
2123
2124 // I'm not sure what this is. GDB doesn't use it for unwinding.
2125 case DW_CFA_GNU_args_size:
2126 if (!ParseOperands("o", &ops)) return false;
2127 break;
2128
2129 // An opcode we don't recognize.
2130 default: {
2131 reporter_->BadInstruction(entry_->offset, entry_->kind, CursorOffset());
2132 return false;
2133 }
2134 }
2135
2136 return true;
2137 }
2138
DoDefCFA(unsigned base_register,long offset)2139 bool CallFrameInfo::State::DoDefCFA(unsigned base_register, long offset) {
2140 Rule *rule = new ValOffsetRule(base_register, offset);
2141 rules_.SetCFARule(rule);
2142 return rule->Handle(handler_, address_,
2143 Handler::kCFARegister);
2144 }
2145
DoDefCFAOffset(long offset)2146 bool CallFrameInfo::State::DoDefCFAOffset(long offset) {
2147 Rule *cfa_rule = rules_.CFARule();
2148 if (!cfa_rule) {
2149 reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset());
2150 return false;
2151 }
2152 cfa_rule->SetOffset(offset);
2153 return cfa_rule->Handle(handler_, address_,
2154 Handler::kCFARegister);
2155 }
2156
DoRule(unsigned reg,Rule * rule)2157 bool CallFrameInfo::State::DoRule(unsigned reg, Rule *rule) {
2158 rules_.SetRegisterRule(reg, rule);
2159 return rule->Handle(handler_, address_, reg);
2160 }
2161
DoOffset(unsigned reg,long offset)2162 bool CallFrameInfo::State::DoOffset(unsigned reg, long offset) {
2163 if (!rules_.CFARule()) {
2164 reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset());
2165 return false;
2166 }
2167 return DoRule(reg,
2168 new OffsetRule(Handler::kCFARegister, offset));
2169 }
2170
DoValOffset(unsigned reg,long offset)2171 bool CallFrameInfo::State::DoValOffset(unsigned reg, long offset) {
2172 if (!rules_.CFARule()) {
2173 reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset());
2174 return false;
2175 }
2176 return DoRule(reg,
2177 new ValOffsetRule(Handler::kCFARegister, offset));
2178 }
2179
DoRestore(unsigned reg)2180 bool CallFrameInfo::State::DoRestore(unsigned reg) {
2181 // DW_CFA_restore and DW_CFA_restore_extended don't make sense in a CIE.
2182 if (entry_->kind == kCIE) {
2183 reporter_->RestoreInCIE(entry_->offset, CursorOffset());
2184 return false;
2185 }
2186 Rule *rule = cie_rules_.RegisterRule(reg);
2187 if (!rule) {
2188 // This isn't really the right thing to do, but since CFI generally
2189 // only mentions callee-saves registers, and GCC's convention for
2190 // callee-saves registers is that they are unchanged, it's a good
2191 // approximation.
2192 rule = new SameValueRule();
2193 }
2194 return DoRule(reg, rule);
2195 }
2196
ReadEntryPrologue(const uint8_t * cursor,Entry * entry)2197 bool CallFrameInfo::ReadEntryPrologue(const uint8_t *cursor, Entry *entry) {
2198 const uint8_t *buffer_end = buffer_ + buffer_length_;
2199
2200 // Initialize enough of ENTRY for use in error reporting.
2201 entry->offset = cursor - buffer_;
2202 entry->start = cursor;
2203 entry->kind = kUnknown;
2204 entry->end = NULL;
2205
2206 // Read the initial length. This sets reader_'s offset size.
2207 size_t length_size;
2208 uint64 length = reader_->ReadInitialLength(cursor, &length_size);
2209 if (length_size > size_t(buffer_end - cursor))
2210 return ReportIncomplete(entry);
2211 cursor += length_size;
2212
2213 // In a .eh_frame section, a length of zero marks the end of the series
2214 // of entries.
2215 if (length == 0 && eh_frame_) {
2216 entry->kind = kTerminator;
2217 entry->end = cursor;
2218 return true;
2219 }
2220
2221 // Validate the length.
2222 if (length > size_t(buffer_end - cursor))
2223 return ReportIncomplete(entry);
2224
2225 // The length is the number of bytes after the initial length field;
2226 // we have that position handy at this point, so compute the end
2227 // now. (If we're parsing 64-bit-offset DWARF on a 32-bit machine,
2228 // and the length didn't fit in a size_t, we would have rejected it
2229 // above.)
2230 entry->end = cursor + length;
2231
2232 // Parse the next field: either the offset of a CIE or a CIE id.
2233 size_t offset_size = reader_->OffsetSize();
2234 if (offset_size > size_t(entry->end - cursor)) return ReportIncomplete(entry);
2235 entry->id = reader_->ReadOffset(cursor);
2236
2237 // Don't advance cursor past id field yet; in .eh_frame data we need
2238 // the id's position to compute the section offset of an FDE's CIE.
2239
2240 // Now we can decide what kind of entry this is.
2241 if (eh_frame_) {
2242 // In .eh_frame data, an ID of zero marks the entry as a CIE, and
2243 // anything else is an offset from the id field of the FDE to the start
2244 // of the CIE.
2245 if (entry->id == 0) {
2246 entry->kind = kCIE;
2247 } else {
2248 entry->kind = kFDE;
2249 // Turn the offset from the id into an offset from the buffer's start.
2250 entry->id = (cursor - buffer_) - entry->id;
2251 }
2252 } else {
2253 // In DWARF CFI data, an ID of ~0 (of the appropriate width, given the
2254 // offset size for the entry) marks the entry as a CIE, and anything
2255 // else is the offset of the CIE from the beginning of the section.
2256 if (offset_size == 4)
2257 entry->kind = (entry->id == 0xffffffff) ? kCIE : kFDE;
2258 else {
2259 assert(offset_size == 8);
2260 entry->kind = (entry->id == 0xffffffffffffffffULL) ? kCIE : kFDE;
2261 }
2262 }
2263
2264 // Now advance cursor past the id.
2265 cursor += offset_size;
2266
2267 // The fields specific to this kind of entry start here.
2268 entry->fields = cursor;
2269
2270 entry->cie = NULL;
2271
2272 return true;
2273 }
2274
ReadCIEFields(CIE * cie)2275 bool CallFrameInfo::ReadCIEFields(CIE *cie) {
2276 const uint8_t *cursor = cie->fields;
2277 size_t len;
2278
2279 assert(cie->kind == kCIE);
2280
2281 // Prepare for early exit.
2282 cie->version = 0;
2283 cie->augmentation.clear();
2284 cie->code_alignment_factor = 0;
2285 cie->data_alignment_factor = 0;
2286 cie->return_address_register = 0;
2287 cie->has_z_augmentation = false;
2288 cie->pointer_encoding = DW_EH_PE_absptr;
2289 cie->instructions = 0;
2290
2291 // Parse the version number.
2292 if (cie->end - cursor < 1)
2293 return ReportIncomplete(cie);
2294 cie->version = reader_->ReadOneByte(cursor);
2295 cursor++;
2296
2297 // If we don't recognize the version, we can't parse any more fields of the
2298 // CIE. For DWARF CFI, we handle versions 1 through 4 (there was never a
2299 // version 2 of CFI data). For .eh_frame, we handle versions 1 and 4 as well;
2300 // the difference between those versions seems to be the same as for
2301 // .debug_frame.
2302 if (cie->version < 1 || cie->version > 4) {
2303 reporter_->UnrecognizedVersion(cie->offset, cie->version);
2304 return false;
2305 }
2306
2307 const uint8_t *augmentation_start = cursor;
2308 const uint8_t *augmentation_end =
2309 reinterpret_cast<const uint8_t *>(memchr(augmentation_start, '\0',
2310 cie->end - augmentation_start));
2311 if (! augmentation_end) return ReportIncomplete(cie);
2312 cursor = augmentation_end;
2313 cie->augmentation = string(reinterpret_cast<const char *>(augmentation_start),
2314 cursor - augmentation_start);
2315 // Skip the terminating '\0'.
2316 cursor++;
2317
2318 // Is this CFI augmented?
2319 if (!cie->augmentation.empty()) {
2320 // Is it an augmentation we recognize?
2321 if (cie->augmentation[0] == DW_Z_augmentation_start) {
2322 // Linux C++ ABI 'z' augmentation, used for exception handling data.
2323 cie->has_z_augmentation = true;
2324 } else {
2325 // Not an augmentation we recognize. Augmentations can have arbitrary
2326 // effects on the form of rest of the content, so we have to give up.
2327 reporter_->UnrecognizedAugmentation(cie->offset, cie->augmentation);
2328 return false;
2329 }
2330 }
2331
2332 if (cie->version >= 4) {
2333 cie->address_size = *cursor++;
2334 if (cie->address_size != 8 && cie->address_size != 4) {
2335 reporter_->UnexpectedAddressSize(cie->offset, cie->address_size);
2336 return false;
2337 }
2338
2339 cie->segment_size = *cursor++;
2340 if (cie->segment_size != 0) {
2341 reporter_->UnexpectedSegmentSize(cie->offset, cie->segment_size);
2342 return false;
2343 }
2344 }
2345
2346 // Parse the code alignment factor.
2347 cie->code_alignment_factor = reader_->ReadUnsignedLEB128(cursor, &len);
2348 if (size_t(cie->end - cursor) < len) return ReportIncomplete(cie);
2349 cursor += len;
2350
2351 // Parse the data alignment factor.
2352 cie->data_alignment_factor = reader_->ReadSignedLEB128(cursor, &len);
2353 if (size_t(cie->end - cursor) < len) return ReportIncomplete(cie);
2354 cursor += len;
2355
2356 // Parse the return address register. This is a ubyte in version 1, and
2357 // a ULEB128 in version 3.
2358 if (cie->version == 1) {
2359 if (cursor >= cie->end) return ReportIncomplete(cie);
2360 cie->return_address_register = uint8(*cursor++);
2361 } else {
2362 cie->return_address_register = reader_->ReadUnsignedLEB128(cursor, &len);
2363 if (size_t(cie->end - cursor) < len) return ReportIncomplete(cie);
2364 cursor += len;
2365 }
2366
2367 // If we have a 'z' augmentation string, find the augmentation data and
2368 // use the augmentation string to parse it.
2369 if (cie->has_z_augmentation) {
2370 uint64_t data_size = reader_->ReadUnsignedLEB128(cursor, &len);
2371 if (size_t(cie->end - cursor) < len + data_size)
2372 return ReportIncomplete(cie);
2373 cursor += len;
2374 const uint8_t *data = cursor;
2375 cursor += data_size;
2376 const uint8_t *data_end = cursor;
2377
2378 cie->has_z_lsda = false;
2379 cie->has_z_personality = false;
2380 cie->has_z_signal_frame = false;
2381
2382 // Walk the augmentation string, and extract values from the
2383 // augmentation data as the string directs.
2384 for (size_t i = 1; i < cie->augmentation.size(); i++) {
2385 switch (cie->augmentation[i]) {
2386 case DW_Z_has_LSDA:
2387 // The CIE's augmentation data holds the language-specific data
2388 // area pointer's encoding, and the FDE's augmentation data holds
2389 // the pointer itself.
2390 cie->has_z_lsda = true;
2391 // Fetch the LSDA encoding from the augmentation data.
2392 if (data >= data_end) return ReportIncomplete(cie);
2393 cie->lsda_encoding = DwarfPointerEncoding(*data++);
2394 if (!reader_->ValidEncoding(cie->lsda_encoding)) {
2395 reporter_->InvalidPointerEncoding(cie->offset, cie->lsda_encoding);
2396 return false;
2397 }
2398 // Don't check if the encoding is usable here --- we haven't
2399 // read the FDE's fields yet, so we're not prepared for
2400 // DW_EH_PE_funcrel, although that's a fine encoding for the
2401 // LSDA to use, since it appears in the FDE.
2402 break;
2403
2404 case DW_Z_has_personality_routine:
2405 // The CIE's augmentation data holds the personality routine
2406 // pointer's encoding, followed by the pointer itself.
2407 cie->has_z_personality = true;
2408 // Fetch the personality routine pointer's encoding from the
2409 // augmentation data.
2410 if (data >= data_end) return ReportIncomplete(cie);
2411 cie->personality_encoding = DwarfPointerEncoding(*data++);
2412 if (!reader_->ValidEncoding(cie->personality_encoding)) {
2413 reporter_->InvalidPointerEncoding(cie->offset,
2414 cie->personality_encoding);
2415 return false;
2416 }
2417 if (!reader_->UsableEncoding(cie->personality_encoding)) {
2418 reporter_->UnusablePointerEncoding(cie->offset,
2419 cie->personality_encoding);
2420 return false;
2421 }
2422 // Fetch the personality routine's pointer itself from the data.
2423 cie->personality_address =
2424 reader_->ReadEncodedPointer(data, cie->personality_encoding,
2425 &len);
2426 if (len > size_t(data_end - data))
2427 return ReportIncomplete(cie);
2428 data += len;
2429 break;
2430
2431 case DW_Z_has_FDE_address_encoding:
2432 // The CIE's augmentation data holds the pointer encoding to use
2433 // for addresses in the FDE.
2434 if (data >= data_end) return ReportIncomplete(cie);
2435 cie->pointer_encoding = DwarfPointerEncoding(*data++);
2436 if (!reader_->ValidEncoding(cie->pointer_encoding)) {
2437 reporter_->InvalidPointerEncoding(cie->offset,
2438 cie->pointer_encoding);
2439 return false;
2440 }
2441 if (!reader_->UsableEncoding(cie->pointer_encoding)) {
2442 reporter_->UnusablePointerEncoding(cie->offset,
2443 cie->pointer_encoding);
2444 return false;
2445 }
2446 break;
2447
2448 case DW_Z_is_signal_trampoline:
2449 // Frames using this CIE are signal delivery frames.
2450 cie->has_z_signal_frame = true;
2451 break;
2452
2453 default:
2454 // An augmentation we don't recognize.
2455 reporter_->UnrecognizedAugmentation(cie->offset, cie->augmentation);
2456 return false;
2457 }
2458 }
2459 }
2460
2461 // The CIE's instructions start here.
2462 cie->instructions = cursor;
2463
2464 return true;
2465 }
2466
ReadFDEFields(FDE * fde)2467 bool CallFrameInfo::ReadFDEFields(FDE *fde) {
2468 const uint8_t *cursor = fde->fields;
2469 size_t size;
2470
2471 fde->address = reader_->ReadEncodedPointer(cursor, fde->cie->pointer_encoding,
2472 &size);
2473 if (size > size_t(fde->end - cursor))
2474 return ReportIncomplete(fde);
2475 cursor += size;
2476 reader_->SetFunctionBase(fde->address);
2477
2478 // For the length, we strip off the upper nybble of the encoding used for
2479 // the starting address.
2480 DwarfPointerEncoding length_encoding =
2481 DwarfPointerEncoding(fde->cie->pointer_encoding & 0x0f);
2482 fde->size = reader_->ReadEncodedPointer(cursor, length_encoding, &size);
2483 if (size > size_t(fde->end - cursor))
2484 return ReportIncomplete(fde);
2485 cursor += size;
2486
2487 // If the CIE has a 'z' augmentation string, then augmentation data
2488 // appears here.
2489 if (fde->cie->has_z_augmentation) {
2490 uint64_t data_size = reader_->ReadUnsignedLEB128(cursor, &size);
2491 if (size_t(fde->end - cursor) < size + data_size)
2492 return ReportIncomplete(fde);
2493 cursor += size;
2494
2495 // In the abstract, we should walk the augmentation string, and extract
2496 // items from the FDE's augmentation data as we encounter augmentation
2497 // string characters that specify their presence: the ordering of items
2498 // in the augmentation string determines the arrangement of values in
2499 // the augmentation data.
2500 //
2501 // In practice, there's only ever one value in FDE augmentation data
2502 // that we support --- the LSDA pointer --- and we have to bail if we
2503 // see any unrecognized augmentation string characters. So if there is
2504 // anything here at all, we know what it is, and where it starts.
2505 if (fde->cie->has_z_lsda) {
2506 // Check whether the LSDA's pointer encoding is usable now: only once
2507 // we've parsed the FDE's starting address do we call reader_->
2508 // SetFunctionBase, so that the DW_EH_PE_funcrel encoding becomes
2509 // usable.
2510 if (!reader_->UsableEncoding(fde->cie->lsda_encoding)) {
2511 reporter_->UnusablePointerEncoding(fde->cie->offset,
2512 fde->cie->lsda_encoding);
2513 return false;
2514 }
2515
2516 fde->lsda_address =
2517 reader_->ReadEncodedPointer(cursor, fde->cie->lsda_encoding, &size);
2518 if (size > data_size)
2519 return ReportIncomplete(fde);
2520 // Ideally, we would also complain here if there were unconsumed
2521 // augmentation data.
2522 }
2523
2524 cursor += data_size;
2525 }
2526
2527 // The FDE's instructions start after those.
2528 fde->instructions = cursor;
2529
2530 return true;
2531 }
2532
Start()2533 bool CallFrameInfo::Start() {
2534 const uint8_t *buffer_end = buffer_ + buffer_length_;
2535 const uint8_t *cursor;
2536 bool all_ok = true;
2537 const uint8_t *entry_end;
2538 bool ok;
2539
2540 // Traverse all the entries in buffer_, skipping CIEs and offering
2541 // FDEs to the handler.
2542 for (cursor = buffer_; cursor < buffer_end;
2543 cursor = entry_end, all_ok = all_ok && ok) {
2544 FDE fde;
2545
2546 // Make it easy to skip this entry with 'continue': assume that
2547 // things are not okay until we've checked all the data, and
2548 // prepare the address of the next entry.
2549 ok = false;
2550
2551 // Read the entry's prologue.
2552 if (!ReadEntryPrologue(cursor, &fde)) {
2553 if (!fde.end) {
2554 // If we couldn't even figure out this entry's extent, then we
2555 // must stop processing entries altogether.
2556 all_ok = false;
2557 break;
2558 }
2559 entry_end = fde.end;
2560 continue;
2561 }
2562
2563 // The next iteration picks up after this entry.
2564 entry_end = fde.end;
2565
2566 // Did we see an .eh_frame terminating mark?
2567 if (fde.kind == kTerminator) {
2568 // If there appears to be more data left in the section after the
2569 // terminating mark, warn the user. But this is just a warning;
2570 // we leave all_ok true.
2571 if (fde.end < buffer_end) reporter_->EarlyEHTerminator(fde.offset);
2572 break;
2573 }
2574
2575 // In this loop, we skip CIEs. We only parse them fully when we
2576 // parse an FDE that refers to them. This limits our memory
2577 // consumption (beyond the buffer itself) to that needed to
2578 // process the largest single entry.
2579 if (fde.kind != kFDE) {
2580 ok = true;
2581 continue;
2582 }
2583
2584 // Validate the CIE pointer.
2585 if (fde.id > buffer_length_) {
2586 reporter_->CIEPointerOutOfRange(fde.offset, fde.id);
2587 continue;
2588 }
2589
2590 CIE cie;
2591
2592 // Parse this FDE's CIE header.
2593 if (!ReadEntryPrologue(buffer_ + fde.id, &cie))
2594 continue;
2595 // This had better be an actual CIE.
2596 if (cie.kind != kCIE) {
2597 reporter_->BadCIEId(fde.offset, fde.id);
2598 continue;
2599 }
2600 if (!ReadCIEFields(&cie))
2601 continue;
2602
2603 // TODO(nbilling): This could lead to strange behavior if a single buffer
2604 // contained a mixture of DWARF versions as well as address sizes. Not
2605 // sure if it's worth handling such a case.
2606
2607 // DWARF4 CIE specifies address_size, so use it for this call frame.
2608 if (cie.version >= 4) {
2609 reader_->SetAddressSize(cie.address_size);
2610 }
2611
2612 // We now have the values that govern both the CIE and the FDE.
2613 cie.cie = &cie;
2614 fde.cie = &cie;
2615
2616 // Parse the FDE's header.
2617 if (!ReadFDEFields(&fde))
2618 continue;
2619
2620 // Call Entry to ask the consumer if they're interested.
2621 if (!handler_->Entry(fde.offset, fde.address, fde.size,
2622 cie.version, cie.augmentation,
2623 cie.return_address_register)) {
2624 // The handler isn't interested in this entry. That's not an error.
2625 ok = true;
2626 continue;
2627 }
2628
2629 if (cie.has_z_augmentation) {
2630 // Report the personality routine address, if we have one.
2631 if (cie.has_z_personality) {
2632 if (!handler_
2633 ->PersonalityRoutine(cie.personality_address,
2634 IsIndirectEncoding(cie.personality_encoding)))
2635 continue;
2636 }
2637
2638 // Report the language-specific data area address, if we have one.
2639 if (cie.has_z_lsda) {
2640 if (!handler_
2641 ->LanguageSpecificDataArea(fde.lsda_address,
2642 IsIndirectEncoding(cie.lsda_encoding)))
2643 continue;
2644 }
2645
2646 // If this is a signal-handling frame, report that.
2647 if (cie.has_z_signal_frame) {
2648 if (!handler_->SignalHandler())
2649 continue;
2650 }
2651 }
2652
2653 // Interpret the CIE's instructions, and then the FDE's instructions.
2654 State state(reader_, handler_, reporter_, fde.address);
2655 ok = state.InterpretCIE(cie) && state.InterpretFDE(fde);
2656
2657 // Tell the ByteReader that the function start address from the
2658 // FDE header is no longer valid.
2659 reader_->ClearFunctionBase();
2660
2661 // Report the end of the entry.
2662 handler_->End();
2663 }
2664
2665 return all_ok;
2666 }
2667
KindName(EntryKind kind)2668 const char *CallFrameInfo::KindName(EntryKind kind) {
2669 if (kind == CallFrameInfo::kUnknown)
2670 return "entry";
2671 else if (kind == CallFrameInfo::kCIE)
2672 return "common information entry";
2673 else if (kind == CallFrameInfo::kFDE)
2674 return "frame description entry";
2675 else {
2676 assert (kind == CallFrameInfo::kTerminator);
2677 return ".eh_frame sequence terminator";
2678 }
2679 }
2680
ReportIncomplete(Entry * entry)2681 bool CallFrameInfo::ReportIncomplete(Entry *entry) {
2682 reporter_->Incomplete(entry->offset, entry->kind);
2683 return false;
2684 }
2685
Incomplete(uint64 offset,CallFrameInfo::EntryKind kind)2686 void CallFrameInfo::Reporter::Incomplete(uint64 offset,
2687 CallFrameInfo::EntryKind kind) {
2688 fprintf(stderr,
2689 "%s: CFI %s at offset 0x%llx in '%s': entry ends early\n",
2690 filename_.c_str(), CallFrameInfo::KindName(kind), offset,
2691 section_.c_str());
2692 }
2693
EarlyEHTerminator(uint64 offset)2694 void CallFrameInfo::Reporter::EarlyEHTerminator(uint64 offset) {
2695 fprintf(stderr,
2696 "%s: CFI at offset 0x%llx in '%s': saw end-of-data marker"
2697 " before end of section contents\n",
2698 filename_.c_str(), offset, section_.c_str());
2699 }
2700
CIEPointerOutOfRange(uint64 offset,uint64 cie_offset)2701 void CallFrameInfo::Reporter::CIEPointerOutOfRange(uint64 offset,
2702 uint64 cie_offset) {
2703 fprintf(stderr,
2704 "%s: CFI frame description entry at offset 0x%llx in '%s':"
2705 " CIE pointer is out of range: 0x%llx\n",
2706 filename_.c_str(), offset, section_.c_str(), cie_offset);
2707 }
2708
BadCIEId(uint64 offset,uint64 cie_offset)2709 void CallFrameInfo::Reporter::BadCIEId(uint64 offset, uint64 cie_offset) {
2710 fprintf(stderr,
2711 "%s: CFI frame description entry at offset 0x%llx in '%s':"
2712 " CIE pointer does not point to a CIE: 0x%llx\n",
2713 filename_.c_str(), offset, section_.c_str(), cie_offset);
2714 }
2715
UnexpectedAddressSize(uint64 offset,uint8_t address_size)2716 void CallFrameInfo::Reporter::UnexpectedAddressSize(uint64 offset,
2717 uint8_t address_size) {
2718 fprintf(stderr,
2719 "%s: CFI frame description entry at offset 0x%llx in '%s':"
2720 " CIE specifies unexpected address size: %d\n",
2721 filename_.c_str(), offset, section_.c_str(), address_size);
2722 }
2723
UnexpectedSegmentSize(uint64 offset,uint8_t segment_size)2724 void CallFrameInfo::Reporter::UnexpectedSegmentSize(uint64 offset,
2725 uint8_t segment_size) {
2726 fprintf(stderr,
2727 "%s: CFI frame description entry at offset 0x%llx in '%s':"
2728 " CIE specifies unexpected segment size: %d\n",
2729 filename_.c_str(), offset, section_.c_str(), segment_size);
2730 }
2731
UnrecognizedVersion(uint64 offset,int version)2732 void CallFrameInfo::Reporter::UnrecognizedVersion(uint64 offset, int version) {
2733 fprintf(stderr,
2734 "%s: CFI frame description entry at offset 0x%llx in '%s':"
2735 " CIE specifies unrecognized version: %d\n",
2736 filename_.c_str(), offset, section_.c_str(), version);
2737 }
2738
UnrecognizedAugmentation(uint64 offset,const string & aug)2739 void CallFrameInfo::Reporter::UnrecognizedAugmentation(uint64 offset,
2740 const string &aug) {
2741 fprintf(stderr,
2742 "%s: CFI frame description entry at offset 0x%llx in '%s':"
2743 " CIE specifies unrecognized augmentation: '%s'\n",
2744 filename_.c_str(), offset, section_.c_str(), aug.c_str());
2745 }
2746
InvalidPointerEncoding(uint64 offset,uint8 encoding)2747 void CallFrameInfo::Reporter::InvalidPointerEncoding(uint64 offset,
2748 uint8 encoding) {
2749 fprintf(stderr,
2750 "%s: CFI common information entry at offset 0x%llx in '%s':"
2751 " 'z' augmentation specifies invalid pointer encoding: 0x%02x\n",
2752 filename_.c_str(), offset, section_.c_str(), encoding);
2753 }
2754
UnusablePointerEncoding(uint64 offset,uint8 encoding)2755 void CallFrameInfo::Reporter::UnusablePointerEncoding(uint64 offset,
2756 uint8 encoding) {
2757 fprintf(stderr,
2758 "%s: CFI common information entry at offset 0x%llx in '%s':"
2759 " 'z' augmentation specifies a pointer encoding for which"
2760 " we have no base address: 0x%02x\n",
2761 filename_.c_str(), offset, section_.c_str(), encoding);
2762 }
2763
RestoreInCIE(uint64 offset,uint64 insn_offset)2764 void CallFrameInfo::Reporter::RestoreInCIE(uint64 offset, uint64 insn_offset) {
2765 fprintf(stderr,
2766 "%s: CFI common information entry at offset 0x%llx in '%s':"
2767 " the DW_CFA_restore instruction at offset 0x%llx"
2768 " cannot be used in a common information entry\n",
2769 filename_.c_str(), offset, section_.c_str(), insn_offset);
2770 }
2771
BadInstruction(uint64 offset,CallFrameInfo::EntryKind kind,uint64 insn_offset)2772 void CallFrameInfo::Reporter::BadInstruction(uint64 offset,
2773 CallFrameInfo::EntryKind kind,
2774 uint64 insn_offset) {
2775 fprintf(stderr,
2776 "%s: CFI %s at offset 0x%llx in section '%s':"
2777 " the instruction at offset 0x%llx is unrecognized\n",
2778 filename_.c_str(), CallFrameInfo::KindName(kind),
2779 offset, section_.c_str(), insn_offset);
2780 }
2781
NoCFARule(uint64 offset,CallFrameInfo::EntryKind kind,uint64 insn_offset)2782 void CallFrameInfo::Reporter::NoCFARule(uint64 offset,
2783 CallFrameInfo::EntryKind kind,
2784 uint64 insn_offset) {
2785 fprintf(stderr,
2786 "%s: CFI %s at offset 0x%llx in section '%s':"
2787 " the instruction at offset 0x%llx assumes that a CFA rule has"
2788 " been set, but none has been set\n",
2789 filename_.c_str(), CallFrameInfo::KindName(kind), offset,
2790 section_.c_str(), insn_offset);
2791 }
2792
EmptyStateStack(uint64 offset,CallFrameInfo::EntryKind kind,uint64 insn_offset)2793 void CallFrameInfo::Reporter::EmptyStateStack(uint64 offset,
2794 CallFrameInfo::EntryKind kind,
2795 uint64 insn_offset) {
2796 fprintf(stderr,
2797 "%s: CFI %s at offset 0x%llx in section '%s':"
2798 " the DW_CFA_restore_state instruction at offset 0x%llx"
2799 " should pop a saved state from the stack, but the stack is empty\n",
2800 filename_.c_str(), CallFrameInfo::KindName(kind), offset,
2801 section_.c_str(), insn_offset);
2802 }
2803
ClearingCFARule(uint64 offset,CallFrameInfo::EntryKind kind,uint64 insn_offset)2804 void CallFrameInfo::Reporter::ClearingCFARule(uint64 offset,
2805 CallFrameInfo::EntryKind kind,
2806 uint64 insn_offset) {
2807 fprintf(stderr,
2808 "%s: CFI %s at offset 0x%llx in section '%s':"
2809 " the DW_CFA_restore_state instruction at offset 0x%llx"
2810 " would clear the CFA rule in effect\n",
2811 filename_.c_str(), CallFrameInfo::KindName(kind), offset,
2812 section_.c_str(), insn_offset);
2813 }
2814
2815 } // namespace dwarf2reader
2816