1# HG changeset patch 2# User Nathan Froyd <froydnj@mozilla.com> 3# Date 1554482109 0 4# Fri Apr 05 16:35:09 2019 +0000 5# Node ID 578c94538897c59349de77ee3c2da4252198a371 6# Parent ac23ad5ef0c18d7bf5b55fd9d07cfd1c0da33033 7Bug 524410 - part 1 - extract file information out of .debug_line parsing; r=gsvelto 8 9The DW_AT_call_file attributes that we eventually want to parse from 10DW_TAG_inlined_subroutine DIEs refer to the file name table stored in 11the .debug_line section. To resolve those DW_AT_call_file attributes, 12we need access to that table after parsing of the appropriate 13.debug_line bits is done. This patch adds support for extracting that 14information from the .debug_line parsing process. 15 16Differential Revision: https://phabricator.services.mozilla.com/D25469 17 18diff --git a/src/common/dwarf_cu_to_module.cc b/src/common/dwarf_cu_to_module.cc 19--- a/src/common/dwarf_cu_to_module.cc 20+++ b/src/common/dwarf_cu_to_module.cc 21@@ -888,17 +888,18 @@ void DwarfCUToModule::SetLanguage(DwarfL 22 case dwarf2reader::DW_LANG_C89: 23 case dwarf2reader::DW_LANG_C99: 24 case dwarf2reader::DW_LANG_C_plus_plus: 25 cu_context_->language = Language::CPlusPlus; 26 break; 27 } 28 } 29 30-void DwarfCUToModule::ReadSourceLines(uint64 offset) { 31+void DwarfCUToModule::ReadSourceLines(uint64 offset, 32+ LineToModuleHandler::FileMap *files) { 33 const dwarf2reader::SectionMap §ion_map 34 = cu_context_->file_context->section_map(); 35 dwarf2reader::SectionMap::const_iterator map_entry 36 = section_map.find(".debug_line"); 37 // Mac OS X puts DWARF data in sections whose names begin with "__" 38 // instead of ".". 39 if (map_entry == section_map.end()) 40 map_entry = section_map.find("__debug_line"); 41@@ -908,17 +909,17 @@ void DwarfCUToModule::ReadSourceLines(ui 42 } 43 const uint8_t *section_start = map_entry->second.first; 44 uint64 section_length = map_entry->second.second; 45 if (offset >= section_length) { 46 cu_context_->reporter->BadLineInfoOffset(offset); 47 return; 48 } 49 line_reader_->ReadProgram(section_start + offset, section_length - offset, 50- cu_context_->file_context->module_, &lines_); 51+ cu_context_->file_context->module_, &lines_, files); 52 } 53 54 namespace { 55 class FunctionRange { 56 public: 57 FunctionRange(const Module::Range &range, Module::Function *function) : 58 address(range.address), size(range.size), function(function) { } 59 60@@ -1169,18 +1170,19 @@ void DwarfCUToModule::Finish() { 61 // no place to store our line numbers (even though the GNU toolchain 62 // will happily produce source line info for assembly language 63 // files). To avoid spurious warnings about lines we can't assign 64 // to functions, skip CUs in languages that lack functions. 65 if (!cu_context_->language->HasFunctions()) 66 return; 67 68 // Read source line info, if we have any. 69+ LineToModuleHandler::FileMap files; 70 if (has_source_line_info_) 71- ReadSourceLines(source_line_offset_); 72+ ReadSourceLines(source_line_offset_, &files); 73 74 vector<Module::Function *> *functions = &cu_context_->functions; 75 76 // Dole out lines to the appropriate functions. 77 AssignLinesToFunctions(); 78 79 // Add our functions, which now have source lines assigned to them, 80 // to module_. 81diff --git a/src/common/dwarf_cu_to_module.h b/src/common/dwarf_cu_to_module.h 82--- a/src/common/dwarf_cu_to_module.h 83+++ b/src/common/dwarf_cu_to_module.h 84@@ -140,31 +140,35 @@ class DwarfCUToModule: public dwarf2read 85 }; 86 87 // An abstract base class for handlers that handle DWARF line data 88 // for DwarfCUToModule. DwarfCUToModule could certainly just use 89 // dwarf2reader::LineInfo itself directly, but decoupling things 90 // this way makes unit testing a little easier. 91 class LineToModuleHandler { 92 public: 93+ typedef std::map<uint32, Module::File*> FileMap; 94+ 95 LineToModuleHandler() { } 96 virtual ~LineToModuleHandler() { } 97 98 // Called at the beginning of a new compilation unit, prior to calling 99 // ReadProgram(). compilation_dir will indicate the path that the 100 // current compilation unit was compiled in, consistent with the 101 // DW_AT_comp_dir DIE. 102 virtual void StartCompilationUnit(const string& compilation_dir) = 0; 103 104 // Populate MODULE and LINES with source file names and code/line 105 // mappings, given a pointer to some DWARF line number data 106 // PROGRAM, and an overestimate of its size. Add no zero-length 107- // lines to LINES. 108+ // lines to LINES. If FILES is non-NULL, store the DWARF file name 109+ // table into FILES. 110 virtual void ReadProgram(const uint8_t *program, uint64 length, 111- Module *module, vector<Module::Line> *lines) = 0; 112+ Module *module, vector<Module::Line> *lines, 113+ FileMap *files) = 0; 114 }; 115 116 // The interface DwarfCUToModule uses to report warnings. The member 117 // function definitions for this class write messages to stderr, but 118 // you can override them if you'd like to detect or report these 119 // conditions yourself. 120 class WarningReporter { 121 public: 122@@ -299,17 +303,17 @@ class DwarfCUToModule: public dwarf2read 123 124 // Set this compilation unit's source language to LANGUAGE. 125 void SetLanguage(DwarfLanguage language); 126 127 // Read source line information at OFFSET in the .debug_line 128 // section. Record source files in module_, but record source lines 129 // in lines_; we apportion them to functions in 130 // AssignLinesToFunctions. 131- void ReadSourceLines(uint64 offset); 132+ void ReadSourceLines(uint64 offset, LineToModuleHandler::FileMap *files); 133 134 // Assign the lines in lines_ to the individual line lists of the 135 // functions in functions_. (DWARF line information maps an entire 136 // compilation unit at a time, and gives no indication of which 137 // lines belong to which functions, beyond their addresses.) 138 void AssignLinesToFunctions(); 139 140 // The only reason cu_context_ and child_context_ are pointers is 141diff --git a/src/common/dwarf_line_to_module.cc b/src/common/dwarf_line_to_module.cc 142--- a/src/common/dwarf_line_to_module.cc 143+++ b/src/common/dwarf_line_to_module.cc 144@@ -58,16 +58,22 @@ static string ExpandPath(const string &p 145 const string &base) { 146 if (PathIsAbsolute(path) || base.empty()) 147 return path; 148 return base + (HasTrailingSlash(base) ? "" : "/") + path; 149 } 150 151 namespace google_breakpad { 152 153+DwarfLineToModule::~DwarfLineToModule() { 154+ if (out_files_) { 155+ *out_files_ = std::move(files_); 156+ } 157+} 158+ 159 void DwarfLineToModule::DefineDir(const string &name, uint32 dir_num) { 160 // Directory number zero is reserved to mean the compilation 161 // directory. Silently ignore attempts to redefine it. 162 if (dir_num != 0) 163 directories_[dir_num] = ExpandPath(name, compilation_dir_); 164 } 165 166 void DwarfLineToModule::DefineFile(const string &name, int32 file_num, 167diff --git a/src/common/dwarf_line_to_module.h b/src/common/dwarf_line_to_module.h 168--- a/src/common/dwarf_line_to_module.h 169+++ b/src/common/dwarf_line_to_module.h 170@@ -108,47 +108,48 @@ namespace google_breakpad { 171 // 172 // - If a line starts at address zero, omit it. (On the platforms 173 // breakpad targets, it is extremely unlikely that there will be code 174 // at address zero.) 175 // 176 // - If a line starts immediately after an omitted line, omit it too. 177 class DwarfLineToModule: public dwarf2reader::LineInfoHandler { 178 public: 179+ typedef std::map<uint32, Module::File *> FileTable; 180+ 181 // As the DWARF line info parser passes us line records, add source 182 // files to MODULE, and add all lines to the end of LINES. LINES 183 // need not be empty. If the parser hands us a zero-length line, we 184 // omit it. If the parser hands us a line that extends beyond the 185 // end of the address space, we clip it. It's up to our client to 186 // sort out which lines belong to which functions; we don't add them 187 // to any particular function in MODULE ourselves. 188 DwarfLineToModule(Module *module, const string& compilation_dir, 189- vector<Module::Line> *lines) 190+ vector<Module::Line> *lines, FileTable *files) 191 : module_(module), 192 compilation_dir_(compilation_dir), 193 lines_(lines), 194 highest_file_number_(-1), 195 omitted_line_end_(0), 196 warned_bad_file_number_(false), 197- warned_bad_directory_number_(false) { } 198+ warned_bad_directory_number_(false), 199+ out_files_(files) { } 200 201- ~DwarfLineToModule() { } 202+ ~DwarfLineToModule(); 203 204 void DefineDir(const string &name, uint32 dir_num); 205 void DefineFile(const string &name, int32 file_num, 206 uint32 dir_num, uint64 mod_time, 207 uint64 length); 208 void AddLine(uint64 address, uint64 length, 209 uint32 file_num, uint32 line_num, uint32 column_num); 210 211 private: 212 213 typedef std::map<uint32, string> DirectoryTable; 214- typedef std::map<uint32, Module::File *> FileTable; 215- 216 // The module we're contributing debugging info to. Owned by our 217 // client. 218 Module *module_; 219 220 // The compilation directory for the current compilation unit whose 221 // lines are being accumulated. 222 string compilation_dir_; 223 224@@ -176,13 +177,15 @@ class DwarfLineToModule: public dwarf2re 225 // This is the ending address of the last line we omitted, or zero if we 226 // didn't omit the previous line. It is zero before we have received any 227 // AddLine calls. 228 uint64 omitted_line_end_; 229 230 // True if we've warned about: 231 bool warned_bad_file_number_; // bad file numbers 232 bool warned_bad_directory_number_; // bad directory numbers 233+ 234+ FileTable* out_files_; 235 }; 236 237 } // namespace google_breakpad 238 239 #endif // COMMON_LINUX_DWARF_LINE_TO_MODULE_H 240diff --git a/src/common/linux/dump_symbols.cc b/src/common/linux/dump_symbols.cc 241--- a/src/common/linux/dump_symbols.cc 242+++ b/src/common/linux/dump_symbols.cc 243@@ -246,18 +246,19 @@ class DumperLineToModule: public DwarfCU 244 public: 245 // Create a line-to-module converter using BYTE_READER. 246 explicit DumperLineToModule(dwarf2reader::ByteReader *byte_reader) 247 : byte_reader_(byte_reader) { } 248 void StartCompilationUnit(const string& compilation_dir) { 249 compilation_dir_ = compilation_dir; 250 } 251 void ReadProgram(const uint8_t *program, uint64 length, 252- Module* module, std::vector<Module::Line>* lines) { 253- DwarfLineToModule handler(module, compilation_dir_, lines); 254+ Module* module, std::vector<Module::Line>* lines, 255+ FileMap *files) { 256+ DwarfLineToModule handler(module, compilation_dir_, lines, files); 257 dwarf2reader::LineInfo parser(program, length, byte_reader_, &handler); 258 parser.Start(); 259 } 260 private: 261 string compilation_dir_; 262 dwarf2reader::ByteReader *byte_reader_; 263 }; 264 265diff --git a/src/common/mac/dump_syms.cc b/src/common/mac/dump_syms.cc 266--- a/src/common/mac/dump_syms.cc 267+++ b/src/common/mac/dump_syms.cc 268@@ -340,18 +340,19 @@ class DumpSymbols::DumperLineToModule: 269 DumperLineToModule(dwarf2reader::ByteReader *byte_reader) 270 : byte_reader_(byte_reader) { } 271 272 void StartCompilationUnit(const string& compilation_dir) { 273 compilation_dir_ = compilation_dir; 274 } 275 276 void ReadProgram(const uint8_t *program, uint64 length, 277- Module *module, vector<Module::Line> *lines) { 278- DwarfLineToModule handler(module, compilation_dir_, lines); 279+ Module *module, vector<Module::Line> *lines, 280+ FileMap *files) { 281+ DwarfLineToModule handler(module, compilation_dir_, lines, files); 282 dwarf2reader::LineInfo parser(program, length, byte_reader_, &handler); 283 parser.Start(); 284 } 285 private: 286 string compilation_dir_; 287 dwarf2reader::ByteReader *byte_reader_; // WEAK 288 }; 289 290# HG changeset patch 291# User Nathan Froyd <froydnj@mozilla.com> 292# Date 1554482109 0 293# Fri Apr 05 16:35:09 2019 +0000 294# Node ID 589e276c75fadc2f261f3edb1c8d7f59d2008d55 295# Parent 578c94538897c59349de77ee3c2da4252198a371 296Bug 524410 - part 2 - parse DW_TAG_inlined_subroutine DIEs; r=gsvelto 297 298We record the file and line that these subroutines were inlined from. 299We'll use that information to provide more coarse-grained line 300information in the next patch. 301 302Depends on D25469 303 304Differential Revision: https://phabricator.services.mozilla.com/D25470 305 306diff --git a/src/common/dwarf_cu_to_module.cc b/src/common/dwarf_cu_to_module.cc 307--- a/src/common/dwarf_cu_to_module.cc 308+++ b/src/common/dwarf_cu_to_module.cc 309@@ -120,16 +120,30 @@ struct DwarfCUToModule::FilePrivate { 310 unordered_set<string> common_strings; 311 312 // A map from offsets of DIEs within the .debug_info section to 313 // Specifications describing those DIEs. Specification references can 314 // cross compilation unit boundaries. 315 SpecificationByOffset specifications; 316 317 AbstractOriginByOffset origins; 318+ 319+ struct InlinedSubroutineRange { 320+ InlinedSubroutineRange(Module::Range range, uint64 call_file, 321+ uint64 call_line) 322+ : range_(range), call_file_(call_file), call_line_(call_line) {} 323+ 324+ Module::Range range_; 325+ uint64 call_file_, call_line_; 326+ }; 327+ 328+ // A collection of address ranges with the file and line that they 329+ // correspond to. We'll use this information to replace the precise line 330+ // information gathered from .debug_line. 331+ vector<InlinedSubroutineRange> inlined_ranges; 332 }; 333 334 DwarfCUToModule::FileContext::FileContext(const string &filename, 335 Module *module, 336 bool handle_inter_cu_refs) 337 : filename_(filename), 338 module_(module), 339 handle_inter_cu_refs_(handle_inter_cu_refs), 340@@ -450,16 +464,114 @@ string DwarfCUToModule::GenericDIEHandle 341 spec.unqualified_name = *unqualified_name; 342 } 343 cu_context_->file_context->file_private_->specifications[offset_] = spec; 344 } 345 346 return return_value; 347 } 348 349+// A handler class for DW_TAG_inlined_subroutine DIEs. 350+class DwarfCUToModule::InlinedSubroutineHandler: public GenericDIEHandler { 351+ public: 352+ InlinedSubroutineHandler(CUContext *cu_context, DIEContext *parent_context, 353+ uint64 offset) 354+ : GenericDIEHandler(cu_context, parent_context, offset), 355+ low_pc_(0), high_pc_(0), high_pc_form_(dwarf2reader::DW_FORM_addr), 356+ ranges_(0), call_file_(0), call_file_set_(false), call_line_(0), 357+ call_line_set_(false) {} 358+ 359+ void ProcessAttributeUnsigned(enum DwarfAttribute attr, 360+ enum DwarfForm form, 361+ uint64 data); 362+ 363+ bool EndAttributes(); 364+ 365+ private: 366+ uint64 low_pc_, high_pc_; // DW_AT_low_pc, DW_AT_high_pc 367+ DwarfForm high_pc_form_; // DW_AT_high_pc can be length or address. 368+ uint64 ranges_; // DW_AT_ranges 369+ uint64 call_file_; // DW_AT_call_file 370+ bool call_file_set_; 371+ uint64 call_line_; // DW_AT_call_line 372+ bool call_line_set_; 373+}; 374+ 375+void DwarfCUToModule::InlinedSubroutineHandler::ProcessAttributeUnsigned( 376+ enum DwarfAttribute attr, 377+ enum DwarfForm form, 378+ uint64 data) { 379+ switch (attr) { 380+ case dwarf2reader::DW_AT_low_pc: low_pc_ = data; break; 381+ case dwarf2reader::DW_AT_high_pc: 382+ high_pc_form_ = form; 383+ high_pc_ = data; 384+ break; 385+ case dwarf2reader::DW_AT_ranges: 386+ ranges_ = data; 387+ break; 388+ case dwarf2reader::DW_AT_call_file: 389+ call_file_ = data; 390+ call_file_set_ = true; 391+ break; 392+ case dwarf2reader::DW_AT_call_line: 393+ call_line_ = data; 394+ call_line_set_ = true; 395+ break; 396+ 397+ default: 398+ GenericDIEHandler::ProcessAttributeUnsigned(attr, form, data); 399+ break; 400+ } 401+} 402+ 403+bool DwarfCUToModule::InlinedSubroutineHandler::EndAttributes() { 404+ // DW_TAG_inlined_subroutine child DIEs are only information about formal 405+ // parameters and any subroutines that were further inlined, which we're 406+ // not particularly concerned about. 407+ const bool ignore_children = false; 408+ 409+ // If we didn't find complete information about what file and line we were 410+ // inlined from, then there's no point in computing anything. 411+ if (!call_file_set_ || !call_line_set_) { 412+ return ignore_children; 413+ } 414+ 415+ vector<Module::Range> ranges; 416+ 417+ if (!ranges_) { 418+ // Make high_pc_ an address, if it isn't already. 419+ if (high_pc_form_ != dwarf2reader::DW_FORM_addr && 420+ high_pc_form_ != dwarf2reader::DW_FORM_GNU_addr_index) { 421+ high_pc_ += low_pc_; 422+ } 423+ 424+ Module::Range range(low_pc_, high_pc_ - low_pc_); 425+ ranges.push_back(range); 426+ } else { 427+ RangesHandler *ranges_handler = cu_context_->ranges_handler; 428+ 429+ if (ranges_handler) { 430+ if (!ranges_handler->ReadRanges(ranges_, cu_context_->low_pc, &ranges)) { 431+ ranges.clear(); 432+ cu_context_->reporter->MalformedRangeList(ranges_); 433+ } 434+ } else { 435+ cu_context_->reporter->MissingRanges(); 436+ } 437+ } 438+ 439+ for (const auto& range : ranges) { 440+ FilePrivate::InlinedSubroutineRange inline_range(range, call_file_, call_line_); 441+ cu_context_->file_context->file_private_->inlined_ranges.push_back(inline_range); 442+ } 443+ 444+ return ignore_children; 445+} 446+ 447 // A handler class for DW_TAG_subprogram DIEs. 448 class DwarfCUToModule::FuncHandler: public GenericDIEHandler { 449 public: 450 FuncHandler(CUContext *cu_context, DIEContext *parent_context, 451 uint64 offset) 452 : GenericDIEHandler(cu_context, parent_context, offset), 453 low_pc_(0), high_pc_(0), high_pc_form_(dwarf2reader::DW_FORM_addr), 454 ranges_(0), abstract_origin_(NULL), inline_(false) { } 455@@ -471,16 +583,18 @@ class DwarfCUToModule::FuncHandler: publ 456 int64 data); 457 void ProcessAttributeReference(enum DwarfAttribute attr, 458 enum DwarfForm form, 459 uint64 data); 460 461 bool EndAttributes(); 462 void Finish(); 463 464+ DIEHandler *FindChildHandler(uint64 offset, enum DwarfTag tag); 465+ 466 private: 467 // The fully-qualified name, as derived from name_attribute_, 468 // specification_, parent_context_. Computed in EndAttributes. 469 string name_; 470 uint64 low_pc_, high_pc_; // DW_AT_low_pc, DW_AT_high_pc 471 DwarfForm high_pc_form_; // DW_AT_high_pc can be length or address. 472 uint64 ranges_; // DW_AT_ranges 473 const AbstractOrigin* abstract_origin_; 474@@ -621,16 +735,28 @@ void DwarfCUToModule::FuncHandler::Finis 475 cu_context_->functions.push_back(func.release()); 476 } 477 } else if (inline_) { 478 AbstractOrigin origin(name_); 479 cu_context_->file_context->file_private_->origins[offset_] = origin; 480 } 481 } 482 483+dwarf2reader::DIEHandler *DwarfCUToModule::FuncHandler::FindChildHandler( 484+ uint64 offset, 485+ enum DwarfTag tag) { 486+ switch (tag) { 487+ case dwarf2reader::DW_TAG_inlined_subroutine: 488+ return new InlinedSubroutineHandler(cu_context_, parent_context_, offset); 489+ 490+ default: 491+ return NULL; 492+ } 493+} 494+ 495 // A handler for DIEs that contain functions and contribute a 496 // component to their names: namespaces, classes, etc. 497 class DwarfCUToModule::NamedScopeHandler: public GenericDIEHandler { 498 public: 499 NamedScopeHandler(CUContext *cu_context, DIEContext *parent_context, 500 uint64 offset) 501 : GenericDIEHandler(cu_context, parent_context, offset) { } 502 bool EndAttributes(); 503diff --git a/src/common/dwarf_cu_to_module.h b/src/common/dwarf_cu_to_module.h 504--- a/src/common/dwarf_cu_to_module.h 505+++ b/src/common/dwarf_cu_to_module.h 506@@ -291,16 +291,17 @@ class DwarfCUToModule: public dwarf2read 507 private: 508 // Used internally by the handler. Full definitions are in 509 // dwarf_cu_to_module.cc. 510 struct CUContext; 511 struct DIEContext; 512 struct Specification; 513 class GenericDIEHandler; 514 class FuncHandler; 515+ class InlinedSubroutineHandler; 516 class NamedScopeHandler; 517 518 // A map from section offsets to specifications. 519 typedef map<uint64, Specification> SpecificationByOffset; 520 521 // Set this compilation unit's source language to LANGUAGE. 522 void SetLanguage(DwarfLanguage language); 523 524# HG changeset patch 525# User Nathan Froyd <froydnj@mozilla.com> 526# Date 1554482109 0 527# Fri Apr 05 16:35:09 2019 +0000 528# Node ID b3e5b74ed19fcf6c6f44457accccf4bb59eebcb3 529# Parent 589e276c75fadc2f261f3edb1c8d7f59d2008d55 530Bug 524410 - part 3 - replace line information for inlined functions; r=gsvelto 531 532Differential Revision: https://phabricator.services.mozilla.com/D25471 533 534diff --git a/src/common/dwarf_cu_to_module.cc b/src/common/dwarf_cu_to_module.cc 535--- a/src/common/dwarf_cu_to_module.cc 536+++ b/src/common/dwarf_cu_to_module.cc 537@@ -1090,19 +1090,123 @@ static void FillSortedFunctionRanges(vec 538 // Return true if ADDRESS falls within the range of ITEM. 539 template <class T> 540 inline bool within(const T &item, Module::Address address) { 541 // Because Module::Address is unsigned, and unsigned arithmetic 542 // wraps around, this will be false if ADDRESS falls before the 543 // start of ITEM, or if it falls after ITEM's end. 544 return address - item.address < item.size; 545 } 546+ 547+// LINES contains all the information that we have read from .debug_line. 548+// INLINES contains synthesized line information gathered from 549+// DW_TAG_inlined_subroutine DIEs. We're going to merge the two such that 550+// we have: 551+// 552+// 1. Lines from INLINES; and 553+// 2. Lines from LINES that don't overlap lines from INLINES. 554+// 555+// since the coarser-grained information from INLINES is generally what you 556+// want when getting stack traces. 557+vector<Module::Line> MergeLines(const vector<Module::Line>& inlines, 558+ const vector<Module::Line>& lines) { 559+ vector<Module::Line> merged_lines; 560+ vector<Module::Line>::const_iterator orig_lines = lines.begin(); 561+ vector<Module::Line>::const_iterator inline_lines = inlines.begin(); 562+ vector<Module::Line>::const_iterator orig_end = lines.end(); 563+ vector<Module::Line>::const_iterator inline_end = inlines.end(); 564+ 565+ while (true) { 566+ if (orig_lines == orig_end) { 567+ break; 568+ } 569+ 570+ if (inline_lines == inline_end) { 571+ merged_lines.push_back(*orig_lines); 572+ ++orig_lines; 573+ continue; 574+ } 575+ 576+ // We are going to make the simplifying assumption that an inline line 577+ // will *always* start at the exact position of some original line. 578+ // This assumption significantly reduces the number of cases we have 579+ // to consider. 580+ 581+ // If we haven't caught up to where the inline lines are, keep going. 582+ if (orig_lines->address < inline_lines->address) { 583+ merged_lines.push_back(*orig_lines); 584+ ++orig_lines; 585+ continue; 586+ } 587+ 588+ // We found some overlap! See how far we can go, and merge the inline 589+ // line into our list. 590+ if (orig_lines->address == inline_lines->address) { 591+ auto start = orig_lines + 1; 592+ while ((start->address - inline_lines->address) < inline_lines->size 593+ && start != orig_end) { 594+ ++start; 595+ } 596+ 597+ // start now points just beyond the range covered by *inline_lines. 598+ // But we might have encountered a case like: 599+ // 600+ // | OL1 | OL2 | OL3 | ... | OLN | ... 601+ // | IL1 | <gap> | IL2 ... 602+ // 603+ // where the end of the inline line splits the last original line that 604+ // we've seen in two. This case seems like a bug in the debug 605+ // information, but we have to deal with it intelligently. There are 606+ // several options available: 607+ // 608+ // 1. Split OLN into two parts: the part covered by IL1 and the part 609+ // not covered. Merge IL1 and then merge the latter part. 610+ // 2. Extend IL1 to cover the entirety of OLN, and merge IL1. 611+ // 612+ // Note that due to our simplifying assumption that any inline lines 613+ // start exactly on some original line, we do not have to consider 614+ // the case: 615+ // 616+ // | OL1 | OL2 | OL3 | ... | OLN | ... 617+ // | IL1 | IL2 ... 618+ // 619+ // where two inline lines overlap the range of one original line. 620+ // 621+ // The conservative solution is option 1, which preserves as much of 622+ // the original information as possible. Let's go with that. 623+ merged_lines.push_back(*inline_lines); 624+ auto overlapped = start - 1; 625+ if (within(*overlapped, inline_lines->address + inline_lines->size)) { 626+ // Create a line that covers the rest of the space and merge that. 627+ Module::Line rest; 628+ rest.address = inline_lines->address + inline_lines->size; 629+ rest.size = overlapped->address + overlapped->size - rest.address; 630+ rest.file = overlapped->file; 631+ rest.number = overlapped->number; 632+ merged_lines.push_back(rest); 633+ } 634+ 635+ ++inline_lines; 636+ orig_lines = start; 637+ continue; 638+ } 639+ 640+ // This case is weird: we have inlined lines that exist prior to any 641+ // lines recorded in our debug information. Just skip them. 642+ if (orig_lines->address > inline_lines->address) { 643+ ++inline_lines; 644+ continue; 645+ } 646+ } 647+ 648+ return merged_lines; 649+} 650 } 651 652-void DwarfCUToModule::AssignLinesToFunctions() { 653+void DwarfCUToModule::AssignLinesToFunctions(const LineToModuleHandler::FileMap &files) { 654 vector<Module::Function *> *functions = &cu_context_->functions; 655 WarningReporter *reporter = cu_context_->reporter; 656 657 // This would be simpler if we assumed that source line entries 658 // don't cross function boundaries. However, there's no real reason 659 // to assume that (say) a series of function definitions on the same 660 // line wouldn't get coalesced into one line number entry. The 661 // DWARF spec certainly makes no such promises. 662@@ -1112,16 +1216,40 @@ void DwarfCUToModule::AssignLinesToFunct 663 // the hair here is a constant factor for performance; the 664 // complexity from here on out is linear. 665 666 // Put both our functions and lines in order by address. 667 std::sort(functions->begin(), functions->end(), 668 Module::Function::CompareByAddress); 669 std::sort(lines_.begin(), lines_.end(), Module::Line::CompareByAddress); 670 671+ // Prepare a sorted list of lines containing inlined subroutines. 672+ vector<Module::Line> inlines; 673+ 674+ for (const auto& range : cu_context_->file_context->file_private_->inlined_ranges) { 675+ auto f = files.find(range.call_file_); 676+ if (f == files.end()) { 677+ // Uh, that's weird. Skip this? 678+ continue; 679+ } 680+ 681+ Module::Line line; 682+ line.address = range.range_.address; 683+ line.size = range.range_.size; 684+ line.number = range.call_line_; 685+ line.file = f->second; 686+ inlines.push_back(line); 687+ } 688+ std::sort(inlines.begin(), inlines.end(), Module::Line::CompareByAddress); 689+ 690+ if (!inlines.empty()) { 691+ vector<Module::Line> merged_lines = MergeLines(inlines, lines_); 692+ lines_ = std::move(merged_lines); 693+ } 694+ 695 // The last line that we used any piece of. We use this only for 696 // generating warnings. 697 const Module::Line *last_line_used = NULL; 698 699 // The last function and line we warned about --- so we can avoid 700 // doing so more than once. 701 const Module::Function *last_function_cited = NULL; 702 const Module::Line *last_line_cited = NULL; 703@@ -1303,17 +1431,17 @@ void DwarfCUToModule::Finish() { 704 // Read source line info, if we have any. 705 LineToModuleHandler::FileMap files; 706 if (has_source_line_info_) 707 ReadSourceLines(source_line_offset_, &files); 708 709 vector<Module::Function *> *functions = &cu_context_->functions; 710 711 // Dole out lines to the appropriate functions. 712- AssignLinesToFunctions(); 713+ AssignLinesToFunctions(files); 714 715 // Add our functions, which now have source lines assigned to them, 716 // to module_. 717 cu_context_->file_context->module_->AddFunctions(functions->begin(), 718 functions->end()); 719 720 // Ownership of the function objects has shifted from cu_context to 721 // the Module. 722diff --git a/src/common/dwarf_cu_to_module.h b/src/common/dwarf_cu_to_module.h 723--- a/src/common/dwarf_cu_to_module.h 724+++ b/src/common/dwarf_cu_to_module.h 725@@ -310,17 +310,17 @@ class DwarfCUToModule: public dwarf2read 726 // in lines_; we apportion them to functions in 727 // AssignLinesToFunctions. 728 void ReadSourceLines(uint64 offset, LineToModuleHandler::FileMap *files); 729 730 // Assign the lines in lines_ to the individual line lists of the 731 // functions in functions_. (DWARF line information maps an entire 732 // compilation unit at a time, and gives no indication of which 733 // lines belong to which functions, beyond their addresses.) 734- void AssignLinesToFunctions(); 735+ void AssignLinesToFunctions(const LineToModuleHandler::FileMap &files); 736 737 // The only reason cu_context_ and child_context_ are pointers is 738 // that we want to keep their definitions private to 739 // dwarf_cu_to_module.cc, instead of listing them all here. They are 740 // owned by this DwarfCUToModule: the constructor sets them, and the 741 // destructor deletes them. 742 743 // The handler to use to handle line number data. 744# HG changeset patch 745# User Nathan Froyd <froydnj@mozilla.com> 746# Date 1554482110 0 747# Fri Apr 05 16:35:10 2019 +0000 748# Node ID d048bcf083e5df1547230ecc780139a264389889 749# Parent b3e5b74ed19fcf6c6f44457accccf4bb59eebcb3 750Bug 524410 - part 4 - look through lexical block DIEs where appropriate; r=gsvelto 751 752DW_TAG_subprogram DIEs sometimes have child DW_TAG_lexical_block DIEs 753which in turn contain child DW_TAG_inlined_subroutine DIEs that we woud 754like to look at. If we skip the DW_TAG_inlined_subroutine DIEs, we miss 755important information. We therefore need to look through the 756DW_TAG_lexical_block DIEs to find the DIEs that we are interested in. 757 758Depends on D25471 759 760Differential Revision: https://phabricator.services.mozilla.com/D25472 761 762diff --git a/src/common/dwarf_cu_to_module.cc b/src/common/dwarf_cu_to_module.cc 763--- a/src/common/dwarf_cu_to_module.cc 764+++ b/src/common/dwarf_cu_to_module.cc 765@@ -562,16 +562,46 @@ bool DwarfCUToModule::InlinedSubroutineH 766 for (const auto& range : ranges) { 767 FilePrivate::InlinedSubroutineRange inline_range(range, call_file_, call_line_); 768 cu_context_->file_context->file_private_->inlined_ranges.push_back(inline_range); 769 } 770 771 return ignore_children; 772 } 773 774+// A handler class for DW_TAG_lexical_block DIEs. 775+class DwarfCUToModule::LexicalBlockHandler: public GenericDIEHandler { 776+ public: 777+ LexicalBlockHandler(CUContext *cu_context, DIEContext *parent_context, 778+ uint64 offset) 779+ : GenericDIEHandler(cu_context, parent_context, offset) {} 780+ 781+ bool EndAttributes(); 782+ 783+ DIEHandler* FindChildHandler(uint64 offset, enum DwarfTag tag); 784+}; 785+ 786+ 787+bool DwarfCUToModule::LexicalBlockHandler::EndAttributes() { 788+ // Parse child DIEs if possible. 789+ return true; 790+} 791+ 792+dwarf2reader::DIEHandler* DwarfCUToModule::LexicalBlockHandler::FindChildHandler( 793+ uint64 offset, 794+ enum DwarfTag tag) { 795+ switch (tag) { 796+ case dwarf2reader::DW_TAG_inlined_subroutine: 797+ return new InlinedSubroutineHandler(cu_context_, parent_context_, offset); 798+ 799+ default: 800+ return NULL; 801+ } 802+} 803+ 804 // A handler class for DW_TAG_subprogram DIEs. 805 class DwarfCUToModule::FuncHandler: public GenericDIEHandler { 806 public: 807 FuncHandler(CUContext *cu_context, DIEContext *parent_context, 808 uint64 offset) 809 : GenericDIEHandler(cu_context, parent_context, offset), 810 low_pc_(0), high_pc_(0), high_pc_form_(dwarf2reader::DW_FORM_addr), 811 ranges_(0), abstract_origin_(NULL), inline_(false) { } 812@@ -742,16 +772,24 @@ void DwarfCUToModule::FuncHandler::Finis 813 814 dwarf2reader::DIEHandler *DwarfCUToModule::FuncHandler::FindChildHandler( 815 uint64 offset, 816 enum DwarfTag tag) { 817 switch (tag) { 818 case dwarf2reader::DW_TAG_inlined_subroutine: 819 return new InlinedSubroutineHandler(cu_context_, parent_context_, offset); 820 821+ // Compilers will sometimes give DW_TAG_subprogram DIEs 822+ // DW_TAG_lexical_block children DIEs, which then in turn contain 823+ // DW_TAG_inlined_subroutine DIEs. We want to parse those 824+ // grandchildren as though they belonged to the original 825+ // DW_TAG_subprogram DIE. 826+ case dwarf2reader::DW_TAG_lexical_block: 827+ return new LexicalBlockHandler(cu_context_, parent_context_, offset); 828+ 829 default: 830 return NULL; 831 } 832 } 833 834 // A handler for DIEs that contain functions and contribute a 835 // component to their names: namespaces, classes, etc. 836 class DwarfCUToModule::NamedScopeHandler: public GenericDIEHandler { 837diff --git a/src/common/dwarf_cu_to_module.h b/src/common/dwarf_cu_to_module.h 838--- a/src/common/dwarf_cu_to_module.h 839+++ b/src/common/dwarf_cu_to_module.h 840@@ -293,16 +293,17 @@ class DwarfCUToModule: public dwarf2read 841 // dwarf_cu_to_module.cc. 842 struct CUContext; 843 struct DIEContext; 844 struct Specification; 845 class GenericDIEHandler; 846 class FuncHandler; 847 class InlinedSubroutineHandler; 848 class NamedScopeHandler; 849+ class LexicalBlockHandler; 850 851 // A map from section offsets to specifications. 852 typedef map<uint64, Specification> SpecificationByOffset; 853 854 // Set this compilation unit's source language to LANGUAGE. 855 void SetLanguage(DwarfLanguage language); 856 857 // Read source line information at OFFSET in the .debug_line 858# HG changeset patch 859# User Nathan Froyd <froydnj@mozilla.com> 860# Date 1554482110 0 861# Fri Apr 05 16:35:10 2019 +0000 862# Node ID 17040bb20e256476df548a6be770e7d8f78387ef 863# Parent d048bcf083e5df1547230ecc780139a264389889 864Bug 524410 - part 5 - merge adjacent line records where possible; r=gsvelto 865 866After replacing precise line information from .debug_line with coarse 867line information from DW_AT_call_{file,line}, it's very likely that 868adjacent line records actually refer to identical file and line 869numbers. Such adjacent records are not really useful and take up more 870space than they should in the symbol file. We might as well merge them 871and save ourselves some space. 872 873Differential Revision: https://phabricator.services.mozilla.com/D25473 874 875diff --git a/src/common/dwarf_cu_to_module.cc b/src/common/dwarf_cu_to_module.cc 876--- a/src/common/dwarf_cu_to_module.cc 877+++ b/src/common/dwarf_cu_to_module.cc 878@@ -1232,16 +1232,57 @@ vector<Module::Line> MergeLines(const ve 879 if (orig_lines->address > inline_lines->address) { 880 ++inline_lines; 881 continue; 882 } 883 } 884 885 return merged_lines; 886 } 887+ 888+// After merging the line information, we may have adjacent lines that belong 889+// to the same file and line number. (The compiler shouldn't be producing 890+// such line records on its own.) Let's merge adjacent lines where possible 891+// to make symbol files smaller. 892+void CollapseAdjacentLines(vector<Module::Line>& lines) { 893+ if (lines.empty()) { 894+ return; 895+ } 896+ 897+ auto merging_into = lines.begin(); 898+ auto next = merging_into + 1; 899+ const auto end = lines.end(); 900+ 901+ while (next != end) { 902+ // The next record might be able to be merged. 903+ if ((merging_into->address + merging_into->size) == next->address && 904+ merging_into->file == next->file && 905+ merging_into->number == next->number) { 906+ merging_into->size = next->address + next->size - merging_into->address; 907+ ++next; 908+ continue; 909+ } 910+ 911+ // We've merged all we can into this record. Move on. 912+ ++merging_into; 913+ 914+ // next now points at the most recent record that wasn't able to be 915+ // merged with a previous record. We may still have more records to 916+ // consider, and if merging_into and next have become discontiguous, 917+ // we need to copy things around. 918+ if (next != end) { 919+ if (next != merging_into) { 920+ *merging_into = std::move(*next); 921+ } 922+ ++next; 923+ } 924+ } 925+ 926+ lines.erase(merging_into + 1, end); 927+} 928 } 929 930 void DwarfCUToModule::AssignLinesToFunctions(const LineToModuleHandler::FileMap &files) { 931 vector<Module::Function *> *functions = &cu_context_->functions; 932 WarningReporter *reporter = cu_context_->reporter; 933 934 // This would be simpler if we assumed that source line entries 935 // don't cross function boundaries. However, there's no real reason 936@@ -1275,16 +1316,19 @@ void DwarfCUToModule::AssignLinesToFunct 937 line.number = range.call_line_; 938 line.file = f->second; 939 inlines.push_back(line); 940 } 941 std::sort(inlines.begin(), inlines.end(), Module::Line::CompareByAddress); 942 943 if (!inlines.empty()) { 944 vector<Module::Line> merged_lines = MergeLines(inlines, lines_); 945+ 946+ CollapseAdjacentLines(merged_lines); 947+ 948 lines_ = std::move(merged_lines); 949 } 950 951 // The last line that we used any piece of. We use this only for 952 // generating warnings. 953 const Module::Line *last_line_used = NULL; 954 955 // The last function and line we warned about --- so we can avoid 956