1 //===------ dwarf2yaml.cpp - obj2yaml conversion tool -----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/BinaryFormat/Dwarf.h"
10 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
11 #include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h"
12 #include "llvm/DebugInfo/DWARF/DWARFDebugAddr.h"
13 #include "llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h"
14 #include "llvm/DebugInfo/DWARF/DWARFDebugPubTable.h"
15 #include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h"
16 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
17 #include "llvm/DebugInfo/DWARF/DWARFSection.h"
18 #include "llvm/ObjectYAML/DWARFYAML.h"
19 
20 #include <algorithm>
21 #include <optional>
22 
23 using namespace llvm;
24 
dumpDebugAbbrev(DWARFContext & DCtx,DWARFYAML::Data & Y)25 void dumpDebugAbbrev(DWARFContext &DCtx, DWARFYAML::Data &Y) {
26   auto AbbrevSetPtr = DCtx.getDebugAbbrev();
27   if (AbbrevSetPtr) {
28     uint64_t AbbrevTableID = 0;
29     for (auto AbbrvDeclSet : *AbbrevSetPtr) {
30       Y.DebugAbbrev.emplace_back();
31       Y.DebugAbbrev.back().ID = AbbrevTableID++;
32       for (auto AbbrvDecl : AbbrvDeclSet.second) {
33         DWARFYAML::Abbrev Abbrv;
34         Abbrv.Code = AbbrvDecl.getCode();
35         Abbrv.Tag = AbbrvDecl.getTag();
36         Abbrv.Children = AbbrvDecl.hasChildren() ? dwarf::DW_CHILDREN_yes
37                                                  : dwarf::DW_CHILDREN_no;
38         for (auto Attribute : AbbrvDecl.attributes()) {
39           DWARFYAML::AttributeAbbrev AttAbrv;
40           AttAbrv.Attribute = Attribute.Attr;
41           AttAbrv.Form = Attribute.Form;
42           if (AttAbrv.Form == dwarf::DW_FORM_implicit_const)
43             AttAbrv.Value = Attribute.getImplicitConstValue();
44           Abbrv.Attributes.push_back(AttAbrv);
45         }
46         Y.DebugAbbrev.back().Table.push_back(Abbrv);
47       }
48     }
49   }
50 }
51 
dumpDebugAddr(DWARFContext & DCtx,DWARFYAML::Data & Y)52 Error dumpDebugAddr(DWARFContext &DCtx, DWARFYAML::Data &Y) {
53   DWARFDebugAddrTable AddrTable;
54   DWARFDataExtractor AddrData(DCtx.getDWARFObj(),
55                               DCtx.getDWARFObj().getAddrSection(),
56                               DCtx.isLittleEndian(), /*AddressSize=*/0);
57   std::vector<DWARFYAML::AddrTableEntry> AddrTables;
58   uint64_t Offset = 0;
59   while (AddrData.isValidOffset(Offset)) {
60     // We ignore any errors that don't prevent parsing the section, since we can
61     // still represent such sections.
62     if (Error Err = AddrTable.extractV5(AddrData, &Offset, /*CUAddrSize=*/0,
63                                         consumeError))
64       return Err;
65     AddrTables.emplace_back();
66     for (uint64_t Addr : AddrTable.getAddressEntries()) {
67       // Currently, the parser doesn't support parsing an address table with non
68       // linear addresses (segment_selector_size != 0). The segment selectors
69       // are specified to be zero.
70       AddrTables.back().SegAddrPairs.push_back(
71           {/*SegmentSelector=*/0, /*Address=*/Addr});
72     }
73 
74     AddrTables.back().Format = AddrTable.getFormat();
75     AddrTables.back().Length = AddrTable.getLength();
76     AddrTables.back().Version = AddrTable.getVersion();
77     AddrTables.back().AddrSize = AddrTable.getAddressSize();
78     AddrTables.back().SegSelectorSize = AddrTable.getSegmentSelectorSize();
79   }
80   Y.DebugAddr = std::move(AddrTables);
81   return Error::success();
82 }
83 
dumpDebugStrings(DWARFContext & DCtx,DWARFYAML::Data & Y)84 Error dumpDebugStrings(DWARFContext &DCtx, DWARFYAML::Data &Y) {
85   DataExtractor StrData = DCtx.getStringExtractor();
86   uint64_t Offset = 0;
87   std::vector<StringRef> DebugStr;
88   Error Err = Error::success();
89   while (StrData.isValidOffset(Offset)) {
90     const char *CStr = StrData.getCStr(&Offset, &Err);
91     if (Err)
92       return Err;
93     DebugStr.push_back(CStr);
94   }
95 
96   Y.DebugStrings = DebugStr;
97   return Err;
98 }
99 
dumpDebugARanges(DWARFContext & DCtx,DWARFYAML::Data & Y)100 Error dumpDebugARanges(DWARFContext &DCtx, DWARFYAML::Data &Y) {
101   DWARFDataExtractor ArangesData(DCtx.getDWARFObj().getArangesSection(),
102                                  DCtx.isLittleEndian(), 0);
103   uint64_t Offset = 0;
104   DWARFDebugArangeSet Set;
105   std::vector<DWARFYAML::ARange> DebugAranges;
106 
107   // We ignore any errors that don't prevent parsing the section, since we can
108   // still represent such sections. These errors are recorded via the
109   // WarningHandler parameter of Set.extract().
110   auto DiscardError = [](Error Err) { consumeError(std::move(Err)); };
111 
112   while (ArangesData.isValidOffset(Offset)) {
113     if (Error E = Set.extract(ArangesData, &Offset, DiscardError))
114       return E;
115     DWARFYAML::ARange Range;
116     Range.Format = Set.getHeader().Format;
117     Range.Length = Set.getHeader().Length;
118     Range.Version = Set.getHeader().Version;
119     Range.CuOffset = Set.getHeader().CuOffset;
120     Range.AddrSize = Set.getHeader().AddrSize;
121     Range.SegSize = Set.getHeader().SegSize;
122     for (auto Descriptor : Set.descriptors()) {
123       DWARFYAML::ARangeDescriptor Desc;
124       Desc.Address = Descriptor.Address;
125       Desc.Length = Descriptor.Length;
126       Range.Descriptors.push_back(Desc);
127     }
128     DebugAranges.push_back(Range);
129   }
130 
131   Y.DebugAranges = DebugAranges;
132   return ErrorSuccess();
133 }
134 
dumpDebugRanges(DWARFContext & DCtx,DWARFYAML::Data & Y)135 Error dumpDebugRanges(DWARFContext &DCtx, DWARFYAML::Data &Y) {
136   // We are assuming all address byte sizes will be consistent across all
137   // compile units.
138   uint8_t AddrSize = 0;
139   for (const auto &CU : DCtx.compile_units()) {
140     const uint8_t CUAddrSize = CU->getAddressByteSize();
141     if (AddrSize == 0)
142       AddrSize = CUAddrSize;
143     else if (CUAddrSize != AddrSize)
144       return createStringError(std::errc::invalid_argument,
145                                "address sizes vary in different compile units");
146   }
147 
148   DWARFDataExtractor Data(DCtx.getDWARFObj().getRangesSection().Data,
149                           DCtx.isLittleEndian(), AddrSize);
150   uint64_t Offset = 0;
151   DWARFDebugRangeList DwarfRanges;
152   std::vector<DWARFYAML::Ranges> DebugRanges;
153 
154   while (Data.isValidOffset(Offset)) {
155     DWARFYAML::Ranges YamlRanges;
156     YamlRanges.Offset = Offset;
157     YamlRanges.AddrSize = AddrSize;
158     if (Error E = DwarfRanges.extract(Data, &Offset))
159       return E;
160     for (const auto &RLE : DwarfRanges.getEntries())
161       YamlRanges.Entries.push_back({RLE.StartAddress, RLE.EndAddress});
162     DebugRanges.push_back(std::move(YamlRanges));
163   }
164 
165   Y.DebugRanges = DebugRanges;
166   return ErrorSuccess();
167 }
168 
169 static std::optional<DWARFYAML::PubSection>
dumpPubSection(const DWARFContext & DCtx,const DWARFSection & Section,bool IsGNUStyle)170 dumpPubSection(const DWARFContext &DCtx, const DWARFSection &Section,
171                bool IsGNUStyle) {
172   DWARFYAML::PubSection Y;
173   DWARFDataExtractor PubSectionData(DCtx.getDWARFObj(), Section,
174                                     DCtx.isLittleEndian(), 0);
175   DWARFDebugPubTable Table;
176   // We ignore any errors that don't prevent parsing the section, since we can
177   // still represent such sections.
178   Table.extract(PubSectionData, IsGNUStyle,
179                 [](Error Err) { consumeError(std::move(Err)); });
180   ArrayRef<DWARFDebugPubTable::Set> Sets = Table.getData();
181   if (Sets.empty())
182     return std::nullopt;
183 
184   // FIXME: Currently, obj2yaml only supports dumping the first pubtable.
185   Y.Format = Sets[0].Format;
186   Y.Length = Sets[0].Length;
187   Y.Version = Sets[0].Version;
188   Y.UnitOffset = Sets[0].Offset;
189   Y.UnitSize = Sets[0].Size;
190 
191   for (const DWARFDebugPubTable::Entry &E : Sets[0].Entries)
192     Y.Entries.push_back(DWARFYAML::PubEntry{(uint32_t)E.SecOffset,
193                                             E.Descriptor.toBits(), E.Name});
194 
195   return Y;
196 }
197 
dumpDebugPubSections(DWARFContext & DCtx,DWARFYAML::Data & Y)198 void dumpDebugPubSections(DWARFContext &DCtx, DWARFYAML::Data &Y) {
199   const DWARFObject &D = DCtx.getDWARFObj();
200 
201   Y.PubNames =
202       dumpPubSection(DCtx, D.getPubnamesSection(), /*IsGNUStyle=*/false);
203   Y.PubTypes =
204       dumpPubSection(DCtx, D.getPubtypesSection(), /*IsGNUStyle=*/false);
205   // TODO: Test dumping .debug_gnu_pubnames section.
206   Y.GNUPubNames =
207       dumpPubSection(DCtx, D.getGnuPubnamesSection(), /*IsGNUStyle=*/true);
208   // TODO: Test dumping .debug_gnu_pubtypes section.
209   Y.GNUPubTypes =
210       dumpPubSection(DCtx, D.getGnuPubtypesSection(), /*IsGNUStyle=*/true);
211 }
212 
dumpDebugInfo(DWARFContext & DCtx,DWARFYAML::Data & Y)213 void dumpDebugInfo(DWARFContext &DCtx, DWARFYAML::Data &Y) {
214   for (const auto &CU : DCtx.compile_units()) {
215     DWARFYAML::Unit NewUnit;
216     NewUnit.Format = CU->getFormat();
217     NewUnit.Length = CU->getLength();
218     NewUnit.Version = CU->getVersion();
219     if (NewUnit.Version >= 5)
220       NewUnit.Type = (dwarf::UnitType)CU->getUnitType();
221     const DWARFDebugAbbrev *DebugAbbrev = DCtx.getDebugAbbrev();
222     NewUnit.AbbrevTableID = std::distance(
223         DebugAbbrev->begin(),
224         llvm::find_if(
225             *DebugAbbrev,
226             [&](const std::pair<uint64_t, DWARFAbbreviationDeclarationSet> &P) {
227               return P.first == CU->getAbbreviations()->getOffset();
228             }));
229     NewUnit.AbbrOffset = CU->getAbbreviations()->getOffset();
230     NewUnit.AddrSize = CU->getAddressByteSize();
231     for (auto DIE : CU->dies()) {
232       DWARFYAML::Entry NewEntry;
233       DataExtractor EntryData = CU->getDebugInfoExtractor();
234       uint64_t offset = DIE.getOffset();
235 
236       assert(EntryData.isValidOffset(offset) && "Invalid DIE Offset");
237       if (!EntryData.isValidOffset(offset))
238         continue;
239 
240       NewEntry.AbbrCode = EntryData.getULEB128(&offset);
241 
242       auto AbbrevDecl = DIE.getAbbreviationDeclarationPtr();
243       if (AbbrevDecl) {
244         for (const auto &AttrSpec : AbbrevDecl->attributes()) {
245           DWARFYAML::FormValue NewValue;
246           NewValue.Value = 0xDEADBEEFDEADBEEF;
247           DWARFDie DIEWrapper(CU.get(), &DIE);
248           auto FormValue = DIEWrapper.find(AttrSpec.Attr);
249           if (!FormValue)
250             return;
251           auto Form = FormValue->getForm();
252           bool indirect = false;
253           do {
254             indirect = false;
255             switch (Form) {
256             case dwarf::DW_FORM_addr:
257             case dwarf::DW_FORM_GNU_addr_index:
258               if (auto Val = FormValue->getAsAddress())
259                 NewValue.Value = *Val;
260               break;
261             case dwarf::DW_FORM_ref_addr:
262             case dwarf::DW_FORM_ref1:
263             case dwarf::DW_FORM_ref2:
264             case dwarf::DW_FORM_ref4:
265             case dwarf::DW_FORM_ref8:
266             case dwarf::DW_FORM_ref_udata:
267             case dwarf::DW_FORM_ref_sig8:
268               if (auto Val = FormValue->getAsReferenceUVal())
269                 NewValue.Value = *Val;
270               break;
271             case dwarf::DW_FORM_exprloc:
272             case dwarf::DW_FORM_block:
273             case dwarf::DW_FORM_block1:
274             case dwarf::DW_FORM_block2:
275             case dwarf::DW_FORM_block4:
276               if (auto Val = FormValue->getAsBlock()) {
277                 auto BlockData = *Val;
278                 std::copy(BlockData.begin(), BlockData.end(),
279                           std::back_inserter(NewValue.BlockData));
280               }
281               NewValue.Value = NewValue.BlockData.size();
282               break;
283             case dwarf::DW_FORM_data1:
284             case dwarf::DW_FORM_flag:
285             case dwarf::DW_FORM_data2:
286             case dwarf::DW_FORM_data4:
287             case dwarf::DW_FORM_data8:
288             case dwarf::DW_FORM_sdata:
289             case dwarf::DW_FORM_udata:
290             case dwarf::DW_FORM_ref_sup4:
291             case dwarf::DW_FORM_ref_sup8:
292               if (auto Val = FormValue->getAsUnsignedConstant())
293                 NewValue.Value = *Val;
294               break;
295             case dwarf::DW_FORM_string:
296               if (auto Val = dwarf::toString(FormValue))
297                 NewValue.CStr = *Val;
298               break;
299             case dwarf::DW_FORM_indirect:
300               indirect = true;
301               if (auto Val = FormValue->getAsUnsignedConstant()) {
302                 NewValue.Value = *Val;
303                 NewEntry.Values.push_back(NewValue);
304                 Form = static_cast<dwarf::Form>(*Val);
305               }
306               break;
307             case dwarf::DW_FORM_strp:
308             case dwarf::DW_FORM_sec_offset:
309             case dwarf::DW_FORM_GNU_ref_alt:
310             case dwarf::DW_FORM_GNU_strp_alt:
311             case dwarf::DW_FORM_line_strp:
312             case dwarf::DW_FORM_strp_sup:
313             case dwarf::DW_FORM_GNU_str_index:
314             case dwarf::DW_FORM_strx:
315               if (auto Val = FormValue->getAsCStringOffset())
316                 NewValue.Value = *Val;
317               break;
318             case dwarf::DW_FORM_flag_present:
319               NewValue.Value = 1;
320               break;
321             default:
322               break;
323             }
324           } while (indirect);
325           NewEntry.Values.push_back(NewValue);
326         }
327       }
328 
329       NewUnit.Entries.push_back(NewEntry);
330     }
331     Y.CompileUnits.push_back(NewUnit);
332   }
333 }
334 
dumpFileEntry(DataExtractor & Data,uint64_t & Offset,DWARFYAML::File & File)335 bool dumpFileEntry(DataExtractor &Data, uint64_t &Offset,
336                    DWARFYAML::File &File) {
337   File.Name = Data.getCStr(&Offset);
338   if (File.Name.empty())
339     return false;
340   File.DirIdx = Data.getULEB128(&Offset);
341   File.ModTime = Data.getULEB128(&Offset);
342   File.Length = Data.getULEB128(&Offset);
343   return true;
344 }
345 
dumpDebugLines(DWARFContext & DCtx,DWARFYAML::Data & Y)346 void dumpDebugLines(DWARFContext &DCtx, DWARFYAML::Data &Y) {
347   for (const auto &CU : DCtx.compile_units()) {
348     auto CUDIE = CU->getUnitDIE();
349     if (!CUDIE)
350       continue;
351     if (auto StmtOffset =
352             dwarf::toSectionOffset(CUDIE.find(dwarf::DW_AT_stmt_list))) {
353       DWARFYAML::LineTable DebugLines;
354       DataExtractor LineData(DCtx.getDWARFObj().getLineSection().Data,
355                              DCtx.isLittleEndian(), CU->getAddressByteSize());
356       uint64_t Offset = *StmtOffset;
357       uint64_t LengthOrDWARF64Prefix = LineData.getU32(&Offset);
358       if (LengthOrDWARF64Prefix == dwarf::DW_LENGTH_DWARF64) {
359         DebugLines.Format = dwarf::DWARF64;
360         DebugLines.Length = LineData.getU64(&Offset);
361       } else {
362         DebugLines.Format = dwarf::DWARF32;
363         DebugLines.Length = LengthOrDWARF64Prefix;
364       }
365       assert(DebugLines.Length);
366       uint64_t LineTableLength = *DebugLines.Length;
367       uint64_t SizeOfPrologueLength =
368           DebugLines.Format == dwarf::DWARF64 ? 8 : 4;
369       DebugLines.Version = LineData.getU16(&Offset);
370       DebugLines.PrologueLength =
371           LineData.getUnsigned(&Offset, SizeOfPrologueLength);
372       assert(DebugLines.PrologueLength);
373       const uint64_t EndPrologue = *DebugLines.PrologueLength + Offset;
374 
375       DebugLines.MinInstLength = LineData.getU8(&Offset);
376       if (DebugLines.Version >= 4)
377         DebugLines.MaxOpsPerInst = LineData.getU8(&Offset);
378       DebugLines.DefaultIsStmt = LineData.getU8(&Offset);
379       DebugLines.LineBase = LineData.getU8(&Offset);
380       DebugLines.LineRange = LineData.getU8(&Offset);
381       DebugLines.OpcodeBase = LineData.getU8(&Offset);
382 
383       DebugLines.StandardOpcodeLengths.emplace();
384       for (uint8_t i = 1; i < DebugLines.OpcodeBase; ++i)
385         DebugLines.StandardOpcodeLengths->push_back(LineData.getU8(&Offset));
386 
387       while (Offset < EndPrologue) {
388         StringRef Dir = LineData.getCStr(&Offset);
389         if (!Dir.empty())
390           DebugLines.IncludeDirs.push_back(Dir);
391         else
392           break;
393       }
394 
395       while (Offset < EndPrologue) {
396         DWARFYAML::File TmpFile;
397         if (dumpFileEntry(LineData, Offset, TmpFile))
398           DebugLines.Files.push_back(TmpFile);
399         else
400           break;
401       }
402 
403       const uint64_t LineEnd =
404           LineTableLength + *StmtOffset + SizeOfPrologueLength;
405       while (Offset < LineEnd) {
406         DWARFYAML::LineTableOpcode NewOp = {};
407         NewOp.Opcode = (dwarf::LineNumberOps)LineData.getU8(&Offset);
408         if (NewOp.Opcode == 0) {
409           auto StartExt = Offset;
410           NewOp.ExtLen = LineData.getULEB128(&Offset);
411           NewOp.SubOpcode =
412               (dwarf::LineNumberExtendedOps)LineData.getU8(&Offset);
413           switch (NewOp.SubOpcode) {
414           case dwarf::DW_LNE_set_address:
415           case dwarf::DW_LNE_set_discriminator:
416             NewOp.Data = LineData.getAddress(&Offset);
417             break;
418           case dwarf::DW_LNE_define_file:
419             dumpFileEntry(LineData, Offset, NewOp.FileEntry);
420             break;
421           case dwarf::DW_LNE_end_sequence:
422             break;
423           default:
424             while (Offset < StartExt + *NewOp.ExtLen)
425               NewOp.UnknownOpcodeData.push_back(LineData.getU8(&Offset));
426           }
427         } else if (NewOp.Opcode < *DebugLines.OpcodeBase) {
428           switch (NewOp.Opcode) {
429           case dwarf::DW_LNS_copy:
430           case dwarf::DW_LNS_negate_stmt:
431           case dwarf::DW_LNS_set_basic_block:
432           case dwarf::DW_LNS_const_add_pc:
433           case dwarf::DW_LNS_set_prologue_end:
434           case dwarf::DW_LNS_set_epilogue_begin:
435             break;
436 
437           case dwarf::DW_LNS_advance_pc:
438           case dwarf::DW_LNS_set_file:
439           case dwarf::DW_LNS_set_column:
440           case dwarf::DW_LNS_set_isa:
441             NewOp.Data = LineData.getULEB128(&Offset);
442             break;
443 
444           case dwarf::DW_LNS_advance_line:
445             NewOp.SData = LineData.getSLEB128(&Offset);
446             break;
447 
448           case dwarf::DW_LNS_fixed_advance_pc:
449             NewOp.Data = LineData.getU16(&Offset);
450             break;
451 
452           default:
453             for (uint8_t i = 0;
454                  i < (*DebugLines.StandardOpcodeLengths)[NewOp.Opcode - 1]; ++i)
455               NewOp.StandardOpcodeData.push_back(LineData.getULEB128(&Offset));
456           }
457         }
458         DebugLines.Opcodes.push_back(NewOp);
459       }
460       Y.DebugLines.push_back(DebugLines);
461     }
462   }
463 }
464