1 //===------ dwarf2yaml.cpp - obj2yaml conversion tool -----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "Error.h"
10 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
11 #include "llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h"
12 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
13 #include "llvm/ObjectYAML/DWARFYAML.h"
14 
15 #include <algorithm>
16 
17 using namespace llvm;
18 
dumpInitialLength(DataExtractor & Data,uint64_t & Offset,DWARFYAML::InitialLength & InitialLength)19 void dumpInitialLength(DataExtractor &Data, uint64_t &Offset,
20                        DWARFYAML::InitialLength &InitialLength) {
21   InitialLength.TotalLength = Data.getU32(&Offset);
22   if (InitialLength.isDWARF64())
23     InitialLength.TotalLength64 = Data.getU64(&Offset);
24 }
25 
dumpDebugAbbrev(DWARFContext & DCtx,DWARFYAML::Data & Y)26 void dumpDebugAbbrev(DWARFContext &DCtx, DWARFYAML::Data &Y) {
27   auto AbbrevSetPtr = DCtx.getDebugAbbrev();
28   if (AbbrevSetPtr) {
29     for (auto AbbrvDeclSet : *AbbrevSetPtr) {
30       auto ListOffset = AbbrvDeclSet.second.getOffset();
31       for (auto AbbrvDecl : AbbrvDeclSet.second) {
32         DWARFYAML::Abbrev Abbrv;
33         Abbrv.Code = AbbrvDecl.getCode();
34         Abbrv.Tag = AbbrvDecl.getTag();
35         Abbrv.Children = AbbrvDecl.hasChildren() ? dwarf::DW_CHILDREN_yes
36                                                  : dwarf::DW_CHILDREN_no;
37         for (auto Attribute : AbbrvDecl.attributes()) {
38           DWARFYAML::AttributeAbbrev AttAbrv;
39           AttAbrv.Attribute = Attribute.Attr;
40           AttAbrv.Form = Attribute.Form;
41           if (AttAbrv.Form == dwarf::DW_FORM_implicit_const)
42             AttAbrv.Value = Attribute.getImplicitConstValue();
43           Abbrv.Attributes.push_back(AttAbrv);
44         }
45         Abbrv.ListOffset = ListOffset;
46         Y.AbbrevDecls.push_back(Abbrv);
47       }
48       // XXX BINARYEN: null-terminate the DeclSet. This is needed to separate
49       // DeclSets from each other, and to null-terminate the entire list
50       // (LLVM works with or without this, but other decoders may error, see
51       //  https://bugs.llvm.org/show_bug.cgi?id=44511).
52       DWARFYAML::Abbrev Abbrv;
53       Abbrv.Code = 0;
54       Abbrv.Tag = dwarf::Tag(0);
55       Y.AbbrevDecls.push_back(Abbrv);
56     }
57   }
58 }
59 
dumpDebugStrings(DWARFContext & DCtx,DWARFYAML::Data & Y)60 void dumpDebugStrings(DWARFContext &DCtx, DWARFYAML::Data &Y) {
61   StringRef RemainingTable = DCtx.getDWARFObj().getStrSection();
62   while (RemainingTable.size() > 0) {
63     auto SymbolPair = RemainingTable.split('\0');
64     RemainingTable = SymbolPair.second;
65     Y.DebugStrings.push_back(SymbolPair.first);
66   }
67 }
68 
dumpDebugARanges(DWARFContext & DCtx,DWARFYAML::Data & Y)69 void dumpDebugARanges(DWARFContext &DCtx, DWARFYAML::Data &Y) {
70   DataExtractor ArangesData(DCtx.getDWARFObj().getArangesSection(),
71                             DCtx.isLittleEndian(), 0);
72   uint64_t Offset = 0;
73   DWARFDebugArangeSet Set;
74 
75   while (Set.extract(ArangesData, &Offset)) {
76     DWARFYAML::ARange Range;
77     Range.Length.setLength(Set.getHeader().Length);
78     Range.Version = Set.getHeader().Version;
79     Range.CuOffset = Set.getHeader().CuOffset;
80     Range.AddrSize = Set.getHeader().AddrSize;
81     Range.SegSize = Set.getHeader().SegSize;
82     for (auto Descriptor : Set.descriptors()) {
83       DWARFYAML::ARangeDescriptor Desc;
84       Desc.Address = Descriptor.Address;
85       Desc.Length = Descriptor.Length;
86       Range.Descriptors.push_back(Desc);
87     }
88     Y.ARanges.push_back(Range);
89   }
90 }
91 
dumpDebugRanges(DWARFContext & DCtx,DWARFYAML::Data & Y)92 void dumpDebugRanges(DWARFContext &DCtx, DWARFYAML::Data &Y) { // XXX BINARYEN
93   uint8_t savedAddressByteSize = 4;
94   DWARFDataExtractor rangesData(DCtx.getDWARFObj(), DCtx.getDWARFObj().getRangesSection(),
95                                 DCtx.isLittleEndian(), savedAddressByteSize);
96   uint64_t offset = 0;
97   DWARFDebugRangeList rangeList;
98   while (rangesData.isValidOffset(offset)) {
99     if (Error E = rangeList.extract(rangesData, &offset)) {
100       errs() << toString(std::move(E)) << '\n';
101       break;
102     }
103     for (auto& entry : rangeList.getEntries()) {
104       DWARFYAML::Range range;
105       range.Start = entry.StartAddress;
106       range.End = entry.EndAddress;
107       range.SectionIndex = entry.SectionIndex;
108       Y.Ranges.push_back(range);
109     }
110     DWARFYAML::Range range;
111     range.Start = 0;
112     range.End = 0;
113     range.SectionIndex = -1;
114     Y.Ranges.push_back(range);
115   }
116 }
117 
dumpDebugLoc(DWARFContext & DCtx,DWARFYAML::Data & Y)118 void dumpDebugLoc(DWARFContext &DCtx, DWARFYAML::Data &Y) { // XXX BINARYEN
119   uint8_t savedAddressByteSize = 4;
120   DWARFDataExtractor locsData(DCtx.getDWARFObj(), DCtx.getDWARFObj().getLocSection(),
121                               DCtx.isLittleEndian(), savedAddressByteSize);
122   uint64_t offset = 0;
123   DWARFDebugLoc locList;
124   while (locsData.isValidOffset(offset)) {
125     uint64_t locListOffset = offset; // XXX BINARYEN
126     auto list = locList.parseOneLocationList(locsData, &offset);
127     if (!list) {
128       errs() << "debug_loc error\n";
129       break;
130     }
131     for (auto& entry : list.get().Entries) {
132       DWARFYAML::Loc loc;
133       loc.Start = entry.Begin;
134       loc.End = entry.End;
135       for (auto x : entry.Loc) {
136         loc.Location.push_back(x);
137       }
138       loc.CompileUnitOffset = locListOffset; // XXX BINARYEN
139       Y.Locs.push_back(loc);
140     }
141     DWARFYAML::Loc loc;
142     loc.Start = 0;
143     loc.End = 0;
144     loc.CompileUnitOffset = locListOffset; // XXX BINARYEN
145     Y.Locs.push_back(loc);
146   }
147 }
148 
dumpPubSection(DWARFContext & DCtx,DWARFYAML::PubSection & Y,DWARFSection Section)149 void dumpPubSection(DWARFContext &DCtx, DWARFYAML::PubSection &Y,
150                     DWARFSection Section) {
151   DWARFDataExtractor PubSectionData(DCtx.getDWARFObj(), Section,
152                                     DCtx.isLittleEndian(), 0);
153   uint64_t Offset = 0;
154   dumpInitialLength(PubSectionData, Offset, Y.Length);
155   Y.Version = PubSectionData.getU16(&Offset);
156   Y.UnitOffset = PubSectionData.getU32(&Offset);
157   Y.UnitSize = PubSectionData.getU32(&Offset);
158   while (Offset < Y.Length.getLength()) {
159     DWARFYAML::PubEntry NewEntry;
160     NewEntry.DieOffset = PubSectionData.getU32(&Offset);
161     if (Y.IsGNUStyle)
162       NewEntry.Descriptor = PubSectionData.getU8(&Offset);
163     NewEntry.Name = PubSectionData.getCStr(&Offset);
164     Y.Entries.push_back(NewEntry);
165   }
166 }
167 
dumpDebugPubSections(DWARFContext & DCtx,DWARFYAML::Data & Y)168 void dumpDebugPubSections(DWARFContext &DCtx, DWARFYAML::Data &Y) {
169   const DWARFObject &D = DCtx.getDWARFObj();
170   Y.PubNames.IsGNUStyle = false;
171   dumpPubSection(DCtx, Y.PubNames, D.getPubnamesSection());
172 
173   Y.PubTypes.IsGNUStyle = false;
174   dumpPubSection(DCtx, Y.PubTypes, D.getPubtypesSection());
175 
176   Y.GNUPubNames.IsGNUStyle = true;
177   dumpPubSection(DCtx, Y.GNUPubNames, D.getGnuPubnamesSection());
178 
179   Y.GNUPubTypes.IsGNUStyle = true;
180   dumpPubSection(DCtx, Y.GNUPubTypes, D.getGnuPubtypesSection());
181 }
182 
dumpDebugInfo(DWARFContext & DCtx,DWARFYAML::Data & Y)183 void dumpDebugInfo(DWARFContext &DCtx, DWARFYAML::Data &Y) {
184   for (const auto &CU : DCtx.compile_units()) {
185     DWARFYAML::Unit NewUnit;
186     NewUnit.Length.setLength(CU->getLength());
187     NewUnit.Version = CU->getVersion();
188     if(NewUnit.Version >= 5)
189       NewUnit.Type = (dwarf::UnitType)CU->getUnitType();
190     if (auto* Abbreviations = CU->getAbbreviations()) { // XXX BINARYEN
191       NewUnit.AbbrOffset = Abbreviations->getOffset();
192     }
193     NewUnit.AddrSize = CU->getAddressByteSize();
194     for (auto DIE : CU->dies()) {
195       DWARFYAML::Entry NewEntry;
196       DataExtractor EntryData = CU->getDebugInfoExtractor();
197       uint64_t offset = DIE.getOffset();
198 
199       assert(EntryData.isValidOffset(offset) && "Invalid DIE Offset");
200       if (!EntryData.isValidOffset(offset))
201         continue;
202 
203       NewEntry.AbbrCode = EntryData.getULEB128(&offset);
204 
205       auto AbbrevDecl = DIE.getAbbreviationDeclarationPtr();
206       if (AbbrevDecl) {
207         for (const auto &AttrSpec : AbbrevDecl->attributes()) {
208           DWARFYAML::FormValue NewValue;
209           NewValue.Value = 0xDEADBEEFDEADBEEF;
210           DWARFDie DIEWrapper(CU.get(), &DIE);
211           auto FormValue = DIEWrapper.find(AttrSpec.Attr);
212           if (!FormValue)
213             return;
214           auto Form = FormValue.getValue().getForm();
215           bool indirect = false;
216           do {
217             indirect = false;
218             switch (Form) {
219             case dwarf::DW_FORM_addr:
220             case dwarf::DW_FORM_GNU_addr_index:
221               if (auto Val = FormValue.getValue().getAsAddress())
222                 NewValue.Value = Val.getValue();
223               break;
224             case dwarf::DW_FORM_ref_addr:
225             case dwarf::DW_FORM_ref1:
226             case dwarf::DW_FORM_ref2:
227             case dwarf::DW_FORM_ref4:
228             case dwarf::DW_FORM_ref8:
229             case dwarf::DW_FORM_ref_udata:
230             case dwarf::DW_FORM_ref_sig8:
231               if (auto Val = FormValue.getValue().getAsReferenceUVal())
232                 NewValue.Value = Val.getValue();
233               break;
234             case dwarf::DW_FORM_exprloc:
235             case dwarf::DW_FORM_block:
236             case dwarf::DW_FORM_block1:
237             case dwarf::DW_FORM_block2:
238             case dwarf::DW_FORM_block4:
239               if (auto Val = FormValue.getValue().getAsBlock()) {
240                 auto BlockData = Val.getValue();
241                 std::copy(BlockData.begin(), BlockData.end(),
242                           std::back_inserter(NewValue.BlockData));
243               }
244               NewValue.Value = NewValue.BlockData.size();
245               break;
246             case dwarf::DW_FORM_data1:
247             case dwarf::DW_FORM_flag:
248             case dwarf::DW_FORM_data2:
249             case dwarf::DW_FORM_data4:
250             case dwarf::DW_FORM_data8:
251             case dwarf::DW_FORM_udata:
252             case dwarf::DW_FORM_ref_sup4:
253             case dwarf::DW_FORM_ref_sup8:
254               if (auto Val = FormValue.getValue().getAsUnsignedConstant())
255                 NewValue.Value = Val.getValue();
256               break;
257             // XXX BINARYEN: sdata is signed, and FormValue won't return it as
258             //               unsigned (it returns an empty value).
259             case dwarf::DW_FORM_sdata:
260               if (auto Val = FormValue.getValue().getAsSignedConstant())
261                 NewValue.Value = Val.getValue();
262               break;
263             case dwarf::DW_FORM_string:
264               if (auto Val = FormValue.getValue().getAsCString())
265                 NewValue.CStr = Val.getValue();
266               break;
267             case dwarf::DW_FORM_indirect:
268               indirect = true;
269               if (auto Val = FormValue.getValue().getAsUnsignedConstant()) {
270                 NewValue.Value = Val.getValue();
271                 NewEntry.Values.push_back(NewValue);
272                 Form = static_cast<dwarf::Form>(Val.getValue());
273               }
274               break;
275             case dwarf::DW_FORM_strp:
276             case dwarf::DW_FORM_sec_offset:
277             case dwarf::DW_FORM_GNU_ref_alt:
278             case dwarf::DW_FORM_GNU_strp_alt:
279             case dwarf::DW_FORM_line_strp:
280             case dwarf::DW_FORM_strp_sup:
281             case dwarf::DW_FORM_GNU_str_index:
282             case dwarf::DW_FORM_strx:
283               if (auto Val = FormValue.getValue().getAsCStringOffset())
284                 NewValue.Value = Val.getValue();
285               break;
286             case dwarf::DW_FORM_flag_present:
287               NewValue.Value = 1;
288               break;
289             default:
290               break;
291             }
292           } while (indirect);
293           NewEntry.Values.push_back(NewValue);
294         }
295       }
296 
297       NewUnit.Entries.push_back(NewEntry);
298     }
299     Y.CompileUnits.push_back(NewUnit);
300   }
301 }
302 
dumpFileEntry(DataExtractor & Data,uint64_t & Offset,DWARFYAML::File & File)303 bool dumpFileEntry(DataExtractor &Data, uint64_t &Offset,
304                    DWARFYAML::File &File) {
305   File.Name = Data.getCStr(&Offset);
306   if (File.Name.empty())
307     return false;
308   File.DirIdx = Data.getULEB128(&Offset);
309   File.ModTime = Data.getULEB128(&Offset);
310   File.Length = Data.getULEB128(&Offset);
311   return true;
312 }
313 
dumpDebugLines(DWARFContext & DCtx,DWARFYAML::Data & Y)314 void dumpDebugLines(DWARFContext &DCtx, DWARFYAML::Data &Y) {
315   for (const auto &CU : DCtx.compile_units()) {
316     auto CUDIE = CU->getUnitDIE();
317     if (!CUDIE)
318       continue;
319     if (auto StmtOffset =
320             dwarf::toSectionOffset(CUDIE.find(dwarf::DW_AT_stmt_list))) {
321       DWARFYAML::LineTable DebugLines;
322       DataExtractor LineData(DCtx.getDWARFObj().getLineSection().Data,
323                              DCtx.isLittleEndian(), CU->getAddressByteSize());
324       uint64_t Offset = *StmtOffset;
325       dumpInitialLength(LineData, Offset, DebugLines.Length);
326       uint64_t LineTableLength = DebugLines.Length.getLength();
327       uint64_t SizeOfPrologueLength = DebugLines.Length.isDWARF64() ? 8 : 4;
328       DebugLines.Version = LineData.getU16(&Offset);
329       DebugLines.PrologueLength =
330           LineData.getUnsigned(&Offset, SizeOfPrologueLength);
331       const uint64_t EndPrologue = DebugLines.PrologueLength + Offset;
332 
333       DebugLines.MinInstLength = LineData.getU8(&Offset);
334       if (DebugLines.Version >= 4)
335         DebugLines.MaxOpsPerInst = LineData.getU8(&Offset);
336       DebugLines.DefaultIsStmt = LineData.getU8(&Offset);
337       DebugLines.LineBase = LineData.getU8(&Offset);
338       DebugLines.LineRange = LineData.getU8(&Offset);
339       DebugLines.OpcodeBase = LineData.getU8(&Offset);
340 
341       DebugLines.StandardOpcodeLengths.reserve(DebugLines.OpcodeBase - 1);
342       for (uint8_t i = 1; i < DebugLines.OpcodeBase; ++i)
343         DebugLines.StandardOpcodeLengths.push_back(LineData.getU8(&Offset));
344 
345       while (Offset < EndPrologue) {
346         StringRef Dir = LineData.getCStr(&Offset);
347         if (!Dir.empty())
348           DebugLines.IncludeDirs.push_back(Dir);
349         else
350           break;
351       }
352 
353       while (Offset < EndPrologue) {
354         DWARFYAML::File TmpFile;
355         if (dumpFileEntry(LineData, Offset, TmpFile))
356           DebugLines.Files.push_back(TmpFile);
357         else
358           break;
359       }
360 
361       const uint64_t LineEnd =
362           LineTableLength + *StmtOffset + SizeOfPrologueLength;
363       while (Offset < LineEnd) {
364         DWARFYAML::LineTableOpcode NewOp = {};
365         NewOp.Opcode = (dwarf::LineNumberOps)LineData.getU8(&Offset);
366         if (NewOp.Opcode == 0) {
367           auto StartExt = Offset;
368           NewOp.ExtLen = LineData.getULEB128(&Offset);
369           NewOp.SubOpcode =
370               (dwarf::LineNumberExtendedOps)LineData.getU8(&Offset);
371           switch (NewOp.SubOpcode) {
372           case dwarf::DW_LNE_set_address:
373           case dwarf::DW_LNE_set_discriminator:
374             NewOp.Data = LineData.getAddress(&Offset);
375             break;
376           case dwarf::DW_LNE_define_file:
377             dumpFileEntry(LineData, Offset, NewOp.FileEntry);
378             break;
379           case dwarf::DW_LNE_end_sequence:
380             break;
381           default:
382             while (Offset < StartExt + NewOp.ExtLen)
383               NewOp.UnknownOpcodeData.push_back(LineData.getU8(&Offset));
384           }
385         } else if (NewOp.Opcode < DebugLines.OpcodeBase) {
386           switch (NewOp.Opcode) {
387           case dwarf::DW_LNS_copy:
388           case dwarf::DW_LNS_negate_stmt:
389           case dwarf::DW_LNS_set_basic_block:
390           case dwarf::DW_LNS_const_add_pc:
391           case dwarf::DW_LNS_set_prologue_end:
392           case dwarf::DW_LNS_set_epilogue_begin:
393             break;
394 
395           case dwarf::DW_LNS_advance_pc:
396           case dwarf::DW_LNS_set_file:
397           case dwarf::DW_LNS_set_column:
398           case dwarf::DW_LNS_set_isa:
399             NewOp.Data = LineData.getULEB128(&Offset);
400             break;
401 
402           case dwarf::DW_LNS_advance_line:
403             NewOp.SData = LineData.getSLEB128(&Offset);
404             break;
405 
406           case dwarf::DW_LNS_fixed_advance_pc:
407             NewOp.Data = LineData.getU16(&Offset);
408             break;
409 
410           default:
411             for (uint8_t i = 0;
412                  i < DebugLines.StandardOpcodeLengths[NewOp.Opcode - 1]; ++i)
413               NewOp.StandardOpcodeData.push_back(LineData.getULEB128(&Offset));
414           }
415         }
416         DebugLines.Opcodes.push_back(NewOp);
417       }
418       Y.DebugLines.push_back(DebugLines);
419     }
420   }
421 }
422 
dwarf2yaml(DWARFContext & DCtx,DWARFYAML::Data & Y)423 std::error_code dwarf2yaml(DWARFContext &DCtx, DWARFYAML::Data &Y) {
424   Y.IsLittleEndian = true; // XXX BINARYEN
425   dumpDebugAbbrev(DCtx, Y);
426   dumpDebugStrings(DCtx, Y);
427   dumpDebugARanges(DCtx, Y);
428   dumpDebugRanges(DCtx, Y); // XXX BINARYEN
429   dumpDebugLoc(DCtx, Y); // XXX BINARYEN
430   dumpDebugPubSections(DCtx, Y);
431   dumpDebugInfo(DCtx, Y);
432   dumpDebugLines(DCtx, Y);
433   return obj2yaml_error::success;
434 }
435