1 /*
2     Copyright (C) 2013-2014 Volker Krause <vkrause@kde.org>
3 
4     This program is free software; you can redistribute it and/or modify it
5     under the terms of the GNU Library General Public License as published by
6     the Free Software Foundation; either version 2 of the License, or (at your
7     option) any later version.
8 
9     This program is distributed in the hope that it will be useful, but WITHOUT
10     ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11     FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
12     License for more details.
13 
14     You should have received a copy of the GNU General Public License
15     along with this program.  If not, see <https://www.gnu.org/licenses/>.
16 */
17 
18 #include "elfdefinitions.h" // must come first before <elf.h>
19 #define _ELF_H_
20 #include "disassembler.h"
21 #include "config-elf-dissector.h"
22 
23 #include <elf/elfsymboltableentry.h>
24 #include <elf/elfsymboltablesection.h>
25 #include <elf/elffile.h>
26 #include <elf/elfheader.h>
27 #include <elf/elfpltentry.h>
28 #include <elf/elfpltsection.h>
29 #include <elf/elfgotsection.h>
30 #include <elf/elfrelocationentry.h>
31 #include <dwarf/dwarfinfo.h>
32 #include <dwarf/dwarfaddressranges.h>
33 #include <dwarf/dwarfcudie.h>
34 #include <dwarf/dwarfline.h>
35 
36 #include <QDebug>
37 #include <QString>
38 #include <QUrl>
39 
40 #include <cassert>
41 #include <cstdarg>
42 
43 #include <ansidecl.h>
44 
45 #define PACKAGE "elf-dissector"
46 #define PACKAGE_VERSION "0.0.1"
47 #include <dis-asm.h>
48 #include <elf.h>
49 #include <stdio.h>
50 
51 #if BINUTILS_VERSION >= BINUTILS_VERSION_CHECK(2, 29)
52     // in binutils 2.29 print_insn_i386 disappeared from the dis-asm.h header,
53     // not sure what the proper replacement for it is, so define it here
54     extern "C" int print_insn_i386 (bfd_vma, disassemble_info *);
55     extern "C" int print_insn_big_arm(bfd_vma, disassemble_info *);
56     extern "C" int print_insn_little_arm(bfd_vma, disassemble_info *);
57 #endif
58 
59 #ifdef HAVE_CAPSTONE
60 #include <capstone.h>
61 #endif
62 
qstring_printf(void * data,const char * format,...)63 static int qstring_printf(void *data, const char *format, ...)
64 {
65     QString buffer;
66     va_list args;
67     va_start(args, format);
68     buffer.vsprintf(format, args);
69     va_end(args);
70 
71     QString *s = static_cast<QString*>(data);
72     s->append(buffer);
73     return buffer.size();
74 }
75 
print_address(bfd_vma addr,struct disassemble_info * info)76 static void print_address(bfd_vma addr, struct disassemble_info *info)
77 {
78     const auto disasm = static_cast<Disassembler*>(info->application_data);
79     assert(disasm);
80 
81     // TODO handle relocations/PLT/etc
82 
83     (*info->fprintf_func) (info->stream, "0x%lx", addr);
84     auto s = static_cast<QString*>(info->stream);
85 
86     const uint64_t targetAddr = disasm->baseAddress() + addr;
87     disasm->printAddress(targetAddr, s);
88 }
89 
90 Disassembler::Disassembler() = default;
91 
92 Disassembler::~Disassembler() = default;
93 
disassemble(ElfSection * section)94 QString Disassembler::disassemble(ElfSection* section)
95 {
96     m_file = section->file();
97     m_baseAddress = section->header()->virtualAddress();
98     return disassemble(section->rawData(), section->size());
99 }
100 
disassemble(ElfSymbolTableEntry * entry)101 QString Disassembler::disassemble(ElfSymbolTableEntry* entry)
102 {
103     m_file = entry->symbolTable()->file();
104     m_baseAddress = entry->value();
105     return disassemble(entry->data(), entry->size());
106 
107 }
108 
disassemble(ElfPltEntry * entry)109 QString Disassembler::disassemble(ElfPltEntry* entry)
110 {
111     m_file = entry->section()->file();
112     m_baseAddress = entry->section()->header()->virtualAddress() + entry->index() * entry->size();
113     return disassemble(entry->rawData(), entry->size());
114 }
115 
disassemble(const unsigned char * data,uint64_t size)116 QString Disassembler::disassemble(const unsigned char* data, uint64_t size)
117 {
118 #if defined(__x86_64__) || defined(__i386__)
119     if (file()->header()->machine() == EM_386 || file()->header()->machine() == EM_X86_64) {
120         return disassembleBinutils(data, size);
121     }
122 #endif
123 
124     return disassembleCapstone(data, size);
125 }
126 
disassembleBinutils(const unsigned char * data,uint64_t size)127 QString Disassembler::disassembleBinutils(const unsigned char* data, uint64_t size)
128 {
129     QString result;
130     disassembler_ftype disassemble_fn;
131     disassemble_info info;
132     INIT_DISASSEMBLE_INFO(info, &result, qstring_printf);
133 
134     info.application_data = this;
135     info.flavour = bfd_target_elf_flavour;
136     info.endian = m_file->byteOrder() == ELFDATA2LSB ? BFD_ENDIAN_LITTLE : BFD_ENDIAN_BIG;
137     switch (m_file->header()->machine()) {
138 #if defined(__x86_64__) || defined(__i386__)
139         case EM_386:
140             info.arch = bfd_arch_i386;
141             info.mach = bfd_mach_i386_i386;
142             disassemble_fn = print_insn_i386;
143             break;
144         case EM_X86_64:
145             info.arch = bfd_arch_i386;
146             info.mach = bfd_mach_x86_64;
147             disassemble_fn = print_insn_i386;
148             break;
149 #endif
150 #if defined(__arm__)
151         case EM_ARM:
152             info.arch = bfd_arch_arm;
153             info.mach = bfd_mach_arm_unknown;
154             if (info.endian == BFD_ENDIAN_LITTLE)
155                 disassemble_fn = print_insn_little_arm;
156             else
157                 disassemble_fn = print_insn_big_arm;
158             break;
159 #endif
160         default:
161             qWarning() << "Unsupported architecture!";
162             return {};
163     }
164 
165     info.buffer = const_cast<bfd_byte*>(data);
166     info.buffer_length = size;
167     info.buffer_vma = 0;
168     info.print_address_func = print_address;
169 
170     uint32_t bytes = 0;
171     while (bytes < size) {
172         auto line = lineForAddress(baseAddress() + bytes);
173         if (!line.isNull())
174             result += printSourceLine(line) + "<br/>";
175         result += QStringLiteral("%1: ").arg(bytes, 8, 10);
176         bytes += (*disassemble_fn)(bytes, &info);
177         result += QLatin1String("<br/>");
178     }
179 
180     return result;
181 }
182 
183 #ifdef HAVE_CAPSTONE
isInsnGroup(cs_insn * insn,uint8_t group)184 static bool isInsnGroup(cs_insn *insn, uint8_t group)
185 {
186     for (uint8_t i = 0; i < insn->detail->groups_count; ++i) {
187         if (insn->detail->groups[i] == group)
188             return true;
189     }
190     return false;
191 }
192 #endif
193 
disassembleCapstone(const unsigned char * data,uint64_t size)194 QString Disassembler::disassembleCapstone(const unsigned char* data, uint64_t size)
195 {
196 #ifdef HAVE_CAPSTONE
197     csh handle;
198     cs_err err;
199     switch (file()->header()->machine()) {
200         case EM_386:
201             err = cs_open(CS_ARCH_X86, CS_MODE_32, &handle);
202             break;
203         case EM_X86_64:
204             err = cs_open(CS_ARCH_X86, CS_MODE_64, &handle);
205             break;
206         case EM_ARM:
207             err = cs_open(CS_ARCH_ARM, CS_MODE_LITTLE_ENDIAN, &handle);
208             break;
209         case EM_AARCH64:
210             err = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &handle);
211             break;
212         default:
213             qWarning() << "Unsupported architecture!";
214             return {};
215     }
216     if (err != CS_ERR_OK) {
217         qWarning() << "Error opening Capstone handle:" << err;
218         return {};
219     }
220     std::unique_ptr<csh, decltype(&cs_close)> handleGuard(&handle, &cs_close);
221     cs_option(handle, CS_OPT_SYNTAX, CS_OPT_SYNTAX_ATT);
222     cs_option(handle, CS_OPT_DETAIL, CS_OPT_ON);
223 
224     cs_insn *insn = cs_malloc(handle);
225     const auto insnFree = [](cs_insn *insn) { cs_free(insn, 1); };
226     std::unique_ptr<cs_insn, decltype(insnFree)> insnGuard(insn, insnFree);
227 
228     auto address = baseAddress();
229     QString result;
230 
231     size_t cs_size = size; // force to size_t for 32bit host support
232     while (cs_size > 0) {
233         if (!cs_disasm_iter(handle, &data, &cs_size, &address, insn)) {
234             return result;
235         }
236 
237         const auto line = lineForAddress(insn->address);
238         if (!line.isNull())
239             result += printSourceLine(line) + "<br/>";
240 
241         result += QString::number(insn->address - baseAddress()) + ": " + insn->mnemonic + QLatin1Char(' ') + insn->op_str;
242         switch (file()->header()->machine()) {
243             case EM_386:
244             case EM_X86_64:
245                 for (int i = 0; i < insn->detail->x86.op_count; ++i) {
246                     const auto op = insn->detail->x86.operands[i];
247                     if (op.type == X86_OP_MEM) {
248                         result += QLatin1String(" # 0x") + QString::number(op.mem.disp + address, 16);
249                         printAddress(op.mem.disp + address, &result);
250                     } else if (op.type == X86_OP_IMM) {
251                         result += QLatin1String(" # 0x") + QString::number(op.imm, 16);
252                         printAddress(op.imm, &result);
253                     }
254                 }
255                 break;
256             case EM_AARCH64:
257                 for (int i = 0; i < insn->detail->arm64.op_count; ++i) {
258                     const auto op = insn->detail->arm64.operands[i];
259                     if (op.type == ARM64_OP_MEM && (isInsnGroup(insn, CS_GRP_CALL) || isInsnGroup(insn, CS_GRP_JUMP))) {
260                         result += QLatin1String(" # 0x") + QString::number(op.mem.disp + address, 16);
261                         printAddress(op.mem.disp + address, &result);
262                     } else if (op.type == ARM64_OP_IMM && (isInsnGroup(insn, CS_GRP_CALL) || isInsnGroup(insn, CS_GRP_JUMP) || insn->id == ARM64_INS_ADRP)) {
263                         result += QLatin1String(" # 0x") + QString::number(op.imm, 16);
264                         printAddress(op.imm, &result);
265                     }
266                 }
267                 break;
268             default:
269                 break;
270         }
271         result += "<br/>";
272     }
273 
274     return result;
275 #else
276     return {};
277 #endif
278 }
279 
file() const280 ElfFile* Disassembler::file() const
281 {
282     return m_file;
283 }
284 
baseAddress() const285 uint64_t Disassembler::baseAddress() const
286 {
287     return m_baseAddress;
288 }
289 
printAddress(uint64_t addr,QString * s) const290 void Disassembler::printAddress(uint64_t addr, QString *s) const
291 {
292     if (auto symbolTable = file()->symbolTable()) {
293         const auto target = symbolTable->entryContainingValue(addr);
294         if (target) {
295             s->append(" (");
296             s->append(printSymbol(target));
297             if (target->value() < addr) {
298                 s->append(QLatin1String("+0x") + QString::number(addr - target->value(), 16));
299             }
300             s->append(')');
301             return;
302         }
303     }
304 
305     const auto secIdx = file()->indexOfSectionWithVirtualAddress(addr);
306     if (secIdx < 0)
307         return;
308 
309     const auto section = file()->section<ElfSection>(secIdx);
310     assert(section);
311 
312     const auto pltSection = dynamic_cast<ElfPltSection*>(section);
313     if (pltSection) {
314         const auto pltEntry = pltSection->entry((addr - section->header()->virtualAddress()) / section->header()->entrySize());
315         assert(pltEntry);
316         s->append(" (");
317         s->append(printPltEntry(pltEntry));
318         s->append(')');
319         return;
320     }
321 
322     const auto gotSection = dynamic_cast<ElfGotSection*>(section);
323     if (gotSection) {
324         const auto gotEntry = gotSection->entry((addr - section->header()->virtualAddress()) / file()->addressSize());
325         assert(gotEntry);
326         s->append(" (");
327         s->append(printGotEntry(gotEntry));
328         s->append(')');
329         return;
330     }
331 
332     s->append(QLatin1String(" (") + section->header()->name() + QLatin1String(" + 0x") + QString::number(addr - section->header()->virtualAddress(), 16) + QLatin1Char(')'));
333 }
334 
printSymbol(ElfSymbolTableEntry * entry) const335 QString Disassembler::printSymbol(ElfSymbolTableEntry* entry) const
336 {
337     return QLatin1String(entry->name());
338 }
339 
printGotEntry(ElfGotEntry * entry) const340 QString Disassembler::printGotEntry(ElfGotEntry* entry) const
341 {
342     const auto reloc = entry->relocation();
343     const auto sym = reloc ? reloc->symbol() : nullptr;
344     if (sym)
345         return sym->name() + QStringLiteral("@got");
346     return entry->section()->header()->name() + QStringLiteral(" + 0x") + QString::number(entry->index() * entry->section()->file()->addressSize());
347 }
348 
printPltEntry(ElfPltEntry * entry) const349 QString Disassembler::printPltEntry(ElfPltEntry* entry) const
350 {
351     const auto gotEntry = entry->gotEntry();
352     const auto reloc = gotEntry ? gotEntry->relocation() : nullptr;
353     const auto sym = reloc ? reloc->symbol() : nullptr;
354     if (sym)
355         return sym->name() + QStringLiteral("@plt");
356     return entry->section()->header()->name() + QStringLiteral(" + 0x") + QString::number(entry->index() * entry->section()->header()->entrySize());
357 }
358 
lineForAddress(uint64_t addr) const359 DwarfLine Disassembler::lineForAddress(uint64_t addr) const
360 {
361     if (!file()->dwarfInfo())
362         return {};
363 
364     auto cu = file()->dwarfInfo()->compilationUnitForAddress(addr);
365     if (!cu)
366         return {};
367     return cu->lineForAddress(addr);
368 }
369 
printSourceLine(DwarfLine line) const370 QString Disassembler::printSourceLine(DwarfLine line) const
371 {
372     assert(!line.isNull());
373     auto cu = file()->dwarfInfo()->compilationUnitForAddress(line.address());
374     assert(cu);
375 
376     QUrl url;
377     url.setScheme(QStringLiteral("code"));
378     url.setPath(cu->sourceFileForLine(line));
379     url.setFragment(QString::number(line.line()));
380 
381     QString s;
382     s += "<i>Source: <a href=\"" + url.toEncoded() + "\">" + cu->sourceFileForLine(line);
383     s += ':' + QString::number(line.line()) + "</a></i>";
384     return s;
385 }
386