1 /*
2 Copyright (C) 2013-2014 Volker Krause <vkrause@kde.org>
3
4 This program is free software; you can redistribute it and/or modify it
5 under the terms of the GNU Library General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or (at your
7 option) any later version.
8
9 This program is distributed in the hope that it will be useful, but WITHOUT
10 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
12 License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>.
16 */
17
18 #include "elfdefinitions.h" // must come first before <elf.h>
19 #define _ELF_H_
20 #include "disassembler.h"
21 #include "config-elf-dissector.h"
22
23 #include <elf/elfsymboltableentry.h>
24 #include <elf/elfsymboltablesection.h>
25 #include <elf/elffile.h>
26 #include <elf/elfheader.h>
27 #include <elf/elfpltentry.h>
28 #include <elf/elfpltsection.h>
29 #include <elf/elfgotsection.h>
30 #include <elf/elfrelocationentry.h>
31 #include <dwarf/dwarfinfo.h>
32 #include <dwarf/dwarfaddressranges.h>
33 #include <dwarf/dwarfcudie.h>
34 #include <dwarf/dwarfline.h>
35
36 #include <QDebug>
37 #include <QString>
38 #include <QUrl>
39
40 #include <cassert>
41 #include <cstdarg>
42
43 #include <ansidecl.h>
44
45 #define PACKAGE "elf-dissector"
46 #define PACKAGE_VERSION "0.0.1"
47 #include <dis-asm.h>
48 #include <elf.h>
49 #include <stdio.h>
50
51 #if BINUTILS_VERSION >= BINUTILS_VERSION_CHECK(2, 29)
52 // in binutils 2.29 print_insn_i386 disappeared from the dis-asm.h header,
53 // not sure what the proper replacement for it is, so define it here
54 extern "C" int print_insn_i386 (bfd_vma, disassemble_info *);
55 extern "C" int print_insn_big_arm(bfd_vma, disassemble_info *);
56 extern "C" int print_insn_little_arm(bfd_vma, disassemble_info *);
57 #endif
58
59 #ifdef HAVE_CAPSTONE
60 #include <capstone.h>
61 #endif
62
qstring_printf(void * data,const char * format,...)63 static int qstring_printf(void *data, const char *format, ...)
64 {
65 QString buffer;
66 va_list args;
67 va_start(args, format);
68 buffer.vsprintf(format, args);
69 va_end(args);
70
71 QString *s = static_cast<QString*>(data);
72 s->append(buffer);
73 return buffer.size();
74 }
75
print_address(bfd_vma addr,struct disassemble_info * info)76 static void print_address(bfd_vma addr, struct disassemble_info *info)
77 {
78 const auto disasm = static_cast<Disassembler*>(info->application_data);
79 assert(disasm);
80
81 // TODO handle relocations/PLT/etc
82
83 (*info->fprintf_func) (info->stream, "0x%lx", addr);
84 auto s = static_cast<QString*>(info->stream);
85
86 const uint64_t targetAddr = disasm->baseAddress() + addr;
87 disasm->printAddress(targetAddr, s);
88 }
89
90 Disassembler::Disassembler() = default;
91
92 Disassembler::~Disassembler() = default;
93
disassemble(ElfSection * section)94 QString Disassembler::disassemble(ElfSection* section)
95 {
96 m_file = section->file();
97 m_baseAddress = section->header()->virtualAddress();
98 return disassemble(section->rawData(), section->size());
99 }
100
disassemble(ElfSymbolTableEntry * entry)101 QString Disassembler::disassemble(ElfSymbolTableEntry* entry)
102 {
103 m_file = entry->symbolTable()->file();
104 m_baseAddress = entry->value();
105 return disassemble(entry->data(), entry->size());
106
107 }
108
disassemble(ElfPltEntry * entry)109 QString Disassembler::disassemble(ElfPltEntry* entry)
110 {
111 m_file = entry->section()->file();
112 m_baseAddress = entry->section()->header()->virtualAddress() + entry->index() * entry->size();
113 return disassemble(entry->rawData(), entry->size());
114 }
115
disassemble(const unsigned char * data,uint64_t size)116 QString Disassembler::disassemble(const unsigned char* data, uint64_t size)
117 {
118 #if defined(__x86_64__) || defined(__i386__)
119 if (file()->header()->machine() == EM_386 || file()->header()->machine() == EM_X86_64) {
120 return disassembleBinutils(data, size);
121 }
122 #endif
123
124 return disassembleCapstone(data, size);
125 }
126
disassembleBinutils(const unsigned char * data,uint64_t size)127 QString Disassembler::disassembleBinutils(const unsigned char* data, uint64_t size)
128 {
129 QString result;
130 disassembler_ftype disassemble_fn;
131 disassemble_info info;
132 INIT_DISASSEMBLE_INFO(info, &result, qstring_printf);
133
134 info.application_data = this;
135 info.flavour = bfd_target_elf_flavour;
136 info.endian = m_file->byteOrder() == ELFDATA2LSB ? BFD_ENDIAN_LITTLE : BFD_ENDIAN_BIG;
137 switch (m_file->header()->machine()) {
138 #if defined(__x86_64__) || defined(__i386__)
139 case EM_386:
140 info.arch = bfd_arch_i386;
141 info.mach = bfd_mach_i386_i386;
142 disassemble_fn = print_insn_i386;
143 break;
144 case EM_X86_64:
145 info.arch = bfd_arch_i386;
146 info.mach = bfd_mach_x86_64;
147 disassemble_fn = print_insn_i386;
148 break;
149 #endif
150 #if defined(__arm__)
151 case EM_ARM:
152 info.arch = bfd_arch_arm;
153 info.mach = bfd_mach_arm_unknown;
154 if (info.endian == BFD_ENDIAN_LITTLE)
155 disassemble_fn = print_insn_little_arm;
156 else
157 disassemble_fn = print_insn_big_arm;
158 break;
159 #endif
160 default:
161 qWarning() << "Unsupported architecture!";
162 return {};
163 }
164
165 info.buffer = const_cast<bfd_byte*>(data);
166 info.buffer_length = size;
167 info.buffer_vma = 0;
168 info.print_address_func = print_address;
169
170 uint32_t bytes = 0;
171 while (bytes < size) {
172 auto line = lineForAddress(baseAddress() + bytes);
173 if (!line.isNull())
174 result += printSourceLine(line) + "<br/>";
175 result += QStringLiteral("%1: ").arg(bytes, 8, 10);
176 bytes += (*disassemble_fn)(bytes, &info);
177 result += QLatin1String("<br/>");
178 }
179
180 return result;
181 }
182
183 #ifdef HAVE_CAPSTONE
isInsnGroup(cs_insn * insn,uint8_t group)184 static bool isInsnGroup(cs_insn *insn, uint8_t group)
185 {
186 for (uint8_t i = 0; i < insn->detail->groups_count; ++i) {
187 if (insn->detail->groups[i] == group)
188 return true;
189 }
190 return false;
191 }
192 #endif
193
disassembleCapstone(const unsigned char * data,uint64_t size)194 QString Disassembler::disassembleCapstone(const unsigned char* data, uint64_t size)
195 {
196 #ifdef HAVE_CAPSTONE
197 csh handle;
198 cs_err err;
199 switch (file()->header()->machine()) {
200 case EM_386:
201 err = cs_open(CS_ARCH_X86, CS_MODE_32, &handle);
202 break;
203 case EM_X86_64:
204 err = cs_open(CS_ARCH_X86, CS_MODE_64, &handle);
205 break;
206 case EM_ARM:
207 err = cs_open(CS_ARCH_ARM, CS_MODE_LITTLE_ENDIAN, &handle);
208 break;
209 case EM_AARCH64:
210 err = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &handle);
211 break;
212 default:
213 qWarning() << "Unsupported architecture!";
214 return {};
215 }
216 if (err != CS_ERR_OK) {
217 qWarning() << "Error opening Capstone handle:" << err;
218 return {};
219 }
220 std::unique_ptr<csh, decltype(&cs_close)> handleGuard(&handle, &cs_close);
221 cs_option(handle, CS_OPT_SYNTAX, CS_OPT_SYNTAX_ATT);
222 cs_option(handle, CS_OPT_DETAIL, CS_OPT_ON);
223
224 cs_insn *insn = cs_malloc(handle);
225 const auto insnFree = [](cs_insn *insn) { cs_free(insn, 1); };
226 std::unique_ptr<cs_insn, decltype(insnFree)> insnGuard(insn, insnFree);
227
228 auto address = baseAddress();
229 QString result;
230
231 size_t cs_size = size; // force to size_t for 32bit host support
232 while (cs_size > 0) {
233 if (!cs_disasm_iter(handle, &data, &cs_size, &address, insn)) {
234 return result;
235 }
236
237 const auto line = lineForAddress(insn->address);
238 if (!line.isNull())
239 result += printSourceLine(line) + "<br/>";
240
241 result += QString::number(insn->address - baseAddress()) + ": " + insn->mnemonic + QLatin1Char(' ') + insn->op_str;
242 switch (file()->header()->machine()) {
243 case EM_386:
244 case EM_X86_64:
245 for (int i = 0; i < insn->detail->x86.op_count; ++i) {
246 const auto op = insn->detail->x86.operands[i];
247 if (op.type == X86_OP_MEM) {
248 result += QLatin1String(" # 0x") + QString::number(op.mem.disp + address, 16);
249 printAddress(op.mem.disp + address, &result);
250 } else if (op.type == X86_OP_IMM) {
251 result += QLatin1String(" # 0x") + QString::number(op.imm, 16);
252 printAddress(op.imm, &result);
253 }
254 }
255 break;
256 case EM_AARCH64:
257 for (int i = 0; i < insn->detail->arm64.op_count; ++i) {
258 const auto op = insn->detail->arm64.operands[i];
259 if (op.type == ARM64_OP_MEM && (isInsnGroup(insn, CS_GRP_CALL) || isInsnGroup(insn, CS_GRP_JUMP))) {
260 result += QLatin1String(" # 0x") + QString::number(op.mem.disp + address, 16);
261 printAddress(op.mem.disp + address, &result);
262 } else if (op.type == ARM64_OP_IMM && (isInsnGroup(insn, CS_GRP_CALL) || isInsnGroup(insn, CS_GRP_JUMP) || insn->id == ARM64_INS_ADRP)) {
263 result += QLatin1String(" # 0x") + QString::number(op.imm, 16);
264 printAddress(op.imm, &result);
265 }
266 }
267 break;
268 default:
269 break;
270 }
271 result += "<br/>";
272 }
273
274 return result;
275 #else
276 return {};
277 #endif
278 }
279
file() const280 ElfFile* Disassembler::file() const
281 {
282 return m_file;
283 }
284
baseAddress() const285 uint64_t Disassembler::baseAddress() const
286 {
287 return m_baseAddress;
288 }
289
printAddress(uint64_t addr,QString * s) const290 void Disassembler::printAddress(uint64_t addr, QString *s) const
291 {
292 if (auto symbolTable = file()->symbolTable()) {
293 const auto target = symbolTable->entryContainingValue(addr);
294 if (target) {
295 s->append(" (");
296 s->append(printSymbol(target));
297 if (target->value() < addr) {
298 s->append(QLatin1String("+0x") + QString::number(addr - target->value(), 16));
299 }
300 s->append(')');
301 return;
302 }
303 }
304
305 const auto secIdx = file()->indexOfSectionWithVirtualAddress(addr);
306 if (secIdx < 0)
307 return;
308
309 const auto section = file()->section<ElfSection>(secIdx);
310 assert(section);
311
312 const auto pltSection = dynamic_cast<ElfPltSection*>(section);
313 if (pltSection) {
314 const auto pltEntry = pltSection->entry((addr - section->header()->virtualAddress()) / section->header()->entrySize());
315 assert(pltEntry);
316 s->append(" (");
317 s->append(printPltEntry(pltEntry));
318 s->append(')');
319 return;
320 }
321
322 const auto gotSection = dynamic_cast<ElfGotSection*>(section);
323 if (gotSection) {
324 const auto gotEntry = gotSection->entry((addr - section->header()->virtualAddress()) / file()->addressSize());
325 assert(gotEntry);
326 s->append(" (");
327 s->append(printGotEntry(gotEntry));
328 s->append(')');
329 return;
330 }
331
332 s->append(QLatin1String(" (") + section->header()->name() + QLatin1String(" + 0x") + QString::number(addr - section->header()->virtualAddress(), 16) + QLatin1Char(')'));
333 }
334
printSymbol(ElfSymbolTableEntry * entry) const335 QString Disassembler::printSymbol(ElfSymbolTableEntry* entry) const
336 {
337 return QLatin1String(entry->name());
338 }
339
printGotEntry(ElfGotEntry * entry) const340 QString Disassembler::printGotEntry(ElfGotEntry* entry) const
341 {
342 const auto reloc = entry->relocation();
343 const auto sym = reloc ? reloc->symbol() : nullptr;
344 if (sym)
345 return sym->name() + QStringLiteral("@got");
346 return entry->section()->header()->name() + QStringLiteral(" + 0x") + QString::number(entry->index() * entry->section()->file()->addressSize());
347 }
348
printPltEntry(ElfPltEntry * entry) const349 QString Disassembler::printPltEntry(ElfPltEntry* entry) const
350 {
351 const auto gotEntry = entry->gotEntry();
352 const auto reloc = gotEntry ? gotEntry->relocation() : nullptr;
353 const auto sym = reloc ? reloc->symbol() : nullptr;
354 if (sym)
355 return sym->name() + QStringLiteral("@plt");
356 return entry->section()->header()->name() + QStringLiteral(" + 0x") + QString::number(entry->index() * entry->section()->header()->entrySize());
357 }
358
lineForAddress(uint64_t addr) const359 DwarfLine Disassembler::lineForAddress(uint64_t addr) const
360 {
361 if (!file()->dwarfInfo())
362 return {};
363
364 auto cu = file()->dwarfInfo()->compilationUnitForAddress(addr);
365 if (!cu)
366 return {};
367 return cu->lineForAddress(addr);
368 }
369
printSourceLine(DwarfLine line) const370 QString Disassembler::printSourceLine(DwarfLine line) const
371 {
372 assert(!line.isNull());
373 auto cu = file()->dwarfInfo()->compilationUnitForAddress(line.address());
374 assert(cu);
375
376 QUrl url;
377 url.setScheme(QStringLiteral("code"));
378 url.setPath(cu->sourceFileForLine(line));
379 url.setFragment(QString::number(line.line()));
380
381 QString s;
382 s += "<i>Source: <a href=\"" + url.toEncoded() + "\">" + cu->sourceFileForLine(line);
383 s += ':' + QString::number(line.line()) + "</a></i>";
384 return s;
385 }
386