1 //===-- ObjectFileWasm.cpp ------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "ObjectFileWasm.h"
10 #include "lldb/Core/Module.h"
11 #include "lldb/Core/ModuleSpec.h"
12 #include "lldb/Core/PluginManager.h"
13 #include "lldb/Core/Section.h"
14 #include "lldb/Target/Process.h"
15 #include "lldb/Target/SectionLoadList.h"
16 #include "lldb/Target/Target.h"
17 #include "lldb/Utility/DataBufferHeap.h"
18 #include "lldb/Utility/Log.h"
19 #include "llvm/ADT/ArrayRef.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/StringRef.h"
22 #include "llvm/BinaryFormat/Magic.h"
23 #include "llvm/BinaryFormat/Wasm.h"
24 #include "llvm/Support/Endian.h"
25 #include "llvm/Support/Format.h"
26
27 using namespace lldb;
28 using namespace lldb_private;
29 using namespace lldb_private::wasm;
30
31 LLDB_PLUGIN_DEFINE(ObjectFileWasm)
32
33 static const uint32_t kWasmHeaderSize =
34 sizeof(llvm::wasm::WasmMagic) + sizeof(llvm::wasm::WasmVersion);
35
36 /// Checks whether the data buffer starts with a valid Wasm module header.
ValidateModuleHeader(const DataBufferSP & data_sp)37 static bool ValidateModuleHeader(const DataBufferSP &data_sp) {
38 if (!data_sp || data_sp->GetByteSize() < kWasmHeaderSize)
39 return false;
40
41 if (llvm::identify_magic(toStringRef(data_sp->GetData())) !=
42 llvm::file_magic::wasm_object)
43 return false;
44
45 uint8_t *Ptr = data_sp->GetBytes() + sizeof(llvm::wasm::WasmMagic);
46
47 uint32_t version = llvm::support::endian::read32le(Ptr);
48 return version == llvm::wasm::WasmVersion;
49 }
50
51 static llvm::Optional<ConstString>
GetWasmString(llvm::DataExtractor & data,llvm::DataExtractor::Cursor & c)52 GetWasmString(llvm::DataExtractor &data, llvm::DataExtractor::Cursor &c) {
53 // A Wasm string is encoded as a vector of UTF-8 codes.
54 // Vectors are encoded with their u32 length followed by the element
55 // sequence.
56 uint64_t len = data.getULEB128(c);
57 if (!c) {
58 consumeError(c.takeError());
59 return llvm::None;
60 }
61
62 if (len >= (uint64_t(1) << 32)) {
63 return llvm::None;
64 }
65
66 llvm::SmallVector<uint8_t, 32> str_storage;
67 data.getU8(c, str_storage, len);
68 if (!c) {
69 consumeError(c.takeError());
70 return llvm::None;
71 }
72
73 llvm::StringRef str = toStringRef(makeArrayRef(str_storage));
74 return ConstString(str);
75 }
76
77 char ObjectFileWasm::ID;
78
Initialize()79 void ObjectFileWasm::Initialize() {
80 PluginManager::RegisterPlugin(GetPluginNameStatic(),
81 GetPluginDescriptionStatic(), CreateInstance,
82 CreateMemoryInstance, GetModuleSpecifications);
83 }
84
Terminate()85 void ObjectFileWasm::Terminate() {
86 PluginManager::UnregisterPlugin(CreateInstance);
87 }
88
GetPluginNameStatic()89 ConstString ObjectFileWasm::GetPluginNameStatic() {
90 static ConstString g_name("wasm");
91 return g_name;
92 }
93
94 ObjectFile *
CreateInstance(const ModuleSP & module_sp,DataBufferSP & data_sp,offset_t data_offset,const FileSpec * file,offset_t file_offset,offset_t length)95 ObjectFileWasm::CreateInstance(const ModuleSP &module_sp, DataBufferSP &data_sp,
96 offset_t data_offset, const FileSpec *file,
97 offset_t file_offset, offset_t length) {
98 Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_OBJECT));
99
100 if (!data_sp) {
101 data_sp = MapFileData(*file, length, file_offset);
102 if (!data_sp) {
103 LLDB_LOGF(log, "Failed to create ObjectFileWasm instance for file %s",
104 file->GetPath().c_str());
105 return nullptr;
106 }
107 data_offset = 0;
108 }
109
110 assert(data_sp);
111 if (!ValidateModuleHeader(data_sp)) {
112 LLDB_LOGF(log,
113 "Failed to create ObjectFileWasm instance: invalid Wasm header");
114 return nullptr;
115 }
116
117 // Update the data to contain the entire file if it doesn't contain it
118 // already.
119 if (data_sp->GetByteSize() < length) {
120 data_sp = MapFileData(*file, length, file_offset);
121 if (!data_sp) {
122 LLDB_LOGF(log,
123 "Failed to create ObjectFileWasm instance: cannot read file %s",
124 file->GetPath().c_str());
125 return nullptr;
126 }
127 data_offset = 0;
128 }
129
130 std::unique_ptr<ObjectFileWasm> objfile_up(new ObjectFileWasm(
131 module_sp, data_sp, data_offset, file, file_offset, length));
132 ArchSpec spec = objfile_up->GetArchitecture();
133 if (spec && objfile_up->SetModulesArchitecture(spec)) {
134 LLDB_LOGF(log,
135 "%p ObjectFileWasm::CreateInstance() module = %p (%s), file = %s",
136 static_cast<void *>(objfile_up.get()),
137 static_cast<void *>(objfile_up->GetModule().get()),
138 objfile_up->GetModule()->GetSpecificationDescription().c_str(),
139 file ? file->GetPath().c_str() : "<NULL>");
140 return objfile_up.release();
141 }
142
143 LLDB_LOGF(log, "Failed to create ObjectFileWasm instance");
144 return nullptr;
145 }
146
CreateMemoryInstance(const ModuleSP & module_sp,DataBufferSP & data_sp,const ProcessSP & process_sp,addr_t header_addr)147 ObjectFile *ObjectFileWasm::CreateMemoryInstance(const ModuleSP &module_sp,
148 DataBufferSP &data_sp,
149 const ProcessSP &process_sp,
150 addr_t header_addr) {
151 if (!ValidateModuleHeader(data_sp))
152 return nullptr;
153
154 std::unique_ptr<ObjectFileWasm> objfile_up(
155 new ObjectFileWasm(module_sp, data_sp, process_sp, header_addr));
156 ArchSpec spec = objfile_up->GetArchitecture();
157 if (spec && objfile_up->SetModulesArchitecture(spec))
158 return objfile_up.release();
159 return nullptr;
160 }
161
DecodeNextSection(lldb::offset_t * offset_ptr)162 bool ObjectFileWasm::DecodeNextSection(lldb::offset_t *offset_ptr) {
163 // Buffer sufficient to read a section header and find the pointer to the next
164 // section.
165 const uint32_t kBufferSize = 1024;
166 DataExtractor section_header_data = ReadImageData(*offset_ptr, kBufferSize);
167
168 llvm::DataExtractor data = section_header_data.GetAsLLVM();
169 llvm::DataExtractor::Cursor c(0);
170
171 // Each section consists of:
172 // - a one-byte section id,
173 // - the u32 size of the contents, in bytes,
174 // - the actual contents.
175 uint8_t section_id = data.getU8(c);
176 uint64_t payload_len = data.getULEB128(c);
177 if (!c)
178 return !llvm::errorToBool(c.takeError());
179
180 if (payload_len >= (uint64_t(1) << 32))
181 return false;
182
183 if (section_id == llvm::wasm::WASM_SEC_CUSTOM) {
184 // Custom sections have the id 0. Their contents consist of a name
185 // identifying the custom section, followed by an uninterpreted sequence
186 // of bytes.
187 lldb::offset_t prev_offset = c.tell();
188 llvm::Optional<ConstString> sect_name = GetWasmString(data, c);
189 if (!sect_name)
190 return false;
191
192 if (payload_len < c.tell() - prev_offset)
193 return false;
194
195 uint32_t section_length = payload_len - (c.tell() - prev_offset);
196 m_sect_infos.push_back(section_info{*offset_ptr + c.tell(), section_length,
197 section_id, *sect_name});
198 *offset_ptr += (c.tell() + section_length);
199 } else if (section_id <= llvm::wasm::WASM_SEC_EVENT) {
200 m_sect_infos.push_back(section_info{*offset_ptr + c.tell(),
201 static_cast<uint32_t>(payload_len),
202 section_id, ConstString()});
203 *offset_ptr += (c.tell() + payload_len);
204 } else {
205 // Invalid section id.
206 return false;
207 }
208 return true;
209 }
210
DecodeSections()211 bool ObjectFileWasm::DecodeSections() {
212 lldb::offset_t offset = kWasmHeaderSize;
213 if (IsInMemory()) {
214 offset += m_memory_addr;
215 }
216
217 while (DecodeNextSection(&offset))
218 ;
219 return true;
220 }
221
GetModuleSpecifications(const FileSpec & file,DataBufferSP & data_sp,offset_t data_offset,offset_t file_offset,offset_t length,ModuleSpecList & specs)222 size_t ObjectFileWasm::GetModuleSpecifications(
223 const FileSpec &file, DataBufferSP &data_sp, offset_t data_offset,
224 offset_t file_offset, offset_t length, ModuleSpecList &specs) {
225 if (!ValidateModuleHeader(data_sp)) {
226 return 0;
227 }
228
229 ModuleSpec spec(file, ArchSpec("wasm32-unknown-unknown-wasm"));
230 specs.Append(spec);
231 return 1;
232 }
233
ObjectFileWasm(const ModuleSP & module_sp,DataBufferSP & data_sp,offset_t data_offset,const FileSpec * file,offset_t offset,offset_t length)234 ObjectFileWasm::ObjectFileWasm(const ModuleSP &module_sp, DataBufferSP &data_sp,
235 offset_t data_offset, const FileSpec *file,
236 offset_t offset, offset_t length)
237 : ObjectFile(module_sp, file, offset, length, data_sp, data_offset),
238 m_arch("wasm32-unknown-unknown-wasm") {
239 m_data.SetAddressByteSize(4);
240 }
241
ObjectFileWasm(const lldb::ModuleSP & module_sp,lldb::DataBufferSP & header_data_sp,const lldb::ProcessSP & process_sp,lldb::addr_t header_addr)242 ObjectFileWasm::ObjectFileWasm(const lldb::ModuleSP &module_sp,
243 lldb::DataBufferSP &header_data_sp,
244 const lldb::ProcessSP &process_sp,
245 lldb::addr_t header_addr)
246 : ObjectFile(module_sp, process_sp, header_addr, header_data_sp),
247 m_arch("wasm32-unknown-unknown-wasm") {}
248
ParseHeader()249 bool ObjectFileWasm::ParseHeader() {
250 // We already parsed the header during initialization.
251 return true;
252 }
253
GetSymtab()254 Symtab *ObjectFileWasm::GetSymtab() { return nullptr; }
255
GetSectionTypeFromName(llvm::StringRef Name)256 static SectionType GetSectionTypeFromName(llvm::StringRef Name) {
257 if (Name.consume_front(".debug_") || Name.consume_front(".zdebug_")) {
258 return llvm::StringSwitch<SectionType>(Name)
259 .Case("abbrev", eSectionTypeDWARFDebugAbbrev)
260 .Case("abbrev.dwo", eSectionTypeDWARFDebugAbbrevDwo)
261 .Case("addr", eSectionTypeDWARFDebugAddr)
262 .Case("aranges", eSectionTypeDWARFDebugAranges)
263 .Case("cu_index", eSectionTypeDWARFDebugCuIndex)
264 .Case("frame", eSectionTypeDWARFDebugFrame)
265 .Case("info", eSectionTypeDWARFDebugInfo)
266 .Case("info.dwo", eSectionTypeDWARFDebugInfoDwo)
267 .Cases("line", "line.dwo", eSectionTypeDWARFDebugLine)
268 .Cases("line_str", "line_str.dwo", eSectionTypeDWARFDebugLineStr)
269 .Case("loc", eSectionTypeDWARFDebugLoc)
270 .Case("loc.dwo", eSectionTypeDWARFDebugLocDwo)
271 .Case("loclists", eSectionTypeDWARFDebugLocLists)
272 .Case("loclists.dwo", eSectionTypeDWARFDebugLocListsDwo)
273 .Case("macinfo", eSectionTypeDWARFDebugMacInfo)
274 .Cases("macro", "macro.dwo", eSectionTypeDWARFDebugMacro)
275 .Case("names", eSectionTypeDWARFDebugNames)
276 .Case("pubnames", eSectionTypeDWARFDebugPubNames)
277 .Case("pubtypes", eSectionTypeDWARFDebugPubTypes)
278 .Case("ranges", eSectionTypeDWARFDebugRanges)
279 .Case("rnglists", eSectionTypeDWARFDebugRngLists)
280 .Case("rnglists.dwo", eSectionTypeDWARFDebugRngListsDwo)
281 .Case("str", eSectionTypeDWARFDebugStr)
282 .Case("str.dwo", eSectionTypeDWARFDebugStrDwo)
283 .Case("str_offsets", eSectionTypeDWARFDebugStrOffsets)
284 .Case("str_offsets.dwo", eSectionTypeDWARFDebugStrOffsetsDwo)
285 .Case("tu_index", eSectionTypeDWARFDebugTuIndex)
286 .Case("types", eSectionTypeDWARFDebugTypes)
287 .Case("types.dwo", eSectionTypeDWARFDebugTypesDwo)
288 .Default(eSectionTypeOther);
289 }
290 return eSectionTypeOther;
291 }
292
CreateSections(SectionList & unified_section_list)293 void ObjectFileWasm::CreateSections(SectionList &unified_section_list) {
294 if (m_sections_up)
295 return;
296
297 m_sections_up = std::make_unique<SectionList>();
298
299 if (m_sect_infos.empty()) {
300 DecodeSections();
301 }
302
303 for (const section_info §_info : m_sect_infos) {
304 SectionType section_type = eSectionTypeOther;
305 ConstString section_name;
306 offset_t file_offset = sect_info.offset & 0xffffffff;
307 addr_t vm_addr = file_offset;
308 size_t vm_size = sect_info.size;
309
310 if (llvm::wasm::WASM_SEC_CODE == sect_info.id) {
311 section_type = eSectionTypeCode;
312 section_name = ConstString("code");
313
314 // A code address in DWARF for WebAssembly is the offset of an
315 // instruction relative within the Code section of the WebAssembly file.
316 // For this reason Section::GetFileAddress() must return zero for the
317 // Code section.
318 vm_addr = 0;
319 } else {
320 section_type = GetSectionTypeFromName(sect_info.name.GetStringRef());
321 if (section_type == eSectionTypeOther)
322 continue;
323 section_name = sect_info.name;
324 if (!IsInMemory()) {
325 vm_size = 0;
326 vm_addr = 0;
327 }
328 }
329
330 SectionSP section_sp(
331 new Section(GetModule(), // Module to which this section belongs.
332 this, // ObjectFile to which this section belongs and
333 // should read section data from.
334 section_type, // Section ID.
335 section_name, // Section name.
336 section_type, // Section type.
337 vm_addr, // VM address.
338 vm_size, // VM size in bytes of this section.
339 file_offset, // Offset of this section in the file.
340 sect_info.size, // Size of the section as found in the file.
341 0, // Alignment of the section
342 0, // Flags for this section.
343 1)); // Number of host bytes per target byte
344 m_sections_up->AddSection(section_sp);
345 unified_section_list.AddSection(section_sp);
346 }
347 }
348
SetLoadAddress(Target & target,lldb::addr_t load_address,bool value_is_offset)349 bool ObjectFileWasm::SetLoadAddress(Target &target, lldb::addr_t load_address,
350 bool value_is_offset) {
351 /// In WebAssembly, linear memory is disjointed from code space. The VM can
352 /// load multiple instances of a module, which logically share the same code.
353 /// We represent a wasm32 code address with 64-bits, like:
354 /// 63 32 31 0
355 /// +---------------+---------------+
356 /// + module_id | offset |
357 /// +---------------+---------------+
358 /// where the lower 32 bits represent a module offset (relative to the module
359 /// start not to the beginning of the code section) and the higher 32 bits
360 /// uniquely identify the module in the WebAssembly VM.
361 /// In other words, we assume that each WebAssembly module is loaded by the
362 /// engine at a 64-bit address that starts at the boundary of 4GB pages, like
363 /// 0x0000000400000000 for module_id == 4.
364 /// These 64-bit addresses will be used to request code ranges for a specific
365 /// module from the WebAssembly engine.
366
367 assert(m_memory_addr == LLDB_INVALID_ADDRESS ||
368 m_memory_addr == load_address);
369
370 ModuleSP module_sp = GetModule();
371 if (!module_sp)
372 return false;
373
374 DecodeSections();
375
376 size_t num_loaded_sections = 0;
377 SectionList *section_list = GetSectionList();
378 if (!section_list)
379 return false;
380
381 const size_t num_sections = section_list->GetSize();
382 for (size_t sect_idx = 0; sect_idx < num_sections; ++sect_idx) {
383 SectionSP section_sp(section_list->GetSectionAtIndex(sect_idx));
384 if (target.SetSectionLoadAddress(
385 section_sp, load_address | section_sp->GetFileOffset())) {
386 ++num_loaded_sections;
387 }
388 }
389
390 return num_loaded_sections > 0;
391 }
392
ReadImageData(offset_t offset,uint32_t size)393 DataExtractor ObjectFileWasm::ReadImageData(offset_t offset, uint32_t size) {
394 DataExtractor data;
395 if (m_file) {
396 if (offset < GetByteSize()) {
397 size = std::min(static_cast<uint64_t>(size), GetByteSize() - offset);
398 auto buffer_sp = MapFileData(m_file, size, offset);
399 return DataExtractor(buffer_sp, GetByteOrder(), GetAddressByteSize());
400 }
401 } else {
402 ProcessSP process_sp(m_process_wp.lock());
403 if (process_sp) {
404 auto data_up = std::make_unique<DataBufferHeap>(size, 0);
405 Status readmem_error;
406 size_t bytes_read = process_sp->ReadMemory(
407 offset, data_up->GetBytes(), data_up->GetByteSize(), readmem_error);
408 if (bytes_read > 0) {
409 DataBufferSP buffer_sp(data_up.release());
410 data.SetData(buffer_sp, 0, buffer_sp->GetByteSize());
411 }
412 }
413 }
414
415 data.SetByteOrder(GetByteOrder());
416 return data;
417 }
418
GetExternalDebugInfoFileSpec()419 llvm::Optional<FileSpec> ObjectFileWasm::GetExternalDebugInfoFileSpec() {
420 static ConstString g_sect_name_external_debug_info("external_debug_info");
421
422 for (const section_info §_info : m_sect_infos) {
423 if (g_sect_name_external_debug_info == sect_info.name) {
424 const uint32_t kBufferSize = 1024;
425 DataExtractor section_header_data =
426 ReadImageData(sect_info.offset, kBufferSize);
427 llvm::DataExtractor data = section_header_data.GetAsLLVM();
428 llvm::DataExtractor::Cursor c(0);
429 llvm::Optional<ConstString> symbols_url = GetWasmString(data, c);
430 if (symbols_url)
431 return FileSpec(symbols_url->GetStringRef());
432 }
433 }
434 return llvm::None;
435 }
436
Dump(Stream * s)437 void ObjectFileWasm::Dump(Stream *s) {
438 ModuleSP module_sp(GetModule());
439 if (!module_sp)
440 return;
441
442 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
443
444 llvm::raw_ostream &ostream = s->AsRawOstream();
445 ostream << static_cast<void *>(this) << ": ";
446 s->Indent();
447 ostream << "ObjectFileWasm, file = '";
448 m_file.Dump(ostream);
449 ostream << "', arch = ";
450 ostream << GetArchitecture().GetArchitectureName() << "\n";
451
452 SectionList *sections = GetSectionList();
453 if (sections) {
454 sections->Dump(s->AsRawOstream(), s->GetIndentLevel(), nullptr, true,
455 UINT32_MAX);
456 }
457 ostream << "\n";
458 DumpSectionHeaders(ostream);
459 ostream << "\n";
460 }
461
DumpSectionHeader(llvm::raw_ostream & ostream,const section_info_t & sh)462 void ObjectFileWasm::DumpSectionHeader(llvm::raw_ostream &ostream,
463 const section_info_t &sh) {
464 ostream << llvm::left_justify(sh.name.GetStringRef(), 16) << " "
465 << llvm::format_hex(sh.offset, 10) << " "
466 << llvm::format_hex(sh.size, 10) << " " << llvm::format_hex(sh.id, 6)
467 << "\n";
468 }
469
DumpSectionHeaders(llvm::raw_ostream & ostream)470 void ObjectFileWasm::DumpSectionHeaders(llvm::raw_ostream &ostream) {
471 ostream << "Section Headers\n";
472 ostream << "IDX name addr size id\n";
473 ostream << "==== ---------------- ---------- ---------- ------\n";
474
475 uint32_t idx = 0;
476 for (auto pos = m_sect_infos.begin(); pos != m_sect_infos.end();
477 ++pos, ++idx) {
478 ostream << "[" << llvm::format_decimal(idx, 2) << "] ";
479 ObjectFileWasm::DumpSectionHeader(ostream, *pos);
480 }
481 }
482