1 //===-- ObjectFileWasm.cpp ------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "ObjectFileWasm.h"
10 #include "lldb/Core/Module.h"
11 #include "lldb/Core/ModuleSpec.h"
12 #include "lldb/Core/PluginManager.h"
13 #include "lldb/Core/Section.h"
14 #include "lldb/Target/Process.h"
15 #include "lldb/Target/SectionLoadList.h"
16 #include "lldb/Target/Target.h"
17 #include "lldb/Utility/DataBufferHeap.h"
18 #include "lldb/Utility/Log.h"
19 #include "llvm/ADT/ArrayRef.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/StringRef.h"
22 #include "llvm/BinaryFormat/Magic.h"
23 #include "llvm/BinaryFormat/Wasm.h"
24 #include "llvm/Support/Endian.h"
25 #include "llvm/Support/Format.h"
26 
27 using namespace lldb;
28 using namespace lldb_private;
29 using namespace lldb_private::wasm;
30 
31 LLDB_PLUGIN_DEFINE(ObjectFileWasm)
32 
33 static const uint32_t kWasmHeaderSize =
34     sizeof(llvm::wasm::WasmMagic) + sizeof(llvm::wasm::WasmVersion);
35 
36 /// Checks whether the data buffer starts with a valid Wasm module header.
ValidateModuleHeader(const DataBufferSP & data_sp)37 static bool ValidateModuleHeader(const DataBufferSP &data_sp) {
38   if (!data_sp || data_sp->GetByteSize() < kWasmHeaderSize)
39     return false;
40 
41   if (llvm::identify_magic(toStringRef(data_sp->GetData())) !=
42       llvm::file_magic::wasm_object)
43     return false;
44 
45   uint8_t *Ptr = data_sp->GetBytes() + sizeof(llvm::wasm::WasmMagic);
46 
47   uint32_t version = llvm::support::endian::read32le(Ptr);
48   return version == llvm::wasm::WasmVersion;
49 }
50 
51 static llvm::Optional<ConstString>
GetWasmString(llvm::DataExtractor & data,llvm::DataExtractor::Cursor & c)52 GetWasmString(llvm::DataExtractor &data, llvm::DataExtractor::Cursor &c) {
53   // A Wasm string is encoded as a vector of UTF-8 codes.
54   // Vectors are encoded with their u32 length followed by the element
55   // sequence.
56   uint64_t len = data.getULEB128(c);
57   if (!c) {
58     consumeError(c.takeError());
59     return llvm::None;
60   }
61 
62   if (len >= (uint64_t(1) << 32)) {
63     return llvm::None;
64   }
65 
66   llvm::SmallVector<uint8_t, 32> str_storage;
67   data.getU8(c, str_storage, len);
68   if (!c) {
69     consumeError(c.takeError());
70     return llvm::None;
71   }
72 
73   llvm::StringRef str = toStringRef(makeArrayRef(str_storage));
74   return ConstString(str);
75 }
76 
77 char ObjectFileWasm::ID;
78 
Initialize()79 void ObjectFileWasm::Initialize() {
80   PluginManager::RegisterPlugin(GetPluginNameStatic(),
81                                 GetPluginDescriptionStatic(), CreateInstance,
82                                 CreateMemoryInstance, GetModuleSpecifications);
83 }
84 
Terminate()85 void ObjectFileWasm::Terminate() {
86   PluginManager::UnregisterPlugin(CreateInstance);
87 }
88 
GetPluginNameStatic()89 ConstString ObjectFileWasm::GetPluginNameStatic() {
90   static ConstString g_name("wasm");
91   return g_name;
92 }
93 
94 ObjectFile *
CreateInstance(const ModuleSP & module_sp,DataBufferSP & data_sp,offset_t data_offset,const FileSpec * file,offset_t file_offset,offset_t length)95 ObjectFileWasm::CreateInstance(const ModuleSP &module_sp, DataBufferSP &data_sp,
96                                offset_t data_offset, const FileSpec *file,
97                                offset_t file_offset, offset_t length) {
98   Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_OBJECT));
99 
100   if (!data_sp) {
101     data_sp = MapFileData(*file, length, file_offset);
102     if (!data_sp) {
103       LLDB_LOGF(log, "Failed to create ObjectFileWasm instance for file %s",
104                 file->GetPath().c_str());
105       return nullptr;
106     }
107     data_offset = 0;
108   }
109 
110   assert(data_sp);
111   if (!ValidateModuleHeader(data_sp)) {
112     LLDB_LOGF(log,
113               "Failed to create ObjectFileWasm instance: invalid Wasm header");
114     return nullptr;
115   }
116 
117   // Update the data to contain the entire file if it doesn't contain it
118   // already.
119   if (data_sp->GetByteSize() < length) {
120     data_sp = MapFileData(*file, length, file_offset);
121     if (!data_sp) {
122       LLDB_LOGF(log,
123                 "Failed to create ObjectFileWasm instance: cannot read file %s",
124                 file->GetPath().c_str());
125       return nullptr;
126     }
127     data_offset = 0;
128   }
129 
130   std::unique_ptr<ObjectFileWasm> objfile_up(new ObjectFileWasm(
131       module_sp, data_sp, data_offset, file, file_offset, length));
132   ArchSpec spec = objfile_up->GetArchitecture();
133   if (spec && objfile_up->SetModulesArchitecture(spec)) {
134     LLDB_LOGF(log,
135               "%p ObjectFileWasm::CreateInstance() module = %p (%s), file = %s",
136               static_cast<void *>(objfile_up.get()),
137               static_cast<void *>(objfile_up->GetModule().get()),
138               objfile_up->GetModule()->GetSpecificationDescription().c_str(),
139               file ? file->GetPath().c_str() : "<NULL>");
140     return objfile_up.release();
141   }
142 
143   LLDB_LOGF(log, "Failed to create ObjectFileWasm instance");
144   return nullptr;
145 }
146 
CreateMemoryInstance(const ModuleSP & module_sp,DataBufferSP & data_sp,const ProcessSP & process_sp,addr_t header_addr)147 ObjectFile *ObjectFileWasm::CreateMemoryInstance(const ModuleSP &module_sp,
148                                                  DataBufferSP &data_sp,
149                                                  const ProcessSP &process_sp,
150                                                  addr_t header_addr) {
151   if (!ValidateModuleHeader(data_sp))
152     return nullptr;
153 
154   std::unique_ptr<ObjectFileWasm> objfile_up(
155       new ObjectFileWasm(module_sp, data_sp, process_sp, header_addr));
156   ArchSpec spec = objfile_up->GetArchitecture();
157   if (spec && objfile_up->SetModulesArchitecture(spec))
158     return objfile_up.release();
159   return nullptr;
160 }
161 
DecodeNextSection(lldb::offset_t * offset_ptr)162 bool ObjectFileWasm::DecodeNextSection(lldb::offset_t *offset_ptr) {
163   // Buffer sufficient to read a section header and find the pointer to the next
164   // section.
165   const uint32_t kBufferSize = 1024;
166   DataExtractor section_header_data = ReadImageData(*offset_ptr, kBufferSize);
167 
168   llvm::DataExtractor data = section_header_data.GetAsLLVM();
169   llvm::DataExtractor::Cursor c(0);
170 
171   // Each section consists of:
172   // - a one-byte section id,
173   // - the u32 size of the contents, in bytes,
174   // - the actual contents.
175   uint8_t section_id = data.getU8(c);
176   uint64_t payload_len = data.getULEB128(c);
177   if (!c)
178     return !llvm::errorToBool(c.takeError());
179 
180   if (payload_len >= (uint64_t(1) << 32))
181     return false;
182 
183   if (section_id == llvm::wasm::WASM_SEC_CUSTOM) {
184     // Custom sections have the id 0. Their contents consist of a name
185     // identifying the custom section, followed by an uninterpreted sequence
186     // of bytes.
187     lldb::offset_t prev_offset = c.tell();
188     llvm::Optional<ConstString> sect_name = GetWasmString(data, c);
189     if (!sect_name)
190       return false;
191 
192     if (payload_len < c.tell() - prev_offset)
193       return false;
194 
195     uint32_t section_length = payload_len - (c.tell() - prev_offset);
196     m_sect_infos.push_back(section_info{*offset_ptr + c.tell(), section_length,
197                                         section_id, *sect_name});
198     *offset_ptr += (c.tell() + section_length);
199   } else if (section_id <= llvm::wasm::WASM_SEC_TAG) {
200     m_sect_infos.push_back(section_info{*offset_ptr + c.tell(),
201                                         static_cast<uint32_t>(payload_len),
202                                         section_id, ConstString()});
203     *offset_ptr += (c.tell() + payload_len);
204   } else {
205     // Invalid section id.
206     return false;
207   }
208   return true;
209 }
210 
DecodeSections()211 bool ObjectFileWasm::DecodeSections() {
212   lldb::offset_t offset = kWasmHeaderSize;
213   if (IsInMemory()) {
214     offset += m_memory_addr;
215   }
216 
217   while (DecodeNextSection(&offset))
218     ;
219   return true;
220 }
221 
GetModuleSpecifications(const FileSpec & file,DataBufferSP & data_sp,offset_t data_offset,offset_t file_offset,offset_t length,ModuleSpecList & specs)222 size_t ObjectFileWasm::GetModuleSpecifications(
223     const FileSpec &file, DataBufferSP &data_sp, offset_t data_offset,
224     offset_t file_offset, offset_t length, ModuleSpecList &specs) {
225   if (!ValidateModuleHeader(data_sp)) {
226     return 0;
227   }
228 
229   ModuleSpec spec(file, ArchSpec("wasm32-unknown-unknown-wasm"));
230   specs.Append(spec);
231   return 1;
232 }
233 
ObjectFileWasm(const ModuleSP & module_sp,DataBufferSP & data_sp,offset_t data_offset,const FileSpec * file,offset_t offset,offset_t length)234 ObjectFileWasm::ObjectFileWasm(const ModuleSP &module_sp, DataBufferSP &data_sp,
235                                offset_t data_offset, const FileSpec *file,
236                                offset_t offset, offset_t length)
237     : ObjectFile(module_sp, file, offset, length, data_sp, data_offset),
238       m_arch("wasm32-unknown-unknown-wasm") {
239   m_data.SetAddressByteSize(4);
240 }
241 
ObjectFileWasm(const lldb::ModuleSP & module_sp,lldb::DataBufferSP & header_data_sp,const lldb::ProcessSP & process_sp,lldb::addr_t header_addr)242 ObjectFileWasm::ObjectFileWasm(const lldb::ModuleSP &module_sp,
243                                lldb::DataBufferSP &header_data_sp,
244                                const lldb::ProcessSP &process_sp,
245                                lldb::addr_t header_addr)
246     : ObjectFile(module_sp, process_sp, header_addr, header_data_sp),
247       m_arch("wasm32-unknown-unknown-wasm") {}
248 
ParseHeader()249 bool ObjectFileWasm::ParseHeader() {
250   // We already parsed the header during initialization.
251   return true;
252 }
253 
GetSymtab()254 Symtab *ObjectFileWasm::GetSymtab() { return nullptr; }
255 
GetSectionTypeFromName(llvm::StringRef Name)256 static SectionType GetSectionTypeFromName(llvm::StringRef Name) {
257   if (Name.consume_front(".debug_") || Name.consume_front(".zdebug_")) {
258     return llvm::StringSwitch<SectionType>(Name)
259         .Case("abbrev", eSectionTypeDWARFDebugAbbrev)
260         .Case("abbrev.dwo", eSectionTypeDWARFDebugAbbrevDwo)
261         .Case("addr", eSectionTypeDWARFDebugAddr)
262         .Case("aranges", eSectionTypeDWARFDebugAranges)
263         .Case("cu_index", eSectionTypeDWARFDebugCuIndex)
264         .Case("frame", eSectionTypeDWARFDebugFrame)
265         .Case("info", eSectionTypeDWARFDebugInfo)
266         .Case("info.dwo", eSectionTypeDWARFDebugInfoDwo)
267         .Cases("line", "line.dwo", eSectionTypeDWARFDebugLine)
268         .Cases("line_str", "line_str.dwo", eSectionTypeDWARFDebugLineStr)
269         .Case("loc", eSectionTypeDWARFDebugLoc)
270         .Case("loc.dwo", eSectionTypeDWARFDebugLocDwo)
271         .Case("loclists", eSectionTypeDWARFDebugLocLists)
272         .Case("loclists.dwo", eSectionTypeDWARFDebugLocListsDwo)
273         .Case("macinfo", eSectionTypeDWARFDebugMacInfo)
274         .Cases("macro", "macro.dwo", eSectionTypeDWARFDebugMacro)
275         .Case("names", eSectionTypeDWARFDebugNames)
276         .Case("pubnames", eSectionTypeDWARFDebugPubNames)
277         .Case("pubtypes", eSectionTypeDWARFDebugPubTypes)
278         .Case("ranges", eSectionTypeDWARFDebugRanges)
279         .Case("rnglists", eSectionTypeDWARFDebugRngLists)
280         .Case("rnglists.dwo", eSectionTypeDWARFDebugRngListsDwo)
281         .Case("str", eSectionTypeDWARFDebugStr)
282         .Case("str.dwo", eSectionTypeDWARFDebugStrDwo)
283         .Case("str_offsets", eSectionTypeDWARFDebugStrOffsets)
284         .Case("str_offsets.dwo", eSectionTypeDWARFDebugStrOffsetsDwo)
285         .Case("tu_index", eSectionTypeDWARFDebugTuIndex)
286         .Case("types", eSectionTypeDWARFDebugTypes)
287         .Case("types.dwo", eSectionTypeDWARFDebugTypesDwo)
288         .Default(eSectionTypeOther);
289   }
290   return eSectionTypeOther;
291 }
292 
CreateSections(SectionList & unified_section_list)293 void ObjectFileWasm::CreateSections(SectionList &unified_section_list) {
294   if (m_sections_up)
295     return;
296 
297   m_sections_up = std::make_unique<SectionList>();
298 
299   if (m_sect_infos.empty()) {
300     DecodeSections();
301   }
302 
303   for (const section_info &sect_info : m_sect_infos) {
304     SectionType section_type = eSectionTypeOther;
305     ConstString section_name;
306     offset_t file_offset = sect_info.offset & 0xffffffff;
307     addr_t vm_addr = file_offset;
308     size_t vm_size = sect_info.size;
309 
310     if (llvm::wasm::WASM_SEC_CODE == sect_info.id) {
311       section_type = eSectionTypeCode;
312       section_name = ConstString("code");
313 
314       // A code address in DWARF for WebAssembly is the offset of an
315       // instruction relative within the Code section of the WebAssembly file.
316       // For this reason Section::GetFileAddress() must return zero for the
317       // Code section.
318       vm_addr = 0;
319     } else {
320       section_type = GetSectionTypeFromName(sect_info.name.GetStringRef());
321       if (section_type == eSectionTypeOther)
322         continue;
323       section_name = sect_info.name;
324       if (!IsInMemory()) {
325         vm_size = 0;
326         vm_addr = 0;
327       }
328     }
329 
330     SectionSP section_sp(
331         new Section(GetModule(), // Module to which this section belongs.
332                     this,        // ObjectFile to which this section belongs and
333                                  // should read section data from.
334                     section_type,   // Section ID.
335                     section_name,   // Section name.
336                     section_type,   // Section type.
337                     vm_addr,        // VM address.
338                     vm_size,        // VM size in bytes of this section.
339                     file_offset,    // Offset of this section in the file.
340                     sect_info.size, // Size of the section as found in the file.
341                     0,              // Alignment of the section
342                     0,              // Flags for this section.
343                     1));            // Number of host bytes per target byte
344     m_sections_up->AddSection(section_sp);
345     unified_section_list.AddSection(section_sp);
346   }
347 }
348 
SetLoadAddress(Target & target,lldb::addr_t load_address,bool value_is_offset)349 bool ObjectFileWasm::SetLoadAddress(Target &target, lldb::addr_t load_address,
350                                     bool value_is_offset) {
351   /// In WebAssembly, linear memory is disjointed from code space. The VM can
352   /// load multiple instances of a module, which logically share the same code.
353   /// We represent a wasm32 code address with 64-bits, like:
354   /// 63            32 31             0
355   /// +---------------+---------------+
356   /// +   module_id   |     offset    |
357   /// +---------------+---------------+
358   /// where the lower 32 bits represent a module offset (relative to the module
359   /// start not to the beginning of the code section) and the higher 32 bits
360   /// uniquely identify the module in the WebAssembly VM.
361   /// In other words, we assume that each WebAssembly module is loaded by the
362   /// engine at a 64-bit address that starts at the boundary of 4GB pages, like
363   /// 0x0000000400000000 for module_id == 4.
364   /// These 64-bit addresses will be used to request code ranges for a specific
365   /// module from the WebAssembly engine.
366 
367   assert(m_memory_addr == LLDB_INVALID_ADDRESS ||
368          m_memory_addr == load_address);
369 
370   ModuleSP module_sp = GetModule();
371   if (!module_sp)
372     return false;
373 
374   DecodeSections();
375 
376   size_t num_loaded_sections = 0;
377   SectionList *section_list = GetSectionList();
378   if (!section_list)
379     return false;
380 
381   const size_t num_sections = section_list->GetSize();
382   for (size_t sect_idx = 0; sect_idx < num_sections; ++sect_idx) {
383     SectionSP section_sp(section_list->GetSectionAtIndex(sect_idx));
384     if (target.SetSectionLoadAddress(
385             section_sp, load_address | section_sp->GetFileOffset())) {
386       ++num_loaded_sections;
387     }
388   }
389 
390   return num_loaded_sections > 0;
391 }
392 
ReadImageData(offset_t offset,uint32_t size)393 DataExtractor ObjectFileWasm::ReadImageData(offset_t offset, uint32_t size) {
394   DataExtractor data;
395   if (m_file) {
396     if (offset < GetByteSize()) {
397       size = std::min(static_cast<uint64_t>(size), GetByteSize() - offset);
398       auto buffer_sp = MapFileData(m_file, size, offset);
399       return DataExtractor(buffer_sp, GetByteOrder(), GetAddressByteSize());
400     }
401   } else {
402     ProcessSP process_sp(m_process_wp.lock());
403     if (process_sp) {
404       auto data_up = std::make_unique<DataBufferHeap>(size, 0);
405       Status readmem_error;
406       size_t bytes_read = process_sp->ReadMemory(
407           offset, data_up->GetBytes(), data_up->GetByteSize(), readmem_error);
408       if (bytes_read > 0) {
409         DataBufferSP buffer_sp(data_up.release());
410         data.SetData(buffer_sp, 0, buffer_sp->GetByteSize());
411       }
412     }
413   }
414 
415   data.SetByteOrder(GetByteOrder());
416   return data;
417 }
418 
GetExternalDebugInfoFileSpec()419 llvm::Optional<FileSpec> ObjectFileWasm::GetExternalDebugInfoFileSpec() {
420   static ConstString g_sect_name_external_debug_info("external_debug_info");
421 
422   for (const section_info &sect_info : m_sect_infos) {
423     if (g_sect_name_external_debug_info == sect_info.name) {
424       const uint32_t kBufferSize = 1024;
425       DataExtractor section_header_data =
426           ReadImageData(sect_info.offset, kBufferSize);
427       llvm::DataExtractor data = section_header_data.GetAsLLVM();
428       llvm::DataExtractor::Cursor c(0);
429       llvm::Optional<ConstString> symbols_url = GetWasmString(data, c);
430       if (symbols_url)
431         return FileSpec(symbols_url->GetStringRef());
432     }
433   }
434   return llvm::None;
435 }
436 
Dump(Stream * s)437 void ObjectFileWasm::Dump(Stream *s) {
438   ModuleSP module_sp(GetModule());
439   if (!module_sp)
440     return;
441 
442   std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
443 
444   llvm::raw_ostream &ostream = s->AsRawOstream();
445   ostream << static_cast<void *>(this) << ": ";
446   s->Indent();
447   ostream << "ObjectFileWasm, file = '";
448   m_file.Dump(ostream);
449   ostream << "', arch = ";
450   ostream << GetArchitecture().GetArchitectureName() << "\n";
451 
452   SectionList *sections = GetSectionList();
453   if (sections) {
454     sections->Dump(s->AsRawOstream(), s->GetIndentLevel(), nullptr, true,
455                    UINT32_MAX);
456   }
457   ostream << "\n";
458   DumpSectionHeaders(ostream);
459   ostream << "\n";
460 }
461 
DumpSectionHeader(llvm::raw_ostream & ostream,const section_info_t & sh)462 void ObjectFileWasm::DumpSectionHeader(llvm::raw_ostream &ostream,
463                                        const section_info_t &sh) {
464   ostream << llvm::left_justify(sh.name.GetStringRef(), 16) << " "
465           << llvm::format_hex(sh.offset, 10) << " "
466           << llvm::format_hex(sh.size, 10) << " " << llvm::format_hex(sh.id, 6)
467           << "\n";
468 }
469 
DumpSectionHeaders(llvm::raw_ostream & ostream)470 void ObjectFileWasm::DumpSectionHeaders(llvm::raw_ostream &ostream) {
471   ostream << "Section Headers\n";
472   ostream << "IDX  name             addr       size       id\n";
473   ostream << "==== ---------------- ---------- ---------- ------\n";
474 
475   uint32_t idx = 0;
476   for (auto pos = m_sect_infos.begin(); pos != m_sect_infos.end();
477        ++pos, ++idx) {
478     ostream << "[" << llvm::format_decimal(idx, 2) << "] ";
479     ObjectFileWasm::DumpSectionHeader(ostream, *pos);
480   }
481 }
482