1 //===-- ObjectFileWasm.cpp ------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "ObjectFileWasm.h"
10 #include "lldb/Core/Module.h"
11 #include "lldb/Core/ModuleSpec.h"
12 #include "lldb/Core/PluginManager.h"
13 #include "lldb/Core/Section.h"
14 #include "lldb/Target/Process.h"
15 #include "lldb/Target/SectionLoadList.h"
16 #include "lldb/Target/Target.h"
17 #include "lldb/Utility/DataBufferHeap.h"
18 #include "lldb/Utility/Log.h"
19 #include "llvm/ADT/ArrayRef.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/StringRef.h"
22 #include "llvm/BinaryFormat/Magic.h"
23 #include "llvm/BinaryFormat/Wasm.h"
24 #include "llvm/Support/Endian.h"
25 #include "llvm/Support/Format.h"
26 
27 using namespace lldb;
28 using namespace lldb_private;
29 using namespace lldb_private::wasm;
30 
31 LLDB_PLUGIN_DEFINE(ObjectFileWasm)
32 
33 static const uint32_t kWasmHeaderSize =
34     sizeof(llvm::wasm::WasmMagic) + sizeof(llvm::wasm::WasmVersion);
35 
36 /// Checks whether the data buffer starts with a valid Wasm module header.
ValidateModuleHeader(const DataBufferSP & data_sp)37 static bool ValidateModuleHeader(const DataBufferSP &data_sp) {
38   if (!data_sp || data_sp->GetByteSize() < kWasmHeaderSize)
39     return false;
40 
41   if (llvm::identify_magic(toStringRef(data_sp->GetData())) !=
42       llvm::file_magic::wasm_object)
43     return false;
44 
45   uint8_t *Ptr = data_sp->GetBytes() + sizeof(llvm::wasm::WasmMagic);
46 
47   uint32_t version = llvm::support::endian::read32le(Ptr);
48   return version == llvm::wasm::WasmVersion;
49 }
50 
51 static llvm::Optional<ConstString>
GetWasmString(llvm::DataExtractor & data,llvm::DataExtractor::Cursor & c)52 GetWasmString(llvm::DataExtractor &data, llvm::DataExtractor::Cursor &c) {
53   // A Wasm string is encoded as a vector of UTF-8 codes.
54   // Vectors are encoded with their u32 length followed by the element
55   // sequence.
56   uint64_t len = data.getULEB128(c);
57   if (!c) {
58     consumeError(c.takeError());
59     return llvm::None;
60   }
61 
62   if (len >= (uint64_t(1) << 32)) {
63     return llvm::None;
64   }
65 
66   llvm::SmallVector<uint8_t, 32> str_storage;
67   data.getU8(c, str_storage, len);
68   if (!c) {
69     consumeError(c.takeError());
70     return llvm::None;
71   }
72 
73   llvm::StringRef str = toStringRef(makeArrayRef(str_storage));
74   return ConstString(str);
75 }
76 
77 char ObjectFileWasm::ID;
78 
Initialize()79 void ObjectFileWasm::Initialize() {
80   PluginManager::RegisterPlugin(GetPluginNameStatic(),
81                                 GetPluginDescriptionStatic(), CreateInstance,
82                                 CreateMemoryInstance, GetModuleSpecifications);
83 }
84 
Terminate()85 void ObjectFileWasm::Terminate() {
86   PluginManager::UnregisterPlugin(CreateInstance);
87 }
88 
GetPluginNameStatic()89 ConstString ObjectFileWasm::GetPluginNameStatic() {
90   static ConstString g_name("wasm");
91   return g_name;
92 }
93 
94 ObjectFile *
CreateInstance(const ModuleSP & module_sp,DataBufferSP & data_sp,offset_t data_offset,const FileSpec * file,offset_t file_offset,offset_t length)95 ObjectFileWasm::CreateInstance(const ModuleSP &module_sp, DataBufferSP &data_sp,
96                                offset_t data_offset, const FileSpec *file,
97                                offset_t file_offset, offset_t length) {
98   Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_OBJECT));
99 
100   if (!data_sp) {
101     data_sp = MapFileData(*file, length, file_offset);
102     if (!data_sp) {
103       LLDB_LOGF(log, "Failed to create ObjectFileWasm instance for file %s",
104                 file->GetPath().c_str());
105       return nullptr;
106     }
107     data_offset = 0;
108   }
109 
110   assert(data_sp);
111   if (!ValidateModuleHeader(data_sp)) {
112     LLDB_LOGF(log,
113               "Failed to create ObjectFileWasm instance: invalid Wasm header");
114     return nullptr;
115   }
116 
117   // Update the data to contain the entire file if it doesn't contain it
118   // already.
119   if (data_sp->GetByteSize() < length) {
120     data_sp = MapFileData(*file, length, file_offset);
121     if (!data_sp) {
122       LLDB_LOGF(log,
123                 "Failed to create ObjectFileWasm instance: cannot read file %s",
124                 file->GetPath().c_str());
125       return nullptr;
126     }
127     data_offset = 0;
128   }
129 
130   std::unique_ptr<ObjectFileWasm> objfile_up(new ObjectFileWasm(
131       module_sp, data_sp, data_offset, file, file_offset, length));
132   ArchSpec spec = objfile_up->GetArchitecture();
133   if (spec && objfile_up->SetModulesArchitecture(spec)) {
134     LLDB_LOGF(log,
135               "%p ObjectFileWasm::CreateInstance() module = %p (%s), file = %s",
136               static_cast<void *>(objfile_up.get()),
137               static_cast<void *>(objfile_up->GetModule().get()),
138               objfile_up->GetModule()->GetSpecificationDescription().c_str(),
139               file ? file->GetPath().c_str() : "<NULL>");
140     return objfile_up.release();
141   }
142 
143   LLDB_LOGF(log, "Failed to create ObjectFileWasm instance");
144   return nullptr;
145 }
146 
CreateMemoryInstance(const ModuleSP & module_sp,DataBufferSP & data_sp,const ProcessSP & process_sp,addr_t header_addr)147 ObjectFile *ObjectFileWasm::CreateMemoryInstance(const ModuleSP &module_sp,
148                                                  DataBufferSP &data_sp,
149                                                  const ProcessSP &process_sp,
150                                                  addr_t header_addr) {
151   if (!ValidateModuleHeader(data_sp))
152     return nullptr;
153 
154   std::unique_ptr<ObjectFileWasm> objfile_up(
155       new ObjectFileWasm(module_sp, data_sp, process_sp, header_addr));
156   ArchSpec spec = objfile_up->GetArchitecture();
157   if (spec && objfile_up->SetModulesArchitecture(spec))
158     return objfile_up.release();
159   return nullptr;
160 }
161 
DecodeNextSection(lldb::offset_t * offset_ptr)162 bool ObjectFileWasm::DecodeNextSection(lldb::offset_t *offset_ptr) {
163   // Buffer sufficient to read a section header and find the pointer to the next
164   // section.
165   const uint32_t kBufferSize = 1024;
166   DataExtractor section_header_data = ReadImageData(*offset_ptr, kBufferSize);
167 
168   llvm::DataExtractor data = section_header_data.GetAsLLVM();
169   llvm::DataExtractor::Cursor c(0);
170 
171   // Each section consists of:
172   // - a one-byte section id,
173   // - the u32 size of the contents, in bytes,
174   // - the actual contents.
175   uint8_t section_id = data.getU8(c);
176   uint64_t payload_len = data.getULEB128(c);
177   if (!c)
178     return !llvm::errorToBool(c.takeError());
179 
180   if (payload_len >= (uint64_t(1) << 32))
181     return false;
182 
183   if (section_id == llvm::wasm::WASM_SEC_CUSTOM) {
184     // Custom sections have the id 0. Their contents consist of a name
185     // identifying the custom section, followed by an uninterpreted sequence
186     // of bytes.
187     lldb::offset_t prev_offset = c.tell();
188     llvm::Optional<ConstString> sect_name = GetWasmString(data, c);
189     if (!sect_name)
190       return false;
191 
192     if (payload_len < c.tell() - prev_offset)
193       return false;
194 
195     uint32_t section_length = payload_len - (c.tell() - prev_offset);
196     m_sect_infos.push_back(section_info{*offset_ptr + c.tell(), section_length,
197                                         section_id, *sect_name});
198     *offset_ptr += (c.tell() + section_length);
199   } else if (section_id <= llvm::wasm::WASM_SEC_EVENT) {
200     m_sect_infos.push_back(section_info{*offset_ptr + c.tell(),
201                                         static_cast<uint32_t>(payload_len),
202                                         section_id, ConstString()});
203     *offset_ptr += (c.tell() + payload_len);
204   } else {
205     // Invalid section id.
206     return false;
207   }
208   return true;
209 }
210 
DecodeSections()211 bool ObjectFileWasm::DecodeSections() {
212   lldb::offset_t offset = kWasmHeaderSize;
213   if (IsInMemory()) {
214     offset += m_memory_addr;
215   }
216 
217   while (DecodeNextSection(&offset))
218     ;
219   return true;
220 }
221 
GetModuleSpecifications(const FileSpec & file,DataBufferSP & data_sp,offset_t data_offset,offset_t file_offset,offset_t length,ModuleSpecList & specs)222 size_t ObjectFileWasm::GetModuleSpecifications(
223     const FileSpec &file, DataBufferSP &data_sp, offset_t data_offset,
224     offset_t file_offset, offset_t length, ModuleSpecList &specs) {
225   if (!ValidateModuleHeader(data_sp)) {
226     return 0;
227   }
228 
229   ModuleSpec spec(file, ArchSpec("wasm32-unknown-unknown-wasm"));
230   specs.Append(spec);
231   return 1;
232 }
233 
ObjectFileWasm(const ModuleSP & module_sp,DataBufferSP & data_sp,offset_t data_offset,const FileSpec * file,offset_t offset,offset_t length)234 ObjectFileWasm::ObjectFileWasm(const ModuleSP &module_sp, DataBufferSP &data_sp,
235                                offset_t data_offset, const FileSpec *file,
236                                offset_t offset, offset_t length)
237     : ObjectFile(module_sp, file, offset, length, data_sp, data_offset),
238       m_arch("wasm32-unknown-unknown-wasm") {
239   m_data.SetAddressByteSize(4);
240 }
241 
ObjectFileWasm(const lldb::ModuleSP & module_sp,lldb::DataBufferSP & header_data_sp,const lldb::ProcessSP & process_sp,lldb::addr_t header_addr)242 ObjectFileWasm::ObjectFileWasm(const lldb::ModuleSP &module_sp,
243                                lldb::DataBufferSP &header_data_sp,
244                                const lldb::ProcessSP &process_sp,
245                                lldb::addr_t header_addr)
246     : ObjectFile(module_sp, process_sp, header_addr, header_data_sp),
247       m_arch("wasm32-unknown-unknown-wasm") {}
248 
ParseHeader()249 bool ObjectFileWasm::ParseHeader() {
250   // We already parsed the header during initialization.
251   return true;
252 }
253 
GetSymtab()254 Symtab *ObjectFileWasm::GetSymtab() { return nullptr; }
255 
CreateSections(SectionList & unified_section_list)256 void ObjectFileWasm::CreateSections(SectionList &unified_section_list) {
257   if (m_sections_up)
258     return;
259 
260   m_sections_up = std::make_unique<SectionList>();
261 
262   if (m_sect_infos.empty()) {
263     DecodeSections();
264   }
265 
266   for (const section_info &sect_info : m_sect_infos) {
267     SectionType section_type = eSectionTypeOther;
268     ConstString section_name;
269     offset_t file_offset = sect_info.offset & 0xffffffff;
270     addr_t vm_addr = file_offset;
271     size_t vm_size = sect_info.size;
272 
273     if (llvm::wasm::WASM_SEC_CODE == sect_info.id) {
274       section_type = eSectionTypeCode;
275       section_name = ConstString("code");
276 
277       // A code address in DWARF for WebAssembly is the offset of an
278       // instruction relative within the Code section of the WebAssembly file.
279       // For this reason Section::GetFileAddress() must return zero for the
280       // Code section.
281       vm_addr = 0;
282     } else {
283       section_type =
284           llvm::StringSwitch<SectionType>(sect_info.name.GetStringRef())
285               .Case(".debug_abbrev", eSectionTypeDWARFDebugAbbrev)
286               .Case(".debug_addr", eSectionTypeDWARFDebugAddr)
287               .Case(".debug_aranges", eSectionTypeDWARFDebugAranges)
288               .Case(".debug_cu_index", eSectionTypeDWARFDebugCuIndex)
289               .Case(".debug_frame", eSectionTypeDWARFDebugFrame)
290               .Case(".debug_info", eSectionTypeDWARFDebugInfo)
291               .Case(".debug_line", eSectionTypeDWARFDebugLine)
292               .Case(".debug_line_str", eSectionTypeDWARFDebugLineStr)
293               .Case(".debug_loc", eSectionTypeDWARFDebugLoc)
294               .Case(".debug_loclists", eSectionTypeDWARFDebugLocLists)
295               .Case(".debug_macinfo", eSectionTypeDWARFDebugMacInfo)
296               .Case(".debug_macro", eSectionTypeDWARFDebugMacro)
297               .Case(".debug_names", eSectionTypeDWARFDebugNames)
298               .Case(".debug_pubnames", eSectionTypeDWARFDebugPubNames)
299               .Case(".debug_pubtypes", eSectionTypeDWARFDebugPubTypes)
300               .Case(".debug_ranges", eSectionTypeDWARFDebugRanges)
301               .Case(".debug_rnglists", eSectionTypeDWARFDebugRngLists)
302               .Case(".debug_str", eSectionTypeDWARFDebugStr)
303               .Case(".debug_str_offsets", eSectionTypeDWARFDebugStrOffsets)
304               .Case(".debug_types", eSectionTypeDWARFDebugTypes)
305               .Default(eSectionTypeOther);
306       if (section_type == eSectionTypeOther)
307         continue;
308       section_name = sect_info.name;
309       if (!IsInMemory()) {
310         vm_size = 0;
311         vm_addr = 0;
312       }
313     }
314 
315     SectionSP section_sp(
316         new Section(GetModule(), // Module to which this section belongs.
317                     this,        // ObjectFile to which this section belongs and
318                                  // should read section data from.
319                     section_type,   // Section ID.
320                     section_name,   // Section name.
321                     section_type,   // Section type.
322                     vm_addr,        // VM address.
323                     vm_size,        // VM size in bytes of this section.
324                     file_offset,    // Offset of this section in the file.
325                     sect_info.size, // Size of the section as found in the file.
326                     0,              // Alignment of the section
327                     0,              // Flags for this section.
328                     1));            // Number of host bytes per target byte
329     m_sections_up->AddSection(section_sp);
330     unified_section_list.AddSection(section_sp);
331   }
332 }
333 
SetLoadAddress(Target & target,lldb::addr_t load_address,bool value_is_offset)334 bool ObjectFileWasm::SetLoadAddress(Target &target, lldb::addr_t load_address,
335                                     bool value_is_offset) {
336   /// In WebAssembly, linear memory is disjointed from code space. The VM can
337   /// load multiple instances of a module, which logically share the same code.
338   /// We represent a wasm32 code address with 64-bits, like:
339   /// 63            32 31             0
340   /// +---------------+---------------+
341   /// +   module_id   |     offset    |
342   /// +---------------+---------------+
343   /// where the lower 32 bits represent a module offset (relative to the module
344   /// start not to the beginning of the code section) and the higher 32 bits
345   /// uniquely identify the module in the WebAssembly VM.
346   /// In other words, we assume that each WebAssembly module is loaded by the
347   /// engine at a 64-bit address that starts at the boundary of 4GB pages, like
348   /// 0x0000000400000000 for module_id == 4.
349   /// These 64-bit addresses will be used to request code ranges for a specific
350   /// module from the WebAssembly engine.
351 
352   assert(m_memory_addr == LLDB_INVALID_ADDRESS ||
353          m_memory_addr == load_address);
354 
355   ModuleSP module_sp = GetModule();
356   if (!module_sp)
357     return false;
358 
359   DecodeSections();
360 
361   size_t num_loaded_sections = 0;
362   SectionList *section_list = GetSectionList();
363   if (!section_list)
364     return false;
365 
366   const size_t num_sections = section_list->GetSize();
367   for (size_t sect_idx = 0; sect_idx < num_sections; ++sect_idx) {
368     SectionSP section_sp(section_list->GetSectionAtIndex(sect_idx));
369     if (target.SetSectionLoadAddress(
370             section_sp, load_address | section_sp->GetFileOffset())) {
371       ++num_loaded_sections;
372     }
373   }
374 
375   return num_loaded_sections > 0;
376 }
377 
ReadImageData(offset_t offset,uint32_t size)378 DataExtractor ObjectFileWasm::ReadImageData(offset_t offset, uint32_t size) {
379   DataExtractor data;
380   if (m_file) {
381     if (offset < GetByteSize()) {
382       size = std::min(static_cast<uint64_t>(size), GetByteSize() - offset);
383       auto buffer_sp = MapFileData(m_file, size, offset);
384       return DataExtractor(buffer_sp, GetByteOrder(), GetAddressByteSize());
385     }
386   } else {
387     ProcessSP process_sp(m_process_wp.lock());
388     if (process_sp) {
389       auto data_up = std::make_unique<DataBufferHeap>(size, 0);
390       Status readmem_error;
391       size_t bytes_read = process_sp->ReadMemory(
392           offset, data_up->GetBytes(), data_up->GetByteSize(), readmem_error);
393       if (bytes_read > 0) {
394         DataBufferSP buffer_sp(data_up.release());
395         data.SetData(buffer_sp, 0, buffer_sp->GetByteSize());
396       }
397     }
398   }
399 
400   data.SetByteOrder(GetByteOrder());
401   return data;
402 }
403 
GetExternalDebugInfoFileSpec()404 llvm::Optional<FileSpec> ObjectFileWasm::GetExternalDebugInfoFileSpec() {
405   static ConstString g_sect_name_external_debug_info("external_debug_info");
406 
407   for (const section_info &sect_info : m_sect_infos) {
408     if (g_sect_name_external_debug_info == sect_info.name) {
409       const uint32_t kBufferSize = 1024;
410       DataExtractor section_header_data =
411           ReadImageData(sect_info.offset, kBufferSize);
412       llvm::DataExtractor data = section_header_data.GetAsLLVM();
413       llvm::DataExtractor::Cursor c(0);
414       llvm::Optional<ConstString> symbols_url = GetWasmString(data, c);
415       if (symbols_url)
416         return FileSpec(symbols_url->GetStringRef());
417     }
418   }
419   return llvm::None;
420 }
421 
Dump(Stream * s)422 void ObjectFileWasm::Dump(Stream *s) {
423   ModuleSP module_sp(GetModule());
424   if (!module_sp)
425     return;
426 
427   std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
428 
429   llvm::raw_ostream &ostream = s->AsRawOstream();
430   ostream << static_cast<void *>(this) << ": ";
431   s->Indent();
432   ostream << "ObjectFileWasm, file = '";
433   m_file.Dump(ostream);
434   ostream << "', arch = ";
435   ostream << GetArchitecture().GetArchitectureName() << "\n";
436 
437   SectionList *sections = GetSectionList();
438   if (sections) {
439     sections->Dump(s->AsRawOstream(), s->GetIndentLevel(), nullptr, true,
440                    UINT32_MAX);
441   }
442   ostream << "\n";
443   DumpSectionHeaders(ostream);
444   ostream << "\n";
445 }
446 
DumpSectionHeader(llvm::raw_ostream & ostream,const section_info_t & sh)447 void ObjectFileWasm::DumpSectionHeader(llvm::raw_ostream &ostream,
448                                        const section_info_t &sh) {
449   ostream << llvm::left_justify(sh.name.GetStringRef(), 16) << " "
450           << llvm::format_hex(sh.offset, 10) << " "
451           << llvm::format_hex(sh.size, 10) << " " << llvm::format_hex(sh.id, 6)
452           << "\n";
453 }
454 
DumpSectionHeaders(llvm::raw_ostream & ostream)455 void ObjectFileWasm::DumpSectionHeaders(llvm::raw_ostream &ostream) {
456   ostream << "Section Headers\n";
457   ostream << "IDX  name             addr       size       id\n";
458   ostream << "==== ---------------- ---------- ---------- ------\n";
459 
460   uint32_t idx = 0;
461   for (auto pos = m_sect_infos.begin(); pos != m_sect_infos.end();
462        ++pos, ++idx) {
463     ostream << "[" << llvm::format_decimal(idx, 2) << "] ";
464     ObjectFileWasm::DumpSectionHeader(ostream, *pos);
465   }
466 }
467