1 //===-- ObjectFileWasm.cpp ------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "ObjectFileWasm.h"
10 #include "lldb/Core/Module.h"
11 #include "lldb/Core/ModuleSpec.h"
12 #include "lldb/Core/PluginManager.h"
13 #include "lldb/Core/Section.h"
14 #include "lldb/Target/Process.h"
15 #include "lldb/Target/SectionLoadList.h"
16 #include "lldb/Target/Target.h"
17 #include "lldb/Utility/DataBufferHeap.h"
18 #include "lldb/Utility/LLDBLog.h"
19 #include "lldb/Utility/Log.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/ADT/StringRef.h"
23 #include "llvm/BinaryFormat/Magic.h"
24 #include "llvm/BinaryFormat/Wasm.h"
25 #include "llvm/Support/Endian.h"
26 #include "llvm/Support/Format.h"
27 #include <optional>
28 
29 using namespace lldb;
30 using namespace lldb_private;
31 using namespace lldb_private::wasm;
32 
33 LLDB_PLUGIN_DEFINE(ObjectFileWasm)
34 
35 static const uint32_t kWasmHeaderSize =
36     sizeof(llvm::wasm::WasmMagic) + sizeof(llvm::wasm::WasmVersion);
37 
38 /// Checks whether the data buffer starts with a valid Wasm module header.
39 static bool ValidateModuleHeader(const DataBufferSP &data_sp) {
40   if (!data_sp || data_sp->GetByteSize() < kWasmHeaderSize)
41     return false;
42 
43   if (llvm::identify_magic(toStringRef(data_sp->GetData())) !=
44       llvm::file_magic::wasm_object)
45     return false;
46 
47   const uint8_t *Ptr = data_sp->GetBytes() + sizeof(llvm::wasm::WasmMagic);
48 
49   uint32_t version = llvm::support::endian::read32le(Ptr);
50   return version == llvm::wasm::WasmVersion;
51 }
52 
53 static std::optional<ConstString>
54 GetWasmString(llvm::DataExtractor &data, llvm::DataExtractor::Cursor &c) {
55   // A Wasm string is encoded as a vector of UTF-8 codes.
56   // Vectors are encoded with their u32 length followed by the element
57   // sequence.
58   uint64_t len = data.getULEB128(c);
59   if (!c) {
60     consumeError(c.takeError());
61     return std::nullopt;
62   }
63 
64   if (len >= (uint64_t(1) << 32)) {
65     return std::nullopt;
66   }
67 
68   llvm::SmallVector<uint8_t, 32> str_storage;
69   data.getU8(c, str_storage, len);
70   if (!c) {
71     consumeError(c.takeError());
72     return std::nullopt;
73   }
74 
75   llvm::StringRef str = toStringRef(llvm::ArrayRef(str_storage));
76   return ConstString(str);
77 }
78 
79 char ObjectFileWasm::ID;
80 
81 void ObjectFileWasm::Initialize() {
82   PluginManager::RegisterPlugin(GetPluginNameStatic(),
83                                 GetPluginDescriptionStatic(), CreateInstance,
84                                 CreateMemoryInstance, GetModuleSpecifications);
85 }
86 
87 void ObjectFileWasm::Terminate() {
88   PluginManager::UnregisterPlugin(CreateInstance);
89 }
90 
91 ObjectFile *
92 ObjectFileWasm::CreateInstance(const ModuleSP &module_sp, DataBufferSP data_sp,
93                                offset_t data_offset, const FileSpec *file,
94                                offset_t file_offset, offset_t length) {
95   Log *log = GetLog(LLDBLog::Object);
96 
97   if (!data_sp) {
98     data_sp = MapFileData(*file, length, file_offset);
99     if (!data_sp) {
100       LLDB_LOGF(log, "Failed to create ObjectFileWasm instance for file %s",
101                 file->GetPath().c_str());
102       return nullptr;
103     }
104     data_offset = 0;
105   }
106 
107   assert(data_sp);
108   if (!ValidateModuleHeader(data_sp)) {
109     LLDB_LOGF(log,
110               "Failed to create ObjectFileWasm instance: invalid Wasm header");
111     return nullptr;
112   }
113 
114   // Update the data to contain the entire file if it doesn't contain it
115   // already.
116   if (data_sp->GetByteSize() < length) {
117     data_sp = MapFileData(*file, length, file_offset);
118     if (!data_sp) {
119       LLDB_LOGF(log,
120                 "Failed to create ObjectFileWasm instance: cannot read file %s",
121                 file->GetPath().c_str());
122       return nullptr;
123     }
124     data_offset = 0;
125   }
126 
127   std::unique_ptr<ObjectFileWasm> objfile_up(new ObjectFileWasm(
128       module_sp, data_sp, data_offset, file, file_offset, length));
129   ArchSpec spec = objfile_up->GetArchitecture();
130   if (spec && objfile_up->SetModulesArchitecture(spec)) {
131     LLDB_LOGF(log,
132               "%p ObjectFileWasm::CreateInstance() module = %p (%s), file = %s",
133               static_cast<void *>(objfile_up.get()),
134               static_cast<void *>(objfile_up->GetModule().get()),
135               objfile_up->GetModule()->GetSpecificationDescription().c_str(),
136               file ? file->GetPath().c_str() : "<NULL>");
137     return objfile_up.release();
138   }
139 
140   LLDB_LOGF(log, "Failed to create ObjectFileWasm instance");
141   return nullptr;
142 }
143 
144 ObjectFile *ObjectFileWasm::CreateMemoryInstance(const ModuleSP &module_sp,
145                                                  WritableDataBufferSP data_sp,
146                                                  const ProcessSP &process_sp,
147                                                  addr_t header_addr) {
148   if (!ValidateModuleHeader(data_sp))
149     return nullptr;
150 
151   std::unique_ptr<ObjectFileWasm> objfile_up(
152       new ObjectFileWasm(module_sp, data_sp, process_sp, header_addr));
153   ArchSpec spec = objfile_up->GetArchitecture();
154   if (spec && objfile_up->SetModulesArchitecture(spec))
155     return objfile_up.release();
156   return nullptr;
157 }
158 
159 bool ObjectFileWasm::DecodeNextSection(lldb::offset_t *offset_ptr) {
160   // Buffer sufficient to read a section header and find the pointer to the next
161   // section.
162   const uint32_t kBufferSize = 1024;
163   DataExtractor section_header_data = ReadImageData(*offset_ptr, kBufferSize);
164 
165   llvm::DataExtractor data = section_header_data.GetAsLLVM();
166   llvm::DataExtractor::Cursor c(0);
167 
168   // Each section consists of:
169   // - a one-byte section id,
170   // - the u32 size of the contents, in bytes,
171   // - the actual contents.
172   uint8_t section_id = data.getU8(c);
173   uint64_t payload_len = data.getULEB128(c);
174   if (!c)
175     return !llvm::errorToBool(c.takeError());
176 
177   if (payload_len >= (uint64_t(1) << 32))
178     return false;
179 
180   if (section_id == llvm::wasm::WASM_SEC_CUSTOM) {
181     // Custom sections have the id 0. Their contents consist of a name
182     // identifying the custom section, followed by an uninterpreted sequence
183     // of bytes.
184     lldb::offset_t prev_offset = c.tell();
185     std::optional<ConstString> sect_name = GetWasmString(data, c);
186     if (!sect_name)
187       return false;
188 
189     if (payload_len < c.tell() - prev_offset)
190       return false;
191 
192     uint32_t section_length = payload_len - (c.tell() - prev_offset);
193     m_sect_infos.push_back(section_info{*offset_ptr + c.tell(), section_length,
194                                         section_id, *sect_name});
195     *offset_ptr += (c.tell() + section_length);
196   } else if (section_id <= llvm::wasm::WASM_SEC_LAST_KNOWN) {
197     m_sect_infos.push_back(section_info{*offset_ptr + c.tell(),
198                                         static_cast<uint32_t>(payload_len),
199                                         section_id, ConstString()});
200     *offset_ptr += (c.tell() + payload_len);
201   } else {
202     // Invalid section id.
203     return false;
204   }
205   return true;
206 }
207 
208 bool ObjectFileWasm::DecodeSections() {
209   lldb::offset_t offset = kWasmHeaderSize;
210   if (IsInMemory()) {
211     offset += m_memory_addr;
212   }
213 
214   while (DecodeNextSection(&offset))
215     ;
216   return true;
217 }
218 
219 size_t ObjectFileWasm::GetModuleSpecifications(
220     const FileSpec &file, DataBufferSP &data_sp, offset_t data_offset,
221     offset_t file_offset, offset_t length, ModuleSpecList &specs) {
222   if (!ValidateModuleHeader(data_sp)) {
223     return 0;
224   }
225 
226   ModuleSpec spec(file, ArchSpec("wasm32-unknown-unknown-wasm"));
227   specs.Append(spec);
228   return 1;
229 }
230 
231 ObjectFileWasm::ObjectFileWasm(const ModuleSP &module_sp, DataBufferSP data_sp,
232                                offset_t data_offset, const FileSpec *file,
233                                offset_t offset, offset_t length)
234     : ObjectFile(module_sp, file, offset, length, data_sp, data_offset),
235       m_arch("wasm32-unknown-unknown-wasm") {
236   m_data.SetAddressByteSize(4);
237 }
238 
239 ObjectFileWasm::ObjectFileWasm(const lldb::ModuleSP &module_sp,
240                                lldb::WritableDataBufferSP header_data_sp,
241                                const lldb::ProcessSP &process_sp,
242                                lldb::addr_t header_addr)
243     : ObjectFile(module_sp, process_sp, header_addr, header_data_sp),
244       m_arch("wasm32-unknown-unknown-wasm") {}
245 
246 bool ObjectFileWasm::ParseHeader() {
247   // We already parsed the header during initialization.
248   return true;
249 }
250 
251 void ObjectFileWasm::ParseSymtab(Symtab &symtab) {}
252 
253 static SectionType GetSectionTypeFromName(llvm::StringRef Name) {
254   if (Name.consume_front(".debug_") || Name.consume_front(".zdebug_")) {
255     return llvm::StringSwitch<SectionType>(Name)
256         .Case("abbrev", eSectionTypeDWARFDebugAbbrev)
257         .Case("abbrev.dwo", eSectionTypeDWARFDebugAbbrevDwo)
258         .Case("addr", eSectionTypeDWARFDebugAddr)
259         .Case("aranges", eSectionTypeDWARFDebugAranges)
260         .Case("cu_index", eSectionTypeDWARFDebugCuIndex)
261         .Case("frame", eSectionTypeDWARFDebugFrame)
262         .Case("info", eSectionTypeDWARFDebugInfo)
263         .Case("info.dwo", eSectionTypeDWARFDebugInfoDwo)
264         .Cases("line", "line.dwo", eSectionTypeDWARFDebugLine)
265         .Cases("line_str", "line_str.dwo", eSectionTypeDWARFDebugLineStr)
266         .Case("loc", eSectionTypeDWARFDebugLoc)
267         .Case("loc.dwo", eSectionTypeDWARFDebugLocDwo)
268         .Case("loclists", eSectionTypeDWARFDebugLocLists)
269         .Case("loclists.dwo", eSectionTypeDWARFDebugLocListsDwo)
270         .Case("macinfo", eSectionTypeDWARFDebugMacInfo)
271         .Cases("macro", "macro.dwo", eSectionTypeDWARFDebugMacro)
272         .Case("names", eSectionTypeDWARFDebugNames)
273         .Case("pubnames", eSectionTypeDWARFDebugPubNames)
274         .Case("pubtypes", eSectionTypeDWARFDebugPubTypes)
275         .Case("ranges", eSectionTypeDWARFDebugRanges)
276         .Case("rnglists", eSectionTypeDWARFDebugRngLists)
277         .Case("rnglists.dwo", eSectionTypeDWARFDebugRngListsDwo)
278         .Case("str", eSectionTypeDWARFDebugStr)
279         .Case("str.dwo", eSectionTypeDWARFDebugStrDwo)
280         .Case("str_offsets", eSectionTypeDWARFDebugStrOffsets)
281         .Case("str_offsets.dwo", eSectionTypeDWARFDebugStrOffsetsDwo)
282         .Case("tu_index", eSectionTypeDWARFDebugTuIndex)
283         .Case("types", eSectionTypeDWARFDebugTypes)
284         .Case("types.dwo", eSectionTypeDWARFDebugTypesDwo)
285         .Default(eSectionTypeOther);
286   }
287   return eSectionTypeOther;
288 }
289 
290 void ObjectFileWasm::CreateSections(SectionList &unified_section_list) {
291   if (m_sections_up)
292     return;
293 
294   m_sections_up = std::make_unique<SectionList>();
295 
296   if (m_sect_infos.empty()) {
297     DecodeSections();
298   }
299 
300   for (const section_info &sect_info : m_sect_infos) {
301     SectionType section_type = eSectionTypeOther;
302     ConstString section_name;
303     offset_t file_offset = sect_info.offset & 0xffffffff;
304     addr_t vm_addr = file_offset;
305     size_t vm_size = sect_info.size;
306 
307     if (llvm::wasm::WASM_SEC_CODE == sect_info.id) {
308       section_type = eSectionTypeCode;
309       section_name = ConstString("code");
310 
311       // A code address in DWARF for WebAssembly is the offset of an
312       // instruction relative within the Code section of the WebAssembly file.
313       // For this reason Section::GetFileAddress() must return zero for the
314       // Code section.
315       vm_addr = 0;
316     } else {
317       section_type = GetSectionTypeFromName(sect_info.name.GetStringRef());
318       if (section_type == eSectionTypeOther)
319         continue;
320       section_name = sect_info.name;
321       if (!IsInMemory()) {
322         vm_size = 0;
323         vm_addr = 0;
324       }
325     }
326 
327     SectionSP section_sp(
328         new Section(GetModule(), // Module to which this section belongs.
329                     this,        // ObjectFile to which this section belongs and
330                                  // should read section data from.
331                     section_type,   // Section ID.
332                     section_name,   // Section name.
333                     section_type,   // Section type.
334                     vm_addr,        // VM address.
335                     vm_size,        // VM size in bytes of this section.
336                     file_offset,    // Offset of this section in the file.
337                     sect_info.size, // Size of the section as found in the file.
338                     0,              // Alignment of the section
339                     0,              // Flags for this section.
340                     1));            // Number of host bytes per target byte
341     m_sections_up->AddSection(section_sp);
342     unified_section_list.AddSection(section_sp);
343   }
344 }
345 
346 bool ObjectFileWasm::SetLoadAddress(Target &target, lldb::addr_t load_address,
347                                     bool value_is_offset) {
348   /// In WebAssembly, linear memory is disjointed from code space. The VM can
349   /// load multiple instances of a module, which logically share the same code.
350   /// We represent a wasm32 code address with 64-bits, like:
351   /// 63            32 31             0
352   /// +---------------+---------------+
353   /// +   module_id   |     offset    |
354   /// +---------------+---------------+
355   /// where the lower 32 bits represent a module offset (relative to the module
356   /// start not to the beginning of the code section) and the higher 32 bits
357   /// uniquely identify the module in the WebAssembly VM.
358   /// In other words, we assume that each WebAssembly module is loaded by the
359   /// engine at a 64-bit address that starts at the boundary of 4GB pages, like
360   /// 0x0000000400000000 for module_id == 4.
361   /// These 64-bit addresses will be used to request code ranges for a specific
362   /// module from the WebAssembly engine.
363 
364   assert(m_memory_addr == LLDB_INVALID_ADDRESS ||
365          m_memory_addr == load_address);
366 
367   ModuleSP module_sp = GetModule();
368   if (!module_sp)
369     return false;
370 
371   DecodeSections();
372 
373   size_t num_loaded_sections = 0;
374   SectionList *section_list = GetSectionList();
375   if (!section_list)
376     return false;
377 
378   const size_t num_sections = section_list->GetSize();
379   for (size_t sect_idx = 0; sect_idx < num_sections; ++sect_idx) {
380     SectionSP section_sp(section_list->GetSectionAtIndex(sect_idx));
381     if (target.SetSectionLoadAddress(
382             section_sp, load_address | section_sp->GetFileOffset())) {
383       ++num_loaded_sections;
384     }
385   }
386 
387   return num_loaded_sections > 0;
388 }
389 
390 DataExtractor ObjectFileWasm::ReadImageData(offset_t offset, uint32_t size) {
391   DataExtractor data;
392   if (m_file) {
393     if (offset < GetByteSize()) {
394       size = std::min(static_cast<uint64_t>(size), GetByteSize() - offset);
395       auto buffer_sp = MapFileData(m_file, size, offset);
396       return DataExtractor(buffer_sp, GetByteOrder(), GetAddressByteSize());
397     }
398   } else {
399     ProcessSP process_sp(m_process_wp.lock());
400     if (process_sp) {
401       auto data_up = std::make_unique<DataBufferHeap>(size, 0);
402       Status readmem_error;
403       size_t bytes_read = process_sp->ReadMemory(
404           offset, data_up->GetBytes(), data_up->GetByteSize(), readmem_error);
405       if (bytes_read > 0) {
406         DataBufferSP buffer_sp(data_up.release());
407         data.SetData(buffer_sp, 0, buffer_sp->GetByteSize());
408       }
409     }
410   }
411 
412   data.SetByteOrder(GetByteOrder());
413   return data;
414 }
415 
416 std::optional<FileSpec> ObjectFileWasm::GetExternalDebugInfoFileSpec() {
417   static ConstString g_sect_name_external_debug_info("external_debug_info");
418 
419   for (const section_info &sect_info : m_sect_infos) {
420     if (g_sect_name_external_debug_info == sect_info.name) {
421       const uint32_t kBufferSize = 1024;
422       DataExtractor section_header_data =
423           ReadImageData(sect_info.offset, kBufferSize);
424       llvm::DataExtractor data = section_header_data.GetAsLLVM();
425       llvm::DataExtractor::Cursor c(0);
426       std::optional<ConstString> symbols_url = GetWasmString(data, c);
427       if (symbols_url)
428         return FileSpec(symbols_url->GetStringRef());
429     }
430   }
431   return std::nullopt;
432 }
433 
434 void ObjectFileWasm::Dump(Stream *s) {
435   ModuleSP module_sp(GetModule());
436   if (!module_sp)
437     return;
438 
439   std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
440 
441   llvm::raw_ostream &ostream = s->AsRawOstream();
442   ostream << static_cast<void *>(this) << ": ";
443   s->Indent();
444   ostream << "ObjectFileWasm, file = '";
445   m_file.Dump(ostream);
446   ostream << "', arch = ";
447   ostream << GetArchitecture().GetArchitectureName() << "\n";
448 
449   SectionList *sections = GetSectionList();
450   if (sections) {
451     sections->Dump(s->AsRawOstream(), s->GetIndentLevel(), nullptr, true,
452                    UINT32_MAX);
453   }
454   ostream << "\n";
455   DumpSectionHeaders(ostream);
456   ostream << "\n";
457 }
458 
459 void ObjectFileWasm::DumpSectionHeader(llvm::raw_ostream &ostream,
460                                        const section_info_t &sh) {
461   ostream << llvm::left_justify(sh.name.GetStringRef(), 16) << " "
462           << llvm::format_hex(sh.offset, 10) << " "
463           << llvm::format_hex(sh.size, 10) << " " << llvm::format_hex(sh.id, 6)
464           << "\n";
465 }
466 
467 void ObjectFileWasm::DumpSectionHeaders(llvm::raw_ostream &ostream) {
468   ostream << "Section Headers\n";
469   ostream << "IDX  name             addr       size       id\n";
470   ostream << "==== ---------------- ---------- ---------- ------\n";
471 
472   uint32_t idx = 0;
473   for (auto pos = m_sect_infos.begin(); pos != m_sect_infos.end();
474        ++pos, ++idx) {
475     ostream << "[" << llvm::format_decimal(idx, 2) << "] ";
476     ObjectFileWasm::DumpSectionHeader(ostream, *pos);
477   }
478 }
479