1 //===-- ObjectFileWasm.cpp ------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "ObjectFileWasm.h"
10 #include "lldb/Core/Module.h"
11 #include "lldb/Core/ModuleSpec.h"
12 #include "lldb/Core/PluginManager.h"
13 #include "lldb/Core/Section.h"
14 #include "lldb/Target/Process.h"
15 #include "lldb/Target/SectionLoadList.h"
16 #include "lldb/Target/Target.h"
17 #include "lldb/Utility/DataBufferHeap.h"
18 #include "lldb/Utility/LLDBLog.h"
19 #include "lldb/Utility/Log.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/ADT/StringRef.h"
23 #include "llvm/BinaryFormat/Magic.h"
24 #include "llvm/BinaryFormat/Wasm.h"
25 #include "llvm/Support/Endian.h"
26 #include "llvm/Support/Format.h"
27 
28 using namespace lldb;
29 using namespace lldb_private;
30 using namespace lldb_private::wasm;
31 
32 LLDB_PLUGIN_DEFINE(ObjectFileWasm)
33 
34 static const uint32_t kWasmHeaderSize =
35     sizeof(llvm::wasm::WasmMagic) + sizeof(llvm::wasm::WasmVersion);
36 
37 /// Checks whether the data buffer starts with a valid Wasm module header.
38 static bool ValidateModuleHeader(const DataBufferSP &data_sp) {
39   if (!data_sp || data_sp->GetByteSize() < kWasmHeaderSize)
40     return false;
41 
42   if (llvm::identify_magic(toStringRef(data_sp->GetData())) !=
43       llvm::file_magic::wasm_object)
44     return false;
45 
46   const uint8_t *Ptr = data_sp->GetBytes() + sizeof(llvm::wasm::WasmMagic);
47 
48   uint32_t version = llvm::support::endian::read32le(Ptr);
49   return version == llvm::wasm::WasmVersion;
50 }
51 
52 static llvm::Optional<ConstString>
53 GetWasmString(llvm::DataExtractor &data, llvm::DataExtractor::Cursor &c) {
54   // A Wasm string is encoded as a vector of UTF-8 codes.
55   // Vectors are encoded with their u32 length followed by the element
56   // sequence.
57   uint64_t len = data.getULEB128(c);
58   if (!c) {
59     consumeError(c.takeError());
60     return llvm::None;
61   }
62 
63   if (len >= (uint64_t(1) << 32)) {
64     return llvm::None;
65   }
66 
67   llvm::SmallVector<uint8_t, 32> str_storage;
68   data.getU8(c, str_storage, len);
69   if (!c) {
70     consumeError(c.takeError());
71     return llvm::None;
72   }
73 
74   llvm::StringRef str = toStringRef(makeArrayRef(str_storage));
75   return ConstString(str);
76 }
77 
78 char ObjectFileWasm::ID;
79 
80 void ObjectFileWasm::Initialize() {
81   PluginManager::RegisterPlugin(GetPluginNameStatic(),
82                                 GetPluginDescriptionStatic(), CreateInstance,
83                                 CreateMemoryInstance, GetModuleSpecifications);
84 }
85 
86 void ObjectFileWasm::Terminate() {
87   PluginManager::UnregisterPlugin(CreateInstance);
88 }
89 
90 ObjectFile *
91 ObjectFileWasm::CreateInstance(const ModuleSP &module_sp, DataBufferSP data_sp,
92                                offset_t data_offset, const FileSpec *file,
93                                offset_t file_offset, offset_t length) {
94   Log *log = GetLog(LLDBLog::Object);
95 
96   if (!data_sp) {
97     data_sp = MapFileData(*file, length, file_offset);
98     if (!data_sp) {
99       LLDB_LOGF(log, "Failed to create ObjectFileWasm instance for file %s",
100                 file->GetPath().c_str());
101       return nullptr;
102     }
103     data_offset = 0;
104   }
105 
106   assert(data_sp);
107   if (!ValidateModuleHeader(data_sp)) {
108     LLDB_LOGF(log,
109               "Failed to create ObjectFileWasm instance: invalid Wasm header");
110     return nullptr;
111   }
112 
113   // Update the data to contain the entire file if it doesn't contain it
114   // already.
115   if (data_sp->GetByteSize() < length) {
116     data_sp = MapFileData(*file, length, file_offset);
117     if (!data_sp) {
118       LLDB_LOGF(log,
119                 "Failed to create ObjectFileWasm instance: cannot read file %s",
120                 file->GetPath().c_str());
121       return nullptr;
122     }
123     data_offset = 0;
124   }
125 
126   std::unique_ptr<ObjectFileWasm> objfile_up(new ObjectFileWasm(
127       module_sp, data_sp, data_offset, file, file_offset, length));
128   ArchSpec spec = objfile_up->GetArchitecture();
129   if (spec && objfile_up->SetModulesArchitecture(spec)) {
130     LLDB_LOGF(log,
131               "%p ObjectFileWasm::CreateInstance() module = %p (%s), file = %s",
132               static_cast<void *>(objfile_up.get()),
133               static_cast<void *>(objfile_up->GetModule().get()),
134               objfile_up->GetModule()->GetSpecificationDescription().c_str(),
135               file ? file->GetPath().c_str() : "<NULL>");
136     return objfile_up.release();
137   }
138 
139   LLDB_LOGF(log, "Failed to create ObjectFileWasm instance");
140   return nullptr;
141 }
142 
143 ObjectFile *ObjectFileWasm::CreateMemoryInstance(const ModuleSP &module_sp,
144                                                  WritableDataBufferSP data_sp,
145                                                  const ProcessSP &process_sp,
146                                                  addr_t header_addr) {
147   if (!ValidateModuleHeader(data_sp))
148     return nullptr;
149 
150   std::unique_ptr<ObjectFileWasm> objfile_up(
151       new ObjectFileWasm(module_sp, data_sp, process_sp, header_addr));
152   ArchSpec spec = objfile_up->GetArchitecture();
153   if (spec && objfile_up->SetModulesArchitecture(spec))
154     return objfile_up.release();
155   return nullptr;
156 }
157 
158 bool ObjectFileWasm::DecodeNextSection(lldb::offset_t *offset_ptr) {
159   // Buffer sufficient to read a section header and find the pointer to the next
160   // section.
161   const uint32_t kBufferSize = 1024;
162   DataExtractor section_header_data = ReadImageData(*offset_ptr, kBufferSize);
163 
164   llvm::DataExtractor data = section_header_data.GetAsLLVM();
165   llvm::DataExtractor::Cursor c(0);
166 
167   // Each section consists of:
168   // - a one-byte section id,
169   // - the u32 size of the contents, in bytes,
170   // - the actual contents.
171   uint8_t section_id = data.getU8(c);
172   uint64_t payload_len = data.getULEB128(c);
173   if (!c)
174     return !llvm::errorToBool(c.takeError());
175 
176   if (payload_len >= (uint64_t(1) << 32))
177     return false;
178 
179   if (section_id == llvm::wasm::WASM_SEC_CUSTOM) {
180     // Custom sections have the id 0. Their contents consist of a name
181     // identifying the custom section, followed by an uninterpreted sequence
182     // of bytes.
183     lldb::offset_t prev_offset = c.tell();
184     llvm::Optional<ConstString> sect_name = GetWasmString(data, c);
185     if (!sect_name)
186       return false;
187 
188     if (payload_len < c.tell() - prev_offset)
189       return false;
190 
191     uint32_t section_length = payload_len - (c.tell() - prev_offset);
192     m_sect_infos.push_back(section_info{*offset_ptr + c.tell(), section_length,
193                                         section_id, *sect_name});
194     *offset_ptr += (c.tell() + section_length);
195   } else if (section_id <= llvm::wasm::WASM_SEC_LAST_KNOWN) {
196     m_sect_infos.push_back(section_info{*offset_ptr + c.tell(),
197                                         static_cast<uint32_t>(payload_len),
198                                         section_id, ConstString()});
199     *offset_ptr += (c.tell() + payload_len);
200   } else {
201     // Invalid section id.
202     return false;
203   }
204   return true;
205 }
206 
207 bool ObjectFileWasm::DecodeSections() {
208   lldb::offset_t offset = kWasmHeaderSize;
209   if (IsInMemory()) {
210     offset += m_memory_addr;
211   }
212 
213   while (DecodeNextSection(&offset))
214     ;
215   return true;
216 }
217 
218 size_t ObjectFileWasm::GetModuleSpecifications(
219     const FileSpec &file, DataBufferSP &data_sp, offset_t data_offset,
220     offset_t file_offset, offset_t length, ModuleSpecList &specs) {
221   if (!ValidateModuleHeader(data_sp)) {
222     return 0;
223   }
224 
225   ModuleSpec spec(file, ArchSpec("wasm32-unknown-unknown-wasm"));
226   specs.Append(spec);
227   return 1;
228 }
229 
230 ObjectFileWasm::ObjectFileWasm(const ModuleSP &module_sp, DataBufferSP data_sp,
231                                offset_t data_offset, const FileSpec *file,
232                                offset_t offset, offset_t length)
233     : ObjectFile(module_sp, file, offset, length, data_sp, data_offset),
234       m_arch("wasm32-unknown-unknown-wasm") {
235   m_data.SetAddressByteSize(4);
236 }
237 
238 ObjectFileWasm::ObjectFileWasm(const lldb::ModuleSP &module_sp,
239                                lldb::WritableDataBufferSP header_data_sp,
240                                const lldb::ProcessSP &process_sp,
241                                lldb::addr_t header_addr)
242     : ObjectFile(module_sp, process_sp, header_addr, header_data_sp),
243       m_arch("wasm32-unknown-unknown-wasm") {}
244 
245 bool ObjectFileWasm::ParseHeader() {
246   // We already parsed the header during initialization.
247   return true;
248 }
249 
250 void ObjectFileWasm::ParseSymtab(Symtab &symtab) {}
251 
252 static SectionType GetSectionTypeFromName(llvm::StringRef Name) {
253   if (Name.consume_front(".debug_") || Name.consume_front(".zdebug_")) {
254     return llvm::StringSwitch<SectionType>(Name)
255         .Case("abbrev", eSectionTypeDWARFDebugAbbrev)
256         .Case("abbrev.dwo", eSectionTypeDWARFDebugAbbrevDwo)
257         .Case("addr", eSectionTypeDWARFDebugAddr)
258         .Case("aranges", eSectionTypeDWARFDebugAranges)
259         .Case("cu_index", eSectionTypeDWARFDebugCuIndex)
260         .Case("frame", eSectionTypeDWARFDebugFrame)
261         .Case("info", eSectionTypeDWARFDebugInfo)
262         .Case("info.dwo", eSectionTypeDWARFDebugInfoDwo)
263         .Cases("line", "line.dwo", eSectionTypeDWARFDebugLine)
264         .Cases("line_str", "line_str.dwo", eSectionTypeDWARFDebugLineStr)
265         .Case("loc", eSectionTypeDWARFDebugLoc)
266         .Case("loc.dwo", eSectionTypeDWARFDebugLocDwo)
267         .Case("loclists", eSectionTypeDWARFDebugLocLists)
268         .Case("loclists.dwo", eSectionTypeDWARFDebugLocListsDwo)
269         .Case("macinfo", eSectionTypeDWARFDebugMacInfo)
270         .Cases("macro", "macro.dwo", eSectionTypeDWARFDebugMacro)
271         .Case("names", eSectionTypeDWARFDebugNames)
272         .Case("pubnames", eSectionTypeDWARFDebugPubNames)
273         .Case("pubtypes", eSectionTypeDWARFDebugPubTypes)
274         .Case("ranges", eSectionTypeDWARFDebugRanges)
275         .Case("rnglists", eSectionTypeDWARFDebugRngLists)
276         .Case("rnglists.dwo", eSectionTypeDWARFDebugRngListsDwo)
277         .Case("str", eSectionTypeDWARFDebugStr)
278         .Case("str.dwo", eSectionTypeDWARFDebugStrDwo)
279         .Case("str_offsets", eSectionTypeDWARFDebugStrOffsets)
280         .Case("str_offsets.dwo", eSectionTypeDWARFDebugStrOffsetsDwo)
281         .Case("tu_index", eSectionTypeDWARFDebugTuIndex)
282         .Case("types", eSectionTypeDWARFDebugTypes)
283         .Case("types.dwo", eSectionTypeDWARFDebugTypesDwo)
284         .Default(eSectionTypeOther);
285   }
286   return eSectionTypeOther;
287 }
288 
289 void ObjectFileWasm::CreateSections(SectionList &unified_section_list) {
290   if (m_sections_up)
291     return;
292 
293   m_sections_up = std::make_unique<SectionList>();
294 
295   if (m_sect_infos.empty()) {
296     DecodeSections();
297   }
298 
299   for (const section_info &sect_info : m_sect_infos) {
300     SectionType section_type = eSectionTypeOther;
301     ConstString section_name;
302     offset_t file_offset = sect_info.offset & 0xffffffff;
303     addr_t vm_addr = file_offset;
304     size_t vm_size = sect_info.size;
305 
306     if (llvm::wasm::WASM_SEC_CODE == sect_info.id) {
307       section_type = eSectionTypeCode;
308       section_name = ConstString("code");
309 
310       // A code address in DWARF for WebAssembly is the offset of an
311       // instruction relative within the Code section of the WebAssembly file.
312       // For this reason Section::GetFileAddress() must return zero for the
313       // Code section.
314       vm_addr = 0;
315     } else {
316       section_type = GetSectionTypeFromName(sect_info.name.GetStringRef());
317       if (section_type == eSectionTypeOther)
318         continue;
319       section_name = sect_info.name;
320       if (!IsInMemory()) {
321         vm_size = 0;
322         vm_addr = 0;
323       }
324     }
325 
326     SectionSP section_sp(
327         new Section(GetModule(), // Module to which this section belongs.
328                     this,        // ObjectFile to which this section belongs and
329                                  // should read section data from.
330                     section_type,   // Section ID.
331                     section_name,   // Section name.
332                     section_type,   // Section type.
333                     vm_addr,        // VM address.
334                     vm_size,        // VM size in bytes of this section.
335                     file_offset,    // Offset of this section in the file.
336                     sect_info.size, // Size of the section as found in the file.
337                     0,              // Alignment of the section
338                     0,              // Flags for this section.
339                     1));            // Number of host bytes per target byte
340     m_sections_up->AddSection(section_sp);
341     unified_section_list.AddSection(section_sp);
342   }
343 }
344 
345 bool ObjectFileWasm::SetLoadAddress(Target &target, lldb::addr_t load_address,
346                                     bool value_is_offset) {
347   /// In WebAssembly, linear memory is disjointed from code space. The VM can
348   /// load multiple instances of a module, which logically share the same code.
349   /// We represent a wasm32 code address with 64-bits, like:
350   /// 63            32 31             0
351   /// +---------------+---------------+
352   /// +   module_id   |     offset    |
353   /// +---------------+---------------+
354   /// where the lower 32 bits represent a module offset (relative to the module
355   /// start not to the beginning of the code section) and the higher 32 bits
356   /// uniquely identify the module in the WebAssembly VM.
357   /// In other words, we assume that each WebAssembly module is loaded by the
358   /// engine at a 64-bit address that starts at the boundary of 4GB pages, like
359   /// 0x0000000400000000 for module_id == 4.
360   /// These 64-bit addresses will be used to request code ranges for a specific
361   /// module from the WebAssembly engine.
362 
363   assert(m_memory_addr == LLDB_INVALID_ADDRESS ||
364          m_memory_addr == load_address);
365 
366   ModuleSP module_sp = GetModule();
367   if (!module_sp)
368     return false;
369 
370   DecodeSections();
371 
372   size_t num_loaded_sections = 0;
373   SectionList *section_list = GetSectionList();
374   if (!section_list)
375     return false;
376 
377   const size_t num_sections = section_list->GetSize();
378   for (size_t sect_idx = 0; sect_idx < num_sections; ++sect_idx) {
379     SectionSP section_sp(section_list->GetSectionAtIndex(sect_idx));
380     if (target.SetSectionLoadAddress(
381             section_sp, load_address | section_sp->GetFileOffset())) {
382       ++num_loaded_sections;
383     }
384   }
385 
386   return num_loaded_sections > 0;
387 }
388 
389 DataExtractor ObjectFileWasm::ReadImageData(offset_t offset, uint32_t size) {
390   DataExtractor data;
391   if (m_file) {
392     if (offset < GetByteSize()) {
393       size = std::min(static_cast<uint64_t>(size), GetByteSize() - offset);
394       auto buffer_sp = MapFileData(m_file, size, offset);
395       return DataExtractor(buffer_sp, GetByteOrder(), GetAddressByteSize());
396     }
397   } else {
398     ProcessSP process_sp(m_process_wp.lock());
399     if (process_sp) {
400       auto data_up = std::make_unique<DataBufferHeap>(size, 0);
401       Status readmem_error;
402       size_t bytes_read = process_sp->ReadMemory(
403           offset, data_up->GetBytes(), data_up->GetByteSize(), readmem_error);
404       if (bytes_read > 0) {
405         DataBufferSP buffer_sp(data_up.release());
406         data.SetData(buffer_sp, 0, buffer_sp->GetByteSize());
407       }
408     }
409   }
410 
411   data.SetByteOrder(GetByteOrder());
412   return data;
413 }
414 
415 llvm::Optional<FileSpec> ObjectFileWasm::GetExternalDebugInfoFileSpec() {
416   static ConstString g_sect_name_external_debug_info("external_debug_info");
417 
418   for (const section_info &sect_info : m_sect_infos) {
419     if (g_sect_name_external_debug_info == sect_info.name) {
420       const uint32_t kBufferSize = 1024;
421       DataExtractor section_header_data =
422           ReadImageData(sect_info.offset, kBufferSize);
423       llvm::DataExtractor data = section_header_data.GetAsLLVM();
424       llvm::DataExtractor::Cursor c(0);
425       llvm::Optional<ConstString> symbols_url = GetWasmString(data, c);
426       if (symbols_url)
427         return FileSpec(symbols_url->GetStringRef());
428     }
429   }
430   return llvm::None;
431 }
432 
433 void ObjectFileWasm::Dump(Stream *s) {
434   ModuleSP module_sp(GetModule());
435   if (!module_sp)
436     return;
437 
438   std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
439 
440   llvm::raw_ostream &ostream = s->AsRawOstream();
441   ostream << static_cast<void *>(this) << ": ";
442   s->Indent();
443   ostream << "ObjectFileWasm, file = '";
444   m_file.Dump(ostream);
445   ostream << "', arch = ";
446   ostream << GetArchitecture().GetArchitectureName() << "\n";
447 
448   SectionList *sections = GetSectionList();
449   if (sections) {
450     sections->Dump(s->AsRawOstream(), s->GetIndentLevel(), nullptr, true,
451                    UINT32_MAX);
452   }
453   ostream << "\n";
454   DumpSectionHeaders(ostream);
455   ostream << "\n";
456 }
457 
458 void ObjectFileWasm::DumpSectionHeader(llvm::raw_ostream &ostream,
459                                        const section_info_t &sh) {
460   ostream << llvm::left_justify(sh.name.GetStringRef(), 16) << " "
461           << llvm::format_hex(sh.offset, 10) << " "
462           << llvm::format_hex(sh.size, 10) << " " << llvm::format_hex(sh.id, 6)
463           << "\n";
464 }
465 
466 void ObjectFileWasm::DumpSectionHeaders(llvm::raw_ostream &ostream) {
467   ostream << "Section Headers\n";
468   ostream << "IDX  name             addr       size       id\n";
469   ostream << "==== ---------------- ---------- ---------- ------\n";
470 
471   uint32_t idx = 0;
472   for (auto pos = m_sect_infos.begin(); pos != m_sect_infos.end();
473        ++pos, ++idx) {
474     ostream << "[" << llvm::format_decimal(idx, 2) << "] ";
475     ObjectFileWasm::DumpSectionHeader(ostream, *pos);
476   }
477 }
478