1 //===-- ObjectFileWasm.cpp ------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "ObjectFileWasm.h" 10 #include "lldb/Core/Module.h" 11 #include "lldb/Core/ModuleSpec.h" 12 #include "lldb/Core/PluginManager.h" 13 #include "lldb/Core/Section.h" 14 #include "lldb/Target/Process.h" 15 #include "lldb/Target/SectionLoadList.h" 16 #include "lldb/Target/Target.h" 17 #include "lldb/Utility/DataBufferHeap.h" 18 #include "lldb/Utility/LLDBLog.h" 19 #include "lldb/Utility/Log.h" 20 #include "llvm/ADT/ArrayRef.h" 21 #include "llvm/ADT/SmallVector.h" 22 #include "llvm/ADT/StringRef.h" 23 #include "llvm/BinaryFormat/Magic.h" 24 #include "llvm/BinaryFormat/Wasm.h" 25 #include "llvm/Support/Endian.h" 26 #include "llvm/Support/Format.h" 27 28 using namespace lldb; 29 using namespace lldb_private; 30 using namespace lldb_private::wasm; 31 32 LLDB_PLUGIN_DEFINE(ObjectFileWasm) 33 34 static const uint32_t kWasmHeaderSize = 35 sizeof(llvm::wasm::WasmMagic) + sizeof(llvm::wasm::WasmVersion); 36 37 /// Checks whether the data buffer starts with a valid Wasm module header. 38 static bool ValidateModuleHeader(const DataBufferSP &data_sp) { 39 if (!data_sp || data_sp->GetByteSize() < kWasmHeaderSize) 40 return false; 41 42 if (llvm::identify_magic(toStringRef(data_sp->GetData())) != 43 llvm::file_magic::wasm_object) 44 return false; 45 46 const uint8_t *Ptr = data_sp->GetBytes() + sizeof(llvm::wasm::WasmMagic); 47 48 uint32_t version = llvm::support::endian::read32le(Ptr); 49 return version == llvm::wasm::WasmVersion; 50 } 51 52 static llvm::Optional<ConstString> 53 GetWasmString(llvm::DataExtractor &data, llvm::DataExtractor::Cursor &c) { 54 // A Wasm string is encoded as a vector of UTF-8 codes. 55 // Vectors are encoded with their u32 length followed by the element 56 // sequence. 57 uint64_t len = data.getULEB128(c); 58 if (!c) { 59 consumeError(c.takeError()); 60 return llvm::None; 61 } 62 63 if (len >= (uint64_t(1) << 32)) { 64 return llvm::None; 65 } 66 67 llvm::SmallVector<uint8_t, 32> str_storage; 68 data.getU8(c, str_storage, len); 69 if (!c) { 70 consumeError(c.takeError()); 71 return llvm::None; 72 } 73 74 llvm::StringRef str = toStringRef(makeArrayRef(str_storage)); 75 return ConstString(str); 76 } 77 78 char ObjectFileWasm::ID; 79 80 void ObjectFileWasm::Initialize() { 81 PluginManager::RegisterPlugin(GetPluginNameStatic(), 82 GetPluginDescriptionStatic(), CreateInstance, 83 CreateMemoryInstance, GetModuleSpecifications); 84 } 85 86 void ObjectFileWasm::Terminate() { 87 PluginManager::UnregisterPlugin(CreateInstance); 88 } 89 90 ObjectFile * 91 ObjectFileWasm::CreateInstance(const ModuleSP &module_sp, DataBufferSP data_sp, 92 offset_t data_offset, const FileSpec *file, 93 offset_t file_offset, offset_t length) { 94 Log *log = GetLog(LLDBLog::Object); 95 96 if (!data_sp) { 97 data_sp = MapFileData(*file, length, file_offset); 98 if (!data_sp) { 99 LLDB_LOGF(log, "Failed to create ObjectFileWasm instance for file %s", 100 file->GetPath().c_str()); 101 return nullptr; 102 } 103 data_offset = 0; 104 } 105 106 assert(data_sp); 107 if (!ValidateModuleHeader(data_sp)) { 108 LLDB_LOGF(log, 109 "Failed to create ObjectFileWasm instance: invalid Wasm header"); 110 return nullptr; 111 } 112 113 // Update the data to contain the entire file if it doesn't contain it 114 // already. 115 if (data_sp->GetByteSize() < length) { 116 data_sp = MapFileData(*file, length, file_offset); 117 if (!data_sp) { 118 LLDB_LOGF(log, 119 "Failed to create ObjectFileWasm instance: cannot read file %s", 120 file->GetPath().c_str()); 121 return nullptr; 122 } 123 data_offset = 0; 124 } 125 126 std::unique_ptr<ObjectFileWasm> objfile_up(new ObjectFileWasm( 127 module_sp, data_sp, data_offset, file, file_offset, length)); 128 ArchSpec spec = objfile_up->GetArchitecture(); 129 if (spec && objfile_up->SetModulesArchitecture(spec)) { 130 LLDB_LOGF(log, 131 "%p ObjectFileWasm::CreateInstance() module = %p (%s), file = %s", 132 static_cast<void *>(objfile_up.get()), 133 static_cast<void *>(objfile_up->GetModule().get()), 134 objfile_up->GetModule()->GetSpecificationDescription().c_str(), 135 file ? file->GetPath().c_str() : "<NULL>"); 136 return objfile_up.release(); 137 } 138 139 LLDB_LOGF(log, "Failed to create ObjectFileWasm instance"); 140 return nullptr; 141 } 142 143 ObjectFile *ObjectFileWasm::CreateMemoryInstance(const ModuleSP &module_sp, 144 WritableDataBufferSP data_sp, 145 const ProcessSP &process_sp, 146 addr_t header_addr) { 147 if (!ValidateModuleHeader(data_sp)) 148 return nullptr; 149 150 std::unique_ptr<ObjectFileWasm> objfile_up( 151 new ObjectFileWasm(module_sp, data_sp, process_sp, header_addr)); 152 ArchSpec spec = objfile_up->GetArchitecture(); 153 if (spec && objfile_up->SetModulesArchitecture(spec)) 154 return objfile_up.release(); 155 return nullptr; 156 } 157 158 bool ObjectFileWasm::DecodeNextSection(lldb::offset_t *offset_ptr) { 159 // Buffer sufficient to read a section header and find the pointer to the next 160 // section. 161 const uint32_t kBufferSize = 1024; 162 DataExtractor section_header_data = ReadImageData(*offset_ptr, kBufferSize); 163 164 llvm::DataExtractor data = section_header_data.GetAsLLVM(); 165 llvm::DataExtractor::Cursor c(0); 166 167 // Each section consists of: 168 // - a one-byte section id, 169 // - the u32 size of the contents, in bytes, 170 // - the actual contents. 171 uint8_t section_id = data.getU8(c); 172 uint64_t payload_len = data.getULEB128(c); 173 if (!c) 174 return !llvm::errorToBool(c.takeError()); 175 176 if (payload_len >= (uint64_t(1) << 32)) 177 return false; 178 179 if (section_id == llvm::wasm::WASM_SEC_CUSTOM) { 180 // Custom sections have the id 0. Their contents consist of a name 181 // identifying the custom section, followed by an uninterpreted sequence 182 // of bytes. 183 lldb::offset_t prev_offset = c.tell(); 184 llvm::Optional<ConstString> sect_name = GetWasmString(data, c); 185 if (!sect_name) 186 return false; 187 188 if (payload_len < c.tell() - prev_offset) 189 return false; 190 191 uint32_t section_length = payload_len - (c.tell() - prev_offset); 192 m_sect_infos.push_back(section_info{*offset_ptr + c.tell(), section_length, 193 section_id, *sect_name}); 194 *offset_ptr += (c.tell() + section_length); 195 } else if (section_id <= llvm::wasm::WASM_SEC_LAST_KNOWN) { 196 m_sect_infos.push_back(section_info{*offset_ptr + c.tell(), 197 static_cast<uint32_t>(payload_len), 198 section_id, ConstString()}); 199 *offset_ptr += (c.tell() + payload_len); 200 } else { 201 // Invalid section id. 202 return false; 203 } 204 return true; 205 } 206 207 bool ObjectFileWasm::DecodeSections() { 208 lldb::offset_t offset = kWasmHeaderSize; 209 if (IsInMemory()) { 210 offset += m_memory_addr; 211 } 212 213 while (DecodeNextSection(&offset)) 214 ; 215 return true; 216 } 217 218 size_t ObjectFileWasm::GetModuleSpecifications( 219 const FileSpec &file, DataBufferSP &data_sp, offset_t data_offset, 220 offset_t file_offset, offset_t length, ModuleSpecList &specs) { 221 if (!ValidateModuleHeader(data_sp)) { 222 return 0; 223 } 224 225 ModuleSpec spec(file, ArchSpec("wasm32-unknown-unknown-wasm")); 226 specs.Append(spec); 227 return 1; 228 } 229 230 ObjectFileWasm::ObjectFileWasm(const ModuleSP &module_sp, DataBufferSP data_sp, 231 offset_t data_offset, const FileSpec *file, 232 offset_t offset, offset_t length) 233 : ObjectFile(module_sp, file, offset, length, data_sp, data_offset), 234 m_arch("wasm32-unknown-unknown-wasm") { 235 m_data.SetAddressByteSize(4); 236 } 237 238 ObjectFileWasm::ObjectFileWasm(const lldb::ModuleSP &module_sp, 239 lldb::WritableDataBufferSP header_data_sp, 240 const lldb::ProcessSP &process_sp, 241 lldb::addr_t header_addr) 242 : ObjectFile(module_sp, process_sp, header_addr, header_data_sp), 243 m_arch("wasm32-unknown-unknown-wasm") {} 244 245 bool ObjectFileWasm::ParseHeader() { 246 // We already parsed the header during initialization. 247 return true; 248 } 249 250 void ObjectFileWasm::ParseSymtab(Symtab &symtab) {} 251 252 static SectionType GetSectionTypeFromName(llvm::StringRef Name) { 253 if (Name.consume_front(".debug_") || Name.consume_front(".zdebug_")) { 254 return llvm::StringSwitch<SectionType>(Name) 255 .Case("abbrev", eSectionTypeDWARFDebugAbbrev) 256 .Case("abbrev.dwo", eSectionTypeDWARFDebugAbbrevDwo) 257 .Case("addr", eSectionTypeDWARFDebugAddr) 258 .Case("aranges", eSectionTypeDWARFDebugAranges) 259 .Case("cu_index", eSectionTypeDWARFDebugCuIndex) 260 .Case("frame", eSectionTypeDWARFDebugFrame) 261 .Case("info", eSectionTypeDWARFDebugInfo) 262 .Case("info.dwo", eSectionTypeDWARFDebugInfoDwo) 263 .Cases("line", "line.dwo", eSectionTypeDWARFDebugLine) 264 .Cases("line_str", "line_str.dwo", eSectionTypeDWARFDebugLineStr) 265 .Case("loc", eSectionTypeDWARFDebugLoc) 266 .Case("loc.dwo", eSectionTypeDWARFDebugLocDwo) 267 .Case("loclists", eSectionTypeDWARFDebugLocLists) 268 .Case("loclists.dwo", eSectionTypeDWARFDebugLocListsDwo) 269 .Case("macinfo", eSectionTypeDWARFDebugMacInfo) 270 .Cases("macro", "macro.dwo", eSectionTypeDWARFDebugMacro) 271 .Case("names", eSectionTypeDWARFDebugNames) 272 .Case("pubnames", eSectionTypeDWARFDebugPubNames) 273 .Case("pubtypes", eSectionTypeDWARFDebugPubTypes) 274 .Case("ranges", eSectionTypeDWARFDebugRanges) 275 .Case("rnglists", eSectionTypeDWARFDebugRngLists) 276 .Case("rnglists.dwo", eSectionTypeDWARFDebugRngListsDwo) 277 .Case("str", eSectionTypeDWARFDebugStr) 278 .Case("str.dwo", eSectionTypeDWARFDebugStrDwo) 279 .Case("str_offsets", eSectionTypeDWARFDebugStrOffsets) 280 .Case("str_offsets.dwo", eSectionTypeDWARFDebugStrOffsetsDwo) 281 .Case("tu_index", eSectionTypeDWARFDebugTuIndex) 282 .Case("types", eSectionTypeDWARFDebugTypes) 283 .Case("types.dwo", eSectionTypeDWARFDebugTypesDwo) 284 .Default(eSectionTypeOther); 285 } 286 return eSectionTypeOther; 287 } 288 289 void ObjectFileWasm::CreateSections(SectionList &unified_section_list) { 290 if (m_sections_up) 291 return; 292 293 m_sections_up = std::make_unique<SectionList>(); 294 295 if (m_sect_infos.empty()) { 296 DecodeSections(); 297 } 298 299 for (const section_info §_info : m_sect_infos) { 300 SectionType section_type = eSectionTypeOther; 301 ConstString section_name; 302 offset_t file_offset = sect_info.offset & 0xffffffff; 303 addr_t vm_addr = file_offset; 304 size_t vm_size = sect_info.size; 305 306 if (llvm::wasm::WASM_SEC_CODE == sect_info.id) { 307 section_type = eSectionTypeCode; 308 section_name = ConstString("code"); 309 310 // A code address in DWARF for WebAssembly is the offset of an 311 // instruction relative within the Code section of the WebAssembly file. 312 // For this reason Section::GetFileAddress() must return zero for the 313 // Code section. 314 vm_addr = 0; 315 } else { 316 section_type = GetSectionTypeFromName(sect_info.name.GetStringRef()); 317 if (section_type == eSectionTypeOther) 318 continue; 319 section_name = sect_info.name; 320 if (!IsInMemory()) { 321 vm_size = 0; 322 vm_addr = 0; 323 } 324 } 325 326 SectionSP section_sp( 327 new Section(GetModule(), // Module to which this section belongs. 328 this, // ObjectFile to which this section belongs and 329 // should read section data from. 330 section_type, // Section ID. 331 section_name, // Section name. 332 section_type, // Section type. 333 vm_addr, // VM address. 334 vm_size, // VM size in bytes of this section. 335 file_offset, // Offset of this section in the file. 336 sect_info.size, // Size of the section as found in the file. 337 0, // Alignment of the section 338 0, // Flags for this section. 339 1)); // Number of host bytes per target byte 340 m_sections_up->AddSection(section_sp); 341 unified_section_list.AddSection(section_sp); 342 } 343 } 344 345 bool ObjectFileWasm::SetLoadAddress(Target &target, lldb::addr_t load_address, 346 bool value_is_offset) { 347 /// In WebAssembly, linear memory is disjointed from code space. The VM can 348 /// load multiple instances of a module, which logically share the same code. 349 /// We represent a wasm32 code address with 64-bits, like: 350 /// 63 32 31 0 351 /// +---------------+---------------+ 352 /// + module_id | offset | 353 /// +---------------+---------------+ 354 /// where the lower 32 bits represent a module offset (relative to the module 355 /// start not to the beginning of the code section) and the higher 32 bits 356 /// uniquely identify the module in the WebAssembly VM. 357 /// In other words, we assume that each WebAssembly module is loaded by the 358 /// engine at a 64-bit address that starts at the boundary of 4GB pages, like 359 /// 0x0000000400000000 for module_id == 4. 360 /// These 64-bit addresses will be used to request code ranges for a specific 361 /// module from the WebAssembly engine. 362 363 assert(m_memory_addr == LLDB_INVALID_ADDRESS || 364 m_memory_addr == load_address); 365 366 ModuleSP module_sp = GetModule(); 367 if (!module_sp) 368 return false; 369 370 DecodeSections(); 371 372 size_t num_loaded_sections = 0; 373 SectionList *section_list = GetSectionList(); 374 if (!section_list) 375 return false; 376 377 const size_t num_sections = section_list->GetSize(); 378 for (size_t sect_idx = 0; sect_idx < num_sections; ++sect_idx) { 379 SectionSP section_sp(section_list->GetSectionAtIndex(sect_idx)); 380 if (target.SetSectionLoadAddress( 381 section_sp, load_address | section_sp->GetFileOffset())) { 382 ++num_loaded_sections; 383 } 384 } 385 386 return num_loaded_sections > 0; 387 } 388 389 DataExtractor ObjectFileWasm::ReadImageData(offset_t offset, uint32_t size) { 390 DataExtractor data; 391 if (m_file) { 392 if (offset < GetByteSize()) { 393 size = std::min(static_cast<uint64_t>(size), GetByteSize() - offset); 394 auto buffer_sp = MapFileData(m_file, size, offset); 395 return DataExtractor(buffer_sp, GetByteOrder(), GetAddressByteSize()); 396 } 397 } else { 398 ProcessSP process_sp(m_process_wp.lock()); 399 if (process_sp) { 400 auto data_up = std::make_unique<DataBufferHeap>(size, 0); 401 Status readmem_error; 402 size_t bytes_read = process_sp->ReadMemory( 403 offset, data_up->GetBytes(), data_up->GetByteSize(), readmem_error); 404 if (bytes_read > 0) { 405 DataBufferSP buffer_sp(data_up.release()); 406 data.SetData(buffer_sp, 0, buffer_sp->GetByteSize()); 407 } 408 } 409 } 410 411 data.SetByteOrder(GetByteOrder()); 412 return data; 413 } 414 415 llvm::Optional<FileSpec> ObjectFileWasm::GetExternalDebugInfoFileSpec() { 416 static ConstString g_sect_name_external_debug_info("external_debug_info"); 417 418 for (const section_info §_info : m_sect_infos) { 419 if (g_sect_name_external_debug_info == sect_info.name) { 420 const uint32_t kBufferSize = 1024; 421 DataExtractor section_header_data = 422 ReadImageData(sect_info.offset, kBufferSize); 423 llvm::DataExtractor data = section_header_data.GetAsLLVM(); 424 llvm::DataExtractor::Cursor c(0); 425 llvm::Optional<ConstString> symbols_url = GetWasmString(data, c); 426 if (symbols_url) 427 return FileSpec(symbols_url->GetStringRef()); 428 } 429 } 430 return llvm::None; 431 } 432 433 void ObjectFileWasm::Dump(Stream *s) { 434 ModuleSP module_sp(GetModule()); 435 if (!module_sp) 436 return; 437 438 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex()); 439 440 llvm::raw_ostream &ostream = s->AsRawOstream(); 441 ostream << static_cast<void *>(this) << ": "; 442 s->Indent(); 443 ostream << "ObjectFileWasm, file = '"; 444 m_file.Dump(ostream); 445 ostream << "', arch = "; 446 ostream << GetArchitecture().GetArchitectureName() << "\n"; 447 448 SectionList *sections = GetSectionList(); 449 if (sections) { 450 sections->Dump(s->AsRawOstream(), s->GetIndentLevel(), nullptr, true, 451 UINT32_MAX); 452 } 453 ostream << "\n"; 454 DumpSectionHeaders(ostream); 455 ostream << "\n"; 456 } 457 458 void ObjectFileWasm::DumpSectionHeader(llvm::raw_ostream &ostream, 459 const section_info_t &sh) { 460 ostream << llvm::left_justify(sh.name.GetStringRef(), 16) << " " 461 << llvm::format_hex(sh.offset, 10) << " " 462 << llvm::format_hex(sh.size, 10) << " " << llvm::format_hex(sh.id, 6) 463 << "\n"; 464 } 465 466 void ObjectFileWasm::DumpSectionHeaders(llvm::raw_ostream &ostream) { 467 ostream << "Section Headers\n"; 468 ostream << "IDX name addr size id\n"; 469 ostream << "==== ---------------- ---------- ---------- ------\n"; 470 471 uint32_t idx = 0; 472 for (auto pos = m_sect_infos.begin(); pos != m_sect_infos.end(); 473 ++pos, ++idx) { 474 ostream << "[" << llvm::format_decimal(idx, 2) << "] "; 475 ObjectFileWasm::DumpSectionHeader(ostream, *pos); 476 } 477 } 478