1 //===-- ObjectFileWasm.cpp ------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "ObjectFileWasm.h" 10 #include "lldb/Core/Module.h" 11 #include "lldb/Core/ModuleSpec.h" 12 #include "lldb/Core/PluginManager.h" 13 #include "lldb/Core/Section.h" 14 #include "lldb/Target/Process.h" 15 #include "lldb/Target/SectionLoadList.h" 16 #include "lldb/Target/Target.h" 17 #include "lldb/Utility/DataBufferHeap.h" 18 #include "lldb/Utility/Log.h" 19 #include "llvm/ADT/ArrayRef.h" 20 #include "llvm/ADT/SmallVector.h" 21 #include "llvm/ADT/StringRef.h" 22 #include "llvm/BinaryFormat/Magic.h" 23 #include "llvm/BinaryFormat/Wasm.h" 24 #include "llvm/Support/Endian.h" 25 #include "llvm/Support/Format.h" 26 27 using namespace lldb; 28 using namespace lldb_private; 29 using namespace lldb_private::wasm; 30 31 LLDB_PLUGIN_DEFINE(ObjectFileWasm) 32 33 static const uint32_t kWasmHeaderSize = 34 sizeof(llvm::wasm::WasmMagic) + sizeof(llvm::wasm::WasmVersion); 35 36 /// Checks whether the data buffer starts with a valid Wasm module header. 37 static bool ValidateModuleHeader(const DataBufferSP &data_sp) { 38 if (!data_sp || data_sp->GetByteSize() < kWasmHeaderSize) 39 return false; 40 41 if (llvm::identify_magic(toStringRef(data_sp->GetData())) != 42 llvm::file_magic::wasm_object) 43 return false; 44 45 uint8_t *Ptr = data_sp->GetBytes() + sizeof(llvm::wasm::WasmMagic); 46 47 uint32_t version = llvm::support::endian::read32le(Ptr); 48 return version == llvm::wasm::WasmVersion; 49 } 50 51 static llvm::Optional<ConstString> 52 GetWasmString(llvm::DataExtractor &data, llvm::DataExtractor::Cursor &c) { 53 // A Wasm string is encoded as a vector of UTF-8 codes. 54 // Vectors are encoded with their u32 length followed by the element 55 // sequence. 56 uint64_t len = data.getULEB128(c); 57 if (!c) { 58 consumeError(c.takeError()); 59 return llvm::None; 60 } 61 62 if (len >= (uint64_t(1) << 32)) { 63 return llvm::None; 64 } 65 66 llvm::SmallVector<uint8_t, 32> str_storage; 67 data.getU8(c, str_storage, len); 68 if (!c) { 69 consumeError(c.takeError()); 70 return llvm::None; 71 } 72 73 llvm::StringRef str = toStringRef(makeArrayRef(str_storage)); 74 return ConstString(str); 75 } 76 77 char ObjectFileWasm::ID; 78 79 void ObjectFileWasm::Initialize() { 80 PluginManager::RegisterPlugin(GetPluginNameStatic(), 81 GetPluginDescriptionStatic(), CreateInstance, 82 CreateMemoryInstance, GetModuleSpecifications); 83 } 84 85 void ObjectFileWasm::Terminate() { 86 PluginManager::UnregisterPlugin(CreateInstance); 87 } 88 89 ConstString ObjectFileWasm::GetPluginNameStatic() { 90 static ConstString g_name("wasm"); 91 return g_name; 92 } 93 94 ObjectFile * 95 ObjectFileWasm::CreateInstance(const ModuleSP &module_sp, DataBufferSP &data_sp, 96 offset_t data_offset, const FileSpec *file, 97 offset_t file_offset, offset_t length) { 98 Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_OBJECT)); 99 100 if (!data_sp) { 101 data_sp = MapFileData(*file, length, file_offset); 102 if (!data_sp) { 103 LLDB_LOGF(log, "Failed to create ObjectFileWasm instance for file %s", 104 file->GetPath().c_str()); 105 return nullptr; 106 } 107 data_offset = 0; 108 } 109 110 assert(data_sp); 111 if (!ValidateModuleHeader(data_sp)) { 112 LLDB_LOGF(log, 113 "Failed to create ObjectFileWasm instance: invalid Wasm header"); 114 return nullptr; 115 } 116 117 // Update the data to contain the entire file if it doesn't contain it 118 // already. 119 if (data_sp->GetByteSize() < length) { 120 data_sp = MapFileData(*file, length, file_offset); 121 if (!data_sp) { 122 LLDB_LOGF(log, 123 "Failed to create ObjectFileWasm instance: cannot read file %s", 124 file->GetPath().c_str()); 125 return nullptr; 126 } 127 data_offset = 0; 128 } 129 130 std::unique_ptr<ObjectFileWasm> objfile_up(new ObjectFileWasm( 131 module_sp, data_sp, data_offset, file, file_offset, length)); 132 ArchSpec spec = objfile_up->GetArchitecture(); 133 if (spec && objfile_up->SetModulesArchitecture(spec)) { 134 LLDB_LOGF(log, 135 "%p ObjectFileWasm::CreateInstance() module = %p (%s), file = %s", 136 static_cast<void *>(objfile_up.get()), 137 static_cast<void *>(objfile_up->GetModule().get()), 138 objfile_up->GetModule()->GetSpecificationDescription().c_str(), 139 file ? file->GetPath().c_str() : "<NULL>"); 140 return objfile_up.release(); 141 } 142 143 LLDB_LOGF(log, "Failed to create ObjectFileWasm instance"); 144 return nullptr; 145 } 146 147 ObjectFile *ObjectFileWasm::CreateMemoryInstance(const ModuleSP &module_sp, 148 DataBufferSP &data_sp, 149 const ProcessSP &process_sp, 150 addr_t header_addr) { 151 if (!ValidateModuleHeader(data_sp)) 152 return nullptr; 153 154 std::unique_ptr<ObjectFileWasm> objfile_up( 155 new ObjectFileWasm(module_sp, data_sp, process_sp, header_addr)); 156 ArchSpec spec = objfile_up->GetArchitecture(); 157 if (spec && objfile_up->SetModulesArchitecture(spec)) 158 return objfile_up.release(); 159 return nullptr; 160 } 161 162 bool ObjectFileWasm::DecodeNextSection(lldb::offset_t *offset_ptr) { 163 // Buffer sufficient to read a section header and find the pointer to the next 164 // section. 165 const uint32_t kBufferSize = 1024; 166 DataExtractor section_header_data = ReadImageData(*offset_ptr, kBufferSize); 167 168 llvm::DataExtractor data = section_header_data.GetAsLLVM(); 169 llvm::DataExtractor::Cursor c(0); 170 171 // Each section consists of: 172 // - a one-byte section id, 173 // - the u32 size of the contents, in bytes, 174 // - the actual contents. 175 uint8_t section_id = data.getU8(c); 176 uint64_t payload_len = data.getULEB128(c); 177 if (!c) 178 return !llvm::errorToBool(c.takeError()); 179 180 if (payload_len >= (uint64_t(1) << 32)) 181 return false; 182 183 if (section_id == llvm::wasm::WASM_SEC_CUSTOM) { 184 // Custom sections have the id 0. Their contents consist of a name 185 // identifying the custom section, followed by an uninterpreted sequence 186 // of bytes. 187 lldb::offset_t prev_offset = c.tell(); 188 llvm::Optional<ConstString> sect_name = GetWasmString(data, c); 189 if (!sect_name) 190 return false; 191 192 if (payload_len < c.tell() - prev_offset) 193 return false; 194 195 uint32_t section_length = payload_len - (c.tell() - prev_offset); 196 m_sect_infos.push_back(section_info{*offset_ptr + c.tell(), section_length, 197 section_id, *sect_name}); 198 *offset_ptr += (c.tell() + section_length); 199 } else if (section_id <= llvm::wasm::WASM_SEC_EVENT) { 200 m_sect_infos.push_back(section_info{*offset_ptr + c.tell(), 201 static_cast<uint32_t>(payload_len), 202 section_id, ConstString()}); 203 *offset_ptr += (c.tell() + payload_len); 204 } else { 205 // Invalid section id. 206 return false; 207 } 208 return true; 209 } 210 211 bool ObjectFileWasm::DecodeSections() { 212 lldb::offset_t offset = kWasmHeaderSize; 213 if (IsInMemory()) { 214 offset += m_memory_addr; 215 } 216 217 while (DecodeNextSection(&offset)) 218 ; 219 return true; 220 } 221 222 size_t ObjectFileWasm::GetModuleSpecifications( 223 const FileSpec &file, DataBufferSP &data_sp, offset_t data_offset, 224 offset_t file_offset, offset_t length, ModuleSpecList &specs) { 225 if (!ValidateModuleHeader(data_sp)) { 226 return 0; 227 } 228 229 ModuleSpec spec(file, ArchSpec("wasm32-unknown-unknown-wasm")); 230 specs.Append(spec); 231 return 1; 232 } 233 234 ObjectFileWasm::ObjectFileWasm(const ModuleSP &module_sp, DataBufferSP &data_sp, 235 offset_t data_offset, const FileSpec *file, 236 offset_t offset, offset_t length) 237 : ObjectFile(module_sp, file, offset, length, data_sp, data_offset), 238 m_arch("wasm32-unknown-unknown-wasm") { 239 m_data.SetAddressByteSize(4); 240 } 241 242 ObjectFileWasm::ObjectFileWasm(const lldb::ModuleSP &module_sp, 243 lldb::DataBufferSP &header_data_sp, 244 const lldb::ProcessSP &process_sp, 245 lldb::addr_t header_addr) 246 : ObjectFile(module_sp, process_sp, header_addr, header_data_sp), 247 m_arch("wasm32-unknown-unknown-wasm") {} 248 249 bool ObjectFileWasm::ParseHeader() { 250 // We already parsed the header during initialization. 251 return true; 252 } 253 254 Symtab *ObjectFileWasm::GetSymtab() { return nullptr; } 255 256 void ObjectFileWasm::CreateSections(SectionList &unified_section_list) { 257 if (m_sections_up) 258 return; 259 260 m_sections_up = std::make_unique<SectionList>(); 261 262 if (m_sect_infos.empty()) { 263 DecodeSections(); 264 } 265 266 for (const section_info §_info : m_sect_infos) { 267 SectionType section_type = eSectionTypeOther; 268 ConstString section_name; 269 offset_t file_offset = sect_info.offset & 0xffffffff; 270 addr_t vm_addr = file_offset; 271 size_t vm_size = sect_info.size; 272 273 if (llvm::wasm::WASM_SEC_CODE == sect_info.id) { 274 section_type = eSectionTypeCode; 275 section_name = ConstString("code"); 276 277 // A code address in DWARF for WebAssembly is the offset of an 278 // instruction relative within the Code section of the WebAssembly file. 279 // For this reason Section::GetFileAddress() must return zero for the 280 // Code section. 281 vm_addr = 0; 282 } else { 283 section_type = 284 llvm::StringSwitch<SectionType>(sect_info.name.GetStringRef()) 285 .Case(".debug_abbrev", eSectionTypeDWARFDebugAbbrev) 286 .Case(".debug_addr", eSectionTypeDWARFDebugAddr) 287 .Case(".debug_aranges", eSectionTypeDWARFDebugAranges) 288 .Case(".debug_cu_index", eSectionTypeDWARFDebugCuIndex) 289 .Case(".debug_frame", eSectionTypeDWARFDebugFrame) 290 .Case(".debug_info", eSectionTypeDWARFDebugInfo) 291 .Case(".debug_line", eSectionTypeDWARFDebugLine) 292 .Case(".debug_line_str", eSectionTypeDWARFDebugLineStr) 293 .Case(".debug_loc", eSectionTypeDWARFDebugLoc) 294 .Case(".debug_loclists", eSectionTypeDWARFDebugLocLists) 295 .Case(".debug_macinfo", eSectionTypeDWARFDebugMacInfo) 296 .Case(".debug_macro", eSectionTypeDWARFDebugMacro) 297 .Case(".debug_names", eSectionTypeDWARFDebugNames) 298 .Case(".debug_pubnames", eSectionTypeDWARFDebugPubNames) 299 .Case(".debug_pubtypes", eSectionTypeDWARFDebugPubTypes) 300 .Case(".debug_ranges", eSectionTypeDWARFDebugRanges) 301 .Case(".debug_rnglists", eSectionTypeDWARFDebugRngLists) 302 .Case(".debug_str", eSectionTypeDWARFDebugStr) 303 .Case(".debug_str_offsets", eSectionTypeDWARFDebugStrOffsets) 304 .Case(".debug_types", eSectionTypeDWARFDebugTypes) 305 .Default(eSectionTypeOther); 306 if (section_type == eSectionTypeOther) 307 continue; 308 section_name = sect_info.name; 309 if (!IsInMemory()) { 310 vm_size = 0; 311 vm_addr = 0; 312 } 313 } 314 315 SectionSP section_sp( 316 new Section(GetModule(), // Module to which this section belongs. 317 this, // ObjectFile to which this section belongs and 318 // should read section data from. 319 section_type, // Section ID. 320 section_name, // Section name. 321 section_type, // Section type. 322 vm_addr, // VM address. 323 vm_size, // VM size in bytes of this section. 324 file_offset, // Offset of this section in the file. 325 sect_info.size, // Size of the section as found in the file. 326 0, // Alignment of the section 327 0, // Flags for this section. 328 1)); // Number of host bytes per target byte 329 m_sections_up->AddSection(section_sp); 330 unified_section_list.AddSection(section_sp); 331 } 332 } 333 334 bool ObjectFileWasm::SetLoadAddress(Target &target, lldb::addr_t load_address, 335 bool value_is_offset) { 336 /// In WebAssembly, linear memory is disjointed from code space. The VM can 337 /// load multiple instances of a module, which logically share the same code. 338 /// We represent a wasm32 code address with 64-bits, like: 339 /// 63 32 31 0 340 /// +---------------+---------------+ 341 /// + module_id | offset | 342 /// +---------------+---------------+ 343 /// where the lower 32 bits represent a module offset (relative to the module 344 /// start not to the beginning of the code section) and the higher 32 bits 345 /// uniquely identify the module in the WebAssembly VM. 346 /// In other words, we assume that each WebAssembly module is loaded by the 347 /// engine at a 64-bit address that starts at the boundary of 4GB pages, like 348 /// 0x0000000400000000 for module_id == 4. 349 /// These 64-bit addresses will be used to request code ranges for a specific 350 /// module from the WebAssembly engine. 351 352 assert(m_memory_addr == LLDB_INVALID_ADDRESS || 353 m_memory_addr == load_address); 354 355 ModuleSP module_sp = GetModule(); 356 if (!module_sp) 357 return false; 358 359 DecodeSections(); 360 361 size_t num_loaded_sections = 0; 362 SectionList *section_list = GetSectionList(); 363 if (!section_list) 364 return false; 365 366 const size_t num_sections = section_list->GetSize(); 367 for (size_t sect_idx = 0; sect_idx < num_sections; ++sect_idx) { 368 SectionSP section_sp(section_list->GetSectionAtIndex(sect_idx)); 369 if (target.SetSectionLoadAddress( 370 section_sp, load_address | section_sp->GetFileOffset())) { 371 ++num_loaded_sections; 372 } 373 } 374 375 return num_loaded_sections > 0; 376 } 377 378 DataExtractor ObjectFileWasm::ReadImageData(offset_t offset, uint32_t size) { 379 DataExtractor data; 380 if (m_file) { 381 if (offset < GetByteSize()) { 382 size = std::min(static_cast<uint64_t>(size), GetByteSize() - offset); 383 auto buffer_sp = MapFileData(m_file, size, offset); 384 return DataExtractor(buffer_sp, GetByteOrder(), GetAddressByteSize()); 385 } 386 } else { 387 ProcessSP process_sp(m_process_wp.lock()); 388 if (process_sp) { 389 auto data_up = std::make_unique<DataBufferHeap>(size, 0); 390 Status readmem_error; 391 size_t bytes_read = process_sp->ReadMemory( 392 offset, data_up->GetBytes(), data_up->GetByteSize(), readmem_error); 393 if (bytes_read > 0) { 394 DataBufferSP buffer_sp(data_up.release()); 395 data.SetData(buffer_sp, 0, buffer_sp->GetByteSize()); 396 } 397 } 398 } 399 400 data.SetByteOrder(GetByteOrder()); 401 return data; 402 } 403 404 llvm::Optional<FileSpec> ObjectFileWasm::GetExternalDebugInfoFileSpec() { 405 static ConstString g_sect_name_external_debug_info("external_debug_info"); 406 407 for (const section_info §_info : m_sect_infos) { 408 if (g_sect_name_external_debug_info == sect_info.name) { 409 const uint32_t kBufferSize = 1024; 410 DataExtractor section_header_data = 411 ReadImageData(sect_info.offset, kBufferSize); 412 llvm::DataExtractor data = section_header_data.GetAsLLVM(); 413 llvm::DataExtractor::Cursor c(0); 414 llvm::Optional<ConstString> symbols_url = GetWasmString(data, c); 415 if (symbols_url) 416 return FileSpec(symbols_url->GetStringRef()); 417 } 418 } 419 return llvm::None; 420 } 421 422 void ObjectFileWasm::Dump(Stream *s) { 423 ModuleSP module_sp(GetModule()); 424 if (!module_sp) 425 return; 426 427 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex()); 428 429 llvm::raw_ostream &ostream = s->AsRawOstream(); 430 ostream << static_cast<void *>(this) << ": "; 431 s->Indent(); 432 ostream << "ObjectFileWasm, file = '"; 433 m_file.Dump(ostream); 434 ostream << "', arch = "; 435 ostream << GetArchitecture().GetArchitectureName() << "\n"; 436 437 SectionList *sections = GetSectionList(); 438 if (sections) { 439 sections->Dump(s->AsRawOstream(), s->GetIndentLevel(), nullptr, true, 440 UINT32_MAX); 441 } 442 ostream << "\n"; 443 DumpSectionHeaders(ostream); 444 ostream << "\n"; 445 } 446 447 void ObjectFileWasm::DumpSectionHeader(llvm::raw_ostream &ostream, 448 const section_info_t &sh) { 449 ostream << llvm::left_justify(sh.name.GetStringRef(), 16) << " " 450 << llvm::format_hex(sh.offset, 10) << " " 451 << llvm::format_hex(sh.size, 10) << " " << llvm::format_hex(sh.id, 6) 452 << "\n"; 453 } 454 455 void ObjectFileWasm::DumpSectionHeaders(llvm::raw_ostream &ostream) { 456 ostream << "Section Headers\n"; 457 ostream << "IDX name addr size id\n"; 458 ostream << "==== ---------------- ---------- ---------- ------\n"; 459 460 uint32_t idx = 0; 461 for (auto pos = m_sect_infos.begin(); pos != m_sect_infos.end(); 462 ++pos, ++idx) { 463 ostream << "[" << llvm::format_decimal(idx, 2) << "] "; 464 ObjectFileWasm::DumpSectionHeader(ostream, *pos); 465 } 466 } 467