1const std = @import("std.zig"); 2const io = std.io; 3const mem = std.mem; 4const os = std.os; 5const File = std.fs.File; 6 7const ArrayList = std.ArrayList; 8 9// CoffHeader.machine values 10// see https://msdn.microsoft.com/en-us/library/windows/desktop/ms680313(v=vs.85).aspx 11const IMAGE_FILE_MACHINE_I386 = 0x014c; 12const IMAGE_FILE_MACHINE_IA64 = 0x0200; 13const IMAGE_FILE_MACHINE_AMD64 = 0x8664; 14 15pub const MachineType = enum(u16) { 16 Unknown = 0x0, 17 /// Matsushita AM33 18 AM33 = 0x1d3, 19 /// x64 20 X64 = 0x8664, 21 /// ARM little endian 22 ARM = 0x1c0, 23 /// ARM64 little endian 24 ARM64 = 0xaa64, 25 /// ARM Thumb-2 little endian 26 ARMNT = 0x1c4, 27 /// EFI byte code 28 EBC = 0xebc, 29 /// Intel 386 or later processors and compatible processors 30 I386 = 0x14c, 31 /// Intel Itanium processor family 32 IA64 = 0x200, 33 /// Mitsubishi M32R little endian 34 M32R = 0x9041, 35 /// MIPS16 36 MIPS16 = 0x266, 37 /// MIPS with FPU 38 MIPSFPU = 0x366, 39 /// MIPS16 with FPU 40 MIPSFPU16 = 0x466, 41 /// Power PC little endian 42 POWERPC = 0x1f0, 43 /// Power PC with floating point support 44 POWERPCFP = 0x1f1, 45 /// MIPS little endian 46 R4000 = 0x166, 47 /// RISC-V 32-bit address space 48 RISCV32 = 0x5032, 49 /// RISC-V 64-bit address space 50 RISCV64 = 0x5064, 51 /// RISC-V 128-bit address space 52 RISCV128 = 0x5128, 53 /// Hitachi SH3 54 SH3 = 0x1a2, 55 /// Hitachi SH3 DSP 56 SH3DSP = 0x1a3, 57 /// Hitachi SH4 58 SH4 = 0x1a6, 59 /// Hitachi SH5 60 SH5 = 0x1a8, 61 /// Thumb 62 Thumb = 0x1c2, 63 /// MIPS little-endian WCE v2 64 WCEMIPSV2 = 0x169, 65}; 66 67// OptionalHeader.magic values 68// see https://msdn.microsoft.com/en-us/library/windows/desktop/ms680339(v=vs.85).aspx 69const IMAGE_NT_OPTIONAL_HDR32_MAGIC = 0x10b; 70const IMAGE_NT_OPTIONAL_HDR64_MAGIC = 0x20b; 71 72// Image Characteristics 73pub const IMAGE_FILE_RELOCS_STRIPPED = 0x1; 74pub const IMAGE_FILE_DEBUG_STRIPPED = 0x200; 75pub const IMAGE_FILE_EXECUTABLE_IMAGE = 0x2; 76pub const IMAGE_FILE_32BIT_MACHINE = 0x100; 77pub const IMAGE_FILE_LARGE_ADDRESS_AWARE = 0x20; 78 79// Section flags 80pub const IMAGE_SCN_CNT_INITIALIZED_DATA = 0x40; 81pub const IMAGE_SCN_MEM_READ = 0x40000000; 82pub const IMAGE_SCN_CNT_CODE = 0x20; 83pub const IMAGE_SCN_MEM_EXECUTE = 0x20000000; 84pub const IMAGE_SCN_MEM_WRITE = 0x80000000; 85 86const IMAGE_NUMBEROF_DIRECTORY_ENTRIES = 16; 87const IMAGE_DEBUG_TYPE_CODEVIEW = 2; 88const DEBUG_DIRECTORY = 6; 89 90pub const CoffError = error{ 91 InvalidPEMagic, 92 InvalidPEHeader, 93 InvalidMachine, 94 MissingCoffSection, 95 MissingStringTable, 96}; 97 98// Official documentation of the format: https://docs.microsoft.com/en-us/windows/win32/debug/pe-format 99pub const Coff = struct { 100 in_file: File, 101 allocator: mem.Allocator, 102 103 coff_header: CoffHeader, 104 pe_header: OptionalHeader, 105 sections: ArrayList(Section), 106 107 guid: [16]u8, 108 age: u32, 109 110 pub fn init(allocator: mem.Allocator, in_file: File) Coff { 111 return Coff{ 112 .in_file = in_file, 113 .allocator = allocator, 114 .coff_header = undefined, 115 .pe_header = undefined, 116 .sections = ArrayList(Section).init(allocator), 117 .guid = undefined, 118 .age = undefined, 119 }; 120 } 121 122 pub fn loadHeader(self: *Coff) !void { 123 const pe_pointer_offset = 0x3C; 124 125 const in = self.in_file.reader(); 126 127 var magic: [2]u8 = undefined; 128 try in.readNoEof(magic[0..]); 129 if (!mem.eql(u8, &magic, "MZ")) 130 return error.InvalidPEMagic; 131 132 // Seek to PE File Header (coff header) 133 try self.in_file.seekTo(pe_pointer_offset); 134 const pe_magic_offset = try in.readIntLittle(u32); 135 try self.in_file.seekTo(pe_magic_offset); 136 137 var pe_header_magic: [4]u8 = undefined; 138 try in.readNoEof(pe_header_magic[0..]); 139 if (!mem.eql(u8, &pe_header_magic, &[_]u8{ 'P', 'E', 0, 0 })) 140 return error.InvalidPEHeader; 141 142 self.coff_header = CoffHeader{ 143 .machine = try in.readIntLittle(u16), 144 .number_of_sections = try in.readIntLittle(u16), 145 .timedate_stamp = try in.readIntLittle(u32), 146 .pointer_to_symbol_table = try in.readIntLittle(u32), 147 .number_of_symbols = try in.readIntLittle(u32), 148 .size_of_optional_header = try in.readIntLittle(u16), 149 .characteristics = try in.readIntLittle(u16), 150 }; 151 152 switch (self.coff_header.machine) { 153 IMAGE_FILE_MACHINE_I386, IMAGE_FILE_MACHINE_AMD64, IMAGE_FILE_MACHINE_IA64 => {}, 154 else => return error.InvalidMachine, 155 } 156 157 try self.loadOptionalHeader(); 158 } 159 160 fn readStringFromTable(self: *Coff, offset: usize, buf: []u8) ![]const u8 { 161 if (self.coff_header.pointer_to_symbol_table == 0) { 162 // No symbol table therefore no string table 163 return error.MissingStringTable; 164 } 165 // The string table is at the end of the symbol table and symbols are 18 bytes long 166 const string_table_offset = self.coff_header.pointer_to_symbol_table + (self.coff_header.number_of_symbols * 18) + offset; 167 const in = self.in_file.reader(); 168 const old_pos = try self.in_file.getPos(); 169 170 try self.in_file.seekTo(string_table_offset); 171 defer { 172 self.in_file.seekTo(old_pos) catch unreachable; 173 } 174 175 const str = try in.readUntilDelimiterOrEof(buf, 0); 176 return str orelse ""; 177 } 178 179 fn loadOptionalHeader(self: *Coff) !void { 180 const in = self.in_file.reader(); 181 const opt_header_pos = try self.in_file.getPos(); 182 183 self.pe_header.magic = try in.readIntLittle(u16); 184 // All we care about is the image base value and PDB info 185 // The header structure is different for 32 or 64 bit 186 var num_rva_pos: u64 = undefined; 187 if (self.pe_header.magic == IMAGE_NT_OPTIONAL_HDR32_MAGIC) { 188 num_rva_pos = opt_header_pos + 92; 189 190 try self.in_file.seekTo(opt_header_pos + 28); 191 const image_base32 = try in.readIntLittle(u32); 192 self.pe_header.image_base = image_base32; 193 } else if (self.pe_header.magic == IMAGE_NT_OPTIONAL_HDR64_MAGIC) { 194 num_rva_pos = opt_header_pos + 108; 195 196 try self.in_file.seekTo(opt_header_pos + 24); 197 self.pe_header.image_base = try in.readIntLittle(u64); 198 } else return error.InvalidPEMagic; 199 200 try self.in_file.seekTo(num_rva_pos); 201 202 const number_of_rva_and_sizes = try in.readIntLittle(u32); 203 if (number_of_rva_and_sizes != IMAGE_NUMBEROF_DIRECTORY_ENTRIES) 204 return error.InvalidPEHeader; 205 206 for (self.pe_header.data_directory) |*data_dir| { 207 data_dir.* = OptionalHeader.DataDirectory{ 208 .virtual_address = try in.readIntLittle(u32), 209 .size = try in.readIntLittle(u32), 210 }; 211 } 212 } 213 214 pub fn getPdbPath(self: *Coff, buffer: []u8) !usize { 215 try self.loadSections(); 216 217 const header = blk: { 218 if (self.getSection(".buildid")) |section| { 219 break :blk section.header; 220 } else if (self.getSection(".rdata")) |section| { 221 break :blk section.header; 222 } else { 223 return error.MissingCoffSection; 224 } 225 }; 226 227 const debug_dir = &self.pe_header.data_directory[DEBUG_DIRECTORY]; 228 const file_offset = debug_dir.virtual_address - header.virtual_address + header.pointer_to_raw_data; 229 230 const in = self.in_file.reader(); 231 try self.in_file.seekTo(file_offset); 232 233 // Find the correct DebugDirectoryEntry, and where its data is stored. 234 // It can be in any section. 235 const debug_dir_entry_count = debug_dir.size / @sizeOf(DebugDirectoryEntry); 236 var i: u32 = 0; 237 blk: while (i < debug_dir_entry_count) : (i += 1) { 238 const debug_dir_entry = try in.readStruct(DebugDirectoryEntry); 239 if (debug_dir_entry.type == IMAGE_DEBUG_TYPE_CODEVIEW) { 240 for (self.sections.items) |*section| { 241 const section_start = section.header.virtual_address; 242 const section_size = section.header.misc.virtual_size; 243 const rva = debug_dir_entry.address_of_raw_data; 244 const offset = rva - section_start; 245 if (section_start <= rva and offset < section_size and debug_dir_entry.size_of_data <= section_size - offset) { 246 try self.in_file.seekTo(section.header.pointer_to_raw_data + offset); 247 break :blk; 248 } 249 } 250 } 251 } 252 253 var cv_signature: [4]u8 = undefined; // CodeView signature 254 try in.readNoEof(cv_signature[0..]); 255 // 'RSDS' indicates PDB70 format, used by lld. 256 if (!mem.eql(u8, &cv_signature, "RSDS")) 257 return error.InvalidPEMagic; 258 try in.readNoEof(self.guid[0..]); 259 self.age = try in.readIntLittle(u32); 260 261 // Finally read the null-terminated string. 262 var byte = try in.readByte(); 263 i = 0; 264 while (byte != 0 and i < buffer.len) : (i += 1) { 265 buffer[i] = byte; 266 byte = try in.readByte(); 267 } 268 269 if (byte != 0 and i == buffer.len) 270 return error.NameTooLong; 271 272 return @as(usize, i); 273 } 274 275 pub fn loadSections(self: *Coff) !void { 276 if (self.sections.items.len == self.coff_header.number_of_sections) 277 return; 278 279 try self.sections.ensureTotalCapacityPrecise(self.coff_header.number_of_sections); 280 281 const in = self.in_file.reader(); 282 283 var name: [32]u8 = undefined; 284 285 var i: u16 = 0; 286 while (i < self.coff_header.number_of_sections) : (i += 1) { 287 try in.readNoEof(name[0..8]); 288 289 if (name[0] == '/') { 290 // This is a long name and stored in the string table 291 const offset_len = mem.indexOfScalar(u8, name[1..], 0) orelse 7; 292 293 const str_offset = try std.fmt.parseInt(u32, name[1 .. offset_len + 1], 10); 294 const str = try self.readStringFromTable(str_offset, &name); 295 std.mem.set(u8, name[str.len..], 0); 296 } else { 297 std.mem.set(u8, name[8..], 0); 298 } 299 300 self.sections.appendAssumeCapacity(Section{ 301 .header = SectionHeader{ 302 .name = name, 303 .misc = SectionHeader.Misc{ .virtual_size = try in.readIntLittle(u32) }, 304 .virtual_address = try in.readIntLittle(u32), 305 .size_of_raw_data = try in.readIntLittle(u32), 306 .pointer_to_raw_data = try in.readIntLittle(u32), 307 .pointer_to_relocations = try in.readIntLittle(u32), 308 .pointer_to_line_numbers = try in.readIntLittle(u32), 309 .number_of_relocations = try in.readIntLittle(u16), 310 .number_of_line_numbers = try in.readIntLittle(u16), 311 .characteristics = try in.readIntLittle(u32), 312 }, 313 }); 314 } 315 } 316 317 pub fn getSection(self: *Coff, comptime name: []const u8) ?*Section { 318 for (self.sections.items) |*sec| { 319 if (mem.eql(u8, sec.header.name[0..name.len], name)) { 320 return sec; 321 } 322 } 323 return null; 324 } 325 326 // Return an owned slice full of the section data 327 pub fn getSectionData(self: *Coff, comptime name: []const u8, allocator: mem.Allocator) ![]u8 { 328 const sec = for (self.sections.items) |*sec| { 329 if (mem.eql(u8, sec.header.name[0..name.len], name)) { 330 break sec; 331 } 332 } else { 333 return error.MissingCoffSection; 334 }; 335 const in = self.in_file.reader(); 336 try self.in_file.seekTo(sec.header.pointer_to_raw_data); 337 const out_buff = try allocator.alloc(u8, sec.header.misc.virtual_size); 338 try in.readNoEof(out_buff); 339 return out_buff; 340 } 341}; 342 343const CoffHeader = struct { 344 machine: u16, 345 number_of_sections: u16, 346 timedate_stamp: u32, 347 pointer_to_symbol_table: u32, 348 number_of_symbols: u32, 349 size_of_optional_header: u16, 350 characteristics: u16, 351}; 352 353const OptionalHeader = struct { 354 const DataDirectory = struct { 355 virtual_address: u32, 356 size: u32, 357 }; 358 359 magic: u16, 360 data_directory: [IMAGE_NUMBEROF_DIRECTORY_ENTRIES]DataDirectory, 361 image_base: u64, 362}; 363 364const DebugDirectoryEntry = packed struct { 365 characteristiccs: u32, 366 time_date_stamp: u32, 367 major_version: u16, 368 minor_version: u16, 369 @"type": u32, 370 size_of_data: u32, 371 address_of_raw_data: u32, 372 pointer_to_raw_data: u32, 373}; 374 375pub const Section = struct { 376 header: SectionHeader, 377}; 378 379const SectionHeader = struct { 380 const Misc = union { 381 physical_address: u32, 382 virtual_size: u32, 383 }; 384 385 name: [32]u8, 386 misc: Misc, 387 virtual_address: u32, 388 size_of_raw_data: u32, 389 pointer_to_raw_data: u32, 390 pointer_to_relocations: u32, 391 pointer_to_line_numbers: u32, 392 number_of_relocations: u16, 393 number_of_line_numbers: u16, 394 characteristics: u32, 395}; 396