1# Copyright 2013-2017 the openage authors. See copying.md for legal info. 2 3""" 4Provides PEFile, a class for reading MS portable executable files. 5 6Primary doc sources: 7http://www.csn.ul.ie/~caolan/pub/winresdump/winresdump/doc/pefile2.html 8http://en.wikibooks.org/wiki/X86_Disassembly/Windows_Executable_Files 9""" 10 11from ..util.struct import NamedStruct 12from ..util.filelike.stream import StreamFragment 13 14 15class PEDOSHeader(NamedStruct): 16 """ 17 The (legacy) DOS-compatible PE header. 18 19 In all modern PE files, only the 'lfanew' pointer is relevant. 20 """ 21 22 # pylint: disable=bad-whitespace,too-few-public-methods 23 24 endianness = "<" 25 26 signature = "2s" # always 'MZ' 27 bytes_lastpage = "H" # bytes on the last page of file 28 count_pages = "H" # pages in file 29 crlc = "H" # relocations 30 cparhdr = "H" # size of header in paragraphs 31 minalloc = "H" # minimum extra paragraphs needed 32 maxalloc = "H" # maximum extra paragraphs needed 33 initial_ss = "H" # initial (relative) SS value 34 initial_sp = "H" # initial sp value 35 checksum = "H" # checksum 36 initial_ip = "H" # initial IP value 37 initial_cs = "H" # initial (relative) CS value 38 lfarlc = "H" # file address of relocation table 39 ovno = "H" # overlay number 40 reserved0 = "8s" # reserved block #0 41 oemid = "H" # OEM identifier (for oeminfo) 42 oeminfo = "H" # OEM information; oemid-specific 43 reserved1 = "20s" # reserved block #1 44 coffheaderpos = "I" # address of new EXE header 45 46 47class PECOFFHeader(NamedStruct): 48 """ 49 The new (win32) PE and object file header. 50 """ 51 52 # pylint: disable=bad-whitespace,too-few-public-methods 53 54 endianness = "<" 55 56 signature = "4s" # always 'PE\0\0' 57 machine = "H" # architecture; 332 means x86 58 number_of_sections = "H" 59 time_stamp = "I" 60 symbol_table_ptr = "I" 61 symbol_count = "I" 62 opt_header_size = "H" 63 characteristics = "H" # 2: exe; 512: non-relocatable; 8192: dll 64 65 66class PEOptionalHeader(NamedStruct): 67 """ 68 This "optional" header is required for linked files (but not object files). 69 """ 70 71 # pylint: disable=bad-whitespace,too-few-public-methods 72 73 endianness = "<" 74 75 signature = "H" # 267: x86; 523: x86_64 76 major_linker_ver = "B" 77 minor_linker_ver = "B" 78 size_of_code = "I" 79 size_of_data = "I" 80 size_of_bss = "I" 81 entry_point_addr = "I" # RVA of code entry point 82 base_of_code = "I" 83 base_of_data = "I" 84 image_base = "I" # preferred memory location 85 section_alignment = "I" 86 file_alignment = "I" 87 major_os_ver = "H" 88 minor_os_ver = "H" 89 major_img_ver = "H" 90 minor_img_ver = "H" 91 major_subsys_ver = "H" 92 minor_subsys_ver = "H" 93 reserved = "I" 94 size_of_image = "I" 95 size_of_headers = "I" 96 checksum = "I" 97 98 # the windows subsystem to run this executable. 99 # 1: native, 2: GUI, 3: non-GUI, 5: OS/2, 7: POSIX 100 subsystem = "H" 101 102 dll_characteristics = "H" # some flags we're not interested in. 103 stack_reserve_size = "I" 104 stack_commit_size = "I" 105 heap_reserve_size = "I" 106 heap_commit_size = "I" 107 loader_flags = "I" # we're not interested in those either. 108 109 # describes the number of data directory headers that follow this header. 110 # always 16. 111 data_directory_count = "I" 112 113 # written manually at some later point 114 data_directories = None 115 116 117class PEDataDirectory(NamedStruct): 118 """ 119 Provides the locations of various metadata structures, 120 which are used to set up the execution environment. 121 """ 122 123 # pylint: disable=bad-whitespace,too-few-public-methods 124 125 endianness = "<" 126 127 rva = "I" 128 size = "I" 129 130 131class PESection(NamedStruct): 132 """ 133 Describes a section in a PE file (like an ELF section). 134 """ 135 136 # pylint: disable=bad-whitespace,too-few-public-methods 137 138 endianness = "<" 139 140 name = "8s" # first char must be '.'. 141 virtual_size = "I" # size in memory 142 virtual_address = "I" # RVA where the section will be loaded. 143 size_on_disk = "I" 144 file_offset = "I" 145 reserved = "12s" 146 flags = "I" # some flags we don't care about 147 148 149class PEFile: 150 """ 151 Reads Microsoft PE files. 152 153 The constructor takes a file-like object. 154 """ 155 156 def __init__(self, fileobj): 157 # read DOS header 158 doshdr = PEDOSHeader.read(fileobj) 159 if doshdr.signature != b'MZ': 160 raise Exception("not a PE file") 161 162 # read COFF header 163 fileobj.seek(doshdr.coffheaderpos) 164 coffhdr = PECOFFHeader.read(fileobj) 165 166 if coffhdr.signature != b'PE\0\0': 167 raise Exception("not a Win32 PE file") 168 169 if coffhdr.opt_header_size != 224: 170 raise Exception("unknown optional header size") 171 172 # read optional header 173 opthdr = PEOptionalHeader.read(fileobj) 174 175 if opthdr.signature not in {267, 523}: 176 raise Exception("Not an x86{_64} file") 177 178 # read data directories 179 opthdr.data_directories = [] 180 for _ in range(opthdr.data_directory_count): 181 opthdr.data_directories.append(PEDataDirectory.read(fileobj)) 182 183 # read section headers 184 sections = {} 185 186 for _ in range(coffhdr.number_of_sections): 187 section = PESection.read(fileobj) 188 189 section.name = section.name.decode('ascii').rstrip('\0') 190 if not section.name.startswith('.'): 191 raise Exception("Invalid section name: " + section.name) 192 193 sections[section.name] = section 194 195 # store all read header info 196 self.fileobj = fileobj 197 198 self.doshdr = doshdr 199 self.coffhdr = coffhdr 200 self.opthdr = opthdr 201 202 self.sections = sections 203 204 def open_section(self, section_name): 205 """ 206 Returns a tuple of data, va for the given section. 207 208 data is a file-like object (StreamFragment), 209 and va is the RVA of the section start. 210 """ 211 if section_name not in self.sections: 212 raise Exception("no such section in PE file: " + section_name) 213 214 section = self.sections[section_name] 215 216 return StreamFragment( 217 self.fileobj, 218 section.file_offset, 219 section.virtual_size), section.virtual_address 220 221 def resources(self): 222 """ 223 Returns a PEResources object for self. 224 """ 225 from .peresource import PEResources 226 return PEResources(self) 227