1import os 2import struct 3import elftools 4import logging 5from collections import defaultdict 6 7from .elf import ELF 8from ..blob import Blob 9from .. import register_backend 10from ...errors import CLEError, CLECompatibilityError 11from ...memory import Clemory 12from ...address_translator import AT 13 14l = logging.getLogger(name=__name__) 15 16# TODO: yall know struct.unpack_from exists, right? maybe even bitstream? 17 18 19class ELFCore(ELF): 20 """ 21 Loader class for ELF core files. 22 """ 23 is_default = True # Tell CLE to automatically consider using the ELFCore backend 24 25 def __init__(self, *args, executable=None, **kwargs): 26 super().__init__(*args, **kwargs) 27 28 self.filename_lookup = [] 29 self.__current_thread = None 30 self._threads = [] 31 self.auxv = {} 32 self._main_filepath = executable 33 34 self.__extract_note_info() 35 36 self.__reload_children() 37 38 @staticmethod 39 def is_compatible(stream): 40 stream.seek(0) 41 identstring = stream.read(0x1000) 42 stream.seek(0) 43 if identstring.startswith(b'\x7fELF'): 44 if elftools.elf.elffile.ELFFile(stream).header['e_type'] == 'ET_CORE': 45 return True 46 return False 47 return False 48 49 def __cycle_thread(self): 50 if self.__current_thread is not None: 51 self._threads.append(self.__current_thread) 52 self.__current_thread = {} 53 54 @property 55 def threads(self): 56 return list(range(len(self._threads))) 57 58 def thread_registers(self, thread=None): 59 if thread is None: 60 thread = 0 61 return self._threads[thread]['registers'] 62 63 def __extract_note_info(self): 64 """ 65 All meaningful information about the process's state at crashtime is stored in the note segment. 66 """ 67 for seg_readelf in self._reader.iter_segments(): 68 if seg_readelf.header.p_type == 'PT_NOTE': 69 for note in seg_readelf.iter_notes(): 70 if note.n_type == 'NT_PRSTATUS': 71 self.__cycle_thread() 72 self.__parse_prstatus(note.n_desc.encode('latin-1')) # ??? 73 elif note.n_type == 'NT_FILE': 74 self.__parse_files(note.n_desc) 75 elif note.n_type == 'NT_AUXV': 76 self.__parse_auxv(note.n_desc.encode('latin-1')) 77 elif note.n_type == 512 and self.arch.name == 'X86': 78 self.__parse_x86_tls(note.n_desc.encode('latin-1')) 79 80 self.__cycle_thread() 81 if not self._threads: 82 l.warning("Could not find thread info, cannot initialize registers") 83 elif self.arch.name == 'X86' and 'segments' not in self._threads[0]: 84 if 'AT_RANDOM' in self.auxv: 85 l.warning("This core dump does not contain TLS information. threads will be matched to TLS regions via heuristics") 86 pointer_rand = self.auxv['AT_RANDOM'][4:8] 87 all_locations = [addr - 0x18 for addr in self.__dummy_clemory.find(pointer_rand) if self.__dummy_clemory.unpack_word(addr - 0x18) == addr - 0x18] 88 # the heuristic is that generally threads are allocated with descending tls addresses 89 for thread, loc in zip(self._threads, reversed(all_locations)): 90 thread['segments'] = {thread['registers']['gs'] >> 3: (loc, 0xfffff, 0x51)} 91 else: 92 l.warning("This core dump does not contain TLS or auxv information. TLS information will be wrong.") 93 for thread in self._threads: 94 thread['segments'] = {thread['registers']['gs'] >> 3: (0, 0xffffffff, 0x51)} 95 96 @property 97 def __dummy_clemory(self): 98 dummy_clemory = Clemory(self.arch, root=True) 99 dummy_clemory.add_backer(self.linked_base, self.memory) 100 return dummy_clemory 101 102 103 def __parse_prstatus(self, desc): 104 """ 105 Parse out the prstatus, accumulating the general purpose register values. Supports AMD64, X86, ARM, and AARCH64 106 at the moment. 107 108 :param prstatus: a note object of type NT_PRSTATUS. 109 """ 110 111 # TODO: support all architectures angr supports 112 113 result = {} 114 result['si_signo'], result['si_code'], result['si_errno'] = struct.unpack("<3I", desc[:12]) 115 116 # this field is a short, but it's padded to an int 117 result['pr_cursig'] = struct.unpack("<I", desc[12:16])[0] 118 119 arch_bytes = self.arch.bytes 120 if arch_bytes == 4: 121 fmt = "I" 122 elif arch_bytes == 8: 123 fmt = "Q" 124 else: 125 raise CLEError("Architecture must have a bitwidth of either 64 or 32") 126 127 result['pr_sigpend'], result['pr_sighold'] = struct.unpack("<" + (fmt * 2), desc[16:16+(2*arch_bytes)]) 128 129 attrs = struct.unpack("<IIII", desc[16+(2*arch_bytes):16+(2*arch_bytes)+(4*4)]) 130 result['pr_pid'], result['pr_ppid'], result['pr_pgrp'], result['pr_sid'] = attrs 131 132 # parse out the 4 timevals 133 pos = 16+(2*arch_bytes)+(4*4) 134 usec = struct.unpack("<" + fmt, desc[pos:pos+arch_bytes])[0] * 1000 135 result['pr_utime_usec'] = struct.unpack("<" + fmt, desc[pos+arch_bytes:pos+arch_bytes*2])[0] + usec 136 137 pos += arch_bytes * 2 138 usec = struct.unpack("<" + fmt, desc[pos:pos+arch_bytes])[0] * 1000 139 result['pr_stime_usec'] = struct.unpack("<" + fmt, desc[pos+arch_bytes:pos+arch_bytes*2])[0] + usec 140 141 pos += arch_bytes * 2 142 usec = struct.unpack("<" + fmt, desc[pos:pos+arch_bytes])[0] * 1000 143 result['pr_cutime_usec'] = struct.unpack("<" + fmt, desc[pos+arch_bytes:pos+arch_bytes*2])[0] + usec 144 145 pos += arch_bytes * 2 146 usec = struct.unpack("<" + fmt, desc[pos:pos+arch_bytes])[0] * 1000 147 result['pr_cstime_usec'] = struct.unpack("<" + fmt, desc[pos+arch_bytes:pos+arch_bytes*2])[0] + usec 148 149 pos += arch_bytes * 2 150 151 # parse out general purpose registers 152 if self.arch.name == 'AMD64': 153 # register names as they appear in dump 154 rnames = ['r15', 'r14', 'r13', 'r12', 'rbp', 'rbx', 'r11', 'r10', 'r9', 'r8', 'rax', 'rcx', 155 'rdx', 'rsi', 'rdi', 'xxx', 'rip', 'cs', 'eflags', 'rsp', 'ss', 'fs_base', 'gs_base', 'ds', 'es', 156 'xxx', 'xxx'] 157 nreg = 27 158 elif self.arch.name == 'X86': 159 rnames = ['ebx', 'ecx', 'edx', 'esi', 'edi', 'ebp', 'eax', 'ds', 'es', 'fs', 'gs', 'xxx', 'eip', 160 'cs', 'eflags', 'esp', 'ss'] 161 nreg = 17 162 elif self.arch.name == 'ARMHF' or self.arch.name == 'ARMEL': 163 rnames = ['r0', 'r1', 'r2', 'r3', 'r4', 'r5', 'r6', 'r7', 'r8', 'r9', 'r10', 'r11', 'r12', 'r13', 164 'r14', 'r15', 'xxx', 'xxx'] 165 nreg = 18 166 elif self.arch.name == 'AARCH64': 167 rnames = ['x%d' % i for i in range(32)] 168 rnames.append('pc') 169 rnames.append('xxx') 170 nreg = 34 171 elif self.arch.name == 'MIPS32': 172 rnames = ['xxx', 'xxx', 'xxx', 'xxx', 'xxx', 'xxx', 173 'zero', 'at', 'v0', 'v1', 'a0', 'a1', 'a2', 'a3', 174 't0', 't1', 't2', 't3', 't4', 't5', 't6', 't7', 175 's0', 's1', 's2', 's3', 's4', 's5', 's6', 's7', 176 't8', 't9', 'k0', 'k1', 'gp', 'sp', 's8', 'ra', 177 'lo', 'hi', 'pc', 'bad', 'sr', 'status', 'cause'] 178 nreg = 45 179 else: 180 raise CLECompatibilityError("Architecture '%s' unsupported by ELFCore" % self.arch.name) 181 182 regvals = [] 183 for idx in range(pos, pos+nreg*arch_bytes, arch_bytes): 184 regvals.append(struct.unpack("<" + fmt, desc[idx:idx+arch_bytes])[0]) 185 result['registers'] = dict(zip(rnames, regvals)) 186 del result['registers']['xxx'] 187 188 pos += nreg * arch_bytes 189 result['pr_fpvalid'] = struct.unpack("<I", desc[pos:pos+4])[0] 190 self.__current_thread.update(result) 191 192 def __parse_files(self, desc): 193 self.filename_lookup = [(ent.vm_start, ent.vm_end, ent.page_offset * desc.page_size, fn.decode()) for ent, fn in zip(desc.Elf_Nt_File_Entry, desc.filename)] 194 195 # TODO this can be less stupid if we just parse out what the name/address of the main executable is 196 # that metadata has to be somewhere, right? 197 matched = None 198 if self.filename_lookup and self._main_filepath is not None: 199 for i, (a, b, c, fn) in enumerate(self.filename_lookup): 200 if os.path.basename(self._main_filepath) == fn[fn.rfind('/')+1:]: # explicit unix basename 201 matched = fn 202 break 203 else: 204 matched = self.filename_lookup[0][-1] 205 206 for i, (a, b, c, fn) in enumerate(self.filename_lookup): 207 if fn == matched: 208 self.filename_lookup[i] = (a, b, c, self._main_filepath) 209 210 211 def __parse_x86_tls(self, desc): 212 self.__current_thread['segments'] = {} 213 for offset in range(0, len(desc), 4*4): 214 index, base, limit, flags = struct.unpack_from('4I', desc, offset) 215 self.__current_thread['segments'][index] = (base, limit, flags) 216 217 def __parse_auxv(self, desc): 218 for offset in range(0, len(desc), self.arch.bytes*2): 219 code = struct.unpack_from(self.arch.struct_fmt(), desc, offset)[0] 220 value = struct.unpack_from(self.arch.struct_fmt(), desc, offset + self.arch.bytes)[0] 221 code_str = auxv_codes.get(code, code) 222 223 if code_str == 'AT_RANDOM': 224 value = self.__dummy_clemory.load(value, 0x10) 225 elif code_str in ('AT_EXECFN', 'AT_PLATFORM'): 226 pos = value 227 value = bytearray() 228 while True: 229 byte = self.__dummy_clemory[pos] 230 if byte == 0: 231 break 232 value.append(byte) 233 pos += 1 234 value = bytes(value) 235 236 self.auxv[code_str] = value 237 238 def __reload_children(self): 239 # god damn. hacks start here 240 self.loader.page_size = 0x1000 241 self.loader._perform_relocations = False 242 243 # hack: we are using a loader internal method in a non-kosher way which will cause our children to be 244 # marked as the main binary if we are also the main binary 245 # work around this by setting ourself here: 246 if self.loader.main_object is None: 247 self.loader.main_object = self 248 249 child_patches = defaultdict(list) 250 for vm_start, vm_end, offset, filename in self.filename_lookup: 251 try: 252 patch_data = self.__dummy_clemory.load(vm_start, vm_end-vm_start) 253 except KeyError: 254 pass 255 else: 256 child_patches[filename].append((vm_start, offset, patch_data)) 257 258 remaining_segments = list(self.segments) 259 260 for filename, patches in child_patches.items(): 261 try: 262 with open(filename, 'rb') as fp: 263 obj = self.loader._load_object_isolated(fp) 264 except FileNotFoundError: 265 l.warning("Could not load %s; core may be incomplete", filename) 266 if self.loader.main_object is self: 267 self.loader.main_object = None 268 self.child_objects.clear() 269 return 270 271 # several ways to try to match the NT_FILE entries to the object 272 # (not trivial because offsets can be mapped multiple places) 273 # (and because there's no clear pattern for how mappings are included or omitted) 274 base_addr = None 275 276 # try one: use the delta between each allocation as a signature (works when the text segment is missing) 277 if base_addr is None: 278 vm_starts = [a for a, _, _ in patches] 279 vm_deltas = [b - a for a, b in zip(vm_starts, vm_starts[1:])] 280 segment_starts = [seg.vaddr for seg in obj.segments] 281 segment_deltas = [b - a for a, b in zip(segment_starts, segment_starts[1:])] 282 283 # funky lil algorithm to find substrings 284 for match_idx in range(len(segment_deltas) - len(vm_deltas) + 1): 285 for idx, vm_delta in enumerate(vm_deltas): 286 if vm_delta != segment_deltas[match_idx + idx]: 287 break 288 else: 289 base_addr = vm_starts[0] - AT.from_lva(obj.segments[match_idx].vaddr, obj).to_rva() 290 break 291 292 # try two: if the file is identity-mapped, it's easy (?) 293 if base_addr is None: 294 base_reccomendations = [a - b for a, b, _ in patches] 295 if all(a == base_reccomendations[0] for a in base_reccomendations): 296 base_addr = base_reccomendations[0] 297 298 # try three: if we have the zero offset then it's easy (?) 299 if base_addr is None: 300 if patches[0][1] == 0: 301 base_addr = patches[0][0] 302 303 if base_addr is None: 304 l.warning("Could not load %s (could not determine base); core may be incomplete", filename) 305 if self.loader.main_object is self: 306 self.loader.main_object = None 307 self.child_objects.clear() 308 return 309 310 # store data provided by core into object 311 for vaddr, _, patch in patches: 312 try: 313 obj.memory.store(vaddr - base_addr, patch) 314 except KeyError: 315 pass # this case handled below in the inject clause, right??? 316 317 obj._custom_base_addr = base_addr 318 self.child_objects.append(obj) 319 320 # remove any core segments which are handled by this object 321 for seg in obj.segments: 322 addr = AT.from_lva(seg.vaddr, obj).to_rva() + base_addr 323 for subaddr in range(addr, addr + seg.memsize, 0x1000): 324 match_seg = self.find_segment_containing(subaddr) 325 if match_seg is not None: 326 try: 327 remaining_segments.remove(match_seg) 328 except ValueError: 329 pass 330 331 # inject any core segments which are not handled by the object but overlap with it 332 max_addr = base_addr + (obj.max_addr - obj.min_addr) 333 i = 0 334 while i < len(remaining_segments): 335 seg = remaining_segments[i] 336 if base_addr <= seg.vaddr <= max_addr or seg.vaddr <= base_addr < seg.vaddr + seg.memsize: 337 remaining_segments.pop(i) 338 339 seg_vaddr, backer = next(self.memory.backers(AT.from_mva(seg.vaddr, self).to_rva())) 340 assert seg_vaddr == AT.from_mva(seg.vaddr, self).to_rva() 341 obj.memory.add_backer(seg.vaddr - base_addr, backer) 342 else: 343 i += 1 344 345 # for all remaining segments, make blobs out of them 346 mem = self.__dummy_clemory 347 for seg in remaining_segments: 348 if not seg.memsize: 349 continue 350 obj = Blob(self.binary, mem, segments=[(seg.vaddr, seg.vaddr, seg.memsize)], base_addr=seg.vaddr, arch=self.arch, entry_point=0, force_rebase=True) 351 self.child_objects.append(obj) 352 353 self.mapped_base = 0 354 self._max_addr = 0 355 self.has_memory = False 356 if self.loader.main_object is self: 357 self.loader.main_object = None 358 359 360 361auxv_codes = { 362 0x0: 'AT_NULL', 363 0x1: 'AT_IGNORE', 364 0x2: 'AT_EXECFD', 365 0x3: 'AT_PHDR', 366 0x4: 'AT_PHENT', 367 0x5: 'AT_PHNUM', 368 0x6: 'AT_PAGESZ', 369 0x7: 'AT_BASE', 370 0x8: 'AT_FLAGS', 371 0x9: 'AT_ENTRY', 372 0xa: 'AT_NOTELF', 373 0xb: 'AT_UID', 374 0xc: 'AT_EUID', 375 0xd: 'AT_GID', 376 0xe: 'AT_EGID', 377 0x11: 'AT_CLKTCK', 378 0xf: 'AT_PLATFORM', 379 0x10: 'AT_HWCAP', 380 0x12: 'AT_FPUCW', 381 0x13: 'AT_DCACHEBSIZE', 382 0x14: 'AT_ICACHEBSIZE', 383 0x15: 'AT_UCACHEBSIZE', 384 0x16: 'AT_IGNOREPPC', 385 0x17: 'AT_SECURE', 386 0x18: 'AT_BASE_PLATFORM', 387 0x19: 'AT_RANDOM', 388 0x1a: 'AT_HWCAP2', 389 0x1f: 'AT_EXECFN', 390 0x20: 'AT_SYSINFO', 391 0x21: 'AT_SYSINFO_EHDR', 392 0x22: 'AT_L1I_CACHESHAPE', 393 0x23: 'AT_L1D_CACHESHAPE', 394 0x24: 'AT_L2_CACHESHAPE', 395 0x25: 'AT_L3_CACHESHAPE', 396 0x28: 'AT_L1I_CACHESIZE', 397 0x29: 'AT_L1I_CACHEGEOMETRY', 398 0x2a: 'AT_L1D_CACHESIZE', 399 0x2b: 'AT_L1D_CACHEGEOMETRY', 400 0x2c: 'AT_L2_CACHESIZE', 401 0x2d: 'AT_L2_CACHEGEOMETRY', 402 0x2e: 'AT_L3_CACHESIZE', 403 0x2f: 'AT_L3_CACHEGEOMETRY'} 404 405register_backend('elfcore', ELFCore) 406