1import os
2import struct
3import elftools
4import logging
5from collections import defaultdict
6
7from .elf import ELF
8from ..blob import Blob
9from .. import register_backend
10from ...errors import CLEError, CLECompatibilityError
11from ...memory import Clemory
12from ...address_translator import AT
13
14l = logging.getLogger(name=__name__)
15
16# TODO: yall know struct.unpack_from exists, right? maybe even bitstream?
17
18
19class ELFCore(ELF):
20    """
21    Loader class for ELF core files.
22    """
23    is_default = True # Tell CLE to automatically consider using the ELFCore backend
24
25    def __init__(self, *args, executable=None, **kwargs):
26        super().__init__(*args, **kwargs)
27
28        self.filename_lookup = []
29        self.__current_thread = None
30        self._threads = []
31        self.auxv = {}
32        self._main_filepath = executable
33
34        self.__extract_note_info()
35
36        self.__reload_children()
37
38    @staticmethod
39    def is_compatible(stream):
40        stream.seek(0)
41        identstring = stream.read(0x1000)
42        stream.seek(0)
43        if identstring.startswith(b'\x7fELF'):
44            if elftools.elf.elffile.ELFFile(stream).header['e_type'] == 'ET_CORE':
45                return True
46            return False
47        return False
48
49    def __cycle_thread(self):
50        if self.__current_thread is not None:
51            self._threads.append(self.__current_thread)
52        self.__current_thread = {}
53
54    @property
55    def threads(self):
56        return list(range(len(self._threads)))
57
58    def thread_registers(self, thread=None):
59        if thread is None:
60            thread = 0
61        return self._threads[thread]['registers']
62
63    def __extract_note_info(self):
64        """
65        All meaningful information about the process's state at crashtime is stored in the note segment.
66        """
67        for seg_readelf in self._reader.iter_segments():
68            if seg_readelf.header.p_type == 'PT_NOTE':
69                for note in seg_readelf.iter_notes():
70                    if note.n_type == 'NT_PRSTATUS':
71                        self.__cycle_thread()
72                        self.__parse_prstatus(note.n_desc.encode('latin-1'))  # ???
73                    elif note.n_type == 'NT_FILE':
74                        self.__parse_files(note.n_desc)
75                    elif note.n_type == 'NT_AUXV':
76                        self.__parse_auxv(note.n_desc.encode('latin-1'))
77                    elif note.n_type == 512 and self.arch.name == 'X86':
78                        self.__parse_x86_tls(note.n_desc.encode('latin-1'))
79
80        self.__cycle_thread()
81        if not self._threads:
82            l.warning("Could not find thread info, cannot initialize registers")
83        elif self.arch.name == 'X86' and 'segments' not in self._threads[0]:
84            if 'AT_RANDOM' in self.auxv:
85                l.warning("This core dump does not contain TLS information. threads will be matched to TLS regions via heuristics")
86                pointer_rand = self.auxv['AT_RANDOM'][4:8]
87                all_locations = [addr - 0x18 for addr in self.__dummy_clemory.find(pointer_rand) if self.__dummy_clemory.unpack_word(addr - 0x18) == addr - 0x18]
88                # the heuristic is that generally threads are allocated with descending tls addresses
89                for thread, loc in zip(self._threads, reversed(all_locations)):
90                    thread['segments'] = {thread['registers']['gs'] >> 3: (loc, 0xfffff, 0x51)}
91            else:
92                l.warning("This core dump does not contain TLS or auxv information. TLS information will be wrong.")
93                for thread in self._threads:
94                    thread['segments'] = {thread['registers']['gs'] >> 3: (0, 0xffffffff, 0x51)}
95
96    @property
97    def __dummy_clemory(self):
98        dummy_clemory = Clemory(self.arch, root=True)
99        dummy_clemory.add_backer(self.linked_base, self.memory)
100        return dummy_clemory
101
102
103    def __parse_prstatus(self, desc):
104        """
105        Parse out the prstatus, accumulating the general purpose register values. Supports AMD64, X86, ARM, and AARCH64
106        at the moment.
107
108        :param prstatus: a note object of type NT_PRSTATUS.
109        """
110
111        # TODO: support all architectures angr supports
112
113        result = {}
114        result['si_signo'], result['si_code'], result['si_errno'] = struct.unpack("<3I", desc[:12])
115
116        # this field is a short, but it's padded to an int
117        result['pr_cursig'] = struct.unpack("<I", desc[12:16])[0]
118
119        arch_bytes = self.arch.bytes
120        if arch_bytes == 4:
121            fmt = "I"
122        elif arch_bytes == 8:
123            fmt = "Q"
124        else:
125            raise CLEError("Architecture must have a bitwidth of either 64 or 32")
126
127        result['pr_sigpend'], result['pr_sighold'] = struct.unpack("<" + (fmt * 2), desc[16:16+(2*arch_bytes)])
128
129        attrs = struct.unpack("<IIII", desc[16+(2*arch_bytes):16+(2*arch_bytes)+(4*4)])
130        result['pr_pid'], result['pr_ppid'], result['pr_pgrp'], result['pr_sid'] = attrs
131
132        # parse out the 4 timevals
133        pos = 16+(2*arch_bytes)+(4*4)
134        usec = struct.unpack("<" + fmt, desc[pos:pos+arch_bytes])[0] * 1000
135        result['pr_utime_usec'] = struct.unpack("<" + fmt, desc[pos+arch_bytes:pos+arch_bytes*2])[0] + usec
136
137        pos += arch_bytes * 2
138        usec = struct.unpack("<" + fmt, desc[pos:pos+arch_bytes])[0] * 1000
139        result['pr_stime_usec'] = struct.unpack("<" + fmt, desc[pos+arch_bytes:pos+arch_bytes*2])[0] + usec
140
141        pos += arch_bytes * 2
142        usec = struct.unpack("<" + fmt, desc[pos:pos+arch_bytes])[0] * 1000
143        result['pr_cutime_usec'] = struct.unpack("<" + fmt, desc[pos+arch_bytes:pos+arch_bytes*2])[0] + usec
144
145        pos += arch_bytes * 2
146        usec = struct.unpack("<" + fmt, desc[pos:pos+arch_bytes])[0] * 1000
147        result['pr_cstime_usec'] = struct.unpack("<" + fmt, desc[pos+arch_bytes:pos+arch_bytes*2])[0] + usec
148
149        pos += arch_bytes * 2
150
151        # parse out general purpose registers
152        if self.arch.name == 'AMD64':
153            # register names as they appear in dump
154            rnames = ['r15', 'r14', 'r13', 'r12', 'rbp', 'rbx', 'r11', 'r10', 'r9', 'r8', 'rax', 'rcx',
155                    'rdx', 'rsi', 'rdi', 'xxx', 'rip', 'cs', 'eflags', 'rsp', 'ss', 'fs_base', 'gs_base', 'ds', 'es',
156                    'xxx', 'xxx']
157            nreg = 27
158        elif self.arch.name == 'X86':
159            rnames = ['ebx', 'ecx', 'edx', 'esi', 'edi', 'ebp', 'eax', 'ds', 'es', 'fs', 'gs', 'xxx', 'eip',
160                    'cs', 'eflags', 'esp', 'ss']
161            nreg = 17
162        elif self.arch.name == 'ARMHF' or self.arch.name == 'ARMEL':
163            rnames = ['r0', 'r1', 'r2', 'r3', 'r4', 'r5', 'r6', 'r7', 'r8', 'r9', 'r10', 'r11', 'r12', 'r13',
164                    'r14', 'r15', 'xxx', 'xxx']
165            nreg = 18
166        elif self.arch.name == 'AARCH64':
167            rnames =  ['x%d' % i for i in range(32)]
168            rnames.append('pc')
169            rnames.append('xxx')
170            nreg = 34
171        elif self.arch.name == 'MIPS32':
172            rnames = ['xxx', 'xxx', 'xxx', 'xxx', 'xxx', 'xxx',
173                    'zero', 'at', 'v0', 'v1', 'a0', 'a1', 'a2', 'a3',
174                    't0', 't1', 't2', 't3', 't4', 't5', 't6', 't7',
175                    's0', 's1', 's2', 's3', 's4', 's5', 's6', 's7',
176                    't8', 't9', 'k0', 'k1', 'gp', 'sp', 's8', 'ra',
177                    'lo', 'hi', 'pc', 'bad', 'sr', 'status', 'cause']
178            nreg = 45
179        else:
180            raise CLECompatibilityError("Architecture '%s' unsupported by ELFCore" % self.arch.name)
181
182        regvals = []
183        for idx in range(pos, pos+nreg*arch_bytes, arch_bytes):
184            regvals.append(struct.unpack("<" + fmt, desc[idx:idx+arch_bytes])[0])
185        result['registers'] = dict(zip(rnames, regvals))
186        del result['registers']['xxx']
187
188        pos += nreg * arch_bytes
189        result['pr_fpvalid'] = struct.unpack("<I", desc[pos:pos+4])[0]
190        self.__current_thread.update(result)
191
192    def __parse_files(self, desc):
193        self.filename_lookup = [(ent.vm_start, ent.vm_end, ent.page_offset * desc.page_size, fn.decode()) for ent, fn in zip(desc.Elf_Nt_File_Entry, desc.filename)]
194
195        # TODO this can be less stupid if we just parse out what the name/address of the main executable is
196        # that metadata has to be somewhere, right?
197        matched = None
198        if self.filename_lookup and self._main_filepath is not None:
199            for i, (a, b, c, fn) in enumerate(self.filename_lookup):
200                if os.path.basename(self._main_filepath) == fn[fn.rfind('/')+1:]: # explicit unix basename
201                    matched = fn
202                    break
203            else:
204                matched = self.filename_lookup[0][-1]
205
206        for i, (a, b, c, fn) in enumerate(self.filename_lookup):
207            if fn == matched:
208                self.filename_lookup[i] = (a, b, c, self._main_filepath)
209
210
211    def __parse_x86_tls(self, desc):
212        self.__current_thread['segments'] = {}
213        for offset in range(0, len(desc), 4*4):
214            index, base, limit, flags = struct.unpack_from('4I', desc, offset)
215            self.__current_thread['segments'][index] = (base, limit, flags)
216
217    def __parse_auxv(self, desc):
218        for offset in range(0, len(desc), self.arch.bytes*2):
219            code = struct.unpack_from(self.arch.struct_fmt(), desc, offset)[0]
220            value = struct.unpack_from(self.arch.struct_fmt(), desc, offset + self.arch.bytes)[0]
221            code_str = auxv_codes.get(code, code)
222
223            if code_str == 'AT_RANDOM':
224                value = self.__dummy_clemory.load(value, 0x10)
225            elif code_str in ('AT_EXECFN', 'AT_PLATFORM'):
226                pos = value
227                value = bytearray()
228                while True:
229                    byte = self.__dummy_clemory[pos]
230                    if byte == 0:
231                        break
232                    value.append(byte)
233                    pos += 1
234                value = bytes(value)
235
236            self.auxv[code_str] = value
237
238    def __reload_children(self):
239        # god damn. hacks start here
240        self.loader.page_size = 0x1000
241        self.loader._perform_relocations = False
242
243        # hack: we are using a loader internal method in a non-kosher way which will cause our children to be
244        # marked as the main binary if we are also the main binary
245        # work around this by setting ourself here:
246        if self.loader.main_object is None:
247            self.loader.main_object = self
248
249        child_patches = defaultdict(list)
250        for vm_start, vm_end, offset, filename in self.filename_lookup:
251            try:
252                patch_data = self.__dummy_clemory.load(vm_start, vm_end-vm_start)
253            except KeyError:
254                pass
255            else:
256                child_patches[filename].append((vm_start, offset, patch_data))
257
258        remaining_segments = list(self.segments)
259
260        for filename, patches in child_patches.items():
261            try:
262                with open(filename, 'rb') as fp:
263                    obj = self.loader._load_object_isolated(fp)
264            except FileNotFoundError:
265                l.warning("Could not load %s; core may be incomplete", filename)
266                if self.loader.main_object is self:
267                    self.loader.main_object = None
268                self.child_objects.clear()
269                return
270
271            # several ways to try to match the NT_FILE entries to the object
272            # (not trivial because offsets can be mapped multiple places)
273            # (and because there's no clear pattern for how mappings are included or omitted)
274            base_addr = None
275
276            # try one: use the delta between each allocation as a signature (works when the text segment is missing)
277            if base_addr is None:
278                vm_starts = [a for a, _, _ in patches]
279                vm_deltas = [b - a for a, b in zip(vm_starts, vm_starts[1:])]
280                segment_starts = [seg.vaddr for seg in obj.segments]
281                segment_deltas = [b - a for a, b in zip(segment_starts, segment_starts[1:])]
282
283                # funky lil algorithm to find substrings
284                for match_idx in range(len(segment_deltas) - len(vm_deltas) + 1):
285                    for idx, vm_delta in enumerate(vm_deltas):
286                        if vm_delta != segment_deltas[match_idx + idx]:
287                            break
288                    else:
289                        base_addr = vm_starts[0] - AT.from_lva(obj.segments[match_idx].vaddr, obj).to_rva()
290                        break
291
292            # try two: if the file is identity-mapped, it's easy (?)
293            if base_addr is None:
294                base_reccomendations = [a - b for a, b, _ in patches]
295                if all(a == base_reccomendations[0] for a in base_reccomendations):
296                    base_addr = base_reccomendations[0]
297
298            # try three: if we have the zero offset then it's easy (?)
299            if base_addr is None:
300                if patches[0][1] == 0:
301                    base_addr = patches[0][0]
302
303            if base_addr is None:
304                l.warning("Could not load %s (could not determine base); core may be incomplete", filename)
305                if self.loader.main_object is self:
306                    self.loader.main_object = None
307                self.child_objects.clear()
308                return
309
310            # store data provided by core into object
311            for vaddr, _, patch in patches:
312                try:
313                    obj.memory.store(vaddr - base_addr, patch)
314                except KeyError:
315                    pass  # this case handled below in the inject clause, right???
316
317            obj._custom_base_addr = base_addr
318            self.child_objects.append(obj)
319
320            # remove any core segments which are handled by this object
321            for seg in obj.segments:
322                addr = AT.from_lva(seg.vaddr, obj).to_rva() + base_addr
323                for subaddr in range(addr, addr + seg.memsize, 0x1000):
324                    match_seg = self.find_segment_containing(subaddr)
325                    if match_seg is not None:
326                        try:
327                            remaining_segments.remove(match_seg)
328                        except ValueError:
329                            pass
330
331            # inject any core segments which are not handled by the object but overlap with it
332            max_addr = base_addr + (obj.max_addr - obj.min_addr)
333            i = 0
334            while i < len(remaining_segments):
335                seg = remaining_segments[i]
336                if base_addr <= seg.vaddr <= max_addr or seg.vaddr <= base_addr < seg.vaddr + seg.memsize:
337                    remaining_segments.pop(i)
338
339                    seg_vaddr, backer = next(self.memory.backers(AT.from_mva(seg.vaddr, self).to_rva()))
340                    assert seg_vaddr == AT.from_mva(seg.vaddr, self).to_rva()
341                    obj.memory.add_backer(seg.vaddr - base_addr, backer)
342                else:
343                    i += 1
344
345        # for all remaining segments, make blobs out of them
346        mem = self.__dummy_clemory
347        for seg in remaining_segments:
348            if not seg.memsize:
349                continue
350            obj = Blob(self.binary, mem, segments=[(seg.vaddr, seg.vaddr, seg.memsize)], base_addr=seg.vaddr, arch=self.arch, entry_point=0, force_rebase=True)
351            self.child_objects.append(obj)
352
353        self.mapped_base = 0
354        self._max_addr = 0
355        self.has_memory = False
356        if self.loader.main_object is self:
357            self.loader.main_object = None
358
359
360
361auxv_codes = {
362 0x0: 'AT_NULL',
363 0x1: 'AT_IGNORE',
364 0x2: 'AT_EXECFD',
365 0x3: 'AT_PHDR',
366 0x4: 'AT_PHENT',
367 0x5: 'AT_PHNUM',
368 0x6: 'AT_PAGESZ',
369 0x7: 'AT_BASE',
370 0x8: 'AT_FLAGS',
371 0x9: 'AT_ENTRY',
372 0xa: 'AT_NOTELF',
373 0xb: 'AT_UID',
374 0xc: 'AT_EUID',
375 0xd: 'AT_GID',
376 0xe: 'AT_EGID',
377 0x11: 'AT_CLKTCK',
378 0xf: 'AT_PLATFORM',
379 0x10: 'AT_HWCAP',
380 0x12: 'AT_FPUCW',
381 0x13: 'AT_DCACHEBSIZE',
382 0x14: 'AT_ICACHEBSIZE',
383 0x15: 'AT_UCACHEBSIZE',
384 0x16: 'AT_IGNOREPPC',
385 0x17: 'AT_SECURE',
386 0x18: 'AT_BASE_PLATFORM',
387 0x19: 'AT_RANDOM',
388 0x1a: 'AT_HWCAP2',
389 0x1f: 'AT_EXECFN',
390 0x20: 'AT_SYSINFO',
391 0x21: 'AT_SYSINFO_EHDR',
392 0x22: 'AT_L1I_CACHESHAPE',
393 0x23: 'AT_L1D_CACHESHAPE',
394 0x24: 'AT_L2_CACHESHAPE',
395 0x25: 'AT_L3_CACHESHAPE',
396 0x28: 'AT_L1I_CACHESIZE',
397 0x29: 'AT_L1I_CACHEGEOMETRY',
398 0x2a: 'AT_L1D_CACHESIZE',
399 0x2b: 'AT_L1D_CACHEGEOMETRY',
400 0x2c: 'AT_L2_CACHESIZE',
401 0x2d: 'AT_L2_CACHEGEOMETRY',
402 0x2e: 'AT_L3_CACHESIZE',
403 0x2f: 'AT_L3_CACHEGEOMETRY'}
404
405register_backend('elfcore', ELFCore)
406