1#!/usr/bin/env python
2## -*- coding: utf-8 -*-
3##
4## An example of a small symbolic emulator for elf x86-64 binaries.
5## Only simulates these following libc functions (but feel free to
6## add more ones):
7##
8##  * __libc_start_main
9##  * atoi
10##  * atol
11##  * atoll
12##  * malloc
13##  * printf
14##  * putchar
15##  * puts
16##  * raise
17##  * rand
18##  * signal
19##  * strlen
20##  * strtoul
21##
22## Example:
23##
24##  $ ./small_x86-64_symbolic_emulator.py ./samples/sample_1 hello
25##  [Triton] Loading 0x400040 - 0x400270
26##  [Triton] Loading 0x400270 - 0x40028c
27##  [Triton] Loading 0x400000 - 0x4007a4
28##  [Triton] Loading 0x600e10 - 0x601048
29##  [Triton] Loading 0x600e28 - 0x600ff8
30##  [Triton] Loading 0x40028c - 0x4002ac
31##  [Triton] Loading 0x400678 - 0x4006ac
32##  [Triton] Loading 0x000000 - 0x000000
33##  [Triton] Loading 0x600e10 - 0x601000
34##  [Triton] Loading 0x000000 - 0x000000
35##  [Triton] Hooking strlen
36##  [Triton] Hooking printf
37##  [Triton] Hooking __libc_start_main
38##  [Triton] Starting emulation
39##  [Triton] __libc_start_main hooked
40##  [Triton] argv[0] = ./samples/sample_1
41##  [Triton] argv[1] = hello
42##  [Triton] strlen hooked
43##  [Triton] printf hooked
44##  Input size = 5
45##  [Triton] Instruction executed: 34
46##  [Triton] Emulation done
47##
48
49from __future__ import print_function
50from triton     import TritonContext, ARCH, MemoryAccess, CPUSIZE, Instruction, OPCODE, MODE
51
52import sys
53import string
54import random
55
56Triton = TritonContext()
57
58
59# Script options
60DEBUG = True
61
62# Memory mapping
63BASE_PLT   = 0x10000000
64BASE_ARGV  = 0x20000000
65BASE_ALLOC = 0x30000000
66BASE_STACK = 0x9fffffff
67
68# Signal handlers used by raise() and signal()
69sigHandlers = dict()
70
71# Allocation information used by malloc()
72mallocCurrentAllocation = 0
73mallocMaxAllocation     = 2048
74mallocBase              = BASE_ALLOC
75mallocChunkSize         = 0x00010000
76
77
78
79def getMemoryString(addr):
80    s = str()
81    index = 0
82
83    while Triton.getConcreteMemoryValue(addr+index):
84        c = chr(Triton.getConcreteMemoryValue(addr+index))
85        if c not in string.printable: c = ""
86        s += c
87        index  += 1
88
89    return s
90
91
92def getFormatString(addr):
93    return getMemoryString(addr)                                                    \
94           .replace("%s", "{}").replace("%d", "{:d}").replace("%#02x", "{:#02x}")   \
95           .replace("%#x", "{:#x}").replace("%x", "{:x}").replace("%02X", "{:02x}") \
96           .replace("%c", "{:c}").replace("%02x", "{:02x}").replace("%ld", "{:d}")  \
97           .replace("%*s", "").replace("%lX", "{:x}").replace("%08x", "{:08x}")     \
98           .replace("%u", "{:d}").replace("%lu", "{:d}")                            \
99
100
101# Simulate the rand() function
102def __rand():
103    debug('rand hooked')
104    # Return value
105    return random.randrange(0xffffffff)
106
107
108# Simulate the malloc() function
109def __malloc():
110    global mallocCurrentAllocation
111    global mallocMaxAllocation
112    global mallocBase
113    global mallocChunkSize
114
115    debug('malloc hooked')
116
117    # Get arguments
118    size = Triton.getConcreteRegisterValue(Triton.registers.rdi)
119
120    if size > mallocChunkSize:
121        debug('malloc failed: size too big')
122        sys.exit(-1)
123
124    if mallocCurrentAllocation >= mallocMaxAllocation:
125        debug('malloc failed: too many allocations done')
126        sys.exit(-1)
127
128    area = mallocBase + (mallocCurrentAllocation * mallocChunkSize)
129    mallocCurrentAllocation += 1
130
131    # Return value
132    return area
133
134
135# Simulate the signal() function
136def __signal():
137    debug('signal hooked')
138
139    # Get arguments
140    signal  = Triton.getConcreteRegisterValue(Triton.registers.rdi)
141    handler = Triton.getConcreteRegisterValue(Triton.registers.rsi)
142
143    global sigHandlers
144    sigHandlers.update({signal: handler})
145
146    # Return value (void)
147    return Triton.getConcreteRegisterValue(Triton.registers.rax)
148
149
150# Simulate the raise() function
151def __raise():
152    debug('raise hooked')
153
154    # Get arguments
155    signal  = Triton.getConcreteRegisterValue(Triton.registers.rdi)
156    handler = sigHandlers[signal]
157
158    Triton.processing(Instruction(b"\x6A\x00")) # push 0
159    emulate(handler)
160
161    # Return value
162    return 0
163
164
165# Simulate the strlen() function
166def __strlen():
167    debug('strlen hooked')
168
169    # Get arguments
170    arg1 = getMemoryString(Triton.getConcreteRegisterValue(Triton.registers.rdi))
171
172    # Return value
173    return len(arg1)
174
175
176# Simulate the strtoul() function
177def __strtoul():
178    debug('strtoul hooked')
179
180    # Get arguments
181    nptr   = getMemoryString(Triton.getConcreteRegisterValue(Triton.registers.rdi))
182    endptr = Triton.getConcreteRegisterValue(Triton.registers.rsi)
183    base   = Triton.getConcreteRegisterValue(Triton.registers.rdx)
184
185    # Return value
186    return int(nptr, base)
187
188
189# Simulate the printf() function
190def __printf():
191    debug('printf hooked')
192
193    # Get arguments
194    arg1   = getFormatString(Triton.getConcreteRegisterValue(Triton.registers.rdi))
195    arg2   = Triton.getConcreteRegisterValue(Triton.registers.rsi)
196    arg3   = Triton.getConcreteRegisterValue(Triton.registers.rdx)
197    arg4   = Triton.getConcreteRegisterValue(Triton.registers.rcx)
198    arg5   = Triton.getConcreteRegisterValue(Triton.registers.r8)
199    arg6   = Triton.getConcreteRegisterValue(Triton.registers.r9)
200    nbArgs = arg1.count("{")
201    args   = [arg2, arg3, arg4, arg5, arg6][:nbArgs]
202    s      = arg1.format(*args)
203
204    sys.stdout.write(s)
205
206    # Return value
207    return len(s)
208
209
210# Simulate the putchar() function
211def __putchar():
212    debug('putchar hooked')
213
214    # Get arguments
215    arg1 = Triton.getConcreteRegisterValue(Triton.registers.rdi)
216    sys.stdout.write(chr(arg1) + '\n')
217
218    # Return value
219    return 2
220
221
222# Simulate the puts() function
223def __puts():
224    debug('puts hooked')
225
226    # Get arguments
227    arg1 = getMemoryString(Triton.getConcreteRegisterValue(Triton.registers.rdi))
228    sys.stdout.write(arg1 + '\n')
229
230    # Return value
231    return len(arg1) + 1
232
233
234def __libc_start_main():
235    debug('__libc_start_main hooked')
236
237    # Get arguments
238    main = Triton.getConcreteRegisterValue(Triton.registers.rdi)
239
240    # Push the return value to jump into the main() function
241    Triton.setConcreteRegisterValue(Triton.registers.rsp, Triton.getConcreteRegisterValue(Triton.registers.rsp)-CPUSIZE.QWORD)
242
243    ret2main = MemoryAccess(Triton.getConcreteRegisterValue(Triton.registers.rsp), CPUSIZE.QWORD)
244    Triton.setConcreteMemoryValue(ret2main, main)
245
246    # Setup argc / argv
247    Triton.concretizeRegister(Triton.registers.rdi)
248    Triton.concretizeRegister(Triton.registers.rsi)
249
250    # Setup target argvs
251    argvs = [sys.argv[1]] + sys.argv[2:]
252
253    # Define argc / argv
254    base  = BASE_ARGV
255    addrs = list()
256
257    index = 0
258    for argv in argvs:
259        addrs.append(base)
260        Triton.setConcreteMemoryAreaValue(base, bytes(argv.encode('utf8')) + b'\x00')
261
262        # Tainting argvs
263        for i in range(len(argv)):
264            Triton.taintMemory(base + i)
265
266        base += len(argv)+1
267        debug('argv[%d] = %s' %(index, argv))
268        index += 1
269
270    argc = len(argvs)
271    argv = base
272    for addr in addrs:
273        Triton.setConcreteMemoryValue(MemoryAccess(base, CPUSIZE.QWORD), addr)
274        base += CPUSIZE.QWORD
275
276    Triton.setConcreteRegisterValue(Triton.registers.rdi, argc)
277    Triton.setConcreteRegisterValue(Triton.registers.rsi, argv)
278
279    return 0
280
281
282# Simulate the atoi() function
283def __atoi():
284    debug('atoi hooked')
285
286    # Get arguments
287    arg1 = getMemoryString(Triton.getConcreteRegisterValue(Triton.registers.rdi))
288
289    # Return value
290    return int(arg1)
291
292
293# Simulate the atol() function
294def __atol():
295    debug('atol hooked')
296
297    # Get arguments
298    arg1 = getMemoryString(Triton.getConcreteRegisterValue(Triton.registers.rdi))
299
300    # Return value
301    return int(arg1)
302
303
304# Simulate the atoll() function
305def __atoll():
306    debug('atoll hooked')
307
308    # Get arguments
309    arg1 = getMemoryString(Triton.getConcreteRegisterValue(Triton.registers.rdi))
310
311    # Return value
312    return int(arg1)
313
314
315customRelocation = [
316    ['__libc_start_main', __libc_start_main,    None],
317    ['atoi',              __atoi,               None],
318    ['atol',              __atol,               None],
319    ['atoll',             __atoll,              None],
320    ['malloc',            __malloc,             None],
321    ['printf',            __printf,             None],
322    ['putchar',           __putchar,            None],
323    ['puts',              __puts,               None],
324    ['raise',             __raise,              None],
325    ['rand',              __rand,               None],
326    ['signal',            __signal,             None],
327    ['strlen',            __strlen,             None],
328    ['strtoul',           __strtoul,            None],
329]
330
331
332def hookingHandler():
333    pc = Triton.getConcreteRegisterValue(Triton.registers.rip)
334    for rel in customRelocation:
335        if rel[2] == pc:
336            # Emulate the routine and the return value
337            ret_value = rel[1]()
338            Triton.setConcreteRegisterValue(Triton.registers.rax, ret_value)
339
340            # Get the return address
341            ret_addr = Triton.getConcreteMemoryValue(MemoryAccess(Triton.getConcreteRegisterValue(Triton.registers.rsp), CPUSIZE.QWORD))
342
343            # Hijack RIP to skip the call
344            Triton.setConcreteRegisterValue(Triton.registers.rip, ret_addr)
345
346            # Restore RSP (simulate the ret)
347            Triton.setConcreteRegisterValue(Triton.registers.rsp, Triton.getConcreteRegisterValue(Triton.registers.rsp)+CPUSIZE.QWORD)
348    return
349
350
351# Emulate the binary.
352def emulate(pc):
353    count = 0
354    while pc:
355        # Fetch opcode
356        opcode = Triton.getConcreteMemoryAreaValue(pc, 16)
357
358        # Create the Triton instruction
359        instruction = Instruction()
360        instruction.setOpcode(opcode)
361        instruction.setAddress(pc)
362
363        # Process
364        Triton.processing(instruction)
365        count += 1
366
367        #print instruction
368
369        if instruction.getType() == OPCODE.X86.HLT:
370            break
371
372        # Simulate routines
373        hookingHandler()
374
375        # Next
376        pc = Triton.getConcreteRegisterValue(Triton.registers.rip)
377
378    debug('Instruction executed: %d' %(count))
379    return
380
381
382def loadBinary(path):
383    import lief
384    # Map the binary into the memory
385    binary = lief.parse(path)
386    phdrs  = binary.segments
387    for phdr in phdrs:
388        size   = phdr.physical_size
389        vaddr  = phdr.virtual_address
390        debug('Loading 0x%06x - 0x%06x' %(vaddr, vaddr+size))
391        Triton.setConcreteMemoryAreaValue(vaddr, phdr.content)
392    return binary
393
394
395def makeRelocation(binary):
396    # Setup plt
397    for pltIndex in range(len(customRelocation)):
398        customRelocation[pltIndex][2] = BASE_PLT + pltIndex
399
400    # Perform our own relocations
401    for rel in binary.pltgot_relocations:
402        symbolName = rel.symbol.name
403        symbolRelo = rel.address
404        for crel in customRelocation:
405            if symbolName == crel[0]:
406                debug('Hooking %s' %(symbolName))
407                Triton.setConcreteMemoryValue(MemoryAccess(symbolRelo, CPUSIZE.QWORD), crel[2])
408                break
409    return
410
411
412def debug(s):
413    if DEBUG:
414        print('[Triton] %s' %(s))
415    return
416
417
418if __name__ == '__main__':
419    # Set the architecture
420    Triton.setArchitecture(ARCH.X86_64)
421
422    # Set a symbolic optimization mode
423    Triton.setMode(MODE.ALIGNED_MEMORY, True)
424
425    # AST representation as Python syntax
426    #setAstRepresentationMode(AST_REPRESENTATION.PYTHON)
427
428    if len(sys.argv) < 2:
429        debug('Syntax: %s <elf binary> [arg1, arg2, ...]' %(sys.argv[0]))
430        sys.exit(1)
431
432    # Load the binary
433    binary = loadBinary(sys.argv[1])
434
435    # Perform our own relocations
436    makeRelocation(binary)
437
438    # Define a fake stack
439    Triton.setConcreteRegisterValue(Triton.registers.rbp, BASE_STACK)
440    Triton.setConcreteRegisterValue(Triton.registers.rsp, BASE_STACK)
441
442    # Let's emulate the binary from the entry point
443    debug('Starting emulation')
444    emulate(binary.entrypoint)
445    debug('Emulation done')
446
447    sys.exit(0)
448