1# coding=utf-8
2# Copyright 2018 Sascha Schirra
3#
4# Redistribution and use in source and binary forms, with or without
5# modification, are permitted provided that the following conditions are met:
6#
7# 1. Redistributions of source code must retain the above copyright notice, this
8# list of conditions and the following disclaimer.
9#
10# 2. Redistributions in binary form must reproduce the above copyright notice,
11# this list of conditions and the following disclaimer in the documentation
12# and/or other materials provided with the distribution.
13#
14# 3. Neither the name of the copyright holder nor the names of its contributors
15# may be used to endorse or promote products derived from this software without
16# specific prior written permission.
17#
18# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" A ND
19# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29import re
30from ropper.common.error import RopperError
31
32class ConstraintCompiler(object):
33    """
34    Compile a user given constraints to z3 expressions
35
36    constraint := assignment | pop_reg
37    assignment := reg, adjust, reg | number
38    pop_reg := "pop", reg
39    adjust := "==" | "+=" | "-=" | "*=" | "/="
40    reg := a register of the current architecture
41    number := int
42    """
43
44    NUMBER_REGEX = '(-?[0-9]+)'
45    REG_REGEX = '(?P<{}>[a-zA-Z0-9]+)'
46    ADJUST_REGEX = '([\\+\-\*/=]=)'
47    ASSIGNMENT_REGEX = '('+REG_REGEX.format('reg_dst_1') + ' *' + ADJUST_REGEX + ' *('+NUMBER_REGEX+'|'+REG_REGEX.format('reg_src_1')+'|(\[)'+REG_REGEX.format('reg_src_2')+'(\])))'
48    POP_REGEX = '((pop) +'+REG_REGEX.format('reg_dst_2')+')'
49    CONSTRAINT_REGEX = '(' + ASSIGNMENT_REGEX + '|' + POP_REGEX + ')'
50
51    def __init__(self, architecture, semantic_info):
52        self.__architecture = architecture
53        self.__semantic_info = semantic_info
54
55    def getSymbols(self, constraints):
56        symbols = []
57        for constraint in constraints:
58            match = re.match(ConstraintCompiler.CONSTRAINT_REGEX, constraint)
59            if match is None:
60                raise Exception('Invalid syntax: %s' % constraint)
61            reg_dst = match.group('reg_dst_1')
62            if reg_dst is not None:
63                reg_src = match.group('reg_src_1')
64                reg_src = match.group('reg_src_2') if reg_src is None else reg_src
65                symbols.append((reg_dst, reg_src))
66            else:
67                symbols.append((match.group('reg_dst_2'), None))
68
69        return symbols
70
71
72    def compile(self, constraints):
73        """
74        compile a line of semantic expressions
75        """
76        tokens = self._tokenize(constraints)[::-1]
77        to_return = None
78        constraint = None
79        while True:
80            if not tokens:
81                break
82
83            token = tokens.pop()
84            if token in self.__architecture.info.registers:
85                constraint = self._assignment(token, tokens)
86            elif token == 'pop':
87                constraint = self._popReg(token, tokens)
88            elif token == ';':
89                if to_return is None:
90                    to_return = constraint
91                else:
92                    to_return = 'And(%s, %s)' % (to_return, constraint)
93            else:
94                raise ConstraintError('Invalid token: %s' % token)
95
96        return to_return
97
98    def _tokenize(self, constraints):
99        """
100        return a list of tokens
101        """
102        tokens = []
103        for constraint in constraints.split(';'):
104            constraint = constraint.strip()
105            if not constraint:
106                continue
107            match = re.match(ConstraintCompiler.CONSTRAINT_REGEX, constraint)
108            if match is None:
109                raise ConstraintError('Invalid Syntax: %s' % constraint)
110            last_valid_index = -1
111            for index in range(1, len(match.regs)):
112                start = match.regs[index][0]
113                if start == -1:
114                    continue
115                if last_valid_index == -1:
116                    last_valid_index = index
117                    continue
118                if match.regs[last_valid_index][0] != start:
119                    tokens.append(match.group(last_valid_index))
120                last_valid_index = index
121            tokens.append(match.group(last_valid_index))
122            tokens.append(';')
123        return tokens
124
125    def _assignment(self, register, tokens):
126        register = self.__architecture.getRegisterName(register)
127        reg1_last = self.__semantic_info.regs[register][-1]
128        reg1_init = self.__semantic_info.regs[register][0]
129        op = tokens.pop()
130        if not re.match(ConstraintCompiler.ADJUST_REGEX, op):
131            raise ConstraintError('Invalid syntax: %s' % op)
132        value = tokens.pop()
133        if value == '[':
134            r1 = register
135            register = tokens.pop()
136
137            register_name = self.__architecture.getRegisterName(register)
138            if not register_name:
139                raise ConstraintError('Invalid register: %s' & register)
140            value = self._readMemory(register_name)
141            tokens.pop()
142        elif re.match(ConstraintCompiler.NUMBER_REGEX, value):
143            value = create_number_expression(int(value), int(reg1_last.split('_')[-1]))
144
145        elif value in self.__architecture.info.registers:
146            value = self.__architecture.getRegisterName(value)
147            value = self.__semantic_info.regs[value][0]
148            value = create_register_expression(value, int(value.split('_')[-1]))
149        else:
150            print(re.match(ConstraintCompiler.NUMBER_REGEX, value))
151            raise ConstraintError('Invalid Assignment: %s%s%s' % (register, op, value))
152        reg1_last = create_register_expression(reg1_last, int(reg1_last.split('_')[-1]))
153        reg1_init = create_register_expression(reg1_init, int(reg1_init.split('_')[-1]))
154        return self._create(reg1_last, reg1_init, value, op[0])
155
156    def _create(self, left_last, left_init, right, adjust):
157        if adjust != '=':
158            return '%s == %s %s %s' % (left_last, left_init, adjust, right)
159        else:
160            return '%s == %s' % (left_last, right)
161
162    def _readMemory(self, register):
163        register_init = self.__semantic_info.regs[register][0]
164        if self.__semantic_info.mems:
165            memory = self.__semantic_info.mems[-1]
166        else:
167            memory = 'memory%d_%d_%d' % (0, self.__architecture.info.bits, 8)
168            self.__semantic_info.mems.append(memory)
169        size = int(register_init.split('_')[-1])
170        register_expr = create_register_expression(register_init, size)
171        mem_expr = create_read_memory_expression(memory, register_expr, size)
172        return mem_expr
173
174    def _popReg(self, pop, tokens):
175        reg_name = tokens.pop()
176        self.symbols.append((reg_name,None))
177        reg = self.__semantic_info.regs[reg_name][-1]
178        if self.__semantic_info.mems:
179            memory = self.__semantic_info.mems[0]
180        else:
181            memory = 'memory%d_%d_%d' % (0, self.__architecture.info.bits, 8)
182            self.__semantic_info.mems.append(memory)
183        size = int(reg.split('_')[-1])
184        register_expr = create_register_expression(reg, size)
185        mem_expr = create_read_memory_expression(memory, register_expr, size)
186        return mem_expr
187
188
189class ConstraintError(RopperError):
190    """
191    ConstraintError
192    """
193    pass
194
195
196def create_register_expression(register_accessor, size, high=False):
197    register_size = int(register_accessor.split('_')[2])
198    if size < register_size:
199        if high:
200            return 'Extract(%d, 8, %s)' % (size+8-1, register_accessor)
201        else:
202            return 'Extract(%d, 0, %s)' % (size-1, register_accessor)
203    else:
204        return '%s' % register_accessor
205
206def create_number_expression(number, size):
207    return "BitVecVal(%d, %d)" % (number, size)
208
209def create_read_memory_expression(memory, addr, size):
210    to_return = '%s[%s]' % (memory, addr)
211    for i in range(1, int(size/8)):
212        value = '%s[%s]' % (memory, '%s + %d' % (addr, i))
213        to_return = 'Concat(%s, %s)' % (value, to_return)
214
215    return to_return
216