1#!/usr/local/bin/python3.8
2#
3# PLASMA : Generate an indented asm code (pseudo-C) with colored syntax.
4# Copyright (C) 2015    Joel
5#
6# This program is free software: you can redistribute it and/or modify
7# it under the terms of the GNU General Public License as published by
8# the Free Software Foundation, either version 3 of the License, or
9# (at your option) any later version.
10#
11# This program is distributed in the hope that it will be useful,
12# but WITHOUT ANY WARRANTY; without even the implied warranty of
13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.    See the
14# GNU General Public License for more details.
15#
16# You should have received a copy of the GNU General Public License
17# along with this program.    If not, see <http://www.gnu.org/licenses/>.
18#
19
20import bisect
21from time import time
22import subprocess
23
24from plasma.lib.utils import debug__, print_no_end, get_char, BYTES_PRINTABLE_SET
25from plasma.lib.colors import color_section
26
27T_BIN_ELF = 0
28T_BIN_PE  = 1
29T_BIN_RAW = 2
30T_BIN_UNK = 3
31
32
33class SectionAbs():
34    # virt_size: size of the mapped section in memory
35    def __init__(self, name, start, virt_size, real_size, is_exec, is_data, is_bss, data):
36        self.name = name
37        self.start = start
38        self.virt_size = virt_size
39        self.real_size = real_size
40        self.end = start + virt_size - 1
41        self.real_end = start + real_size - 1
42        self.is_exec = is_exec
43        self.is_data = is_data
44        self.is_bss = is_bss
45        self.data = data
46        self.big_endian = False # set in lib.disassembler
47
48    def print_header(self):
49        print_no_end(color_section(self.name.ljust(20)))
50        print_no_end(" [ ")
51        print_no_end(hex(self.start))
52        print_no_end(" - ")
53        print_no_end(hex(self.end))
54        print_no_end(" - %d - %d" % (self.virt_size, self.real_size))
55        print(" ]")
56
57    def read(self, ad, size):
58        if ad > self.real_end:
59            return b""
60        off = ad - self.start
61        return self.data[off:off + size]
62
63    def read_int(self, ad, size):
64        if size == 1:
65            return self.read_byte(ad)
66        if size == 2:
67            return self.read_word(ad)
68        if size == 4:
69            return self.read_dword(ad)
70        if size == 8:
71            return self.read_qword(ad)
72        return None
73
74    def read_byte(self, ad):
75        if ad > self.real_end:
76            return None
77        off = ad - self.start
78        return self.data[off]
79
80    def read_word(self, ad):
81        if ad > self.real_end:
82            return None
83        off = ad - self.start
84        w = self.data[off:off+2]
85        if len(w) != 2:
86            return None
87        if self.big_endian:
88            return (w[0] << 8) + w[1]
89        return (w[1] << 8) + w[0]
90
91    def read_dword(self, ad):
92        if ad > self.real_end:
93            return None
94        off = ad - self.start
95        w = self.data[off:off+4]
96        if len(w) != 4:
97            return None
98        if self.big_endian:
99            return (w[0] << 24) + (w[1] << 16) + (w[2] << 8) + w[3]
100        return (w[3] << 24) + (w[2] << 16) + (w[1] << 8) + w[0]
101
102    def read_qword(self, ad):
103        if ad > self.real_end:
104            return None
105        off = ad - self.start
106        w = self.data[off:off+8]
107        if len(w) != 8:
108            return None
109        if self.big_endian:
110            return (w[0] << 56) + (w[1] << 48) + (w[2] << 40) + (w[3] << 32) + \
111                   (w[4] << 24) + (w[5] << 16) + (w[6] << 8) + w[7]
112        return (w[7] << 56) + (w[6] << 48) + (w[5] << 40) + (w[4] << 32) + \
113               (w[3] << 24) + (w[2] << 16) + (w[1] << 8) + w[0]
114
115
116class SegmentAbs(SectionAbs):
117    def __init__(self, name, start, virt_size, real_size, is_exec, is_data,
118                 data, file_offset, big_endian):
119        self.name = name
120        self.start = start
121        self.virt_size = virt_size
122        self.real_size = real_size
123        self.end = start + virt_size - 1
124        self.real_end = start + real_size - 1
125        self.is_exec = is_exec
126        self.is_data = is_data
127        self.file_offset = file_offset
128        self.data = data
129        self.big_endian = big_endian
130        self.is_bss = False
131
132
133
134class Binary(object):
135    def __init__(self):
136        self.reverse_symbols = {} # ad -> name
137        self.symbols = {} # name -> ad
138        self.section_names = {}
139        self.demangled = {} # name -> ad
140        self.reverse_demangled = {} # ad -> name
141        self.imports = {} # ad -> True (the bool is just for msgpack to save the database)
142        self._abs_sections = {} # start section -> SectionAbs
143        self._sorted_sections = [] # bisect list, contains section start address
144
145        # for elf
146        self._abs_segments = {}
147        self._sorted_segments = []
148
149        # To be compatible with CLE, used only in ELF
150        self.rebase_addr = 0
151
152        # It will be set in Disassembler !
153        self.wordsize = 0
154        self.type = -1
155
156        # It will be set in Console !
157        self.api = None
158
159
160    def get_section(self, ad):
161        i = bisect.bisect_right(self._sorted_sections, ad)
162        if not i:
163            return None
164        start = self._sorted_sections[i - 1]
165        s = self._abs_sections[start]
166        if ad <= s.end:
167            return s
168        return None
169
170
171    def add_section(self, start_address, name, virt_size, real_size,
172                    is_exec, is_data, is_bss, data):
173        if is_exec or is_data:
174            bisect.insort_left(self._sorted_sections, start_address)
175        self._abs_sections[start_address] = SectionAbs(
176                name,
177                start_address,
178                virt_size,
179                real_size,
180                is_exec,
181                is_data,
182                is_bss,
183                data)
184
185
186    # for elf
187    def get_segment(self, ad):
188        i = bisect.bisect_right(self._sorted_segments, ad)
189        if not i:
190            return None
191        start = self._sorted_segments[i - 1]
192        s = self._abs_segments[start]
193        if ad <= s.end:
194            return s
195        return None
196
197
198    def is_address(self, ad):
199        s = self.get_section(ad)
200        return s is not None and s.start != 0
201
202
203    def get_next_section(self, ad):
204        i = bisect.bisect_right(self._sorted_sections, ad)
205        if i >= len(self._sorted_sections):
206            return None
207        start = self._sorted_sections[i]
208        s = self._abs_sections[start]
209        if ad <= s.end:
210            return s
211        return None
212
213
214    def get_first_addr(self):
215        return self._sorted_sections[0]
216
217
218    def get_last_addr(self):
219        ad = self._sorted_sections[-1]
220        return self._abs_sections[ad].end
221
222
223    def read(self, ad, size):
224        s = self.get_section(ad)
225        if s is None:
226            return b""
227        return s.read(ad, size)
228
229
230    def read_byte(self, ad):
231        s = self.get_section(ad)
232        if ad > s.real_end:
233            return None
234        return s.read_byte(ad)
235
236
237    def rename_sym(self, name):
238        count = 0
239        n = "%s_%d" % (name, count)
240        while n in self.symbols:
241            n = "%s_%d" % (name, count)
242            count += 1
243        return n
244
245
246    # not optimized
247    def get_section_by_name(self, name):
248        for s in self._abs_sections.values():
249            if s.name == name:
250                return s
251        return None
252
253
254    def get_prev_section(self, ad):
255        s = self.get_section(ad)
256        i = bisect.bisect_right(self._sorted_sections, s.start - 1)
257        if i == 0:
258            return None
259        start = self._sorted_sections[i - 1]
260        return self._abs_sections[start]
261
262
263    def iter_sections(self):
264        for ad in self._sorted_sections:
265            yield self._abs_sections[ad]
266
267
268    # TODO : move in SectionAbs
269    def get_string(self, addr, max_data_size=-1, s=None):
270        if s is None:
271            s = self.get_section(addr)
272            if s is None:
273                return None
274
275        data = s.data
276        off = addr - s.start
277        txt = []
278
279        c = 0
280        i = 0
281        while (i < max_data_size or max_data_size == -1) and off < len(data):
282            c = data[off]
283            if c == 0:
284                break
285            if c not in BYTES_PRINTABLE_SET:
286                break
287            txt.append(get_char(c))
288            off += 1
289            i += 1
290
291        if i == max_data_size:
292            if c != 0:
293                txt.append("...")
294        elif c != 0 or i == 0:
295            return None
296
297        return ''.join(txt)
298
299
300    # Returns the size of the string or 0 if it's not an ascii string
301    def is_string(self, addr, min_bytes=3, s=None):
302        if s is None:
303            s = self.get_section(addr)
304            if s is None:
305                return 0
306
307        data = s.data
308        off = addr - s.start
309        n = 0
310        c = 0
311        while off < len(data):
312            c = data[off]
313            if c == 0:
314                n += 1
315                break
316            if c in BYTES_PRINTABLE_SET:
317                n += 1
318            else:
319                break
320            off += 1
321
322        # consider this is a string when there are more than 2 chars
323        # with a null byte
324        if c == 0 and n >= min_bytes:
325            return n
326        return 0
327
328
329    def load_section_names(self):
330        # Used for the auto-completion
331        for ad, sec in self._abs_sections.items():
332            self.section_names[sec.name] = ad
333
334
335    def demangle_symbols(self):
336        addr = []
337        lookup_names = []
338        for n, ad in self.symbols.items():
339            if n.startswith("_Z") or n.startswith("__Z"):
340                addr.append(ad)
341                lookup_names.append(n.split("@@")[0])
342
343        if not addr:
344            return
345
346        # http://stackoverflow.com/questions/6526500/c-name-mangling-library-for-python
347        args = ["c++filt", "-p"]
348        pipe = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
349        stdout, _ = pipe.communicate('\n'.join(lookup_names).encode('utf-8'))
350        demangled = stdout.split(b"\n")[:-1]
351
352        self.reverse_demangled = dict(zip(addr, demangled))
353
354        for ad, n in self.reverse_demangled.items():
355            n = n.decode()
356            self.reverse_demangled[ad] = n
357            self.demangled[n] = ad
358
359
360    def load_static_sym(self):
361        return
362
363
364    def load_dyn_sym(self):
365        return
366
367
368    def is_big_endian(self):
369        raise NotImplementedError
370
371
372    def get_entry_point(self):
373        raise NotImplementedError
374