1import logging
2import cffi
3
4import cle
5from cle.backends.externs import KernelObject, ExternObject
6from cle.backends.tls.elf_tls import ELFTLSObject
7
8from sortedcontainers import SortedDict
9
10from ...knowledge_plugins.cfg.memory_data import MemoryDataSort, MemoryData
11from ..analysis import Analysis
12
13_l = logging.getLogger(name=__name__)
14
15
16class CFBlanketView:
17    """
18    A view into the control-flow blanket.
19    """
20    def __init__(self, cfb):
21        self._cfb = cfb
22
23    def __getitem__(self, item):
24
25        if isinstance(item, slice):
26            addr = item.start
27            start_addr = self._cfb.floor_addr(addr)
28
29            addr_ = start_addr
30            while True:
31                obj = self._cfb[addr_]
32                yield obj
33
34                addr_ += obj
35                # Find gaps
36                # TODO: finish it
37                raise NotImplementedError()
38
39
40#
41# Memory region
42#
43
44
45class MemoryRegion:
46    def __init__(self, addr, size, type_, object_, cle_region):
47        self.addr = addr
48        self.size = size
49        self.type = type_
50        self.object = object_
51        self.cle_region = cle_region
52
53    def __repr__(self):
54        return "<MemoryRegion %#x-%#x, type %s>" % (self.addr, self.addr+self.size, self.type)
55
56#
57# An address can be mapped to one of the following types of object
58# - Block
59# - MemoryData
60# - Unknown
61#
62
63
64class Unknown:
65    def __init__(self, addr, size, bytes_=None, object_=None, segment=None, section=None):
66        self.addr = addr
67        self.size = size
68
69        # Optional
70        self.bytes = bytes_
71        self.object = object_
72        self.segment = segment
73        self.section = section
74
75        if size == 0:
76            raise Exception("You cannot create an unknown region of size 0.")
77
78    def __repr__(self):
79        s = "<Unknown %#x-%#x>" % (self.addr, self.addr + self.size)
80        return s
81
82
83class CFBlanket(Analysis):
84    """
85    A Control-Flow Blanket is a representation for storing all instructions, data entries, and bytes of a full program.
86
87    Region types:
88    - section
89    - segment
90    - extern
91    - tls
92    - kernel
93    """
94    def __init__(self, exclude_region_types=None):
95        self._blanket = SortedDict()
96
97        self._regions = [ ]
98        self._exclude_region_types = set() if not exclude_region_types else exclude_region_types
99
100        self._init_regions()
101
102        # initialize
103        for func in self.kb.functions.values():
104            self.add_function(func)
105        self._mark_memory_data()
106        self._mark_unknowns()
107
108    def _init_regions(self):
109
110        for obj in self.project.loader.all_objects:
111            if isinstance(obj, cle.MetaELF):
112                if obj.sections:
113                    if "section" not in self._exclude_region_types:
114                        # Enumerate sections in an ELF file
115                        for section in obj.sections:
116                            if section.occupies_memory:
117                                mr = MemoryRegion(section.vaddr, section.memsize, 'section', obj, section)
118                                self._regions.append(mr)
119                else:
120                    raise NotImplementedError("Currently ELFs without sections are not supported. Please implement or "
121                                              "complain on GitHub.")
122            elif isinstance(obj, cle.PE):
123                if obj.sections:
124                    if "section" not in self._exclude_region_types:
125                        for section in obj.sections:
126                            mr = MemoryRegion(section.vaddr, section.memsize, 'section', obj, section)
127                            self._regions.append(mr)
128                else:
129                    raise NotImplementedError("Currently PEs without sections are not supported. Please report to "
130                                              "GitHub and provide an example binary.")
131            elif isinstance(obj, KernelObject):
132                if "kernel" not in self._exclude_region_types:
133                    size = obj.max_addr - obj.min_addr
134                    mr = MemoryRegion(obj.min_addr, size, "kernel", obj, None)
135                    self._regions.append(mr)
136            elif isinstance(obj, ExternObject):
137                if "extern" not in self._exclude_region_types:
138                    size = obj.max_addr - obj.min_addr
139                    mr = MemoryRegion(obj.min_addr, size, "extern", obj, None)
140                    self._regions.append(mr)
141            elif isinstance(obj, ELFTLSObject):
142                if "tls" not in self._exclude_region_types:
143                    size = obj.max_addr - obj.min_addr
144                    mr = MemoryRegion(obj.min_addr, size, "tls", obj, None)
145                    self._regions.append(mr)
146            else:
147                if hasattr(obj, "size"):
148                    size = obj.size
149                else:
150                    size = obj.max_addr - obj.min_addr
151                type_ = "TODO"
152                mr = MemoryRegion(obj.min_addr, size, type_, obj, obj)
153                self._regions.append(mr)
154
155        # Sort them just in case
156        self._regions = list(sorted(self._regions, key=lambda x: x.addr))
157
158    @property
159    def regions(self):
160        """
161        Return all memory regions.
162        """
163
164        return self._regions
165
166    def floor_addr(self, addr):
167        try:
168            return next(self._blanket.irange(maximum=addr, reverse=True))
169        except StopIteration:
170            raise KeyError(addr)
171
172    def floor_item(self, addr):
173        key = self.floor_addr(addr)
174        return key, self._blanket[key]
175
176    def floor_items(self, addr=None, reverse=False):
177        if addr is None:
178            start_addr = None
179        else:
180            try:
181                start_addr = next(self._blanket.irange(maximum=addr, reverse=True))
182            except StopIteration:
183                start_addr = addr
184
185        for key in self._blanket.irange(minimum=start_addr, reverse=reverse):
186            yield key, self._blanket[key]
187
188    def ceiling_addr(self, addr):
189        try:
190            return next(self._blanket.irange(minimum=addr))
191        except StopIteration:
192            raise KeyError(addr)
193
194    def ceiling_item(self, addr):
195        key = self.ceiling_addr(addr)
196        return key, self._blanket[key]
197
198    def ceiling_items(self, addr=None, reverse=False, include_first=True):
199        if addr is None:
200            start_addr = None
201        else:
202            try:
203                start_addr = next(self._blanket.irange(minimum=addr))
204            except StopIteration:
205                start_addr = addr
206
207        for key in self._blanket.irange(maximum=start_addr if include_first else start_addr - 1, reverse=reverse):
208            yield key, self._blanket[key]
209
210    def __getitem__(self, addr):
211        return self._blanket[addr]
212
213    def add_obj(self, addr, obj):
214        """
215        Adds an object `obj` to the blanket at the specified address `addr`
216        """
217        self._blanket[addr] = obj
218
219    def add_function(self, func):
220        """
221        Add a function `func` and all blocks of this function to the blanket.
222        """
223        for block in func.blocks:
224            self.add_obj(block.addr, block)
225
226    def dbg_repr(self):
227        """
228        The debugging representation of this CFBlanket.
229
230        :return:    The debugging representation of this CFBlanket.
231        :rtype:     str
232        """
233
234        output = [ ]
235
236        for obj in self.project.loader.all_objects:
237            for section in obj.sections:
238                if section.memsize == 0:
239                    continue
240                min_addr, max_addr = section.min_addr, section.max_addr
241                output.append("### Object %s" % repr(section))
242                output.append("### Range %#x-%#x" % (min_addr, max_addr))
243
244                pos = min_addr
245                while pos < max_addr:
246                    try:
247                        addr, thing = self.floor_item(pos)
248                        output.append("%#x: %s" % (addr, repr(thing)))
249
250                        if thing.size == 0: pos += 1
251                        else: pos += thing.size
252                    except KeyError:
253                        pos += 1
254
255                output.append("")
256
257        return "\n".join(output)
258
259    def _mark_memory_data(self):
260        """
261        Mark all memory data.
262
263        :return: None
264        """
265        if 'CFGFast' not in self.kb.cfgs:
266            return
267        cfg_model = self.kb.cfgs['CFGFast']
268
269        for addr, memory_data in cfg_model.memory_data.items():
270            memory_data : MemoryData
271            if memory_data.sort == MemoryDataSort.CodeReference:
272                # skip Code Reference
273                continue
274            self.add_obj(addr, memory_data)
275
276    def _mark_unknowns(self):
277        """
278        Mark all unmapped regions.
279
280        :return: None
281        """
282
283        for obj in self.project.loader.all_objects:
284            if isinstance(obj, cle.ELF):
285                # sections?
286                if obj.sections and "section" not in self._exclude_region_types:
287                    for section in obj.sections:
288                        if not section.memsize or not section.vaddr:
289                            continue
290                        min_addr, max_addr = section.min_addr, section.max_addr
291                        self._mark_unknowns_core(min_addr, max_addr + 1, obj=obj, section=section)
292                elif obj.segments and "segment" not in self._exclude_region_types:
293                    for segment in obj.segments:
294                        if not segment.memsize:
295                            continue
296                        min_addr, max_addr = segment.min_addr, segment.max_addr
297                        self._mark_unknowns_core(min_addr, max_addr + 1, obj=obj, segment=segment)
298                else:
299                    # is it empty?
300                    _l.warning("Empty ELF object %s.", repr(obj))
301            elif isinstance(obj, cle.PE):
302                if obj.sections:
303                    for section in obj.sections:
304                        if not section.memsize:
305                            continue
306                        min_addr, max_addr = section.min_addr, section.max_addr
307                        self._mark_unknowns_core(min_addr, max_addr + 1, obj=obj, section=section)
308                else:
309                    # is it empty?
310                    _l.warning("Empty PE object %s.", repr(obj))
311            elif isinstance(obj, ELFTLSObject):
312                if "tls" in self._exclude_region_types:
313                    # Skip them for now
314                    pass
315                else:
316                    min_addr, max_addr = obj.min_addr, obj.max_addr
317                    self._mark_unknowns_core(min_addr, max_addr + 1, obj=obj)
318            elif isinstance(obj, KernelObject):
319                if "kernel" in self._exclude_region_types:
320                    # skip
321                    pass
322                else:
323                    min_addr, max_addr = obj.min_addr, obj.max_addr
324                    self._mark_unknowns_core(min_addr, max_addr + 1, obj=obj)
325            else:
326                min_addr, max_addr = obj.min_addr, obj.max_addr
327                self._mark_unknowns_core(min_addr, max_addr + 1, obj=obj)
328
329    def _mark_unknowns_core(self, min_addr, max_addr, obj=None, segment=None, section=None):
330
331        # The region should be [min_addr, max_addr)
332
333        try:
334            addr = self.floor_addr(min_addr)
335            if addr < min_addr:
336                raise KeyError
337        except KeyError:
338            # there is no other lower address
339            try:
340                next_addr = self.ceiling_addr(min_addr)
341                if next_addr >= max_addr:
342                    raise KeyError
343            except KeyError:
344                next_addr = max_addr
345
346            size = next_addr - min_addr
347            if obj is None or isinstance(obj, cle.ExternObject):
348                bytes_ = None
349            else:
350                try:
351                    _l.debug("Loading bytes from object %s, section %s, segmeng %s, addresss %#x.",
352                             obj, section, segment, min_addr)
353                    bytes_ = self.project.loader.memory.load(min_addr, size)
354                except KeyError:
355                    # The address does not exist
356                    bytes_ = None
357            self.add_obj(min_addr,
358                         Unknown(min_addr, size, bytes_=bytes_, object_=obj, segment=segment, section=section)
359                         )
360
361        addr = min_addr
362        while addr < max_addr:
363            last_addr, last_item = self.floor_item(addr)
364            if last_addr < min_addr:
365                # impossible
366                raise Exception('Impossible')
367
368            if last_item.size == 0:
369                # Make sure everything has a non-zero size
370                last_item_size = 1
371            else:
372                last_item_size = last_item.size
373            end_addr = last_addr + last_item_size
374            if end_addr < max_addr:
375                try:
376                    next_addr = self.ceiling_addr(end_addr)
377                except KeyError:
378                    next_addr = max_addr
379                if next_addr > end_addr:
380                    # there is a gap
381                    size = next_addr - end_addr
382                    if obj is None or isinstance(obj, cle.ExternObject):
383                        bytes_ = None
384                    else:
385                        try:
386                            _l.debug("Loading bytes from object %s, section %s, segmeng %s, addresss %#x.",
387                                     obj, section, segment, next_addr)
388                            bytes_ = self.project.loader.memory.load(next_addr, size)
389                        except KeyError:
390                            # The address does not exist
391                            bytes_ = None
392                    self.add_obj(end_addr,
393                                 Unknown(end_addr, size, bytes_=bytes_, object_=obj, segment=segment, section=section)
394                                 )
395                addr = next_addr
396            else:
397                addr = max_addr
398
399
400from angr.analyses import AnalysesHub
401AnalysesHub.register_default('CFB', CFBlanket)
402AnalysesHub.register_default('CFBlanket', CFBlanket)
403