1
2import logging
3
4from . import Backend, register_backend, Symbol, SymbolType
5from .relocation import Relocation
6from ..errors import CLEError
7from ..address_translator import AT
8import archinfo
9
10l = logging.getLogger(name=__name__)
11
12try:
13    import binaryninja as bn
14except ImportError:
15    bn = None
16    l.info("Unable to import binaryninja module")
17    BINJA_NOT_INSTALLED_STR = "Binary Ninja does not appear to be installed. Please ensure Binary Ninja \
18                               and its Python API are properly installed before using this backend."
19
20
21class BinjaSymbol(Symbol):
22    BINJA_FUNC_SYM_TYPES = [bn.SymbolType.ImportedFunctionSymbol,
23                            bn.SymbolType.FunctionSymbol,
24                            bn.SymbolType.ImportAddressSymbol] if bn else []
25
26    BINJA_DATA_SYM_TYPES = [bn.SymbolType.ImportedDataSymbol,
27                            bn.SymbolType.DataSymbol] if bn else []
28
29    BINJA_IMPORT_TYPES = [bn.SymbolType.ImportedFunctionSymbol,
30                          bn.SymbolType.ImportAddressSymbol,
31                          bn.SymbolType.ImportedDataSymbol] if bn else []
32
33    def __init__(self, owner, sym):
34        if not bn:
35            raise CLEError(BINJA_NOT_INSTALLED_STR)
36
37        if sym.type in self.BINJA_FUNC_SYM_TYPES:
38            symtype = SymbolType.TYPE_FUNCTION
39        elif sym.type in self.BINJA_DATA_SYM_TYPES:
40            symtype = SymbolType.TYPE_OBJECT
41        else:
42            symtype = SymbolType.TYPE_OTHER
43
44        super().__init__(owner,
45                                          sym.raw_name,
46                                          AT.from_rva(sym.address, owner).to_rva(),
47                                          owner.bv.address_size,
48                                          symtype)
49
50        if sym.type in self.BINJA_IMPORT_TYPES:
51            self.is_import = True
52
53        # TODO: set is_weak appropriately
54
55
56class BinjaReloc(Relocation):
57
58    @property
59    def value(self):
60        return self.relative_addr
61
62
63class BinjaBin(Backend):
64    """
65    Get information from binaries using Binary Ninja. Basing this on idabin.py, but will try to be more complete.
66    TODO: add more features as Binary Ninja's feature set improves
67    """
68    is_default = True # Tell CLE to automatically consider using the BinjaBin backend
69    BINJA_ARCH_MAP = {"aarch64": archinfo.ArchAArch64(endness='Iend_LE'),
70                      "armv7": archinfo.ArchARMEL(endness='Iend_LE'),
71                      "thumb2": archinfo.ArchARMEL(endness='Iend_LE'),
72                      "armv7eb": archinfo.ArchARMEL(endness='Iend_BE'),
73                      "thumb2eb": archinfo.ArchARMEL(endness='Iend_BE'),
74                      "mipsel32": archinfo.ArchMIPS32(endness='Iend_LE'),
75                      "mips32": archinfo.ArchMIPS32(endness='Iend_BE'),
76                      "ppc": archinfo.ArchPPC32(endness="Iend_BE"),
77                      "ppc_le": archinfo.ArchPPC32(endness="Iend_LE"),
78                      "x86": archinfo.ArchX86(),
79                      "x86_64": archinfo.ArchAMD64()}
80
81    def __init__(self, binary, *args, **kwargs):
82        super().__init__(binary, *args, **kwargs)
83        if not bn:
84            raise CLEError(BINJA_NOT_INSTALLED_STR)
85        # get_view_of_file can take a bndb or binary - wait for autoanalysis to complete
86        self.bv = bn.BinaryViewType.get_view_of_file(binary, False)
87        l.info("Analyzing %s, this may take some time...", binary)
88        self.bv.update_analysis_and_wait()
89        l.info("Analysis complete")
90        # Note may want to add option to kick off linear sweep
91
92        try:
93            self.set_arch(self.BINJA_ARCH_MAP[self.bv.arch.name])
94        except KeyError:
95            l.error("Architecture %s is not supported.", self.bv.arch.name)
96
97        for seg in self.bv.segments:
98            l.info("Adding memory for segment at %x.", seg.start)
99            br = bn.BinaryReader(self.bv)
100            br.seek(seg.start)
101            data = br.read(len(seg))
102            self.memory.add_backer(seg.start, data)
103
104        self._find_got()
105        self._symbol_cache = {}
106        self._init_symbol_cache()
107        # Note: this represents the plt stub. ImportAddressSymbol refers to .got entries
108        # Since we're not trying to import and load dependencies directly, but want to run SimProcedures,
109        # We should use the binaryninja.SymbolType.ImportedFunctionSymbol
110        # Also this should be generalized to get data imports, too
111        self.raw_imports = {i.name: i.address for i in self.bv.get_symbols_of_type(bn.SymbolType.ImportedFunctionSymbol)}
112        self._process_imports()
113        self.exports = {}
114        self.linking = "static" if len(self.raw_imports) == 0 else "dynamic"
115        # We'll look for this attribute to see if we need to do SimProcedures for any imports in this binary
116        # This is an ugly hack, but will have to use this for now until Binary Ninja exposes dependencies
117        self.guess_simprocs = True
118        self.guess_simprocs_hint = "nix" if self.bv.get_section_by_name(".plt") else "win"
119        l.warning("This backend is based on idabin.py.\n\
120                   You may encounter unexpected behavior if:\n\
121                   \tyour target depends on library data symbol imports, or\n\
122                   \tlibrary imports that don't have a guess-able SimProcedure\n\
123                   Good luck!")
124
125
126    def _process_imports(self):
127        ''' Process self.raw_imports into list of Relocation objects '''
128        if not self.raw_imports:
129            l.warning("No imports found - if this is a dynamically-linked binary, something probably went wrong.")
130
131        for name, addr in self.raw_imports.items():
132            BinjaReloc(self, self._symbol_cache[name], addr)
133
134    def _init_symbol_cache(self):
135        # Note that we could also access name, short_name, or full_name attributes
136        for sym in self.bv.get_symbols():
137            cle_sym = BinjaSymbol(self, sym)
138            self._symbol_cache[sym.raw_name] = cle_sym
139            self.symbols.add(cle_sym)
140
141    def _find_got(self):
142        """
143        Locate the section (e.g. .got) that should be updated when relocating functions (that's where we want to
144        write absolute addresses).
145        """
146        sec_name = self.arch.got_section_name
147        self.got_begin = None
148        self.got_end = None
149
150        try:
151            got_sec = self.bv.sections[self.arch.got_section_name]
152            self.got_begin = got_sec.start
153            self.got_end = got_sec.end
154        except KeyError:
155            l.warning("No got section mapping found!")
156
157        # If we reach this point, we should have the addresses
158        if self.got_begin is None or self.got_end is None:
159            l.warning("No section %s, is this a static binary ? (or stripped)", sec_name)
160            return False
161        return True
162
163    @staticmethod
164    def is_compatible(stream):
165        if not bn:
166            return False
167        magic = stream.read(100)
168        stream.seek(0)
169        # bndb files are SQlite 3
170        if magic.startswith(b"SQLite format 3") and stream.name.endswith("bndb"):
171            return True
172
173        return False
174
175    def in_which_segment(self, addr):
176        """
177        Return the segment name at address `addr`.
178        """
179        # WARNING: if there are overlapping sections, we choose the first name.
180        # The only scenario I've seen here is a NOBITS section that "overlaps" with another one, but
181        # I'm not sure if that's a heurstic that should be applied here.
182        # https://stackoverflow.com/questions/25501044/gcc-ld-overlapping-sections-tbss-init-array-in-statically-linked-elf-bin#25771838
183        seg = self.bv.get_sections_at(addr)[0].name
184        return "unknown" if len(seg) == 0 else seg
185
186    def get_symbol_addr(self, sym):
187        """
188        Get the address of the symbol `sym` from IDA.
189
190        :returns: An address.
191        """
192        # sym is assumed to be the raw_name of the symbol
193        return self.bv.get_symbol_by_raw_name(sym)
194
195    def function_name(self, addr):
196        """
197        Return the function name at address `addr`.
198        """
199        func = self.bv.get_function_at(addr)
200        if not func:
201            return "UNKNOWN"
202        return func.name
203
204    @property
205    def min_addr(self):
206        """
207        Get the min address of the binary. (note: this is probably not "right")
208        """
209        return self.bv.start
210
211    @property
212    def max_addr(self):
213        """
214        Get the max address of the binary.
215        """
216        return self.bv.end
217
218    @property
219    def entry(self):
220        if self._custom_entry_point is not None:
221            return self._custom_entry_point + self.mapped_base
222        return self.bv.entry_point + self.mapped_base
223
224    def get_strings(self):
225        """
226        Extract strings from binary (Binary Ninja).
227
228        :returns:   An array of strings.
229        """
230        return self.bv.get_strings()
231
232    def set_got_entry(self, name, newaddr):
233        """
234        Resolve import `name` with address `newaddr`. That is, update the GOT entry for `name` with `newaddr`.
235        """
236        if name not in self.imports:
237            l.warning("%s not in imports", name)
238            return
239
240        addr = self.imports[name]
241        self.memory.pack_word(addr, newaddr)
242
243    def close(self):
244        """
245        Release the BinaryView we created in __init__
246        :return: None
247        """
248        self.bv.file.close()
249
250
251register_backend("binja", BinjaBin)
252