1import os 2import logging 3import hashlib 4from io import BufferedReader 5from typing import List, Optional # pylint:disable=unused-import 6 7import sortedcontainers 8 9import archinfo 10from .region import Region, Segment, Section 11from .regions import Regions 12from .symbol import Symbol, SymbolType 13from ..address_translator import AT 14from ..memory import Clemory 15from ..errors import CLEOperationError, CLEError 16 17l = logging.getLogger(name=__name__) 18 19 20class FunctionHintSource: 21 """ 22 Enums that describe the source of function hints. 23 """ 24 EH_FRAME = 0 25 EXTERNAL_EH_FRAME = 1 26 27 28class FunctionHint: 29 """ 30 Describes a function hint. 31 32 :ivar int addr: Address of the function. 33 :ivar int size: Size of the function. 34 :ivar source: Source of this hint. 35 :vartype source: int 36 """ 37 38 __slots__ = ('addr', 'size', 'source') 39 40 def __init__(self, addr, size, source): 41 self.addr = addr 42 self.size = size 43 self.source = source 44 45 def __repr__(self): 46 return "<FuncHint@%#x, %d bytes>" % (self.addr, self.size) 47 48 49class ExceptionHandling: 50 """ 51 Describes an exception handling. 52 53 Exception handlers are usually language-specific. In C++, it is usually implemented as try {} catch {} blocks. 54 55 :ivar int start_addr: The beginning of the try block. 56 :ivar int size: Size of the try block. 57 :ivar Optional[int] handler_addr: Address of the exception handler code. 58 :ivar type: Type of the exception handler. Optional. 59 :ivar Optional[int] func_addr: Address of the function. Optional. 60 """ 61 62 __slots__ = ('start_addr', 'size', 'handler_addr', 'type', 'func_addr',) 63 64 def __init__(self, start_addr, size, handler_addr=None, type_=None, func_addr=None): 65 66 self.start_addr = start_addr 67 self.size = size 68 self.handler_addr = handler_addr 69 self.type = type_ 70 self.func_addr = func_addr 71 72 def __repr__(self): 73 if self.handler_addr is not None: 74 return "<ExceptionHandling@%#x-%#x: handler@%#x>" % (self.start_addr, 75 self.start_addr + self.size, 76 self.handler_addr) 77 else: 78 return "<ExceptionHandling@%#x-%#x: no handler>" % (self.start_addr, 79 self.start_addr + self.size) 80 81 82class Backend: 83 """ 84 Main base class for CLE binary objects. 85 86 An alternate interface to this constructor exists as the static method :meth:`cle.loader.Loader.load_object` 87 88 :ivar binary: The path to the file this object is loaded from 89 :ivar binary_basename: The basename of the filepath, or a short representation of the stream it was loaded from 90 :ivar is_main_bin: Whether this binary is loaded as the main executable 91 :ivar segments: A listing of all the loaded segments in this file 92 :ivar sections: A listing of all the demarked sections in the file 93 :ivar sections_map: A dict mapping from section name to section 94 :ivar imports: A mapping from symbol name to import relocation 95 :ivar resolved_imports: A list of all the import symbols that are successfully resolved 96 :ivar relocs: A list of all the relocations in this binary 97 :ivar irelatives: A list of tuples representing all the irelative relocations that need to be performed. The 98 first item in the tuple is the address of the resolver function, and the second item is the 99 address of where to write the result. The destination address is an RVA. 100 :ivar jmprel: A mapping from symbol name to the address of its jump slot relocation, i.e. its GOT entry. 101 :ivar arch: The architecture of this binary 102 :vartype arch: archinfo.arch.Arch 103 :ivar str os: The operating system this binary is meant to run under 104 :ivar int mapped_base: The base address of this object in virtual memory 105 :ivar deps: A list of names of shared libraries this binary depends on 106 :ivar linking: 'dynamic' or 'static' 107 :ivar linked_base: The base address this object requests to be loaded at 108 :ivar bool pic: Whether this object is position-independent 109 :ivar bool execstack: Whether this executable has an executable stack 110 :ivar str provides: The name of the shared library dependancy that this object resolves 111 :ivar list symbols: A list of symbols provided by this object, sorted by address 112 :ivar has_memory: Whether this backend is backed by a Clemory or not. As it stands now, a backend should still 113 define `min_addr` and `max_addr` even if `has_memory` is False. 114 """ 115 is_default = False 116 117 def __init__(self, 118 binary, 119 binary_stream, 120 loader=None, 121 is_main_bin=False, 122 entry_point=None, 123 arch=None, 124 base_addr=None, 125 force_rebase=False, 126 has_memory=True, 127 **kwargs): 128 """ 129 :param binary: The path to the binary to load 130 :param binary_stream: The open stream to this binary. The reference to this will be held until you call close. 131 :param is_main_bin: Whether this binary should be loaded as the main executable 132 """ 133 self.binary = binary 134 self._binary_stream: BufferedReader = binary_stream 135 if self.binary is not None: 136 self.binary_basename = os.path.basename(self.binary) 137 elif hasattr(self._binary_stream, "name"): 138 self.binary_basename = os.path.basename(self._binary_stream.name) 139 else: 140 self.binary_basename = str(self._binary_stream) 141 142 for k in list(kwargs.keys()): 143 if k == 'custom_entry_point': 144 entry_point = kwargs.pop(k) 145 elif k == 'custom_arch': 146 arch = kwargs.pop(k) 147 elif k == 'custom_base_addr': 148 base_addr = kwargs.pop(k) 149 else: 150 continue 151 l.critical("Deprecation warning: the %s parameter has been renamed to %s", k, k[7:]) 152 153 if kwargs != {}: 154 l.warning("Unused kwargs for loading binary %s: %s", self.binary, ', '.join(kwargs.keys())) 155 156 self.is_main_bin = is_main_bin 157 self.has_memory = has_memory 158 self.loader = loader 159 self._entry = 0 160 self._segments = Regions() # List of segments 161 self._sections = Regions() # List of sections 162 self.sections_map = {} # Mapping from section name to section 163 self.symbols: 'sortedcontainers.SortedKeyList[Symbol]' = sortedcontainers.SortedKeyList(key=self._get_symbol_relative_addr) 164 self.imports = {} 165 self.resolved_imports = [] 166 self.relocs = [] 167 self.irelatives = [] # list of tuples (resolver, destination), dest w/o rebase 168 self.jmprel = {} 169 self.arch = None 170 self.os = None # Let other stuff override this 171 self.compiler = None, None # compiler name, version 172 self._symbol_cache = {} 173 # a list of directories to search for libraries specified by the object 174 self.extra_load_path = [] 175 # attributes to enable SimProcedure guessing 176 self.guess_simprocs = False 177 self.guess_simprocs_hint = None 178 179 # checksums 180 self.md5 = None 181 self.sha256 = None 182 183 self.mapped_base_symbolic = 0 184 # These are set by cle, and should not be overriden manually 185 self.mapped_base = self.linked_base = 0 # not to be set manually - used by CLE 186 187 self.deps = [] # Needed shared objects (libraries dependencies) 188 self.child_objects = [] # any objects loaded directly out of this 189 self.parent_object = None 190 self.linking = None # Dynamic or static linking 191 self.pic = force_rebase 192 self.execstack = False 193 194 # tls info set by backend to communicate with thread manager 195 self.tls_used = False 196 self.tls_block_size = None 197 self.tls_data_size = None 198 self.tls_data_start = None 199 # tls info set by thread manager 200 self.tls_module_id = None 201 #self.tls_block_offset = None # this is an ELF-only attribute 202 203 # exception handling 204 # they should be rebased when .rebase() is called 205 self.exception_handlings = [] # type: List[ExceptionHandling] 206 207 # Hints 208 # they should be rebased when .rebase() is called 209 self.function_hints = [] # type: List[FunctionHint] 210 211 # Custom options 212 self._custom_entry_point = entry_point 213 self._custom_base_addr = base_addr 214 self.provides = os.path.basename(self.binary) if self.binary is not None else None 215 216 self.memory = None # type: Clemory 217 218 # should be set inside `cle.Loader.add_object` 219 self._is_mapped = False 220 # cached max_addr 221 self._max_addr = None 222 # cached last section 223 self._last_section = None 224 # cached last segment 225 self._last_segment = None 226 227 if arch is None: 228 self.arch = None 229 elif isinstance(arch, str): 230 self.set_arch(archinfo.arch_from_id(arch)) 231 elif isinstance(arch, archinfo.Arch): 232 self.set_arch(arch) 233 elif isinstance(arch, type) and issubclass(arch, archinfo.Arch): 234 self.set_arch(arch()) 235 else: 236 raise CLEError("Bad parameter: arch=%s" % arch) 237 238 self._checksum() 239 240 def close(self): 241 del self._binary_stream 242 243 def __repr__(self): 244 return '<%s Object %s, maps [%#x:%#x]>' % \ 245 (self.__class__.__name__, self.binary_basename, self.min_addr, self.max_addr) 246 247 def set_arch(self, arch): 248 self.arch = arch 249 self.memory = Clemory(arch) # Private virtual address space, without relocations 250 251 @property 252 def image_base_delta(self): 253 return self.mapped_base - self.linked_base 254 255 @property 256 def entry(self): 257 if self._custom_entry_point is not None: 258 return AT.from_lva(self._custom_entry_point, self).to_mva() 259 return AT.from_lva(self._entry, self).to_mva() 260 261 @property 262 def segments(self): 263 return self._segments 264 265 @segments.setter 266 def segments(self, v): 267 if isinstance(v, list): 268 self._segments = Regions(lst=v) 269 elif isinstance(v, Regions): 270 self._segments = v 271 else: 272 raise ValueError('Unsupported type %s set as sections.' % type(v)) 273 274 @property 275 def sections(self): 276 return self._sections 277 278 @sections.setter 279 def sections(self, v): 280 if isinstance(v, list): 281 self._sections = Regions(lst=v) 282 elif isinstance(v, Regions): 283 self._sections = v 284 else: 285 raise ValueError('Unsupported type %s set as sections.' % type(v)) 286 287 @property 288 def symbols_by_addr(self): 289 l.critical("Deprecation warning: symbols_by_addr is deprecated - use loader.find_symbol() for lookup and .symbols for enumeration") 290 return {s.rebased_addr: s for s in self.symbols} 291 292 def rebase(self, new_base): 293 """ 294 Rebase backend's regions to the new base where they were mapped by the loader 295 """ 296 if self._is_mapped: 297 # we could rebase an object twice if we really wanted... no need though, right? 298 raise CLEOperationError("Image already rebased from %#x to %#x" % (self.linked_base, self.mapped_base)) 299 300 self.mapped_base = new_base 301 302 if self.sections: 303 self.sections._rebase(self.image_base_delta) 304 if self.segments and self.sections is not self.segments: 305 self.segments._rebase(self.image_base_delta) 306 307 for handling in self.exception_handlings: 308 if handling.func_addr is not None: 309 handling.func_addr += self.image_base_delta 310 if handling.handler_addr is not None: 311 handling.handler_addr += self.image_base_delta 312 handling.start_addr += self.image_base_delta 313 314 for hint in self.function_hints: 315 hint.addr = hint.addr + self.image_base_delta 316 317 def relocate(self): 318 """ 319 Apply all resolved relocations to memory. 320 321 The meaning of "resolved relocations" is somewhat subtle - there is a linking step which attempts to resolve 322 each relocation, currently only present in the main internal loading function since the calculation of which 323 objects should be available 324 """ 325 for reloc in self.relocs: 326 if reloc.resolved: 327 reloc.relocate() 328 329 def contains_addr(self, addr): 330 """ 331 Is `addr` in one of the binary's segments/sections we have loaded? (i.e. is it mapped into memory ?) 332 """ 333 return self.find_loadable_containing(addr) is not None 334 335 def find_loadable_containing(self, addr): 336 lookup = self.find_segment_containing if self.segments else self.find_section_containing 337 return lookup(addr) 338 339 def find_segment_containing(self, addr): 340 """ 341 Returns the segment that contains `addr`, or ``None``. 342 """ 343 if self._last_segment is not None and self._last_segment.contains_addr(addr): 344 return self._last_segment 345 346 r = self.segments.find_region_containing(addr) 347 if r is not None: 348 self._last_segment = r 349 return r 350 351 def find_section_containing(self, addr): 352 """ 353 Returns the section that contains `addr` or ``None``. 354 """ 355 if self._last_section is not None and self._last_section.contains_addr(addr): 356 return self._last_section 357 358 r = self.sections.find_region_containing(addr) 359 if r is not None: 360 self._last_section = r 361 return r 362 363 def addr_to_offset(self, addr): 364 loadable = self.find_loadable_containing(addr) 365 if loadable is not None: 366 return loadable.addr_to_offset(addr) 367 else: 368 return None 369 370 def offset_to_addr(self, offset): 371 if self.segments: 372 for s in self.segments: 373 if s.contains_offset(offset): 374 return s.offset_to_addr(offset) 375 else: 376 for s in self.sections: 377 if s.contains_offset(offset): 378 return s.offset_to_addr(offset) 379 return None 380 381 @property 382 def min_addr(self): 383 """ 384 This returns the lowest virtual address contained in any loaded segment of the binary. 385 """ 386 # Loader maps the object at chosen mapped base anyway and independently of the internal structure 387 return self.mapped_base 388 389 @property 390 def max_addr(self): 391 """ 392 This returns the highest virtual address contained in any loaded segment of the binary. 393 """ 394 395 if self._max_addr is None: 396 out = self.mapped_base 397 if self.segments or self.sections: 398 out = max(map(lambda x: x.max_addr, self.segments or self.sections)) 399 self._max_addr = out - self.mapped_base 400 return self._max_addr + self.mapped_base 401 402 @property 403 def initializers(self): # pylint: disable=no-self-use 404 """ 405 Stub function. Should be overridden by backends that can provide initializer functions that ought to be run 406 before execution reaches the entry point. Addresses should be rebased. 407 """ 408 return [] 409 410 @property 411 def finalizers(self): # pylint: disable=no-self-use 412 """ 413 Stub function. Like initializers, but with finalizers. 414 """ 415 return [] 416 417 @property 418 def threads(self): # pylint: disable=no-self-use 419 """ 420 If this backend represents a dump of a running program, it may contain one or more thread contexts, i.e. 421 register files. This property should contain a list of names for these threads, which should be unique. 422 """ 423 return [] 424 425 def thread_registers(self, thread=None): # pylint: disable=no-self-use,unused-argument 426 """ 427 If this backend represents a dump of a running program, it may contain one or more thread contexts, i.e. 428 register files. This method should return the register file for a given thread (as named in ``Backend.threads``) 429 as a dict mapping register names (as seen in archinfo) to numbers. If the thread is not specified, it should 430 return the context for a "default" thread. If there are no threads, it should return an empty dict. 431 """ 432 return {} 433 434 def initial_register_values(self): 435 """ 436 Deprecated 437 """ 438 l.critical("Deprecation warning: initial_register_values is deprecated - use backend.thread_registers() instead") 439 return self.thread_registers().items() 440 441 def get_symbol(self, name): # pylint: disable=no-self-use,unused-argument 442 """ 443 Stub function. Implement to find the symbol with name `name`. 444 """ 445 if name in self._symbol_cache: 446 return self._symbol_cache[name] 447 return None 448 449 @staticmethod 450 def extract_soname(path): # pylint: disable=unused-argument 451 """ 452 Extracts the shared object identifier from the path, or returns None if it cannot. 453 """ 454 return None 455 456 @classmethod 457 def is_compatible(cls, stream): # pylint:disable=unused-argument 458 """ 459 Determine quickly whether this backend can load an object from this stream 460 """ 461 return False 462 463 @classmethod 464 def check_compatibility(cls, spec, obj): # pylint: disable=unused-argument 465 """ 466 Performs a minimal static load of ``spec`` and returns whether it's compatible with other_obj 467 """ 468 return False 469 470 @classmethod 471 def check_magic_compatibility(cls, stream): # pylint: disable=unused-argument 472 """ 473 Check if a stream of bytes contains the same magic number as the main object 474 """ 475 return False 476 477 @staticmethod 478 def _get_symbol_relative_addr(symbol): 479 return symbol.relative_addr 480 481 def _checksum(self): 482 """ 483 Calculate MD5 and SHA256 checksum for the binary. 484 """ 485 486 if self._binary_stream is not None: 487 data = self._binary_stream.read() 488 self._binary_stream.seek(0) 489 self.md5 = hashlib.md5(data).digest() 490 self.sha256 = hashlib.sha256(data).digest() 491 492 def __getstate__(self): 493 return self.__dict__ 494 495 def __setstate__(self, state): 496 self.__dict__.update(state) 497 for sym in self.symbols: 498 sym.owner = self 499 500ALL_BACKENDS = dict() 501 502 503def register_backend(name, cls): 504 ALL_BACKENDS.update({name: cls}) 505 506 507from .elf import ELF, ELFCore, MetaELF 508from .pe import PE 509#from .idabin import IDABin 510from .blob import Blob 511from .cgc import CGC, BackedCGC 512from .ihex import Hex 513from .minidump import Minidump 514from .macho import MachO 515from .named_region import NamedRegion 516from .java.jar import Jar 517from .java.apk import Apk 518from .java.soot import Soot 519from .xbe import XBE 520from .static_archive import StaticArchive 521 522try: 523 from .binja import BinjaBin 524except Exception: # pylint:disable=broad-except 525 l.warning("Binary Ninja is installed in the environment but the BinjaBin backend fails to initialize. Your Binary " 526 "Ninja might be too old.", 527 exc_info=True) 528