1#===- object.py - Python Object Bindings --------------------*- python -*--===# 2# 3# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4# See https://llvm.org/LICENSE.txt for license information. 5# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6# 7#===------------------------------------------------------------------------===# 8 9r""" 10Object File Interface 11===================== 12 13This module provides an interface for reading information from object files 14(e.g. binary executables and libraries). 15 16Using this module, you can obtain information about an object file's sections, 17symbols, and relocations. These are represented by the classes ObjectFile, 18Section, Symbol, and Relocation, respectively. 19 20Usage 21----- 22 23The only way to use this module is to start by creating an ObjectFile. You can 24create an ObjectFile by loading a file (specified by its path) or by creating a 25llvm.core.MemoryBuffer and loading that. 26 27Once you have an object file, you can inspect its sections and symbols directly 28by calling get_sections() and get_symbols() respectively. To inspect 29relocations, call get_relocations() on a Section instance. 30 31Iterator Interface 32------------------ 33 34The LLVM bindings expose iteration over sections, symbols, and relocations in a 35way that only allows one instance to be operated on at a single time. This is 36slightly annoying from a Python perspective, as it isn't very Pythonic to have 37objects that "expire" but are still active from a dynamic language. 38 39To aid working around this limitation, each Section, Symbol, and Relocation 40instance caches its properties after first access. So, if the underlying 41iterator is advanced, the properties can still be obtained provided they have 42already been retrieved. 43 44In addition, we also provide a "cache" method on each class to cache all 45available data. You can call this on each obtained instance. Or, you can pass 46cache=True to the appropriate get_XXX() method to have this done for you. 47 48Here are some examples on how to perform iteration: 49 50 obj = ObjectFile(filename='/bin/ls') 51 52 # This is OK. Each Section is only accessed inside its own iteration slot. 53 section_names = [] 54 for section in obj.get_sections(): 55 section_names.append(section.name) 56 57 # This is NOT OK. You perform a lookup after the object has expired. 58 symbols = list(obj.get_symbols()) 59 for symbol in symbols: 60 print symbol.name # This raises because the object has expired. 61 62 # In this example, we mix a working and failing scenario. 63 symbols = [] 64 for symbol in obj.get_symbols(): 65 symbols.append(symbol) 66 print symbol.name 67 68 for symbol in symbols: 69 print symbol.name # OK 70 print symbol.address # NOT OK. We didn't look up this property before. 71 72 # Cache everything up front. 73 symbols = list(obj.get_symbols(cache=True)) 74 for symbol in symbols: 75 print symbol.name # OK 76 77""" 78 79from ctypes import c_char_p 80from ctypes import c_char 81from ctypes import POINTER 82from ctypes import c_uint64 83from ctypes import string_at 84 85from .common import CachedProperty 86from .common import LLVMObject 87from .common import c_object_p 88from .common import get_library 89from .core import MemoryBuffer 90 91__all__ = [ 92 "lib", 93 "ObjectFile", 94 "Relocation", 95 "Section", 96 "Symbol", 97] 98 99class ObjectFile(LLVMObject): 100 """Represents an object/binary file.""" 101 102 def __init__(self, filename=None, contents=None): 103 """Construct an instance from a filename or binary data. 104 105 filename must be a path to a file that can be opened with open(). 106 contents can be either a native Python buffer type (like str) or a 107 llvm.core.MemoryBuffer instance. 108 """ 109 if contents: 110 assert isinstance(contents, MemoryBuffer) 111 112 if filename is not None: 113 contents = MemoryBuffer(filename=filename) 114 115 if contents is None: 116 raise Exception('No input found.') 117 118 ptr = lib.LLVMCreateObjectFile(contents) 119 LLVMObject.__init__(self, ptr, disposer=lib.LLVMDisposeObjectFile) 120 self.take_ownership(contents) 121 122 def get_sections(self, cache=False): 123 """Obtain the sections in this object file. 124 125 This is a generator for llvm.object.Section instances. 126 127 Sections are exposed as limited-use objects. See the module's 128 documentation on iterators for more. 129 """ 130 sections = lib.LLVMGetSections(self) 131 last = None 132 while True: 133 if lib.LLVMIsSectionIteratorAtEnd(self, sections): 134 break 135 136 last = Section(sections) 137 if cache: 138 last.cache() 139 140 yield last 141 142 lib.LLVMMoveToNextSection(sections) 143 last.expire() 144 145 if last is not None: 146 last.expire() 147 148 lib.LLVMDisposeSectionIterator(sections) 149 150 def get_symbols(self, cache=False): 151 """Obtain the symbols in this object file. 152 153 This is a generator for llvm.object.Symbol instances. 154 155 Each Symbol instance is a limited-use object. See this module's 156 documentation on iterators for more. 157 """ 158 symbols = lib.LLVMGetSymbols(self) 159 last = None 160 while True: 161 if lib.LLVMIsSymbolIteratorAtEnd(self, symbols): 162 break 163 164 last = Symbol(symbols, self) 165 if cache: 166 last.cache() 167 168 yield last 169 170 lib.LLVMMoveToNextSymbol(symbols) 171 last.expire() 172 173 if last is not None: 174 last.expire() 175 176 lib.LLVMDisposeSymbolIterator(symbols) 177 178class Section(LLVMObject): 179 """Represents a section in an object file.""" 180 181 def __init__(self, ptr): 182 """Construct a new section instance. 183 184 Section instances can currently only be created from an ObjectFile 185 instance. Therefore, this constructor should not be used outside of 186 this module. 187 """ 188 LLVMObject.__init__(self, ptr) 189 190 self.expired = False 191 192 @CachedProperty 193 def name(self): 194 """Obtain the string name of the section. 195 196 This is typically something like '.dynsym' or '.rodata'. 197 """ 198 if self.expired: 199 raise Exception('Section instance has expired.') 200 201 return lib.LLVMGetSectionName(self) 202 203 @CachedProperty 204 def size(self): 205 """The size of the section, in long bytes.""" 206 if self.expired: 207 raise Exception('Section instance has expired.') 208 209 return lib.LLVMGetSectionSize(self) 210 211 @CachedProperty 212 def contents(self): 213 if self.expired: 214 raise Exception('Section instance has expired.') 215 216 siz = self.size 217 218 r = lib.LLVMGetSectionContents(self) 219 if r: 220 return string_at(r, siz) 221 return None 222 223 @CachedProperty 224 def address(self): 225 """The address of this section, in long bytes.""" 226 if self.expired: 227 raise Exception('Section instance has expired.') 228 229 return lib.LLVMGetSectionAddress(self) 230 231 def has_symbol(self, symbol): 232 """Returns whether a Symbol instance is present in this Section.""" 233 if self.expired: 234 raise Exception('Section instance has expired.') 235 236 assert isinstance(symbol, Symbol) 237 return lib.LLVMGetSectionContainsSymbol(self, symbol) 238 239 def get_relocations(self, cache=False): 240 """Obtain the relocations in this Section. 241 242 This is a generator for llvm.object.Relocation instances. 243 244 Each instance is a limited used object. See this module's documentation 245 on iterators for more. 246 """ 247 if self.expired: 248 raise Exception('Section instance has expired.') 249 250 relocations = lib.LLVMGetRelocations(self) 251 last = None 252 while True: 253 if lib.LLVMIsRelocationIteratorAtEnd(self, relocations): 254 break 255 256 last = Relocation(relocations) 257 if cache: 258 last.cache() 259 260 yield last 261 262 lib.LLVMMoveToNextRelocation(relocations) 263 last.expire() 264 265 if last is not None: 266 last.expire() 267 268 lib.LLVMDisposeRelocationIterator(relocations) 269 270 def cache(self): 271 """Cache properties of this Section. 272 273 This can be called as a workaround to the single active Section 274 limitation. When called, the properties of the Section are fetched so 275 they are still available after the Section has been marked inactive. 276 """ 277 getattr(self, 'name') 278 getattr(self, 'size') 279 getattr(self, 'contents') 280 getattr(self, 'address') 281 282 def expire(self): 283 """Expire the section. 284 285 This is called internally by the section iterator. 286 """ 287 self.expired = True 288 289class Symbol(LLVMObject): 290 """Represents a symbol in an object file.""" 291 def __init__(self, ptr, object_file): 292 assert isinstance(ptr, c_object_p) 293 assert isinstance(object_file, ObjectFile) 294 295 LLVMObject.__init__(self, ptr) 296 297 self.expired = False 298 self._object_file = object_file 299 300 @CachedProperty 301 def name(self): 302 """The str name of the symbol. 303 304 This is often a function or variable name. Keep in mind that name 305 mangling could be in effect. 306 """ 307 if self.expired: 308 raise Exception('Symbol instance has expired.') 309 310 return lib.LLVMGetSymbolName(self) 311 312 @CachedProperty 313 def address(self): 314 """The address of this symbol, in long bytes.""" 315 if self.expired: 316 raise Exception('Symbol instance has expired.') 317 318 return lib.LLVMGetSymbolAddress(self) 319 320 @CachedProperty 321 def size(self): 322 """The size of the symbol, in long bytes.""" 323 if self.expired: 324 raise Exception('Symbol instance has expired.') 325 326 return lib.LLVMGetSymbolSize(self) 327 328 @CachedProperty 329 def section(self): 330 """The Section to which this Symbol belongs. 331 332 The returned Section instance does not expire, unlike Sections that are 333 commonly obtained through iteration. 334 335 Because this obtains a new section iterator each time it is accessed, 336 calling this on a number of Symbol instances could be expensive. 337 """ 338 sections = lib.LLVMGetSections(self._object_file) 339 lib.LLVMMoveToContainingSection(sections, self) 340 341 return Section(sections) 342 343 def cache(self): 344 """Cache all cacheable properties.""" 345 getattr(self, 'name') 346 getattr(self, 'address') 347 getattr(self, 'size') 348 349 def expire(self): 350 """Mark the object as expired to prevent future API accesses. 351 352 This is called internally by this module and it is unlikely that 353 external callers have a legitimate reason for using it. 354 """ 355 self.expired = True 356 357class Relocation(LLVMObject): 358 """Represents a relocation definition.""" 359 def __init__(self, ptr): 360 """Create a new relocation instance. 361 362 Relocations are created from objects derived from Section instances. 363 Therefore, this constructor should not be called outside of this 364 module. See Section.get_relocations() for the proper method to obtain 365 a Relocation instance. 366 """ 367 assert isinstance(ptr, c_object_p) 368 369 LLVMObject.__init__(self, ptr) 370 371 self.expired = False 372 373 @CachedProperty 374 def offset(self): 375 """The offset of this relocation, in long bytes.""" 376 if self.expired: 377 raise Exception('Relocation instance has expired.') 378 379 return lib.LLVMGetRelocationOffset(self) 380 381 @CachedProperty 382 def symbol(self): 383 """The Symbol corresponding to this Relocation.""" 384 if self.expired: 385 raise Exception('Relocation instance has expired.') 386 387 ptr = lib.LLVMGetRelocationSymbol(self) 388 return Symbol(ptr) 389 390 @CachedProperty 391 def type_number(self): 392 """The relocation type, as a long.""" 393 if self.expired: 394 raise Exception('Relocation instance has expired.') 395 396 return lib.LLVMGetRelocationType(self) 397 398 @CachedProperty 399 def type_name(self): 400 """The relocation type's name, as a str.""" 401 if self.expired: 402 raise Exception('Relocation instance has expired.') 403 404 return lib.LLVMGetRelocationTypeName(self) 405 406 @CachedProperty 407 def value_string(self): 408 if self.expired: 409 raise Exception('Relocation instance has expired.') 410 411 return lib.LLVMGetRelocationValueString(self) 412 413 def expire(self): 414 """Expire this instance, making future API accesses fail.""" 415 self.expired = True 416 417 def cache(self): 418 """Cache all cacheable properties on this instance.""" 419 getattr(self, 'address') 420 getattr(self, 'offset') 421 getattr(self, 'symbol') 422 getattr(self, 'type') 423 getattr(self, 'type_name') 424 getattr(self, 'value_string') 425 426def register_library(library): 427 """Register function prototypes with LLVM library instance.""" 428 429 # Object.h functions 430 library.LLVMCreateObjectFile.argtypes = [MemoryBuffer] 431 library.LLVMCreateObjectFile.restype = c_object_p 432 433 library.LLVMDisposeObjectFile.argtypes = [ObjectFile] 434 435 library.LLVMGetSections.argtypes = [ObjectFile] 436 library.LLVMGetSections.restype = c_object_p 437 438 library.LLVMDisposeSectionIterator.argtypes = [c_object_p] 439 440 library.LLVMIsSectionIteratorAtEnd.argtypes = [ObjectFile, c_object_p] 441 library.LLVMIsSectionIteratorAtEnd.restype = bool 442 443 library.LLVMMoveToNextSection.argtypes = [c_object_p] 444 445 library.LLVMMoveToContainingSection.argtypes = [c_object_p, c_object_p] 446 447 library.LLVMGetSymbols.argtypes = [ObjectFile] 448 library.LLVMGetSymbols.restype = c_object_p 449 450 library.LLVMDisposeSymbolIterator.argtypes = [c_object_p] 451 452 library.LLVMIsSymbolIteratorAtEnd.argtypes = [ObjectFile, c_object_p] 453 library.LLVMIsSymbolIteratorAtEnd.restype = bool 454 455 library.LLVMMoveToNextSymbol.argtypes = [c_object_p] 456 457 library.LLVMGetSectionName.argtypes = [c_object_p] 458 library.LLVMGetSectionName.restype = c_char_p 459 460 library.LLVMGetSectionSize.argtypes = [c_object_p] 461 library.LLVMGetSectionSize.restype = c_uint64 462 463 library.LLVMGetSectionContents.argtypes = [c_object_p] 464 # Can't use c_char_p here as it isn't a NUL-terminated string. 465 library.LLVMGetSectionContents.restype = POINTER(c_char) 466 467 library.LLVMGetSectionAddress.argtypes = [c_object_p] 468 library.LLVMGetSectionAddress.restype = c_uint64 469 470 library.LLVMGetSectionContainsSymbol.argtypes = [c_object_p, c_object_p] 471 library.LLVMGetSectionContainsSymbol.restype = bool 472 473 library.LLVMGetRelocations.argtypes = [c_object_p] 474 library.LLVMGetRelocations.restype = c_object_p 475 476 library.LLVMDisposeRelocationIterator.argtypes = [c_object_p] 477 478 library.LLVMIsRelocationIteratorAtEnd.argtypes = [c_object_p, c_object_p] 479 library.LLVMIsRelocationIteratorAtEnd.restype = bool 480 481 library.LLVMMoveToNextRelocation.argtypes = [c_object_p] 482 483 library.LLVMGetSymbolName.argtypes = [Symbol] 484 library.LLVMGetSymbolName.restype = c_char_p 485 486 library.LLVMGetSymbolAddress.argtypes = [Symbol] 487 library.LLVMGetSymbolAddress.restype = c_uint64 488 489 library.LLVMGetSymbolSize.argtypes = [Symbol] 490 library.LLVMGetSymbolSize.restype = c_uint64 491 492 library.LLVMGetRelocationOffset.argtypes = [c_object_p] 493 library.LLVMGetRelocationOffset.restype = c_uint64 494 495 library.LLVMGetRelocationSymbol.argtypes = [c_object_p] 496 library.LLVMGetRelocationSymbol.restype = c_object_p 497 498 library.LLVMGetRelocationType.argtypes = [c_object_p] 499 library.LLVMGetRelocationType.restype = c_uint64 500 501 library.LLVMGetRelocationTypeName.argtypes = [c_object_p] 502 library.LLVMGetRelocationTypeName.restype = c_char_p 503 504 library.LLVMGetRelocationValueString.argtypes = [c_object_p] 505 library.LLVMGetRelocationValueString.restype = c_char_p 506 507lib = get_library() 508register_library(lib) 509