1#===- object.py - Python Object Bindings --------------------*- python -*--===#
2#
3# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4# See https://llvm.org/LICENSE.txt for license information.
5# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6#
7#===------------------------------------------------------------------------===#
8
9r"""
10Object File Interface
11=====================
12
13This module provides an interface for reading information from object files
14(e.g. binary executables and libraries).
15
16Using this module, you can obtain information about an object file's sections,
17symbols, and relocations. These are represented by the classes ObjectFile,
18Section, Symbol, and Relocation, respectively.
19
20Usage
21-----
22
23The only way to use this module is to start by creating an ObjectFile. You can
24create an ObjectFile by loading a file (specified by its path) or by creating a
25llvm.core.MemoryBuffer and loading that.
26
27Once you have an object file, you can inspect its sections and symbols directly
28by calling get_sections() and get_symbols() respectively. To inspect
29relocations, call get_relocations() on a Section instance.
30
31Iterator Interface
32------------------
33
34The LLVM bindings expose iteration over sections, symbols, and relocations in a
35way that only allows one instance to be operated on at a single time. This is
36slightly annoying from a Python perspective, as it isn't very Pythonic to have
37objects that "expire" but are still active from a dynamic language.
38
39To aid working around this limitation, each Section, Symbol, and Relocation
40instance caches its properties after first access. So, if the underlying
41iterator is advanced, the properties can still be obtained provided they have
42already been retrieved.
43
44In addition, we also provide a "cache" method on each class to cache all
45available data. You can call this on each obtained instance. Or, you can pass
46cache=True to the appropriate get_XXX() method to have this done for you.
47
48Here are some examples on how to perform iteration:
49
50    obj = ObjectFile(filename='/bin/ls')
51
52    # This is OK. Each Section is only accessed inside its own iteration slot.
53    section_names = []
54    for section in obj.get_sections():
55        section_names.append(section.name)
56
57    # This is NOT OK. You perform a lookup after the object has expired.
58    symbols = list(obj.get_symbols())
59    for symbol in symbols:
60        print symbol.name # This raises because the object has expired.
61
62    # In this example, we mix a working and failing scenario.
63    symbols = []
64    for symbol in obj.get_symbols():
65        symbols.append(symbol)
66        print symbol.name
67
68    for symbol in symbols:
69        print symbol.name # OK
70        print symbol.address # NOT OK. We didn't look up this property before.
71
72    # Cache everything up front.
73    symbols = list(obj.get_symbols(cache=True))
74    for symbol in symbols:
75        print symbol.name # OK
76
77"""
78
79from ctypes import c_char_p
80from ctypes import c_char
81from ctypes import POINTER
82from ctypes import c_uint64
83from ctypes import string_at
84
85from .common import CachedProperty
86from .common import LLVMObject
87from .common import c_object_p
88from .common import get_library
89from .core import MemoryBuffer
90
91__all__ = [
92    "lib",
93    "ObjectFile",
94    "Relocation",
95    "Section",
96    "Symbol",
97]
98
99class ObjectFile(LLVMObject):
100    """Represents an object/binary file."""
101
102    def __init__(self, filename=None, contents=None):
103        """Construct an instance from a filename or binary data.
104
105        filename must be a path to a file that can be opened with open().
106        contents can be either a native Python buffer type (like str) or a
107        llvm.core.MemoryBuffer instance.
108        """
109        if contents:
110            assert isinstance(contents, MemoryBuffer)
111
112        if filename is not None:
113            contents = MemoryBuffer(filename=filename)
114
115        if contents is None:
116            raise Exception('No input found.')
117
118        ptr = lib.LLVMCreateObjectFile(contents)
119        LLVMObject.__init__(self, ptr, disposer=lib.LLVMDisposeObjectFile)
120        self.take_ownership(contents)
121
122    def get_sections(self, cache=False):
123        """Obtain the sections in this object file.
124
125        This is a generator for llvm.object.Section instances.
126
127        Sections are exposed as limited-use objects. See the module's
128        documentation on iterators for more.
129        """
130        sections = lib.LLVMGetSections(self)
131        last = None
132        while True:
133            if lib.LLVMIsSectionIteratorAtEnd(self, sections):
134                break
135
136            last = Section(sections)
137            if cache:
138                last.cache()
139
140            yield last
141
142            lib.LLVMMoveToNextSection(sections)
143            last.expire()
144
145        if last is not None:
146            last.expire()
147
148        lib.LLVMDisposeSectionIterator(sections)
149
150    def get_symbols(self, cache=False):
151        """Obtain the symbols in this object file.
152
153        This is a generator for llvm.object.Symbol instances.
154
155        Each Symbol instance is a limited-use object. See this module's
156        documentation on iterators for more.
157        """
158        symbols = lib.LLVMGetSymbols(self)
159        last = None
160        while True:
161            if lib.LLVMIsSymbolIteratorAtEnd(self, symbols):
162                break
163
164            last = Symbol(symbols, self)
165            if cache:
166                last.cache()
167
168            yield last
169
170            lib.LLVMMoveToNextSymbol(symbols)
171            last.expire()
172
173        if last is not None:
174            last.expire()
175
176        lib.LLVMDisposeSymbolIterator(symbols)
177
178class Section(LLVMObject):
179    """Represents a section in an object file."""
180
181    def __init__(self, ptr):
182        """Construct a new section instance.
183
184        Section instances can currently only be created from an ObjectFile
185        instance. Therefore, this constructor should not be used outside of
186        this module.
187        """
188        LLVMObject.__init__(self, ptr)
189
190        self.expired = False
191
192    @CachedProperty
193    def name(self):
194        """Obtain the string name of the section.
195
196        This is typically something like '.dynsym' or '.rodata'.
197        """
198        if self.expired:
199            raise Exception('Section instance has expired.')
200
201        return lib.LLVMGetSectionName(self)
202
203    @CachedProperty
204    def size(self):
205        """The size of the section, in long bytes."""
206        if self.expired:
207            raise Exception('Section instance has expired.')
208
209        return lib.LLVMGetSectionSize(self)
210
211    @CachedProperty
212    def contents(self):
213        if self.expired:
214            raise Exception('Section instance has expired.')
215
216        siz = self.size
217
218        r = lib.LLVMGetSectionContents(self)
219        if r:
220            return string_at(r, siz)
221        return None
222
223    @CachedProperty
224    def address(self):
225        """The address of this section, in long bytes."""
226        if self.expired:
227            raise Exception('Section instance has expired.')
228
229        return lib.LLVMGetSectionAddress(self)
230
231    def has_symbol(self, symbol):
232        """Returns whether a Symbol instance is present in this Section."""
233        if self.expired:
234            raise Exception('Section instance has expired.')
235
236        assert isinstance(symbol, Symbol)
237        return lib.LLVMGetSectionContainsSymbol(self, symbol)
238
239    def get_relocations(self, cache=False):
240        """Obtain the relocations in this Section.
241
242        This is a generator for llvm.object.Relocation instances.
243
244        Each instance is a limited used object. See this module's documentation
245        on iterators for more.
246        """
247        if self.expired:
248            raise Exception('Section instance has expired.')
249
250        relocations = lib.LLVMGetRelocations(self)
251        last = None
252        while True:
253            if lib.LLVMIsRelocationIteratorAtEnd(self, relocations):
254                break
255
256            last = Relocation(relocations)
257            if cache:
258                last.cache()
259
260            yield last
261
262            lib.LLVMMoveToNextRelocation(relocations)
263            last.expire()
264
265        if last is not None:
266            last.expire()
267
268        lib.LLVMDisposeRelocationIterator(relocations)
269
270    def cache(self):
271        """Cache properties of this Section.
272
273        This can be called as a workaround to the single active Section
274        limitation. When called, the properties of the Section are fetched so
275        they are still available after the Section has been marked inactive.
276        """
277        getattr(self, 'name')
278        getattr(self, 'size')
279        getattr(self, 'contents')
280        getattr(self, 'address')
281
282    def expire(self):
283        """Expire the section.
284
285        This is called internally by the section iterator.
286        """
287        self.expired = True
288
289class Symbol(LLVMObject):
290    """Represents a symbol in an object file."""
291    def __init__(self, ptr, object_file):
292        assert isinstance(ptr, c_object_p)
293        assert isinstance(object_file, ObjectFile)
294
295        LLVMObject.__init__(self, ptr)
296
297        self.expired = False
298        self._object_file = object_file
299
300    @CachedProperty
301    def name(self):
302        """The str name of the symbol.
303
304        This is often a function or variable name. Keep in mind that name
305        mangling could be in effect.
306        """
307        if self.expired:
308            raise Exception('Symbol instance has expired.')
309
310        return lib.LLVMGetSymbolName(self)
311
312    @CachedProperty
313    def address(self):
314        """The address of this symbol, in long bytes."""
315        if self.expired:
316            raise Exception('Symbol instance has expired.')
317
318        return lib.LLVMGetSymbolAddress(self)
319
320    @CachedProperty
321    def size(self):
322        """The size of the symbol, in long bytes."""
323        if self.expired:
324            raise Exception('Symbol instance has expired.')
325
326        return lib.LLVMGetSymbolSize(self)
327
328    @CachedProperty
329    def section(self):
330        """The Section to which this Symbol belongs.
331
332        The returned Section instance does not expire, unlike Sections that are
333        commonly obtained through iteration.
334
335        Because this obtains a new section iterator each time it is accessed,
336        calling this on a number of Symbol instances could be expensive.
337        """
338        sections = lib.LLVMGetSections(self._object_file)
339        lib.LLVMMoveToContainingSection(sections, self)
340
341        return Section(sections)
342
343    def cache(self):
344        """Cache all cacheable properties."""
345        getattr(self, 'name')
346        getattr(self, 'address')
347        getattr(self, 'size')
348
349    def expire(self):
350        """Mark the object as expired to prevent future API accesses.
351
352        This is called internally by this module and it is unlikely that
353        external callers have a legitimate reason for using it.
354        """
355        self.expired = True
356
357class Relocation(LLVMObject):
358    """Represents a relocation definition."""
359    def __init__(self, ptr):
360        """Create a new relocation instance.
361
362        Relocations are created from objects derived from Section instances.
363        Therefore, this constructor should not be called outside of this
364        module. See Section.get_relocations() for the proper method to obtain
365        a Relocation instance.
366        """
367        assert isinstance(ptr, c_object_p)
368
369        LLVMObject.__init__(self, ptr)
370
371        self.expired = False
372
373    @CachedProperty
374    def offset(self):
375        """The offset of this relocation, in long bytes."""
376        if self.expired:
377            raise Exception('Relocation instance has expired.')
378
379        return lib.LLVMGetRelocationOffset(self)
380
381    @CachedProperty
382    def symbol(self):
383        """The Symbol corresponding to this Relocation."""
384        if self.expired:
385            raise Exception('Relocation instance has expired.')
386
387        ptr = lib.LLVMGetRelocationSymbol(self)
388        return Symbol(ptr)
389
390    @CachedProperty
391    def type_number(self):
392        """The relocation type, as a long."""
393        if self.expired:
394            raise Exception('Relocation instance has expired.')
395
396        return lib.LLVMGetRelocationType(self)
397
398    @CachedProperty
399    def type_name(self):
400        """The relocation type's name, as a str."""
401        if self.expired:
402            raise Exception('Relocation instance has expired.')
403
404        return lib.LLVMGetRelocationTypeName(self)
405
406    @CachedProperty
407    def value_string(self):
408        if self.expired:
409            raise Exception('Relocation instance has expired.')
410
411        return lib.LLVMGetRelocationValueString(self)
412
413    def expire(self):
414        """Expire this instance, making future API accesses fail."""
415        self.expired = True
416
417    def cache(self):
418        """Cache all cacheable properties on this instance."""
419        getattr(self, 'address')
420        getattr(self, 'offset')
421        getattr(self, 'symbol')
422        getattr(self, 'type')
423        getattr(self, 'type_name')
424        getattr(self, 'value_string')
425
426def register_library(library):
427    """Register function prototypes with LLVM library instance."""
428
429    # Object.h functions
430    library.LLVMCreateObjectFile.argtypes = [MemoryBuffer]
431    library.LLVMCreateObjectFile.restype = c_object_p
432
433    library.LLVMDisposeObjectFile.argtypes = [ObjectFile]
434
435    library.LLVMGetSections.argtypes = [ObjectFile]
436    library.LLVMGetSections.restype = c_object_p
437
438    library.LLVMDisposeSectionIterator.argtypes = [c_object_p]
439
440    library.LLVMIsSectionIteratorAtEnd.argtypes = [ObjectFile, c_object_p]
441    library.LLVMIsSectionIteratorAtEnd.restype = bool
442
443    library.LLVMMoveToNextSection.argtypes = [c_object_p]
444
445    library.LLVMMoveToContainingSection.argtypes = [c_object_p, c_object_p]
446
447    library.LLVMGetSymbols.argtypes = [ObjectFile]
448    library.LLVMGetSymbols.restype = c_object_p
449
450    library.LLVMDisposeSymbolIterator.argtypes = [c_object_p]
451
452    library.LLVMIsSymbolIteratorAtEnd.argtypes = [ObjectFile, c_object_p]
453    library.LLVMIsSymbolIteratorAtEnd.restype = bool
454
455    library.LLVMMoveToNextSymbol.argtypes = [c_object_p]
456
457    library.LLVMGetSectionName.argtypes = [c_object_p]
458    library.LLVMGetSectionName.restype = c_char_p
459
460    library.LLVMGetSectionSize.argtypes = [c_object_p]
461    library.LLVMGetSectionSize.restype = c_uint64
462
463    library.LLVMGetSectionContents.argtypes = [c_object_p]
464    # Can't use c_char_p here as it isn't a NUL-terminated string.
465    library.LLVMGetSectionContents.restype = POINTER(c_char)
466
467    library.LLVMGetSectionAddress.argtypes = [c_object_p]
468    library.LLVMGetSectionAddress.restype = c_uint64
469
470    library.LLVMGetSectionContainsSymbol.argtypes = [c_object_p, c_object_p]
471    library.LLVMGetSectionContainsSymbol.restype = bool
472
473    library.LLVMGetRelocations.argtypes = [c_object_p]
474    library.LLVMGetRelocations.restype = c_object_p
475
476    library.LLVMDisposeRelocationIterator.argtypes = [c_object_p]
477
478    library.LLVMIsRelocationIteratorAtEnd.argtypes = [c_object_p, c_object_p]
479    library.LLVMIsRelocationIteratorAtEnd.restype = bool
480
481    library.LLVMMoveToNextRelocation.argtypes = [c_object_p]
482
483    library.LLVMGetSymbolName.argtypes = [Symbol]
484    library.LLVMGetSymbolName.restype = c_char_p
485
486    library.LLVMGetSymbolAddress.argtypes = [Symbol]
487    library.LLVMGetSymbolAddress.restype = c_uint64
488
489    library.LLVMGetSymbolSize.argtypes = [Symbol]
490    library.LLVMGetSymbolSize.restype = c_uint64
491
492    library.LLVMGetRelocationOffset.argtypes = [c_object_p]
493    library.LLVMGetRelocationOffset.restype = c_uint64
494
495    library.LLVMGetRelocationSymbol.argtypes = [c_object_p]
496    library.LLVMGetRelocationSymbol.restype = c_object_p
497
498    library.LLVMGetRelocationType.argtypes = [c_object_p]
499    library.LLVMGetRelocationType.restype = c_uint64
500
501    library.LLVMGetRelocationTypeName.argtypes = [c_object_p]
502    library.LLVMGetRelocationTypeName.restype = c_char_p
503
504    library.LLVMGetRelocationValueString.argtypes = [c_object_p]
505    library.LLVMGetRelocationValueString.restype = c_char_p
506
507lib = get_library()
508register_library(lib)
509