1 #===- object.py - Python Object Bindings --------------------*- python -*--===#
3 # The LLVM Compiler Infrastructure
5 # This file is distributed under the University of Illinois Open Source
6 # License. See LICENSE.TXT for details.
8 #===------------------------------------------------------------------------===#
14 This module provides an interface for reading information from object files
15 (e.g. binary executables and libraries).
17 Using this module, you can obtain information about an object file's sections,
18 symbols, and relocations. These are represented by the classes ObjectFile,
19 Section, Symbol, and Relocation, respectively.
24 The only way to use this module is to start by creating an ObjectFile. You can
25 create an ObjectFile by loading a file (specified by its path) or by creating a
26 llvm.core.MemoryBuffer and loading that.
28 Once you have an object file, you can inspect its sections and symbols directly
29 by calling get_sections() and get_symbols() respectively. To inspect
30 relocations, call get_relocations() on a Section instance.
35 The LLVM bindings expose iteration over sections, symbols, and relocations in a
36 way that only allows one instance to be operated on at a single time. This is
37 slightly annoying from a Python perspective, as it isn't very Pythonic to have
38 objects that "expire" but are still active from a dynamic language.
40 To aid working around this limitation, each Section, Symbol, and Relocation
41 instance caches its properties after first access. So, if the underlying
42 iterator is advanced, the properties can still be obtained provided they have
43 already been retrieved.
45 In addition, we also provide a "cache" method on each class to cache all
46 available data. You can call this on each obtained instance. Or, you can pass
47 cache=True to the appropriate get_XXX() method to have this done for you.
49 Here are some examples on how to perform iteration:
51 obj = ObjectFile(filename='/bin/ls')
53 # This is OK. Each Section is only accessed inside its own iteration slot.
55 for section in obj.get_sections():
56 section_names.append(section.name)
58 # This is NOT OK. You perform a lookup after the object has expired.
59 symbols = list(obj.get_symbols())
60 for symbol in symbols:
61 print symbol.name # This raises because the object has expired.
63 # In this example, we mix a working and failing scenario.
65 for symbol in obj.get_symbols():
66 symbols.append(symbol)
69 for symbol in symbols:
70 print symbol.name # OK
71 print symbol.address # NOT OK. We didn't look up this property before.
73 # Cache everything up front.
74 symbols = list(obj.get_symbols(cache=True))
75 for symbol in symbols:
76 print symbol.name # OK
80 from ctypes import c_char_p
81 from ctypes import c_char
82 from ctypes import POINTER
83 from ctypes import c_uint64
84 from ctypes import string_at
86 from .common import CachedProperty
87 from .common import LLVMObject
88 from .common import c_object_p
89 from .common import get_library
90 from .core import MemoryBuffer
100 class ObjectFile(LLVMObject):
101 """Represents an object/binary file."""
103 def __init__(self, filename=None, contents=None):
104 """Construct an instance from a filename or binary data.
106 filename must be a path to a file that can be opened with open().
107 contents can be either a native Python buffer type (like str) or a
108 llvm.core.MemoryBuffer instance.
111 assert isinstance(contents, MemoryBuffer)
113 if filename is not None:
114 contents = MemoryBuffer(filename=filename)
117 raise Exception('No input found.')
119 ptr = lib.LLVMCreateObjectFile(contents)
120 LLVMObject.__init__(self, ptr, disposer=lib.LLVMDisposeObjectFile)
121 self.take_ownership(contents)
123 def get_sections(self, cache=False):
124 """Obtain the sections in this object file.
126 This is a generator for llvm.object.Section instances.
128 Sections are exposed as limited-use objects. See the module's
129 documentation on iterators for more.
131 sections = lib.LLVMGetSections(self)
134 if lib.LLVMIsSectionIteratorAtEnd(self, sections):
137 last = Section(sections)
143 lib.LLVMMoveToNextSection(sections)
149 lib.LLVMDisposeSectionIterator(sections)
151 def get_symbols(self, cache=False):
152 """Obtain the symbols in this object file.
154 This is a generator for llvm.object.Symbol instances.
156 Each Symbol instance is a limited-use object. See this module's
157 documentation on iterators for more.
159 symbols = lib.LLVMGetSymbols(self)
162 if lib.LLVMIsSymbolIteratorAtEnd(self, symbols):
165 last = Symbol(symbols, self)
171 lib.LLVMMoveToNextSymbol(symbols)
177 lib.LLVMDisposeSymbolIterator(symbols)
179 class Section(LLVMObject):
180 """Represents a section in an object file."""
182 def __init__(self, ptr):
183 """Construct a new section instance.
185 Section instances can currently only be created from an ObjectFile
186 instance. Therefore, this constructor should not be used outside of
189 LLVMObject.__init__(self, ptr)
195 """Obtain the string name of the section.
197 This is typically something like '.dynsym' or '.rodata'.
200 raise Exception('Section instance has expired.')
202 return lib.LLVMGetSectionName(self)
206 """The size of the section, in long bytes."""
208 raise Exception('Section instance has expired.')
210 return lib.LLVMGetSectionSize(self)
215 raise Exception('Section instance has expired.')
219 r = lib.LLVMGetSectionContents(self)
221 return string_at(r, siz)
226 """The address of this section, in long bytes."""
228 raise Exception('Section instance has expired.')
230 return lib.LLVMGetSectionAddress(self)
232 def has_symbol(self, symbol):
233 """Returns whether a Symbol instance is present in this Section."""
235 raise Exception('Section instance has expired.')
237 assert isinstance(symbol, Symbol)
238 return lib.LLVMGetSectionContainsSymbol(self, symbol)
240 def get_relocations(self, cache=False):
241 """Obtain the relocations in this Section.
243 This is a generator for llvm.object.Relocation instances.
245 Each instance is a limited used object. See this module's documentation
246 on iterators for more.
249 raise Exception('Section instance has expired.')
251 relocations = lib.LLVMGetRelocations(self)
254 if lib.LLVMIsRelocationIteratorAtEnd(self, relocations):
257 last = Relocation(relocations)
263 lib.LLVMMoveToNextRelocation(relocations)
269 lib.LLVMDisposeRelocationIterator(relocations)
272 """Cache properties of this Section.
274 This can be called as a workaround to the single active Section
275 limitation. When called, the properties of the Section are fetched so
276 they are still available after the Section has been marked inactive.
278 getattr(self, 'name')
279 getattr(self, 'size')
280 getattr(self, 'contents')
281 getattr(self, 'address')
284 """Expire the section.
286 This is called internally by the section iterator.
290 class Symbol(LLVMObject):
291 """Represents a symbol in an object file."""
292 def __init__(self, ptr, object_file):
293 assert isinstance(ptr, c_object_p)
294 assert isinstance(object_file, ObjectFile)
296 LLVMObject.__init__(self, ptr)
299 self._object_file = object_file
303 """The str name of the symbol.
305 This is often a function or variable name. Keep in mind that name
306 mangling could be in effect.
309 raise Exception('Symbol instance has expired.')
311 return lib.LLVMGetSymbolName(self)
315 """The address of this symbol, in long bytes."""
317 raise Exception('Symbol instance has expired.')
319 return lib.LLVMGetSymbolAddress(self)
323 """The size of the symbol, in long bytes."""
325 raise Exception('Symbol instance has expired.')
327 return lib.LLVMGetSymbolSize(self)
331 """The Section to which this Symbol belongs.
333 The returned Section instance does not expire, unlike Sections that are
334 commonly obtained through iteration.
336 Because this obtains a new section iterator each time it is accessed,
337 calling this on a number of Symbol instances could be expensive.
339 sections = lib.LLVMGetSections(self._object_file)
340 lib.LLVMMoveToContainingSection(sections, self)
342 return Section(sections)
345 """Cache all cacheable properties."""
346 getattr(self, 'name')
347 getattr(self, 'address')
348 getattr(self, 'size')
351 """Mark the object as expired to prevent future API accesses.
353 This is called internally by this module and it is unlikely that
354 external callers have a legitimate reason for using it.
358 class Relocation(LLVMObject):
359 """Represents a relocation definition."""
360 def __init__(self, ptr):
361 """Create a new relocation instance.
363 Relocations are created from objects derived from Section instances.
364 Therefore, this constructor should not be called outside of this
365 module. See Section.get_relocations() for the proper method to obtain
366 a Relocation instance.
368 assert isinstance(ptr, c_object_p)
370 LLVMObject.__init__(self, ptr)
376 """The offset of this relocation, in long bytes."""
378 raise Exception('Relocation instance has expired.')
380 return lib.LLVMGetRelocationOffset(self)
384 """The Symbol corresponding to this Relocation."""
386 raise Exception('Relocation instance has expired.')
388 ptr = lib.LLVMGetRelocationSymbol(self)
392 def type_number(self):
393 """The relocation type, as a long."""
395 raise Exception('Relocation instance has expired.')
397 return lib.LLVMGetRelocationType(self)
401 """The relocation type's name, as a str."""
403 raise Exception('Relocation instance has expired.')
405 return lib.LLVMGetRelocationTypeName(self)
408 def value_string(self):
410 raise Exception('Relocation instance has expired.')
412 return lib.LLVMGetRelocationValueString(self)
415 """Expire this instance, making future API accesses fail."""
419 """Cache all cacheable properties on this instance."""
420 getattr(self, 'address')
421 getattr(self, 'offset')
422 getattr(self, 'symbol')
423 getattr(self, 'type')
424 getattr(self, 'type_name')
425 getattr(self, 'value_string')
427 def register_library(library):
428 """Register function prototypes with LLVM library instance."""
431 library.LLVMCreateObjectFile.argtypes = [MemoryBuffer]
432 library.LLVMCreateObjectFile.restype = c_object_p
434 library.LLVMDisposeObjectFile.argtypes = [ObjectFile]
436 library.LLVMGetSections.argtypes = [ObjectFile]
437 library.LLVMGetSections.restype = c_object_p
439 library.LLVMDisposeSectionIterator.argtypes = [c_object_p]
441 library.LLVMIsSectionIteratorAtEnd.argtypes = [ObjectFile, c_object_p]
442 library.LLVMIsSectionIteratorAtEnd.restype = bool
444 library.LLVMMoveToNextSection.argtypes = [c_object_p]
446 library.LLVMMoveToContainingSection.argtypes = [c_object_p, c_object_p]
448 library.LLVMGetSymbols.argtypes = [ObjectFile]
449 library.LLVMGetSymbols.restype = c_object_p
451 library.LLVMDisposeSymbolIterator.argtypes = [c_object_p]
453 library.LLVMIsSymbolIteratorAtEnd.argtypes = [ObjectFile, c_object_p]
454 library.LLVMIsSymbolIteratorAtEnd.restype = bool
456 library.LLVMMoveToNextSymbol.argtypes = [c_object_p]
458 library.LLVMGetSectionName.argtypes = [c_object_p]
459 library.LLVMGetSectionName.restype = c_char_p
461 library.LLVMGetSectionSize.argtypes = [c_object_p]
462 library.LLVMGetSectionSize.restype = c_uint64
464 library.LLVMGetSectionContents.argtypes = [c_object_p]
465 # Can't use c_char_p here as it isn't a NUL-terminated string.
466 library.LLVMGetSectionContents.restype = POINTER(c_char)
468 library.LLVMGetSectionAddress.argtypes = [c_object_p]
469 library.LLVMGetSectionAddress.restype = c_uint64
471 library.LLVMGetSectionContainsSymbol.argtypes = [c_object_p, c_object_p]
472 library.LLVMGetSectionContainsSymbol.restype = bool
474 library.LLVMGetRelocations.argtypes = [c_object_p]
475 library.LLVMGetRelocations.restype = c_object_p
477 library.LLVMDisposeRelocationIterator.argtypes = [c_object_p]
479 library.LLVMIsRelocationIteratorAtEnd.argtypes = [c_object_p, c_object_p]
480 library.LLVMIsRelocationIteratorAtEnd.restype = bool
482 library.LLVMMoveToNextRelocation.argtypes = [c_object_p]
484 library.LLVMGetSymbolName.argtypes = [Symbol]
485 library.LLVMGetSymbolName.restype = c_char_p
487 library.LLVMGetSymbolAddress.argtypes = [Symbol]
488 library.LLVMGetSymbolAddress.restype = c_uint64
490 library.LLVMGetSymbolSize.argtypes = [Symbol]
491 library.LLVMGetSymbolSize.restype = c_uint64
493 library.LLVMGetRelocationOffset.argtypes = [c_object_p]
494 library.LLVMGetRelocationOffset.restype = c_uint64
496 library.LLVMGetRelocationSymbol.argtypes = [c_object_p]
497 library.LLVMGetRelocationSymbol.restype = c_object_p
499 library.LLVMGetRelocationType.argtypes = [c_object_p]
500 library.LLVMGetRelocationType.restype = c_uint64
502 library.LLVMGetRelocationTypeName.argtypes = [c_object_p]
503 library.LLVMGetRelocationTypeName.restype = c_char_p
505 library.LLVMGetRelocationValueString.argtypes = [c_object_p]
506 library.LLVMGetRelocationValueString.restype = c_char_p
509 register_library(lib)