return ptr != end ? ptr : nullptr;
}
+template <class Fn>
+const ElfW(Sym)* ElfFile::iterateSymbols(const ElfW(Shdr)& section, Fn fn)
+ const {
+ enforce(section.sh_entsize == sizeof(ElfW(Sym)),
+ "invalid entry size in symbol table");
+
+ const ElfW(Sym)* sym = &at<ElfW(Sym)>(section.sh_offset);
+ const ElfW(Sym)* end = sym + (section.sh_size / section.sh_entsize);
+
+ while (sym < end) {
+ if (fn(*sym)) {
+ return sym;
+ }
+
+ ++sym;
+ }
+
+ return nullptr;
+}
+
+template <class Fn>
+const ElfW(Sym)* ElfFile::iterateSymbolsWithType(const ElfW(Shdr)& section,
+ uint32_t type, Fn fn) const {
+ // N.B. st_info has the same representation on 32- and 64-bit platforms
+ return iterateSymbols(section, [&](const ElfW(Sym)& sym) -> bool {
+ return ELF32_ST_TYPE(sym.st_info) == type && fn(sym);
+ });
+}
} // namespace symbolizer
} // namespace folly
ElfFile::Symbol ElfFile::getDefinitionByAddress(uintptr_t address) const {
Symbol foundSymbol {nullptr, nullptr};
- auto find = [&] (const ElfW(Shdr)& section) {
- enforce(section.sh_entsize == sizeof(ElfW(Sym)),
- "invalid entry size in symbol table");
-
- const ElfW(Sym)* sym = &at<ElfW(Sym)>(section.sh_offset);
- const ElfW(Sym)* end = &at<ElfW(Sym)>(section.sh_offset + section.sh_size);
- for (; sym != end; ++sym) {
- // st_info has the same representation on 32- and 64-bit platforms
- auto type = ELF32_ST_TYPE(sym->st_info);
-
- // TODO(tudorb): Handle STT_TLS, but then we'd have to understand
- // thread-local relocations. If all we're looking up is functions
- // (instruction pointers), it doesn't matter, though.
- if (type != STT_OBJECT && type != STT_FUNC) {
- continue;
+ auto findSection = [&](const ElfW(Shdr)& section) {
+ auto findSymbols = [&](const ElfW(Sym)& sym) {
+ if (sym.st_shndx == SHN_UNDEF) {
+ return false; // not a definition
}
- if (sym->st_shndx == SHN_UNDEF) {
- continue; // not a definition
- }
- if (address >= sym->st_value && address < sym->st_value + sym->st_size) {
+ if (address >= sym.st_value && address < sym.st_value + sym.st_size) {
foundSymbol.first = §ion;
- foundSymbol.second = sym;
+ foundSymbol.second = &sym;
return true;
}
+
+ return false;
+ };
+
+ return iterateSymbolsWithType(section, STT_OBJECT, findSymbols) ||
+ iterateSymbolsWithType(section, STT_FUNC, findSymbols);
+ };
+
+ // Try the .dynsym section first if it exists, it's smaller.
+ (iterateSectionsWithType(SHT_DYNSYM, findSection) ||
+ iterateSectionsWithType(SHT_SYMTAB, findSection));
+
+ return foundSymbol;
+}
+
+ElfFile::Symbol ElfFile::getSymbolByName(const char* name) const {
+ Symbol foundSymbol{nullptr, nullptr};
+
+ auto findSection = [&](const ElfW(Shdr)& section) -> bool {
+ // This section has no string table associated w/ its symbols; hence we
+ // can't get names for them
+ if (section.sh_link == SHN_UNDEF) {
+ return false;
}
- return false;
+ auto findSymbols = [&](const ElfW(Sym)& sym) -> bool {
+ if (sym.st_shndx == SHN_UNDEF) {
+ return false; // not a definition
+ }
+ if (sym.st_name == 0) {
+ return false; // no name for this symbol
+ }
+ const char* sym_name = getString(
+ *getSectionByIndex(section.sh_link), sym.st_name);
+ if (strcmp(sym_name, name) == 0) {
+ foundSymbol.first = §ion;
+ foundSymbol.second = &sym;
+ return true;
+ }
+
+ return false;
+ };
+
+ return iterateSymbolsWithType(section, STT_OBJECT, findSymbols) ||
+ iterateSymbolsWithType(section, STT_FUNC, findSymbols);
};
// Try the .dynsym section first if it exists, it's smaller.
- (iterateSectionsWithType(SHT_DYNSYM, find) ||
- iterateSectionsWithType(SHT_SYMTAB, find));
+ iterateSectionsWithType(SHT_DYNSYM, findSection) ||
+ iterateSectionsWithType(SHT_SYMTAB, findSection);
return foundSymbol;
}
+const ElfW(Shdr)* ElfFile::getSectionContainingAddress(ElfW(Addr) addr) const {
+ return iterateSections([&](const ElfW(Shdr)& sh) -> bool {
+ return (addr >= sh.sh_addr) && (addr < (sh.sh_addr + sh.sh_size));
+ });
+}
+
const char* ElfFile::getSymbolName(Symbol symbol) const {
if (!symbol.first || !symbol.second) {
return nullptr;
namespace folly {
namespace symbolizer {
+template <class... Args>
+inline void enforce(bool v, Args... args) {
+ if (UNLIKELY(!v)) {
+ throw std::runtime_error(folly::to<std::string>(args...));
+ }
+}
+
/**
* ELF file parser.
*
template <class Fn>
const ElfW(Shdr)* iterateSectionsWithType(uint32_t type, Fn fn) const;
+ /**
+ * Iterate over all symbols witin a given section.
+ *
+ * Returns a pointer to the current ("found") symbol when fn returned true,
+ * or nullptr if fn returned false for all symbols.
+ */
+ template <class Fn>
+ const ElfW(Sym)* iterateSymbols(const ElfW(Shdr)& section, Fn fn) const;
+ template <class Fn>
+ const ElfW(Sym)* iterateSymbolsWithType(const ElfW(Shdr)& section,
+ uint32_t type, Fn fn) const;
+
/**
* Find symbol definition by address.
* Note that this is the file virtual address, so you need to undo
typedef std::pair<const ElfW(Shdr)*, const ElfW(Sym)*> Symbol;
Symbol getDefinitionByAddress(uintptr_t address) const;
+ /**
+ * Find symbol definition by name.
+ *
+ * If a symbol with this name cannot be found, a <nullptr, nullptr> Symbol
+ * will be returned. This is O(N) in the number of symbols in the file.
+ */
+ Symbol getSymbolByName(const char* name) const;
+
+ /**
+ * Get the value of a symbol.
+ */
+ template <class T>
+ const T& getSymbolValue(const ElfW(Sym)* symbol) const {
+ const ElfW(Shdr)* section = getSectionByIndex(symbol->st_shndx);
+ enforce(section, "Symbol's section index is invalid");
+
+ return valueAt<T>(*section, symbol->st_value);
+ }
+
+ /**
+ * Get the value of the object stored at the given address.
+ *
+ * This is the function that you want to use in conjunction with
+ * getSymbolValue() to follow pointers. For example, to get the value of
+ * a char* symbol, you'd do something like this:
+ *
+ * auto sym = getSymbolByName("someGlobalValue");
+ * auto addr = getSymbolValue<ElfW(Addr)>(sym.second);
+ * const char* str = &getSymbolValue<const char>(addr);
+ */
+ template <class T>
+ const T& getAddressValue(const ElfW(Addr) addr) const {
+ const ElfW(Shdr)* section = getSectionContainingAddress(addr);
+ enforce(section, "Address does not refer to existing section");
+
+ return valueAt<T>(*section, addr);
+ }
+
/**
* Retrieve symbol name.
*/
const char* getSymbolName(Symbol symbol) const;
+ /** Find the section containing the given address */
+ const ElfW(Shdr)* getSectionContainingAddress(ElfW(Addr) addr) const;
+
private:
void init();
void destroy();
template <class T>
const typename std::enable_if<std::is_pod<T>::value, T>::type&
- at(off_t offset) const {
+ at(ElfW(Off) offset) const {
+ enforce(offset + sizeof(T) <= length_,
+ "Offset is not contained within our mmapped file");
+
return *reinterpret_cast<T*>(file_ + offset);
}
+ template <class T>
+ const T& valueAt(const ElfW(Shdr)& section, const ElfW(Addr) addr) const {
+ // For exectuables and shared objects, st_value holds a virtual address
+ // that refers to the memory owned by sections. Since we didn't map the
+ // sections into the addresses that they're expecting (sh_addr), but
+ // instead just mmapped the entire file directly, we need to translate
+ // between addresses and offsets into the file.
+ //
+ // TODO: For other file types, st_value holds a file offset directly. Since
+ // I don't have a use-case for that right now, just assert that
+ // nobody wants this. We can always add it later.
+ enforce(elfHeader().e_type == ET_EXEC || elfHeader().e_type == ET_DYN,
+ "Only exectuables and shared objects are supported");
+ enforce(addr >= section.sh_addr &&
+ (addr + sizeof(T)) <= (section.sh_addr + section.sh_size),
+ "Address is not contained within the provided segment");
+
+ return at<T>(section.sh_offset + (addr - section.sh_addr));
+ }
+
int fd_;
char* file_; // mmap() location
size_t length_; // mmap() length
uintptr_t baseAddress_;
};
-template <class... Args>
-inline void enforce(bool v, Args... args) {
- if (UNLIKELY(!v)) {
- throw std::runtime_error(folly::to<std::string>(args...));
- }
-}
-
} // namespace symbolizer
} // namespace folly
--- /dev/null
+/*
+ * Copyright 2013 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "folly/experimental/symbolizer/Elf.h"
+
+using folly::symbolizer::ElfFile;
+
+// Add some symbols for testing. Note that we have to be careful with type
+// signatures here to prevent name mangling
+uint64_t kIntegerValue = 1234567890UL;
+const char* kStringValue = "coconuts";
+
+class ElfTest : public ::testing::Test {
+ public:
+ // Path to the test binary itself; set by main()
+ static std::string binaryPath;
+
+ ElfTest() : elfFile_(binaryPath.c_str()) {
+ }
+ virtual ~ElfTest() {
+ }
+
+ protected:
+ ElfFile elfFile_;
+};
+
+std::string ElfTest::binaryPath;
+
+TEST_F(ElfTest, IntegerValue) {
+ auto sym = elfFile_.getSymbolByName("kIntegerValue");
+ EXPECT_NE(nullptr, sym.first) <<
+ "Failed to look up symbol kIntegerValue";
+ EXPECT_EQ(kIntegerValue, elfFile_.getSymbolValue<uint64_t>(sym.second));
+}
+
+TEST_F(ElfTest, PointerValue) {
+ auto sym = elfFile_.getSymbolByName("kStringValue");
+ EXPECT_NE(nullptr, sym.first) <<
+ "Failed to look up symbol kStringValue";
+ ElfW(Addr) addr = elfFile_.getSymbolValue<ElfW(Addr)>(sym.second);
+ const char *str = &elfFile_.getAddressValue<const char>(addr);
+ EXPECT_STREQ(kStringValue, str);
+}
+
+int main(int argc, char** argv) {
+ testing::InitGoogleTest(&argc, argv);
+ google::ParseCommandLineFlags(&argc, &argv, true);
+ ElfTest::binaryPath = argv[0];
+ return RUN_ALL_TESTS();
+}