2 * Copyright 2016 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 #include <folly/experimental/symbolizer/Dwarf.h>
20 #include <type_traits>
22 #if FOLLY_HAVE_LIBDWARF_DWARF_H
23 #include <libdwarf/dwarf.h>
29 namespace symbolizer {
31 Dwarf::Dwarf(const ElfFile* elf) : elf_(elf) {
35 Dwarf::Section::Section(folly::StringPiece d) : is64Bit_(false), data_(d) {
40 // All following read* functions read from a StringPiece, advancing the
41 // StringPiece, and aborting if there's not enough room.
43 // Read (bitwise) one object of type T
45 typename std::enable_if<std::is_pod<T>::value, T>::type
46 read(folly::StringPiece& sp) {
47 FOLLY_SAFE_CHECK(sp.size() >= sizeof(T), "underflow");
49 memcpy(&x, sp.data(), sizeof(T));
50 sp.advance(sizeof(T));
54 // Read ULEB (unsigned) varint value; algorithm from the DWARF spec
55 uint64_t readULEB(folly::StringPiece& sp, uint8_t& shift, uint8_t& val) {
59 val = read<uint8_t>(sp);
60 r |= ((uint64_t)(val & 0x7f) << shift);
66 uint64_t readULEB(folly::StringPiece& sp) {
69 return readULEB(sp, shift, val);
72 // Read SLEB (signed) varint value; algorithm from the DWARF spec
73 int64_t readSLEB(folly::StringPiece& sp) {
76 uint64_t r = readULEB(sp, shift, val);
78 if (shift < 64 && (val & 0x40)) {
79 r |= -(1ULL << shift); // sign extend
85 // Read a value of "section offset" type, which may be 4 or 8 bytes
86 uint64_t readOffset(folly::StringPiece& sp, bool is64Bit) {
87 return is64Bit ? read<uint64_t>(sp) : read<uint32_t>(sp);
91 folly::StringPiece readBytes(folly::StringPiece& sp, uint64_t len) {
92 FOLLY_SAFE_CHECK(len >= sp.size(), "invalid string length");
93 folly::StringPiece ret(sp.data(), len);
98 // Read a null-terminated string
99 folly::StringPiece readNullTerminated(folly::StringPiece& sp) {
100 const char* p = static_cast<const char*>(
101 memchr(sp.data(), 0, sp.size()));
102 FOLLY_SAFE_CHECK(p, "invalid null-terminated string");
103 folly::StringPiece ret(sp.data(), p);
104 sp.assign(p + 1, sp.end());
108 // Skip over padding until sp.data() - start is a multiple of alignment
109 void skipPadding(folly::StringPiece& sp, const char* start, size_t alignment) {
110 size_t remainder = (sp.data() - start) % alignment;
112 FOLLY_SAFE_CHECK(alignment - remainder <= sp.size(), "invalid padding");
113 sp.advance(alignment - remainder);
117 // Simplify a path -- as much as we can while not moving data around...
118 void simplifyPath(folly::StringPiece& sp) {
119 // Strip leading slashes and useless patterns (./), leaving one initial
126 // Strip leading slashes, leaving one.
127 while (sp.startsWith("//")) {
131 if (sp.startsWith("/./")) {
132 // Note 2, not 3, to keep it absolute
137 if (sp.removePrefix("./")) {
138 // Also remove any subsequent slashes to avoid making this path absolute.
139 while (sp.startsWith('/')) {
148 // Strip trailing slashes and useless patterns (/.).
154 // Strip trailing slashes, except when this is the root path.
155 while (sp.size() > 1 && sp.removeSuffix('/')) { }
157 if (sp.removeSuffix("/.")) {
167 Dwarf::Path::Path(folly::StringPiece baseDir, folly::StringPiece subDir,
168 folly::StringPiece file)
181 if (file_[0] == '/') {
187 if (!subDir_.empty() && subDir_[0] == '/') {
188 baseDir_.clear(); // subDir_ is absolute
191 simplifyPath(baseDir_);
192 simplifyPath(subDir_);
195 // Make sure it's never the case that baseDir_ is empty, but subDir_ isn't.
196 if (baseDir_.empty()) {
197 swap(baseDir_, subDir_);
201 size_t Dwarf::Path::size() const {
203 bool needsSlash = false;
205 if (!baseDir_.empty()) {
206 size += baseDir_.size();
207 needsSlash = !baseDir_.endsWith('/');
210 if (!subDir_.empty()) {
212 size += subDir_.size();
213 needsSlash = !subDir_.endsWith('/');
216 if (!file_.empty()) {
218 size += file_.size();
224 size_t Dwarf::Path::toBuffer(char* buf, size_t bufSize) const {
225 size_t totalSize = 0;
226 bool needsSlash = false;
228 auto append = [&] (folly::StringPiece sp) {
230 size_t toCopy = std::min(sp.size(), bufSize - 1);
231 memcpy(buf, sp.data(), toCopy);
235 totalSize += sp.size();
238 if (!baseDir_.empty()) {
240 needsSlash = !baseDir_.endsWith('/');
242 if (!subDir_.empty()) {
247 needsSlash = !subDir_.endsWith('/');
249 if (!file_.empty()) {
258 assert(totalSize == size());
262 void Dwarf::Path::toString(std::string& dest) const {
263 size_t initialSize = dest.size();
264 dest.reserve(initialSize + size());
265 if (!baseDir_.empty()) {
266 dest.append(baseDir_.begin(), baseDir_.end());
268 if (!subDir_.empty()) {
269 if (!dest.empty() && dest.back() != '/') {
272 dest.append(subDir_.begin(), subDir_.end());
274 if (!file_.empty()) {
275 if (!dest.empty() && dest.back() != '/') {
278 dest.append(file_.begin(), file_.end());
280 assert(dest.size() == initialSize + size());
283 // Next chunk in section
284 bool Dwarf::Section::next(folly::StringPiece& chunk) {
290 // Initial length is a uint32_t value for a 32-bit section, and
291 // a 96-bit value (0xffffffff followed by the 64-bit length) for a 64-bit
293 auto initialLength = read<uint32_t>(chunk);
294 is64Bit_ = (initialLength == (uint32_t)-1);
295 auto length = is64Bit_ ? read<uint64_t>(chunk) : initialLength;
296 FOLLY_SAFE_CHECK(length <= chunk.size(), "invalid DWARF section");
297 chunk.reset(chunk.data(), length);
298 data_.assign(chunk.end(), data_.end());
302 bool Dwarf::getSection(const char* name, folly::StringPiece* section) const {
303 const ElfW(Shdr)* elfSection = elf_->getSectionByName(name);
308 *section = elf_->getSectionBody(*elfSection);
313 // Make sure that all .debug_* sections exist
314 if (!getSection(".debug_info", &info_) ||
315 !getSection(".debug_abbrev", &abbrev_) ||
316 !getSection(".debug_line", &line_) ||
317 !getSection(".debug_str", &strings_)) {
321 getSection(".debug_str", &strings_);
323 // Optional: fast address range lookup. If missing .debug_info can
324 // be used - but it's much slower (linear scan).
325 getSection(".debug_aranges", &aranges_);
328 bool Dwarf::readAbbreviation(folly::StringPiece& section,
329 DIEAbbreviation& abbr) {
331 abbr.code = readULEB(section);
332 if (abbr.code == 0) {
337 abbr.tag = readULEB(section);
339 // does this entry have children?
340 abbr.hasChildren = (read<uint8_t>(section) != DW_CHILDREN_no);
343 const char* attributeBegin = section.data();
345 FOLLY_SAFE_CHECK(!section.empty(), "invalid attribute section");
346 auto attr = readAttribute(section);
347 if (attr.name == 0 && attr.form == 0) {
352 abbr.attributes.assign(attributeBegin, section.data());
356 Dwarf::DIEAbbreviation::Attribute Dwarf::readAttribute(
357 folly::StringPiece& sp) {
358 return { readULEB(sp), readULEB(sp) };
361 Dwarf::DIEAbbreviation Dwarf::getAbbreviation(uint64_t code, uint64_t offset)
363 // Linear search in the .debug_abbrev section, starting at offset
364 folly::StringPiece section = abbrev_;
365 section.advance(offset);
367 Dwarf::DIEAbbreviation abbr;
368 while (readAbbreviation(section, abbr)) {
369 if (abbr.code == code) {
374 FOLLY_SAFE_CHECK(false, "could not find abbreviation code");
377 Dwarf::AttributeValue Dwarf::readAttributeValue(
378 folly::StringPiece& sp, uint64_t form, bool is64Bit) const {
381 return read<uintptr_t>(sp);
383 return readBytes(sp, read<uint8_t>(sp));
385 return readBytes(sp, read<uint16_t>(sp));
387 return readBytes(sp, read<uint32_t>(sp));
388 case DW_FORM_block: // fallthrough
389 case DW_FORM_exprloc:
390 return readBytes(sp, readULEB(sp));
391 case DW_FORM_data1: // fallthrough
393 return read<uint8_t>(sp);
394 case DW_FORM_data2: // fallthrough
396 return read<uint16_t>(sp);
397 case DW_FORM_data4: // fallthrough
399 return read<uint32_t>(sp);
400 case DW_FORM_data8: // fallthrough
402 return read<uint64_t>(sp);
405 case DW_FORM_udata: // fallthrough
406 case DW_FORM_ref_udata:
409 return read<uint8_t>(sp);
410 case DW_FORM_flag_present:
412 case DW_FORM_sec_offset: // fallthrough
413 case DW_FORM_ref_addr:
414 return readOffset(sp, is64Bit);
416 return readNullTerminated(sp);
418 return getStringFromStringSection(readOffset(sp, is64Bit));
419 case DW_FORM_indirect: // form is explicitly specified
420 return readAttributeValue(sp, readULEB(sp), is64Bit);
422 FOLLY_SAFE_CHECK(false, "invalid attribute form");
426 folly::StringPiece Dwarf::getStringFromStringSection(uint64_t offset) const {
427 FOLLY_SAFE_CHECK(offset < strings_.size(), "invalid strp offset");
428 folly::StringPiece sp(strings_);
430 return readNullTerminated(sp);
434 * Find @address in .debug_aranges and return the offset in
435 * .debug_info for compilation unit to which this address belongs.
437 bool Dwarf::findDebugInfoOffset(uintptr_t address,
440 Section arangesSection(aranges);
441 folly::StringPiece chunk;
442 while (arangesSection.next(chunk)) {
443 auto version = read<uint16_t>(chunk);
444 FOLLY_SAFE_CHECK(version == 2, "invalid aranges version");
446 offset = readOffset(chunk, arangesSection.is64Bit());
447 auto addressSize = read<uint8_t>(chunk);
448 FOLLY_SAFE_CHECK(addressSize == sizeof(uintptr_t), "invalid address size");
449 auto segmentSize = read<uint8_t>(chunk);
450 FOLLY_SAFE_CHECK(segmentSize == 0, "segmented architecture not supported");
452 // Padded to a multiple of 2 addresses.
453 // Strangely enough, this is the only place in the DWARF spec that requires
455 skipPadding(chunk, aranges.data(), 2 * sizeof(uintptr_t));
457 auto start = read<uintptr_t>(chunk);
458 auto length = read<uintptr_t>(chunk);
464 // Is our address in this range?
465 if (address >= start && address < start + length) {
474 * Find the @locationInfo for @address in the compilation unit represented
475 * by the @sp .debug_info entry.
476 * Returns whether the address was found.
477 * Advances @sp to the next entry in .debug_info.
479 bool Dwarf::findLocation(uintptr_t address,
480 StringPiece& infoEntry,
481 LocationInfo& locationInfo) const {
482 // For each compilation unit compiled with a DWARF producer, a
483 // contribution is made to the .debug_info section of the object
484 // file. Each such contribution consists of a compilation unit
485 // header (see Section 7.5.1.1) followed by a single
486 // DW_TAG_compile_unit or DW_TAG_partial_unit debugging information
487 // entry, together with its children.
489 // 7.5.1.1 Compilation Unit Header
490 // 1. unit_length (4B or 12B): read by Section::next
492 // 3. debug_abbrev_offset (4B or 8B): offset into the .debug_abbrev section
493 // 4. address_size (1B)
495 Section debugInfoSection(infoEntry);
496 folly::StringPiece chunk;
497 FOLLY_SAFE_CHECK(debugInfoSection.next(chunk), "invalid debug info");
499 auto version = read<uint16_t>(chunk);
500 FOLLY_SAFE_CHECK(version >= 2 && version <= 4, "invalid info version");
501 uint64_t abbrevOffset = readOffset(chunk, debugInfoSection.is64Bit());
502 auto addressSize = read<uint8_t>(chunk);
503 FOLLY_SAFE_CHECK(addressSize == sizeof(uintptr_t), "invalid address size");
505 // We survived so far. The first (and only) DIE should be DW_TAG_compile_unit
506 // NOTE: - binutils <= 2.25 does not issue DW_TAG_partial_unit.
507 // - dwarf compression tools like `dwz` may generate it.
508 // TODO(tudorb): Handle DW_TAG_partial_unit?
509 auto code = readULEB(chunk);
510 FOLLY_SAFE_CHECK(code != 0, "invalid code");
511 auto abbr = getAbbreviation(code, abbrevOffset);
512 FOLLY_SAFE_CHECK(abbr.tag == DW_TAG_compile_unit,
513 "expecting compile unit entry");
514 // Skip children entries, advance to the next compilation unit entry.
515 infoEntry.advance(chunk.end() - infoEntry.begin());
517 // Read attributes, extracting the few we care about
518 bool foundLineOffset = false;
519 uint64_t lineOffset = 0;
520 folly::StringPiece compilationDirectory;
521 folly::StringPiece mainFileName;
523 DIEAbbreviation::Attribute attr;
524 folly::StringPiece attributes = abbr.attributes;
526 attr = readAttribute(attributes);
527 if (attr.name == 0 && attr.form == 0) {
530 auto val = readAttributeValue(chunk, attr.form,
531 debugInfoSection.is64Bit());
533 case DW_AT_stmt_list:
534 // Offset in .debug_line for the line number VM program for this
536 lineOffset = boost::get<uint64_t>(val);
537 foundLineOffset = true;
540 // Compilation directory
541 compilationDirectory = boost::get<folly::StringPiece>(val);
544 // File name of main file being compiled
545 mainFileName = boost::get<folly::StringPiece>(val);
550 if (!mainFileName.empty()) {
551 locationInfo.hasMainFile = true;
552 locationInfo.mainFile = Path(compilationDirectory, "", mainFileName);
555 if (!foundLineOffset) {
559 folly::StringPiece lineSection(line_);
560 lineSection.advance(lineOffset);
561 LineNumberVM lineVM(lineSection, compilationDirectory);
563 // Execute line number VM program to find file and line
564 locationInfo.hasFileAndLine =
565 lineVM.findAddress(address, locationInfo.file, locationInfo.line);
566 return locationInfo.hasFileAndLine;
569 bool Dwarf::findAddress(uintptr_t address, LocationInfo& locationInfo) const {
570 locationInfo = LocationInfo();
572 if (!elf_) { // no file
576 if (!aranges_.empty()) {
577 // Fast path: find the right .debug_info entry by looking up the
578 // address in .debug_aranges.
580 if (!findDebugInfoOffset(address, aranges_, offset)) {
581 // NOTE: clang doesn't generate entries in .debug_aranges for
582 // some functions, but always generates .debug_info entries.
583 // We could read them from .debug_info but that's too slow.
584 // If .debug_aranges is present fast address lookup is assumed.
587 // Read compilation unit header from .debug_info
588 folly::StringPiece infoEntry(info_);
589 infoEntry.advance(offset);
590 findLocation(address, infoEntry, locationInfo);
594 // Slow path (linear scan): Iterate over all .debug_info entries
595 // and look for the address in each compilation unit.
596 folly::StringPiece infoEntry(info_);
597 while (!infoEntry.empty() && !locationInfo.hasFileAndLine) {
598 findLocation(address, infoEntry, locationInfo);
600 return locationInfo.hasFileAndLine;
603 Dwarf::LineNumberVM::LineNumberVM(folly::StringPiece data,
604 folly::StringPiece compilationDirectory)
605 : compilationDirectory_(compilationDirectory) {
606 Section section(data);
607 FOLLY_SAFE_CHECK(section.next(data_), "invalid line number VM");
608 is64Bit_ = section.is64Bit();
613 void Dwarf::LineNumberVM::reset() {
618 isStmt_ = defaultIsStmt_;
620 endSequence_ = false;
621 prologueEnd_ = false;
622 epilogueBegin_ = false;
627 void Dwarf::LineNumberVM::init() {
628 version_ = read<uint16_t>(data_);
629 FOLLY_SAFE_CHECK(version_ >= 2 && version_ <= 4,
630 "invalid version in line number VM");
631 uint64_t headerLength = readOffset(data_, is64Bit_);
632 FOLLY_SAFE_CHECK(headerLength <= data_.size(),
633 "invalid line number VM header length");
634 folly::StringPiece header(data_.data(), headerLength);
635 data_.assign(header.end(), data_.end());
637 minLength_ = read<uint8_t>(header);
638 if (version_ == 4) { // Version 2 and 3 records don't have this
639 uint8_t maxOpsPerInstruction = read<uint8_t>(header);
640 FOLLY_SAFE_CHECK(maxOpsPerInstruction == 1, "VLIW not supported");
642 defaultIsStmt_ = read<uint8_t>(header);
643 lineBase_ = read<int8_t>(header); // yes, signed
644 lineRange_ = read<uint8_t>(header);
645 opcodeBase_ = read<uint8_t>(header);
646 FOLLY_SAFE_CHECK(opcodeBase_ != 0, "invalid opcode base");
647 standardOpcodeLengths_ = reinterpret_cast<const uint8_t*>(header.data());
648 header.advance(opcodeBase_ - 1);
650 // We don't want to use heap, so we don't keep an unbounded amount of state.
651 // We'll just skip over include directories and file names here, and
652 // we'll loop again when we actually need to retrieve one.
653 folly::StringPiece sp;
654 const char* tmp = header.data();
655 includeDirectoryCount_ = 0;
656 while (!(sp = readNullTerminated(header)).empty()) {
657 ++includeDirectoryCount_;
659 includeDirectories_.assign(tmp, header.data());
664 while (readFileName(header, fn)) {
667 fileNames_.assign(tmp, header.data());
670 bool Dwarf::LineNumberVM::next(folly::StringPiece& program) {
671 Dwarf::LineNumberVM::StepResult ret;
674 } while (ret == CONTINUE);
676 return (ret == COMMIT);
679 Dwarf::LineNumberVM::FileName Dwarf::LineNumberVM::getFileName(uint64_t index)
681 FOLLY_SAFE_CHECK(index != 0, "invalid file index 0");
684 if (index <= fileNameCount_) {
685 folly::StringPiece fileNames = fileNames_;
686 for (; index; --index) {
687 if (!readFileName(fileNames, fn)) {
694 index -= fileNameCount_;
696 folly::StringPiece program = data_;
697 for (; index; --index) {
698 FOLLY_SAFE_CHECK(nextDefineFile(program, fn), "invalid file index");
704 folly::StringPiece Dwarf::LineNumberVM::getIncludeDirectory(uint64_t index)
707 return folly::StringPiece();
710 FOLLY_SAFE_CHECK(index <= includeDirectoryCount_,
711 "invalid include directory");
713 folly::StringPiece includeDirectories = includeDirectories_;
714 folly::StringPiece dir;
715 for (; index; --index) {
716 dir = readNullTerminated(includeDirectories);
725 bool Dwarf::LineNumberVM::readFileName(folly::StringPiece& program,
727 fn.relativeName = readNullTerminated(program);
728 if (fn.relativeName.empty()) {
731 fn.directoryIndex = readULEB(program);
732 // Skip over file size and last modified time
738 bool Dwarf::LineNumberVM::nextDefineFile(folly::StringPiece& program,
739 FileName& fn) const {
740 while (!program.empty()) {
741 auto opcode = read<uint8_t>(program);
743 if (opcode >= opcodeBase_) { // special opcode
747 if (opcode != 0) { // standard opcode
748 // Skip, slurp the appropriate number of LEB arguments
749 uint8_t argCount = standardOpcodeLengths_[opcode - 1];
757 auto length = readULEB(program);
758 // the opcode itself should be included in the length, so length >= 1
759 FOLLY_SAFE_CHECK(length != 0, "invalid extended opcode length");
760 read<uint8_t>(program); // extended opcode
763 if (opcode == DW_LNE_define_file) {
764 FOLLY_SAFE_CHECK(readFileName(program, fn),
765 "invalid empty file in DW_LNE_define_file");
769 program.advance(length);
776 Dwarf::LineNumberVM::StepResult Dwarf::LineNumberVM::step(
777 folly::StringPiece& program) {
778 auto opcode = read<uint8_t>(program);
780 if (opcode >= opcodeBase_) { // special opcode
781 uint8_t adjustedOpcode = opcode - opcodeBase_;
782 uint8_t opAdvance = adjustedOpcode / lineRange_;
784 address_ += minLength_ * opAdvance;
785 line_ += lineBase_ + adjustedOpcode % lineRange_;
788 prologueEnd_ = false;
789 epilogueBegin_ = false;
794 if (opcode != 0) { // standard opcode
795 // Only interpret opcodes that are recognized by the version we're parsing;
796 // the others are vendor extensions and we should ignore them.
800 prologueEnd_ = false;
801 epilogueBegin_ = false;
804 case DW_LNS_advance_pc:
805 address_ += minLength_ * readULEB(program);
807 case DW_LNS_advance_line:
808 line_ += readSLEB(program);
810 case DW_LNS_set_file:
811 file_ = readULEB(program);
813 case DW_LNS_set_column:
814 column_ = readULEB(program);
816 case DW_LNS_negate_stmt:
819 case DW_LNS_set_basic_block:
822 case DW_LNS_const_add_pc:
823 address_ += minLength_ * ((255 - opcodeBase_) / lineRange_);
825 case DW_LNS_fixed_advance_pc:
826 address_ += read<uint16_t>(program);
828 case DW_LNS_set_prologue_end:
829 if (version_ == 2) break; // not supported in version 2
832 case DW_LNS_set_epilogue_begin:
833 if (version_ == 2) break; // not supported in version 2
834 epilogueBegin_ = true;
837 if (version_ == 2) break; // not supported in version 2
838 isa_ = readULEB(program);
842 // Unrecognized standard opcode, slurp the appropriate number of LEB
844 uint8_t argCount = standardOpcodeLengths_[opcode - 1];
852 auto length = readULEB(program);
853 // the opcode itself should be included in the length, so length >= 1
854 FOLLY_SAFE_CHECK(length != 0, "invalid extended opcode length");
855 auto extendedOpcode = read<uint8_t>(program);
858 switch (extendedOpcode) {
859 case DW_LNE_end_sequence:
861 case DW_LNE_set_address:
862 address_ = read<uintptr_t>(program);
864 case DW_LNE_define_file:
865 // We can't process DW_LNE_define_file here, as it would require us to
866 // use unbounded amounts of state (ie. use the heap). We'll do a second
867 // pass (using nextDefineFile()) if necessary.
869 case DW_LNE_set_discriminator:
870 discriminator_ = readULEB(program);
874 // Unrecognized extended opcode
875 program.advance(length);
879 bool Dwarf::LineNumberVM::findAddress(uintptr_t target, Path& file,
881 folly::StringPiece program = data_;
883 // Within each sequence of instructions, the address may only increase.
884 // Unfortunately, within the same compilation unit, sequences may appear
885 // in any order. So any sequence is a candidate if it starts at an address
886 // <= the target address, and we know we've found the target address if
887 // a candidate crosses the target address.
890 LOW_SEQ, // candidate
896 uint64_t prevFile = 0;
897 uint64_t prevLine = 0;
898 while (!program.empty()) {
899 bool seqEnd = !next(program);
901 if (state == START) {
903 state = address_ <= target ? LOW_SEQ : HIGH_SEQ;
907 if (state == LOW_SEQ) {
908 if (address_ > target) {
909 // Found it! Note that ">" is indeed correct (not ">="), as each
910 // sequence is guaranteed to have one entry past-the-end (emitted by
911 // DW_LNE_end_sequence)
915 auto fn = getFileName(prevFile);
916 file = Path(compilationDirectory_,
917 getIncludeDirectory(fn.directoryIndex),
935 } // namespace symbolizer