From 351f83be64057380877615153fe5dc50308ab017 Mon Sep 17 00:00:00 2001 From: Alexey Samsonov Date: Tue, 7 Aug 2012 11:46:57 +0000 Subject: [PATCH] Fix the representation of debug line table in DebugInfo LLVM library, and "instruction address -> file/line" lookup. Instead of plain collection of rows, debug line table for compilation unit is now treated as the number of row ranges, describing sequences (series of contiguous machine instructions). The sequences are not always listed in the order of increasing address, so previously used std::lower_bound() sometimes produced wrong results. Now the instruction address lookup consists of two stages: finding the correct sequence, and searching for address in range of rows for this sequence. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@161414 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/DebugInfo/DWARFContext.cpp | 4 +- lib/DebugInfo/DWARFDebugLine.cpp | 117 ++++++++++++------ lib/DebugInfo/DWARFDebugLine.h | 61 ++++++++- .../Inputs/dwarfdump-test4.elf-x86-64 | Bin 0 -> 7689 bytes test/DebugInfo/dwarfdump-test.test | 6 + 5 files changed, 140 insertions(+), 48 deletions(-) create mode 100755 test/DebugInfo/Inputs/dwarfdump-test4.elf-x86-64 diff --git a/lib/DebugInfo/DWARFContext.cpp b/lib/DebugInfo/DWARFContext.cpp index a4e0d8eae43..797662b083f 100644 --- a/lib/DebugInfo/DWARFContext.cpp +++ b/lib/DebugInfo/DWARFContext.cpp @@ -167,9 +167,7 @@ DILineInfo DWARFContext::getLineInfoForAddress(uint64_t address, const DWARFDebugLine::LineTable *lineTable = getLineTableForCompileUnit(cu); if (lineTable) { // Get the index of the row we're looking for in the line table. - uint64_t hiPC = cu->getCompileUnitDIE()->getAttributeValueAsUnsigned( - cu, DW_AT_high_pc, -1ULL); - uint32_t rowIndex = lineTable->lookupAddress(address, hiPC); + uint32_t rowIndex = lineTable->lookupAddress(address); if (rowIndex != -1U) { const DWARFDebugLine::Row &row = lineTable->Rows[rowIndex]; // Take file/line info from the line table. diff --git a/lib/DebugInfo/DWARFDebugLine.cpp b/lib/DebugInfo/DWARFDebugLine.cpp index 117fa31aa86..d99575d8003 100644 --- a/lib/DebugInfo/DWARFDebugLine.cpp +++ b/lib/DebugInfo/DWARFDebugLine.cpp @@ -95,14 +95,46 @@ void DWARFDebugLine::LineTable::dump(raw_ostream &OS) const { DWARFDebugLine::State::~State() {} void DWARFDebugLine::State::appendRowToMatrix(uint32_t offset) { + if (Sequence::Empty) { + // Record the beginning of instruction sequence. + Sequence::Empty = false; + Sequence::LowPC = Address; + Sequence::FirstRowIndex = row; + } ++row; // Increase the row number. LineTable::appendRow(*this); + if (EndSequence) { + // Record the end of instruction sequence. + Sequence::HighPC = Address; + Sequence::LastRowIndex = row; + if (Sequence::isValid()) + LineTable::appendSequence(*this); + Sequence::reset(); + } Row::postAppend(); } +void DWARFDebugLine::State::finalize() { + row = DoneParsingLineTable; + if (!Sequence::Empty) { + fprintf(stderr, "warning: last sequence in debug line table is not" + "terminated!\n"); + } + // Sort all sequences so that address lookup will work faster. + if (!Sequences.empty()) { + std::sort(Sequences.begin(), Sequences.end(), Sequence::orderByLowPC); + // Note: actually, instruction address ranges of sequences should not + // overlap (in shared objects and executables). If they do, the address + // lookup would still work, though, but result would be ambiguous. + // We don't report warning in this case. For example, + // sometimes .so compiled from multiple object files contains a few + // rudimentary sequences for address ranges [0x0, 0xsomething). + } +} + DWARFDebugLine::DumpingState::~DumpingState() {} -void DWARFDebugLine::DumpingState::finalize(uint32_t offset) { +void DWARFDebugLine::DumpingState::finalize() { LineTable::dump(OS); } @@ -180,8 +212,9 @@ DWARFDebugLine::parsePrologue(DataExtractor debug_line_data, fprintf(stderr, "warning: parsing line table prologue at 0x%8.8x should" " have ended at 0x%8.8x but it ended ad 0x%8.8x\n", prologue_offset, end_prologue_offset, *offset_ptr); + return false; } - return end_prologue_offset; + return true; } bool @@ -430,47 +463,53 @@ DWARFDebugLine::parseStatementTable(DataExtractor debug_line_data, } } - state.finalize(*offset_ptr); + state.finalize(); return end_offset; } -static bool findMatchingAddress(const DWARFDebugLine::Row& row1, - const DWARFDebugLine::Row& row2) { - return row1.Address < row2.Address; -} - uint32_t -DWARFDebugLine::LineTable::lookupAddress(uint64_t address, - uint64_t cu_high_pc) const { - uint32_t index = UINT32_MAX; - if (!Rows.empty()) { - // Use the lower_bound algorithm to perform a binary search since we know - // that our line table data is ordered by address. - DWARFDebugLine::Row row; - row.Address = address; - typedef std::vector::const_iterator iterator; - iterator begin_pos = Rows.begin(); - iterator end_pos = Rows.end(); - iterator pos = std::lower_bound(begin_pos, end_pos, row, - findMatchingAddress); - if (pos == end_pos) { - if (address < cu_high_pc) - return Rows.size()-1; - } else { - // Rely on fact that we are using a std::vector and we can do - // pointer arithmetic to find the row index (which will be one less - // that what we found since it will find the first position after - // the current address) since std::vector iterators are just - // pointers to the container type. - index = pos - begin_pos; - if (pos->Address > address) { - if (index > 0) - --index; - else - index = UINT32_MAX; - } - } +DWARFDebugLine::LineTable::lookupAddress(uint64_t address) const { + uint32_t unknown_index = UINT32_MAX; + if (Sequences.empty()) + return unknown_index; + // First, find an instruction sequence containing the given address. + DWARFDebugLine::Sequence sequence; + sequence.LowPC = address; + SequenceIter first_seq = Sequences.begin(); + SequenceIter last_seq = Sequences.end(); + SequenceIter seq_pos = std::lower_bound(first_seq, last_seq, sequence, + DWARFDebugLine::Sequence::orderByLowPC); + DWARFDebugLine::Sequence found_seq; + if (seq_pos == last_seq) { + found_seq = Sequences.back(); + } else if (seq_pos->LowPC == address) { + found_seq = *seq_pos; + } else { + if (seq_pos == first_seq) + return unknown_index; + found_seq = *(seq_pos - 1); + } + if (!found_seq.containsPC(address)) + return unknown_index; + // Search for instruction address in the rows describing the sequence. + // Rows are stored in a vector, so we may use arithmetical operations with + // iterators. + DWARFDebugLine::Row row; + row.Address = address; + RowIter first_row = Rows.begin() + found_seq.FirstRowIndex; + RowIter last_row = Rows.begin() + found_seq.LastRowIndex; + RowIter row_pos = std::lower_bound(first_row, last_row, row, + DWARFDebugLine::Row::orderByAddress); + if (row_pos == last_row) { + return found_seq.LastRowIndex - 1; + } + uint32_t index = found_seq.FirstRowIndex + (row_pos - first_row); + if (row_pos->Address > address) { + if (row_pos == first_row) + return unknown_index; + else + index--; } - return index; // Failed to find address. + return index; } diff --git a/lib/DebugInfo/DWARFDebugLine.h b/lib/DebugInfo/DWARFDebugLine.h index a8c0669b738..6382b45a93a 100644 --- a/lib/DebugInfo/DWARFDebugLine.h +++ b/lib/DebugInfo/DWARFDebugLine.h @@ -88,6 +88,10 @@ public: void reset(bool default_is_stmt); void dump(raw_ostream &OS) const; + static bool orderByAddress(const Row& LHS, const Row& RHS) { + return LHS.Address < RHS.Address; + } + // The program-counter value corresponding to a machine instruction // generated by the compiler. uint64_t Address; @@ -125,21 +129,63 @@ public: EpilogueBegin:1; }; + // Represents a series of contiguous machine instructions. Line table for each + // compilation unit may consist of multiple sequences, which are not + // guaranteed to be in the order of ascending instruction address. + struct Sequence { + // Sequence describes instructions at address range [LowPC, HighPC) + // and is described by line table rows [FirstRowIndex, LastRowIndex). + uint64_t LowPC; + uint64_t HighPC; + unsigned FirstRowIndex; + unsigned LastRowIndex; + bool Empty; + + Sequence() { reset(); } + void reset() { + LowPC = 0; + HighPC = 0; + FirstRowIndex = 0; + LastRowIndex = 0; + Empty = true; + } + static bool orderByLowPC(const Sequence& LHS, const Sequence& RHS) { + return LHS.LowPC < RHS.LowPC; + } + bool isValid() const { + return !Empty && (LowPC < HighPC) && (FirstRowIndex < LastRowIndex); + } + bool containsPC(uint64_t pc) const { + return (LowPC <= pc && pc < HighPC); + } + }; + struct LineTable { void appendRow(const DWARFDebugLine::Row &state) { Rows.push_back(state); } + void appendSequence(const DWARFDebugLine::Sequence &sequence) { + Sequences.push_back(sequence); + } void clear() { Prologue.clear(); Rows.clear(); + Sequences.clear(); } - uint32_t lookupAddress(uint64_t address, uint64_t cu_high_pc) const; + // Returns the index of the row with file/line info for a given address, + // or -1 if there is no such row. + uint32_t lookupAddress(uint64_t address) const; void dump(raw_ostream &OS) const; struct Prologue Prologue; - std::vector Rows; + typedef std::vector RowVector; + typedef RowVector::const_iterator RowIter; + typedef std::vector SequenceVector; + typedef SequenceVector::const_iterator SequenceIter; + RowVector Rows; + SequenceVector Sequences; }; - struct State : public Row, public LineTable { + struct State : public Row, public Sequence, public LineTable { // Special row codes. enum { StartParsingLineTable = 0, @@ -150,8 +196,11 @@ public: virtual ~State(); virtual void appendRowToMatrix(uint32_t offset); - virtual void finalize(uint32_t offset) { row = DoneParsingLineTable; } - virtual void reset() { Row::reset(Prologue.DefaultIsStmt); } + virtual void finalize(); + virtual void reset() { + Row::reset(Prologue.DefaultIsStmt); + Sequence::reset(); + } // The row number that starts at zero for the prologue, and increases for // each row added to the matrix. @@ -161,7 +210,7 @@ public: struct DumpingState : public State { DumpingState(raw_ostream &OS) : OS(OS) {} virtual ~DumpingState(); - virtual void finalize(uint32_t offset); + virtual void finalize(); private: raw_ostream &OS; }; diff --git a/test/DebugInfo/Inputs/dwarfdump-test4.elf-x86-64 b/test/DebugInfo/Inputs/dwarfdump-test4.elf-x86-64 new file mode 100755 index 0000000000000000000000000000000000000000..884870859d981c289aefc82c18ac634175eb1df3 GIT binary patch literal 7689 zcmcIpZH!b`89sOJ?lQ2jJA7CRv@#G#Ky>)15ZVgeWmy<-fp&ou1#4dXHg1`qVDfiR)VR32ob1 zS2b~kt`H(sDyad^8(qv4VsjY_6PegIKZv3trKhoDsQ%K`s81?MkAS+G! zt*NX{*Bd)h(|q+D{7foi@!^E|T$nAve=+{u_!9?PD`LauNwpT>FS#W5Li`uu|6#>N z)HEw<3_l0D44#vwoswIMbW2hgt89jxv@j8$!G9g%jGH*@gGYcbLyKNFxY$a04F7eS zYrI^6m{9lU=i{h&N=yjU)I)xfU=o~OKgDIX|r z3_Qb8`|{e^cxS{^rd&5^pm%%$=)^lq>?wC^p(zQ@a&&o@g_`P01JKP z?pvKDu+FLe(`Rmt^{#nQNkP7o2&qn>0>LVncZ)e{+H)p-ZTB9i=I=;5&)k}u6PtV8 zsLq*N!U_~(|NKBO-6T;QGmXB3Coz5L{rO-e>i-7_y=x%98%G~pMYZ3bpSmxdaY|fu zfBzkEboXHVtahJXkNU|y{}xh?fW89S2l`XcSx_k}LZY0D!^)XSII9;gh|i(-<@uHW z^RU&=dVf#irP#odu6`I8-Dg2R1-+hAsNY}LpN2l!Mh}%PQ0vl8CN2X)%5x#XQxTRC zOFnDK^2|t)c~=VON12PIaNgoGaMA?k7{470?Kt1vFnZ3#KMvJ;P8ar3BlB4sG0%qy zqqZ77`<>6s#|{3dsR;4cO&~tkBgSqa?G449FAIC!*rQXm%zk>$$X_w~e_47H=KoH( zF58KwO}k$*l=Ihtfq|!z8xI^gu_^hP)XvlsN(NonnPHdM3xK^q*@;5`3xGp^dtrQ- z|5QD{BLlxfM#dWa+m)E(E5}(0HA5FUxPZA%M#Xe{&Ou^hXgLC`qoXrl3NYh?kEx@$ z#>2$Yl2(ajhz*Nz+r&4ab)75{?*u1_*$ZDXRZC*rFtM}L+0e7R+wEE1v(y=OPIM*E z(NpPGPHWm1`pQ0$^&>{;g!dfjn4V!+tz%MZmC0Q)Z@W*-uf-Dx6*`vzSNjO66N z=FMg>huexRL2+_Rc62;n8Y^$9=FUy!N||uafqjNf#ad68+Y?jg4e&^%lN9tID{8$Z}PLO9ETg9i2b^fU9&;CAFtJnEB`Jj#^$X~2s z(+Tn%`mO5E{$V>v*{AiM3^~?^F&}2HQq>E^z#jz-Dj}U9H{n+&R4RL+1e1^k6_pw< zO{J!Dm1@3RY7ji=D!GC$4u+d71S+MsyQv^I172@>Q@IImtl}4Q-b5BYlvFAclq*#v z3dz|Fl!)aQ^BE*6!yT8Dj;Z&vDQsuqR4x{Cn59zL-00M}=U4pFIEJthWnh!{M@K8U zX-b8BDMua+CR->YSs2PhvU~V{l~=<-ZaUXtuJfd66Oej7BHS0OgbVaf-DMV9 zSzg74{@f320!AHJ#eD@S_YtC#V#0+2uG{dJHJ$mnj+3r3mekvH5A?D}AkKXPY0~&h z9VIvGZ(wy8jPwQlxi29dF^1IBpY4C%^5;H>l=~v_m%WYc?*o&1$!aTw`#4hO3AJHe z$qSXeXw-ki+*nfPrw{LY!T{rzKhHx*H%jnGp&y|lm}q|PJ4qwQDcXhk>0dSe?B90h z6|}egzh?QrXJ$asFVoRb_CMjP#$U#V6rRiQyvA;??f*^qNc-tO+m(c*)U3Io5A**n zY@_~j#(&QE+xcwy2k;kD=I1_?=SDmqqMrV&*H5hce6LO$Lwo$upY-RJKi>;RjKN0e zqlJE?zk)K_e(sMi8w1)ghAsaF3c=Vup1;f)L$|4k?)|5h3pO8=!6F1Auge;47k z!d!l6^3O&=@bn{my9Fk?wdO_WA2c-7_WuiT8xzyz=0#4%N3__s65qze{+Gu8M^Wb% zwjUVG?!OcI359(tfjO>wW5vxBj!bTBpMXF?JxJ2V<&1lZT#0+ z4wk6naqf!x-N4~Uw$SnY1K z2;`zBT(=wTI~3RBM!d83UfzgzDXyQ5_yWasu@P@^t_cGdD<;>mM*BszbNfcT`Mg&2 zE>20ft~A;&QCue)@t)c`(1`b9UQb3L&AT|gjZx$HwLDzPY%HyRt@|t7JFYTwHzP)(U^T36xYlCr0+t>S{JvZ(oCSXdt4>mc8*pdDJg>`n}fkr)Cu1Stkp*AeT+;*tKJ; z5`~w~&WN;Kg|EihsbcX0^9&x@r_<~o?ms-}QIa&|>sjGl329JBk`}YnFO0~1qvNz$EvITs43fo}f?fd+Z{=)|c