From 740a75968a1ffc246c8f54a83cdaffe4d0cb3494 Mon Sep 17 00:00:00 2001 From: Alexey Samsonov Date: Thu, 12 Jun 2014 23:58:49 +0000 Subject: [PATCH] [DWARF parser] Fix broken address ranges construction. Previous algorithm for constructing [Address ranges]->[Compile Units] mapping was wrong. It somewhat relied on the assumption that address ranges for different compile units may not overlap. It is not so. For example, two compile units may contain the definition of the same linkonce_odr function. These definitions will be merged at link-time, resulting in equivalent .debug_ranges entries for both these units Instead of sorting and merging original address ranges (from .debug_ranges and .debug_aranges), implement a different approach: save endpoints of all ranges, and then use a sweep-line approach to construct the desired mapping. If we find that certain address maps to several compilation units, we just pick any of them. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@210860 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/DebugInfo/DWARFDebugAranges.cpp | 84 ++++++++---------- lib/DebugInfo/DWARFDebugAranges.h | 32 ++++--- test/DebugInfo/Inputs/arange-overlap.cc | 26 ++++++ .../Inputs/arange-overlap.elf-x86_64 | Bin 0 -> 9824 bytes test/DebugInfo/llvm-symbolizer.test | 4 + 5 files changed, 86 insertions(+), 60 deletions(-) create mode 100644 test/DebugInfo/Inputs/arange-overlap.cc create mode 100755 test/DebugInfo/Inputs/arange-overlap.elf-x86_64 diff --git a/lib/DebugInfo/DWARFDebugAranges.cpp b/lib/DebugInfo/DWARFDebugAranges.cpp index 2524adc25c1..fe7e46d63ba 100644 --- a/lib/DebugInfo/DWARFDebugAranges.cpp +++ b/lib/DebugInfo/DWARFDebugAranges.cpp @@ -15,6 +15,7 @@ #include "llvm/Support/raw_ostream.h" #include #include +#include using namespace llvm; void DWARFDebugAranges::extract(DataExtractor DebugArangesData) { @@ -30,6 +31,7 @@ void DWARFDebugAranges::extract(DataExtractor DebugArangesData) { uint64_t HighPC = Desc.getEndAddress(); appendRange(CUOffset, LowPC, HighPC); } + ParsedCUOffsets.insert(CUOffset); } } @@ -56,69 +58,55 @@ void DWARFDebugAranges::generate(DWARFContext *CTX) { } } - sortAndMinimize(); + construct(); } void DWARFDebugAranges::clear() { + Endpoints.clear(); Aranges.clear(); ParsedCUOffsets.clear(); } void DWARFDebugAranges::appendRange(uint32_t CUOffset, uint64_t LowPC, uint64_t HighPC) { - if (!Aranges.empty()) { - if (Aranges.back().CUOffset == CUOffset && - Aranges.back().HighPC() == LowPC) { - Aranges.back().setHighPC(HighPC); - return; - } - } - Aranges.push_back(Range(LowPC, HighPC, CUOffset)); -} - -void DWARFDebugAranges::sortAndMinimize() { - const size_t orig_arange_size = Aranges.size(); - // Size of one? If so, no sorting is needed - if (orig_arange_size <= 1) + if (LowPC >= HighPC) return; - // Sort our address range entries - std::stable_sort(Aranges.begin(), Aranges.end()); - - // Most address ranges are contiguous from function to function - // so our new ranges will likely be smaller. We calculate the size - // of the new ranges since although std::vector objects can be resized, - // the will never reduce their allocated block size and free any excesss - // memory, so we might as well start a brand new collection so it is as - // small as possible. - - // First calculate the size of the new minimal arange vector - // so we don't have to do a bunch of re-allocations as we - // copy the new minimal stuff over to the new collection. - size_t minimal_size = 1; - for (size_t i = 1; i < orig_arange_size; ++i) { - if (!Range::SortedOverlapCheck(Aranges[i-1], Aranges[i])) - ++minimal_size; - } + Endpoints.emplace_back(LowPC, CUOffset, true); + Endpoints.emplace_back(HighPC, CUOffset, false); +} - // Else, make a new RangeColl that _only_ contains what we need. - RangeColl minimal_aranges; - minimal_aranges.resize(minimal_size); - uint32_t j = 0; - minimal_aranges[j] = Aranges[0]; - for (size_t i = 1; i < orig_arange_size; ++i) { - if (Range::SortedOverlapCheck(minimal_aranges[j], Aranges[i])) { - minimal_aranges[j].setHighPC(Aranges[i].HighPC()); +void DWARFDebugAranges::construct() { + std::multiset ValidCUs; // Maintain the set of CUs describing + // a current address range. + std::sort(Endpoints.begin(), Endpoints.end()); + uint64_t PrevAddress = -1ULL; + for (const auto &E : Endpoints) { + if (PrevAddress < E.Address && ValidCUs.size() > 0) { + // If the address range between two endpoints is described by some + // CU, first try to extend the last range in Aranges. If we can't + // do it, start a new range. + if (!Aranges.empty() && Aranges.back().HighPC() == PrevAddress && + ValidCUs.find(Aranges.back().CUOffset) != ValidCUs.end()) { + Aranges.back().setHighPC(E.Address); + } else { + Aranges.emplace_back(PrevAddress, E.Address, *ValidCUs.begin()); + } + } + // Update the set of valid CUs. + if (E.IsRangeStart) { + ValidCUs.insert(E.CUOffset); } else { - // Only increment j if we aren't merging - minimal_aranges[++j] = Aranges[i]; + auto CUPos = ValidCUs.find(E.CUOffset); + assert(CUPos != ValidCUs.end()); + ValidCUs.erase(CUPos); } + PrevAddress = E.Address; } - assert(j+1 == minimal_size); + assert(ValidCUs.empty()); - // Now swap our new minimal aranges into place. The local - // minimal_aranges will then contian the old big collection - // which will get freed. - minimal_aranges.swap(Aranges); + // Endpoints are not needed now. + std::vector EmptyEndpoints; + EmptyEndpoints.swap(Endpoints); } uint32_t DWARFDebugAranges::findAddress(uint64_t Address) const { diff --git a/lib/DebugInfo/DWARFDebugAranges.h b/lib/DebugInfo/DWARFDebugAranges.h index de96d7fb819..a9f37fe772c 100644 --- a/lib/DebugInfo/DWARFDebugAranges.h +++ b/lib/DebugInfo/DWARFDebugAranges.h @@ -27,9 +27,9 @@ private: void clear(); void extract(DataExtractor DebugArangesData); - // Use appendRange multiple times and then call sortAndMinimize. + // Call appendRange multiple times and then call construct. void appendRange(uint32_t CUOffset, uint64_t LowPC, uint64_t HighPC); - void sortAndMinimize(); + void construct(); struct Range { explicit Range(uint64_t LowPC = -1ULL, uint64_t HighPC = -1ULL, @@ -47,31 +47,39 @@ private: return LowPC + Length; return -1ULL; } + bool containsAddress(uint64_t Address) const { return LowPC <= Address && Address < HighPC(); } - - bool operator <(const Range &other) const { + bool operator<(const Range &other) const { return LowPC < other.LowPC; } - static bool SortedOverlapCheck(const Range &Left, const Range &Right) { - if (Left.CUOffset != Right.CUOffset) - return false; - return Left.HighPC() >= Right.LowPC; - } - uint64_t LowPC; // Start of address range. uint32_t Length; // End of address range (not including this address). uint32_t CUOffset; // Offset of the compile unit or die. }; + struct RangeEndpoint { + uint64_t Address; + uint32_t CUOffset; + bool IsRangeStart; + + RangeEndpoint(uint64_t Address, uint32_t CUOffset, bool IsRangeStart) + : Address(Address), CUOffset(CUOffset), IsRangeStart(IsRangeStart) {} + + bool operator<(const RangeEndpoint &Other) const { + return Address < Other.Address; + } + }; + + typedef std::vector RangeColl; typedef RangeColl::const_iterator RangeCollIterator; - typedef DenseSet ParsedCUOffsetColl; + std::vector Endpoints; RangeColl Aranges; - ParsedCUOffsetColl ParsedCUOffsets; + DenseSet ParsedCUOffsets; }; } diff --git a/test/DebugInfo/Inputs/arange-overlap.cc b/test/DebugInfo/Inputs/arange-overlap.cc new file mode 100644 index 00000000000..82e3f120efd --- /dev/null +++ b/test/DebugInfo/Inputs/arange-overlap.cc @@ -0,0 +1,26 @@ +void call(); + +struct S { + static void foo() { call(); call(); } + static void bar() { call(); call(); } + static void baz() {} +}; + +#ifdef FILE1 +# define FUNC_NAME func1 +# define FUNC_BODY \ + S::foo(); S::bar(); S::baz(); +#else +# define FUNC_NAME func2 +# define FUNC_BODY \ + S::bar(); +#endif + +void FUNC_NAME() { + FUNC_BODY +} + +// Build instructions: +// $ clang -g -fPIC -c -DFILE1 arange-overlap.cc -o obj1.o +// $ clang -g -fPIC -c arange-overlap.cc -o obj2.o +// $ clang -shared obj1.o obj2.o -o diff --git a/test/DebugInfo/Inputs/arange-overlap.elf-x86_64 b/test/DebugInfo/Inputs/arange-overlap.elf-x86_64 new file mode 100755 index 0000000000000000000000000000000000000000..075e9c27123136eb74a613179a88dadb87e85a20 GIT binary patch literal 9824 zcmeHNYitzP6~41$!-8RLOaSvp*$|LSq723k)FC0PF~$pZ2*p6jtHZ8$ZEv*RCHrs; zeZ(y!B|%h4A1ZCr)Jh{IRiws>`j*-{B8~XbAE;4Vsx%c+LTZ#$g;tGHrDgk_JLk;K z&aPACN7O&u)yz5PJCA$j&dhzx-iLbnwg(JDNio&!N=vgGB*Kc8#bTjUM0Kj!IImJG zwQYS_RmD9SbReNhg*8BRP!+8qErNzMBQzLr@D`@xMfl4i z8P@yhYUrp4C0i=HFBB5QkImL%6}`=Yl~D2fB>cDu+2VtVY&&~PT-pCx{8*nt@ z*xK{Eca}ZB<4W_RJFfivjjw$6dndkm<*l9X@ffcWS{_N!C(A%&6jqd;)W+TGvUUQ9hqt~xl%~Qi1 zpKNup?V*uu#?BXFxq@vgJC#WllsyQdY6i`cZJZ*7tYBW4WF(f5hbnvsumGJP=E#$w0fckE(PkK49gu)<$I~6R|=J zmB#aVCmleUgzD|vv9;T7x7w`@k~3%kPhj~3Zy=eP;^G8ksyBL~{?~KDD*EtLAy7K2t9@L`zVXdLIH>5O7rOTL z7vs^Ro1cTND$I+X*!&l8)8C>fC$Ig-?6B%Px#mp}#Csi@zO`>eUwYq+9(~)0-f}J< zZd|#IE^5k%K3e|^=(|eSybRM9!9t(A@JjttV2yja?!E8yVB^YTNRjL}Pave)hXe}C zV17NBBh%1*(UY4WfvR+#+U35}PR3+6y#y&u|NRP1uj3>tZJ_l(A=QgorR8Tye%h72 z4z8X-lKZ*Qre`6QqwkdpOL6_LL1C z)Hj~_PpNbeRE3(hhgx^v6>54U&^@2>p+fou z4$|v)v4)=R(9{R$pMic7dL6Td|I5%%K;HnWe0qYema8prwFR!Wz||J`|F-~!9$h38 zzbrTpPimZjc}!&;FrQ_a$Nuw5iDo_O5EGA47TA8;H!c7ShTS>}Gz zJin7U+!*U++>|-o62_ajV!>Te!7-ecQ)7Z&|BuK_1Snc^=$1aP)856ptNI4>&B= z8|s*#IrS;9dYuBQ)b%(5iX0!t!5&z-xMuBum3kJ3)9OKeH|o^28dqu^Qz3ygIKwc_ z+d!kl*BNFX=mFyE4f9^mVd7^Qs?NJ)Gxlcy)5B#fH0r&JLSsK=QSV(>8vAc2$h!nJ z_U{CwzJQ}n-Ho$1RYED%-8iItxKzEXQDgt-5#%@yO0_yKWh^k(gcjDDp{6CvL;IF2 zK%)f~8fK`WVN=7cc+X5reJK($LnPg{bj#9qV0D&SU^LH)w741vuNS;`AyxR1MgAP3 z2UIK7VZ0xjkF)-kh13ld%i#e`-ju~7n2sH?1_oe-GAAH6&}=s5e&Rv^OHl);+`L6| zcLwG;kTlZbG9))|2_SN$C-XxmAh$&)aF-+*+SA9$`q}5F_li-lu9Tq z4af4s%1Rv0z|*0HoTK4htU|D%%O%q>a$xRIx}Yrm0mdpM$H42)FjjKd9?Zo?lJ;-{ zJ|Zb=ppebwQISLLAAl0EVk4;mRFs7~PL__TzpPmJumY#-$Vf5+Wg;0b4%vr_ad_ZM z4$J!PGark`bIDQBq*Ixs@Ms8O5ukARP&t5pY4TOA`C)Q2iEu*tiD3>d*f%_LuwT|Nnqy8s6gfKLsV_FaAH} z?oQ~NUT5&q{sKP@hWzFJ%Ks|zzk>gI{r=zg`FDoGK$Pm_3;$pKld$#HA7%e2`}^zj z%RhoY-73jn=CAU<&o~7~k#-Y*$?KQUdFz+&!)F_{o4a0cq;mU7oYOvkSx-!}e?%Oa zil5Nm`21!5+{y`xorLkrzlDM@xqkUSqmBLLI%o3#0~B=qZDPPw{Cj;kH>dU?)p;C( zku2pwt~en8Q@`yc;C_ruUt|9^pVB9)f6w6G&`eR3`k_PIv)`Y-qWdv2{uTTG$@O#? zzwKi7_qX4-X+DecWrw(D@fUg>`@06rtmOr$Kd$(R{rVaF?_&RPF=XoZUjSU59k1yh z*?)`A!6)jrlfK96f9hk7)WZGfbr3sn$nz`xI<1py)jzq~@t^cn=o1~I8O|oMllg?# z#6)i?^Mgv*)M zSKqH5;8bteSMNRAzTD38Uf$1mXHytl`F$S)PjM!Eac0W@80;zkHs5pggvLYalyoGJ zseBiQQR8`|1t-RhfDIK_9D->m5eUTeR@ z0ml9BT>^Nm{Rz8(F0X(xoetwP$_?NKGN_E0(-kELzB`nF@mF@=@&p>(p4OjsK?bgWmT zuv3XKk~);LfKL=hMh;6oJ-fE)YPRp`x}(PyiK-F4T#H`n>sqQm+;F09w)XYl3h$Qf7`*j@w{Ylf8_o_8cb|rT ln}~O}2kp|R*tSvL6jE!;NX4#_NW{CpgLj|k;LgXV{{o^4;EMnN literal 0 HcmV?d00001 diff --git a/test/DebugInfo/llvm-symbolizer.test b/test/DebugInfo/llvm-symbolizer.test index 1ddfd9cf55c..20d3dda21ab 100644 --- a/test/DebugInfo/llvm-symbolizer.test +++ b/test/DebugInfo/llvm-symbolizer.test @@ -18,6 +18,7 @@ RUN: echo "%p/Inputs/macho-universal:i386 0x1f67" >> %t.input RUN: echo "%p/Inputs/macho-universal:x86_64 0x100000f05" >> %t.input RUN: echo "%p/Inputs/llvm-symbolizer-dwo-test 0x400514" >> %t.input RUN: echo "%p/Inputs/fission-ranges.elf-x86_64 0x720" >> %t.input +RUN: echo "%p/Inputs/arange-overlap.elf-x86_64 0x714" >> %t.input RUN: llvm-symbolizer --functions=linkage --inlining --demangle=false \ RUN: --default-arch=i386 < %t.input | FileCheck %s @@ -94,6 +95,9 @@ CHECK-NEXT: llvm-symbolizer-dwo-test.cc:11 CHECK: main CHECK-NEXT: {{.*}}fission-ranges.cc:6 +CHECK: _ZN1S3bazEv +CHECK-NEXT: {{.*}}arange-overlap.cc:6 + RUN: echo "unexisting-file 0x1234" > %t.input2 RUN: llvm-symbolizer < %t.input2 -- 2.34.1