From 527d783837d7b9759d6c88057e26df606e3c45a9 Mon Sep 17 00:00:00 2001 From: Kevin Enderby Date: Tue, 21 Jan 2014 00:23:17 +0000 Subject: [PATCH] Tweak the MCExternalSymbolizer to not use the SymbolLookUp() call back to not guess at a symbol name in some cases. The problem is that in object files assembled starting at address 0, when trying to symbolicate something that starts like this: % cat x.s _t1: vpshufd $0x0, %xmm1, %xmm0 the symbolic disassembly can end up like this: % otool -tV x.o x.o: (__TEXT,__text) section _t1: 0000000000000000 vpshufd $_t1, %xmm1, %xmm0 Which is in this case produced incorrect symbolication. But it is useful in some cases to use the SymbolLookUp() call back to guess at some immediate values. For example one like this that does not have an external relocation entry: % cat y.s _t1: movl $_d1, %eax .data _d1: .long 0 % clang -c -arch i386 y.s % otool -tV y.o y.o: (__TEXT,__text) section _t1: 0000000000000000 movl $_d1, %eax % otool -rv y.o y.o: Relocation information (__TEXT,__text) 1 entries address pcrel length extern type scattered symbolnum/value 00000001 False long False VANILLA False 2 (__DATA,__data) So the change is based on it is not likely that an immediate Value coming from an instruction field of a width of 1 byte, other than branches and items with relocation, are not likely symbol addresses. With the change the first case above simply becomes: % otool -tV x.o x.o: (__TEXT,__text) section _t1: 0000000000000000 vpshufd $0x0, %xmm1, %xmm0 and the second case continues to work as expected. rdar://14863405 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@199698 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCExternalSymbolizer.cpp | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/lib/MC/MCExternalSymbolizer.cpp b/lib/MC/MCExternalSymbolizer.cpp index b82229457db..9cd5f6b68ca 100644 --- a/lib/MC/MCExternalSymbolizer.cpp +++ b/lib/MC/MCExternalSymbolizer.cpp @@ -43,8 +43,19 @@ bool MCExternalSymbolizer::tryAddingSymbolicOperand(MCInst &MI, !GetOpInfo(DisInfo, Address, Offset, InstSize, 1, &SymbolicOp)) { // Clear SymbolicOp.Value from above and also all other fields. std::memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1)); - if (!SymbolLookUp) + + // At this point, GetOpInfo() did not find any relocation information about + // this operand and we are left to use the SymbolLookUp() call back to guess + // if the Value is the address of a symbol. In the case this is a branch + // that always makes sense to guess. But in the case of an immediate it is + // a bit more questionable if it is an address of a symbol or some other + // reference. So if the immediate Value comes from a width of 1 byte, + // InstSize, we will not guess it is an address of a symbol. Because in + // object files assembled starting at address 0 this usually leads to + // incorrect symbolication. + if (!SymbolLookUp || (InstSize == 1 && !IsBranch)) return false; + uint64_t ReferenceType; if (IsBranch) ReferenceType = LLVMDisassembler_ReferenceType_In_Branch; -- 2.34.1