From a02c32f31dd9da7bd1c5c72858c40568a3bd45df Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Fri, 11 Apr 2014 20:07:58 +0000 Subject: [PATCH] Remove redundant symbolization support from MCDisassembler interface. MCDisassembler has an MCSymbolizer member that is meant to take care of symbolizing during disassembly, but it also has several methods that enable the disassembler to do symbolization internally (i.e. without an attached symbolizer object). There is no need for this duplication, but ARM64 had been making use of it. This patch moves the ARM64 symbolization logic out of ARM64Disassembler and into an ARM64ExternalSymbolizer class, and removes the duplicated MCSymbolizer functionality from the MCDisassembler interface. Symbolization will now be done exclusively through MCSymbolizers. There should be no impact on disassembly for any platform, but this allows us to tidy up the MCDisassembler interface and simplify the process of (and invariants related to) disassembler setup. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@206063 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/MC/MCDisassembler.h | 30 +-- include/llvm/MC/MCExternalSymbolizer.h | 2 +- lib/MC/MCDisassembler.cpp | 14 -- lib/MC/MCDisassembler/Disassembler.cpp | 3 +- .../ARM64/Disassembler/ARM64Disassembler.cpp | 225 ++--------------- .../ARM64/Disassembler/ARM64Disassembler.h | 14 -- .../Disassembler/ARM64ExternalSymbolizer.cpp | 226 ++++++++++++++++++ .../Disassembler/ARM64ExternalSymbolizer.h | 37 +++ lib/Target/ARM64/Disassembler/CMakeLists.txt | 1 + 9 files changed, 287 insertions(+), 265 deletions(-) create mode 100644 lib/Target/ARM64/Disassembler/ARM64ExternalSymbolizer.cpp create mode 100644 lib/Target/ARM64/Disassembler/ARM64ExternalSymbolizer.h diff --git a/include/llvm/MC/MCDisassembler.h b/include/llvm/MC/MCDisassembler.h index d545fc7e4ed..410d6d36f79 100644 --- a/include/llvm/MC/MCDisassembler.h +++ b/include/llvm/MC/MCDisassembler.h @@ -57,8 +57,7 @@ public: /// Constructor - Performs initial setup for the disassembler. MCDisassembler(const MCSubtargetInfo &STI) - : GetOpInfo(0), SymbolLookUp(0), DisInfo(0), Ctx(0), STI(STI), - Symbolizer(), CommentStream(0) {} + : STI(STI), Symbolizer(), CommentStream(0) {} virtual ~MCDisassembler(); @@ -84,19 +83,6 @@ public: uint64_t address, raw_ostream &vStream, raw_ostream &cStream) const = 0; -private: - // - // Hooks for symbolic disassembly via the public 'C' interface. - // - // The function to get the symbolic information for operands. - LLVMOpInfoCallback GetOpInfo; - // The function to lookup a symbol name. - LLVMSymbolLookupCallback SymbolLookUp; - // The pointer to the block of symbolic information for above call back. - void *DisInfo; - // The assembly context for creating symbols and MCExprs in place of - // immediate operands when there is symbolic information. - MCContext *Ctx; protected: // Subtarget information, for instruction decoding predicates if required. @@ -116,20 +102,6 @@ public: /// This takes ownership of \p Symzer, and deletes the previously set one. void setSymbolizer(std::unique_ptr Symzer); - /// Sets up an external symbolizer that uses the C API callbacks. - void setupForSymbolicDisassembly(LLVMOpInfoCallback GetOpInfo, - LLVMSymbolLookupCallback SymbolLookUp, - void *DisInfo, - MCContext *Ctx, - std::unique_ptr &RelInfo); - - LLVMOpInfoCallback getLLVMOpInfoCallback() const { return GetOpInfo; } - LLVMSymbolLookupCallback getLLVMSymbolLookupCallback() const { - return SymbolLookUp; - } - void *getDisInfoBlock() const { return DisInfo; } - MCContext *getMCContext() const { return Ctx; } - const MCSubtargetInfo& getSubtargetInfo() const { return STI; } // Marked mutable because we cache it inside the disassembler, rather than diff --git a/include/llvm/MC/MCExternalSymbolizer.h b/include/llvm/MC/MCExternalSymbolizer.h index cab915234f3..2c7d23707c9 100644 --- a/include/llvm/MC/MCExternalSymbolizer.h +++ b/include/llvm/MC/MCExternalSymbolizer.h @@ -26,7 +26,7 @@ namespace llvm { /// /// See llvm-c/Disassembler.h. class MCExternalSymbolizer : public MCSymbolizer { - +protected: /// \name Hooks for symbolic disassembly via the public 'C' interface. /// @{ /// The function to get the symbolic information for operands. diff --git a/lib/MC/MCDisassembler.cpp b/lib/MC/MCDisassembler.cpp index 7a2b1a14a5b..77d9ce16754 100644 --- a/lib/MC/MCDisassembler.cpp +++ b/lib/MC/MCDisassembler.cpp @@ -16,20 +16,6 @@ using namespace llvm; MCDisassembler::~MCDisassembler() { } -void MCDisassembler::setupForSymbolicDisassembly( - LLVMOpInfoCallback GetOpInfo, LLVMSymbolLookupCallback SymbolLookUp, - void *DisInfo, MCContext *Ctx, std::unique_ptr &RelInfo) { - this->GetOpInfo = GetOpInfo; - this->SymbolLookUp = SymbolLookUp; - this->DisInfo = DisInfo; - this->Ctx = Ctx; - assert(Ctx != 0 && "No MCContext given for symbolic disassembly"); - if (!Symbolizer) - Symbolizer.reset(new MCExternalSymbolizer(*Ctx, std::move(RelInfo), - GetOpInfo, SymbolLookUp, - DisInfo)); -} - bool MCDisassembler::tryAddingSymbolicOperand(MCInst &Inst, int64_t Value, uint64_t Address, bool IsBranch, uint64_t Offset, diff --git a/lib/MC/MCDisassembler/Disassembler.cpp b/lib/MC/MCDisassembler/Disassembler.cpp index b935b839bd7..b57b8aaa113 100644 --- a/lib/MC/MCDisassembler/Disassembler.cpp +++ b/lib/MC/MCDisassembler/Disassembler.cpp @@ -82,8 +82,7 @@ LLVMDisasmContextRef LLVMCreateDisasmCPU(const char *Triple, const char *CPU, std::unique_ptr Symbolizer(TheTarget->createMCSymbolizer( Triple, GetOpInfo, SymbolLookUp, DisInfo, Ctx, RelInfo.release())); DisAsm->setSymbolizer(std::move(Symbolizer)); - DisAsm->setupForSymbolicDisassembly(GetOpInfo, SymbolLookUp, DisInfo, - Ctx, RelInfo); + // Set up the instruction printer. int AsmPrinterVariant = MAI->getAssemblerDialect(); MCInstPrinter *IP = TheTarget->createMCInstPrinter(AsmPrinterVariant, diff --git a/lib/Target/ARM64/Disassembler/ARM64Disassembler.cpp b/lib/Target/ARM64/Disassembler/ARM64Disassembler.cpp index 8f9b79c90a8..34cc1b630c3 100644 --- a/lib/Target/ARM64/Disassembler/ARM64Disassembler.cpp +++ b/lib/Target/ARM64/Disassembler/ARM64Disassembler.cpp @@ -13,19 +13,16 @@ #define DEBUG_TYPE "arm64-disassembler" #include "ARM64Disassembler.h" +#include "ARM64ExternalSymbolizer.h" #include "ARM64Subtarget.h" #include "MCTargetDesc/ARM64AddressingModes.h" #include "Utils/ARM64BaseInfo.h" #include "llvm/MC/MCInst.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCContext.h" #include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MemoryObject.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/Format.h" -#include "llvm/Support/raw_ostream.h" // Pull DecodeStatus and its enum values into the global namespace. typedef llvm::MCDisassembler::DecodeStatus DecodeStatus; @@ -219,205 +216,23 @@ DecodeStatus ARM64Disassembler::getInstruction(MCInst &MI, uint64_t &Size, return Success; } -static MCSymbolRefExpr::VariantKind -getVariant(uint64_t LLVMDisassembler_VariantKind) { - switch (LLVMDisassembler_VariantKind) { - case LLVMDisassembler_VariantKind_None: - return MCSymbolRefExpr::VK_None; - case LLVMDisassembler_VariantKind_ARM64_PAGE: - return MCSymbolRefExpr::VK_PAGE; - case LLVMDisassembler_VariantKind_ARM64_PAGEOFF: - return MCSymbolRefExpr::VK_PAGEOFF; - case LLVMDisassembler_VariantKind_ARM64_GOTPAGE: - return MCSymbolRefExpr::VK_GOTPAGE; - case LLVMDisassembler_VariantKind_ARM64_GOTPAGEOFF: - return MCSymbolRefExpr::VK_GOTPAGEOFF; - case LLVMDisassembler_VariantKind_ARM64_TLVP: - case LLVMDisassembler_VariantKind_ARM64_TLVOFF: - default: - assert(0 && "bad LLVMDisassembler_VariantKind"); - return MCSymbolRefExpr::VK_None; - } -} - -/// tryAddingSymbolicOperand - tryAddingSymbolicOperand trys to add a symbolic -/// operand in place of the immediate Value in the MCInst. The immediate -/// Value has not had any PC adjustment made by the caller. If the instruction -/// is a branch that adds the PC to the immediate Value then isBranch is -/// Success, else Fail. If the getOpInfo() function was set as part of the -/// setupForSymbolicDisassembly() call then that function is called to get any -/// symbolic information at the Address for this instrution. If that returns -/// non-zero then the symbolic information it returns is used to create an -/// MCExpr and that is added as an operand to the MCInst. If getOpInfo() -/// returns zero and isBranch is Success then a symbol look up for -/// Address + Value is done and if a symbol is found an MCExpr is created with -/// that, else an MCExpr with Address + Value is created. If getOpInfo() -/// returns zero and isBranch is Fail then the the Opcode of the MCInst is -/// tested and for ADRP an other instructions that help to load of pointers -/// a symbol look up is done to see it is returns a specific reference type -/// to add to the comment stream. This function returns Success if it adds -/// an operand to the MCInst and Fail otherwise. -bool ARM64Disassembler::tryAddingSymbolicOperand(uint64_t Address, int Value, - bool isBranch, - uint64_t InstSize, MCInst &MI, - uint32_t insn) const { - LLVMOpInfoCallback getOpInfo = getLLVMOpInfoCallback(); - - struct LLVMOpInfo1 SymbolicOp; - memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1)); - SymbolicOp.Value = Value; - void *DisInfo = getDisInfoBlock(); - uint64_t ReferenceType; - const char *ReferenceName; - const char *Name; - LLVMSymbolLookupCallback SymbolLookUp = getLLVMSymbolLookupCallback(); - if (!getOpInfo || - !getOpInfo(DisInfo, Address, 0 /* Offset */, InstSize, 1, &SymbolicOp)) { - if (isBranch) { - if (SymbolLookUp) { - ReferenceType = LLVMDisassembler_ReferenceType_In_Branch; - Name = SymbolLookUp(DisInfo, Address + Value, &ReferenceType, Address, - &ReferenceName); - if (Name) { - SymbolicOp.AddSymbol.Name = Name; - SymbolicOp.AddSymbol.Present = Success; - SymbolicOp.Value = 0; - } else { - SymbolicOp.Value = Address + Value; - } - if (ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub) - (*CommentStream) << "symbol stub for: " << ReferenceName; - else if (ReferenceType == - LLVMDisassembler_ReferenceType_Out_Objc_Message) - (*CommentStream) << "Objc message: " << ReferenceName; - } else { - return false; - } - } else if (MI.getOpcode() == ARM64::ADRP) { - if (SymbolLookUp) { - ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADRP; - Name = SymbolLookUp(DisInfo, insn, &ReferenceType, Address, - &ReferenceName); - (*CommentStream) << format("0x%llx", - 0xfffffffffffff000LL & (Address + Value)); - } else { - return false; - } - } else if (MI.getOpcode() == ARM64::ADDXri || - MI.getOpcode() == ARM64::LDRXui || - MI.getOpcode() == ARM64::LDRXl || MI.getOpcode() == ARM64::ADR) { - if (SymbolLookUp) { - if (MI.getOpcode() == ARM64::ADDXri) - ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADDXri; - else if (MI.getOpcode() == ARM64::LDRXui) - ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_LDRXui; - if (MI.getOpcode() == ARM64::LDRXl) { - ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_LDRXl; - Name = SymbolLookUp(DisInfo, Address + Value, &ReferenceType, Address, - &ReferenceName); - } else if (MI.getOpcode() == ARM64::ADR) { - ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADR; - Name = SymbolLookUp(DisInfo, Address + Value, &ReferenceType, Address, - &ReferenceName); - } else { - Name = SymbolLookUp(DisInfo, insn, &ReferenceType, Address, - &ReferenceName); - } - if (ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_SymAddr) - (*CommentStream) << "literal pool symbol address: " << ReferenceName; - else if (ReferenceType == - LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr) - (*CommentStream) << "literal pool for: \"" << ReferenceName << "\""; - else if (ReferenceType == - LLVMDisassembler_ReferenceType_Out_Objc_CFString_Ref) - (*CommentStream) << "Objc cfstring ref: @\"" << ReferenceName << "\""; - else if (ReferenceType == - LLVMDisassembler_ReferenceType_Out_Objc_Message) - (*CommentStream) << "Objc message: " << ReferenceName; - else if (ReferenceType == - LLVMDisassembler_ReferenceType_Out_Objc_Message_Ref) - (*CommentStream) << "Objc message ref: " << ReferenceName; - else if (ReferenceType == - LLVMDisassembler_ReferenceType_Out_Objc_Selector_Ref) - (*CommentStream) << "Objc selector ref: " << ReferenceName; - else if (ReferenceType == - LLVMDisassembler_ReferenceType_Out_Objc_Class_Ref) - (*CommentStream) << "Objc class ref: " << ReferenceName; - // For these instructions, the SymbolLookUp() above is just to get the - // ReferenceType and ReferenceName. We want to make sure not to - // fall through so we don't build an MCExpr to leave the disassembly - // of the immediate values of these instructions to the InstPrinter. - return false; - } else { - return false; - } - } else { - return false; - } - } - - MCContext *Ctx = getMCContext(); - const MCExpr *Add = NULL; - if (SymbolicOp.AddSymbol.Present) { - if (SymbolicOp.AddSymbol.Name) { - StringRef Name(SymbolicOp.AddSymbol.Name); - MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name); - MCSymbolRefExpr::VariantKind Variant = getVariant(SymbolicOp.VariantKind); - if (Variant != MCSymbolRefExpr::VK_None) - Add = MCSymbolRefExpr::Create(Sym, Variant, *Ctx); - else - Add = MCSymbolRefExpr::Create(Sym, *Ctx); - } else { - Add = MCConstantExpr::Create(SymbolicOp.AddSymbol.Value, *Ctx); - } - } - - const MCExpr *Sub = NULL; - if (SymbolicOp.SubtractSymbol.Present) { - if (SymbolicOp.SubtractSymbol.Name) { - StringRef Name(SymbolicOp.SubtractSymbol.Name); - MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name); - Sub = MCSymbolRefExpr::Create(Sym, *Ctx); - } else { - Sub = MCConstantExpr::Create(SymbolicOp.SubtractSymbol.Value, *Ctx); - } - } - - const MCExpr *Off = NULL; - if (SymbolicOp.Value != 0) - Off = MCConstantExpr::Create(SymbolicOp.Value, *Ctx); - - const MCExpr *Expr; - if (Sub) { - const MCExpr *LHS; - if (Add) - LHS = MCBinaryExpr::CreateSub(Add, Sub, *Ctx); - else - LHS = MCUnaryExpr::CreateMinus(Sub, *Ctx); - if (Off != 0) - Expr = MCBinaryExpr::CreateAdd(LHS, Off, *Ctx); - else - Expr = LHS; - } else if (Add) { - if (Off != 0) - Expr = MCBinaryExpr::CreateAdd(Add, Off, *Ctx); - else - Expr = Add; - } else { - if (Off != 0) - Expr = Off; - else - Expr = MCConstantExpr::Create(0, *Ctx); - } - - MI.addOperand(MCOperand::CreateExpr(Expr)); - - return true; +MCSymbolizer *createARM64ExternalSymbolizer( + StringRef TT, + LLVMOpInfoCallback GetOpInfo, + LLVMSymbolLookupCallback SymbolLookUp, + void *DisInfo, MCContext *Ctx, + MCRelocationInfo *RelInfo) { + return new llvm::ARM64ExternalSymbolizer( + *Ctx, + std::unique_ptr(RelInfo), + GetOpInfo, SymbolLookUp, DisInfo); } extern "C" void LLVMInitializeARM64Disassembler() { TargetRegistry::RegisterMCDisassembler(TheARM64Target, createARM64Disassembler); + TargetRegistry::RegisterMCSymbolizer(TheARM64Target, + createARM64ExternalSymbolizer); } static const unsigned FPR128DecoderTable[] = { @@ -773,8 +588,8 @@ static DecodeStatus DecodeCondBranchTarget(llvm::MCInst &Inst, unsigned Imm, if (ImmVal & (1 << (19 - 1))) ImmVal |= ~((1LL << 19) - 1); - if (!Dis->tryAddingSymbolicOperand(Addr, ImmVal << 2, - Inst.getOpcode() != ARM64::LDRXl, 4, Inst)) + if (!Dis->tryAddingSymbolicOperand(Inst, ImmVal << 2, Addr, + Inst.getOpcode() != ARM64::LDRXl, 0, 4)) Inst.addOperand(MCOperand::CreateImm(ImmVal)); return Success; } @@ -1023,7 +838,7 @@ static DecodeStatus DecodeUnsignedLdStInstruction(llvm::MCInst &Inst, } DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder); - if (!Dis->tryAddingSymbolicOperand(Addr, offset, Fail, 4, Inst, insn)) + if (!Dis->tryAddingSymbolicOperand(Inst, offset, Addr, Fail, 0, 4)) Inst.addOperand(MCOperand::CreateImm(offset)); return Success; } @@ -1535,7 +1350,7 @@ static DecodeStatus DecodeAdrInstruction(llvm::MCInst &Inst, uint32_t insn, imm |= ~((1LL << 21) - 1); DecodeGPR64RegisterClass(Inst, Rd, Addr, Decoder); - if (!Dis->tryAddingSymbolicOperand(Addr, imm, Fail, 4, Inst, insn)) + if (!Dis->tryAddingSymbolicOperand(Inst, imm, Addr, Fail, 0, 4)) Inst.addOperand(MCOperand::CreateImm(imm)); return Success; @@ -1571,7 +1386,7 @@ static DecodeStatus DecodeBaseAddSubImm(llvm::MCInst &Inst, uint32_t insn, DecodeGPR32spRegisterClass(Inst, Rn, Addr, Decoder); } - if (!Dis->tryAddingSymbolicOperand(Addr, ImmVal, Fail, 4, Inst, insn)) + if (!Dis->tryAddingSymbolicOperand(Inst, Imm, Addr, Fail, 0, 4)) Inst.addOperand(MCOperand::CreateImm(ImmVal)); Inst.addOperand(MCOperand::CreateImm(12 * ShifterVal)); return Success; @@ -1588,7 +1403,7 @@ static DecodeStatus DecodeUnconditionalBranch(llvm::MCInst &Inst, uint32_t insn, if (imm & (1 << (26 - 1))) imm |= ~((1LL << 26) - 1); - if (!Dis->tryAddingSymbolicOperand(Addr, imm << 2, true, 4, Inst)) + if (!Dis->tryAddingSymbolicOperand(Inst, imm << 2, Addr, true, 0, 4)) Inst.addOperand(MCOperand::CreateImm(imm)); return Success; @@ -1627,7 +1442,7 @@ static DecodeStatus DecodeTestAndBranch(llvm::MCInst &Inst, uint32_t insn, DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder); Inst.addOperand(MCOperand::CreateImm(bit)); - if (!Dis->tryAddingSymbolicOperand(Addr, dst << 2, true, 4, Inst)) + if (!Dis->tryAddingSymbolicOperand(Inst, dst << 2, Addr, true, 0, 4)) Inst.addOperand(MCOperand::CreateImm(dst)); return Success; diff --git a/lib/Target/ARM64/Disassembler/ARM64Disassembler.h b/lib/Target/ARM64/Disassembler/ARM64Disassembler.h index 35efc8de42a..95848d55fa4 100644 --- a/lib/Target/ARM64/Disassembler/ARM64Disassembler.h +++ b/lib/Target/ARM64/Disassembler/ARM64Disassembler.h @@ -33,20 +33,6 @@ public: uint64_t address, raw_ostream &vStream, raw_ostream &cStream) const; - - /// tryAddingSymbolicOperand - tryAddingSymbolicOperand trys to add a symbolic - /// operand in place of the immediate Value in the MCInst. The immediate - /// Value has not had any PC adjustment made by the caller. If the instruction - /// adds the PC to the immediate Value then InstsAddsAddressToValue is true, - /// else false. If the getOpInfo() function was set as part of the - /// setupForSymbolicDisassembly() call then that function is called to get any - /// symbolic information at the Address for this instrution. If that returns - /// non-zero then the symbolic information it returns is used to create an - /// MCExpr and that is added as an operand to the MCInst. This function - /// returns true if it adds an operand to the MCInst and false otherwise. - bool tryAddingSymbolicOperand(uint64_t Address, int Value, - bool InstsAddsAddressToValue, uint64_t InstSize, - MCInst &MI, uint32_t insn = 0) const; }; } // namespace llvm diff --git a/lib/Target/ARM64/Disassembler/ARM64ExternalSymbolizer.cpp b/lib/Target/ARM64/Disassembler/ARM64ExternalSymbolizer.cpp new file mode 100644 index 00000000000..65d4220488c --- /dev/null +++ b/lib/Target/ARM64/Disassembler/ARM64ExternalSymbolizer.cpp @@ -0,0 +1,226 @@ +//===- ARM64ExternalSymbolizer.cpp - Symbolizer for ARM64 -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "arm64-disassembler" + +#include "ARM64ExternalSymbolizer.h" +#include "ARM64Subtarget.h" +#include "MCTargetDesc/ARM64AddressingModes.h" +#include "Utils/ARM64BaseInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +static MCSymbolRefExpr::VariantKind +getVariant(uint64_t LLVMDisassembler_VariantKind) { + switch (LLVMDisassembler_VariantKind) { + case LLVMDisassembler_VariantKind_None: + return MCSymbolRefExpr::VK_None; + case LLVMDisassembler_VariantKind_ARM64_PAGE: + return MCSymbolRefExpr::VK_PAGE; + case LLVMDisassembler_VariantKind_ARM64_PAGEOFF: + return MCSymbolRefExpr::VK_PAGEOFF; + case LLVMDisassembler_VariantKind_ARM64_GOTPAGE: + return MCSymbolRefExpr::VK_GOTPAGE; + case LLVMDisassembler_VariantKind_ARM64_GOTPAGEOFF: + return MCSymbolRefExpr::VK_GOTPAGEOFF; + case LLVMDisassembler_VariantKind_ARM64_TLVP: + case LLVMDisassembler_VariantKind_ARM64_TLVOFF: + default: + assert(0 && "bad LLVMDisassembler_VariantKind"); + return MCSymbolRefExpr::VK_None; + } +} + +/// tryAddingSymbolicOperand - tryAddingSymbolicOperand trys to add a symbolic +/// operand in place of the immediate Value in the MCInst. The immediate +/// Value has not had any PC adjustment made by the caller. If the instruction +/// is a branch that adds the PC to the immediate Value then isBranch is +/// Success, else Fail. If GetOpInfo is non-null, then it is called to get any +/// symbolic information at the Address for this instrution. If that returns +/// non-zero then the symbolic information it returns is used to create an +/// MCExpr and that is added as an operand to the MCInst. If GetOpInfo() +/// returns zero and isBranch is Success then a symbol look up for +/// Address + Value is done and if a symbol is found an MCExpr is created with +/// that, else an MCExpr with Address + Value is created. If GetOpInfo() +/// returns zero and isBranch is Fail then the the Opcode of the MCInst is +/// tested and for ADRP an other instructions that help to load of pointers +/// a symbol look up is done to see it is returns a specific reference type +/// to add to the comment stream. This function returns Success if it adds +/// an operand to the MCInst and Fail otherwise. +bool ARM64ExternalSymbolizer::tryAddingSymbolicOperand( + MCInst &MI, + raw_ostream &CommentStream, + int64_t Value, + uint64_t Address, + bool IsBranch, + uint64_t Offset, + uint64_t InstSize) { + // FIXME: This method shares a lot of code with + // MCExternalSymbolizer::tryAddingSymbolicOperand. It may be possible + // refactor the MCExternalSymbolizer interface to allow more of this + // implementation to be shared. + // + struct LLVMOpInfo1 SymbolicOp; + memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1)); + SymbolicOp.Value = Value; + uint64_t ReferenceType; + const char *ReferenceName; + if (!GetOpInfo || + !GetOpInfo(DisInfo, Address, 0 /* Offset */, InstSize, 1, &SymbolicOp)) { + if (IsBranch) { + ReferenceType = LLVMDisassembler_ReferenceType_In_Branch; + const char *Name = SymbolLookUp(DisInfo, Address + Value, &ReferenceType, + Address, &ReferenceName); + if (Name) { + SymbolicOp.AddSymbol.Name = Name; + SymbolicOp.AddSymbol.Present = true; + SymbolicOp.Value = 0; + } else { + SymbolicOp.Value = Address + Value; + } + if (ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub) + CommentStream << "symbol stub for: " << ReferenceName; + else if (ReferenceType == + LLVMDisassembler_ReferenceType_Out_Objc_Message) + CommentStream << "Objc message: " << ReferenceName; + } else if (MI.getOpcode() == ARM64::ADRP) { + ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADRP; + // otool expects the fully encoded ADRP instruction to be passed in as + // the value here, so reconstruct it: + const MCRegisterInfo &MCRI = *Ctx.getRegisterInfo(); + uint32_t EncodedInst = 0x90000000; + EncodedInst |= (Value & 0x3) << 29; // immlo + EncodedInst |= ((Value >> 2) & 0x7FFFF) << 5; // immhi + EncodedInst |= MCRI.getEncodingValue(MI.getOperand(0).getReg()); // reg + SymbolLookUp(DisInfo, EncodedInst, &ReferenceType, Address, + &ReferenceName); + CommentStream << format("0x%llx", + 0xfffffffffffff000LL & (Address + Value)); + } else if (MI.getOpcode() == ARM64::ADDXri || + MI.getOpcode() == ARM64::LDRXui || + MI.getOpcode() == ARM64::LDRXl || + MI.getOpcode() == ARM64::ADR) { + if (MI.getOpcode() == ARM64::ADDXri) + ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADDXri; + else if (MI.getOpcode() == ARM64::LDRXui) + ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_LDRXui; + if (MI.getOpcode() == ARM64::LDRXl) { + ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_LDRXl; + SymbolLookUp(DisInfo, Address + Value, &ReferenceType, Address, + &ReferenceName); + } else if (MI.getOpcode() == ARM64::ADR) { + ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADR; + SymbolLookUp(DisInfo, Address + Value, &ReferenceType, Address, + &ReferenceName); + } else { + const MCRegisterInfo &MCRI = *Ctx.getRegisterInfo(); + // otool expects the fully encoded ADD/LDR instruction to be passed in + // as the value here, so reconstruct it: + unsigned EncodedInst = + MI.getOpcode() == ARM64::ADDXri ? 0x91000000: 0xF9400000; + EncodedInst |= Value << 10; // imm12 [+ shift:2 for ADD] + EncodedInst |= + MCRI.getEncodingValue(MI.getOperand(1).getReg()) << 5; // Rn + EncodedInst |= MCRI.getEncodingValue(MI.getOperand(0).getReg()); // Rd + + SymbolLookUp(DisInfo, EncodedInst, &ReferenceType, Address, + &ReferenceName); + } + if (ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_SymAddr) + CommentStream << "literal pool symbol address: " << ReferenceName; + else if (ReferenceType == + LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr) + CommentStream << "literal pool for: \"" << ReferenceName << "\""; + else if (ReferenceType == + LLVMDisassembler_ReferenceType_Out_Objc_CFString_Ref) + CommentStream << "Objc cfstring ref: @\"" << ReferenceName << "\""; + else if (ReferenceType == + LLVMDisassembler_ReferenceType_Out_Objc_Message) + CommentStream << "Objc message: " << ReferenceName; + else if (ReferenceType == + LLVMDisassembler_ReferenceType_Out_Objc_Message_Ref) + CommentStream << "Objc message ref: " << ReferenceName; + else if (ReferenceType == + LLVMDisassembler_ReferenceType_Out_Objc_Selector_Ref) + CommentStream << "Objc selector ref: " << ReferenceName; + else if (ReferenceType == + LLVMDisassembler_ReferenceType_Out_Objc_Class_Ref) + CommentStream << "Objc class ref: " << ReferenceName; + // For these instructions, the SymbolLookUp() above is just to get the + // ReferenceType and ReferenceName. We want to make sure not to + // fall through so we don't build an MCExpr to leave the disassembly + // of the immediate values of these instructions to the InstPrinter. + return false; + } else { + return false; + } + } + + const MCExpr *Add = NULL; + if (SymbolicOp.AddSymbol.Present) { + if (SymbolicOp.AddSymbol.Name) { + StringRef Name(SymbolicOp.AddSymbol.Name); + MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name); + MCSymbolRefExpr::VariantKind Variant = getVariant(SymbolicOp.VariantKind); + if (Variant != MCSymbolRefExpr::VK_None) + Add = MCSymbolRefExpr::Create(Sym, Variant, Ctx); + else + Add = MCSymbolRefExpr::Create(Sym, Ctx); + } else { + Add = MCConstantExpr::Create(SymbolicOp.AddSymbol.Value, Ctx); + } + } + + const MCExpr *Sub = NULL; + if (SymbolicOp.SubtractSymbol.Present) { + if (SymbolicOp.SubtractSymbol.Name) { + StringRef Name(SymbolicOp.SubtractSymbol.Name); + MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name); + Sub = MCSymbolRefExpr::Create(Sym, Ctx); + } else { + Sub = MCConstantExpr::Create(SymbolicOp.SubtractSymbol.Value, Ctx); + } + } + + const MCExpr *Off = NULL; + if (SymbolicOp.Value != 0) + Off = MCConstantExpr::Create(SymbolicOp.Value, Ctx); + + const MCExpr *Expr; + if (Sub) { + const MCExpr *LHS; + if (Add) + LHS = MCBinaryExpr::CreateSub(Add, Sub, Ctx); + else + LHS = MCUnaryExpr::CreateMinus(Sub, Ctx); + if (Off != 0) + Expr = MCBinaryExpr::CreateAdd(LHS, Off, Ctx); + else + Expr = LHS; + } else if (Add) { + if (Off != 0) + Expr = MCBinaryExpr::CreateAdd(Add, Off, Ctx); + else + Expr = Add; + } else { + if (Off != 0) + Expr = Off; + else + Expr = MCConstantExpr::Create(0, Ctx); + } + + MI.addOperand(MCOperand::CreateExpr(Expr)); + + return true; +} diff --git a/lib/Target/ARM64/Disassembler/ARM64ExternalSymbolizer.h b/lib/Target/ARM64/Disassembler/ARM64ExternalSymbolizer.h new file mode 100644 index 00000000000..45f07a5e258 --- /dev/null +++ b/lib/Target/ARM64/Disassembler/ARM64ExternalSymbolizer.h @@ -0,0 +1,37 @@ +//===- ARM64ExternalSymbolizer.h - Symbolizer for ARM64 ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Symbolize ARM64 assembly code during disassembly using callbacks. +// +//===----------------------------------------------------------------------===// + +#ifndef ARM64EXTERNALSYMBOLIZER_H +#define ARM64EXTERNALSYMBOLIZER_H + +#include "llvm/MC/MCExternalSymbolizer.h" + +namespace llvm { + +class ARM64ExternalSymbolizer : public MCExternalSymbolizer { +public: + ARM64ExternalSymbolizer(MCContext &Ctx, + std::unique_ptr RelInfo, + LLVMOpInfoCallback GetOpInfo, + LLVMSymbolLookupCallback SymbolLookUp, void *DisInfo) + : MCExternalSymbolizer(Ctx, std::move(RelInfo), GetOpInfo, SymbolLookUp, + DisInfo) {} + + bool tryAddingSymbolicOperand(MCInst &MI, raw_ostream &CommentStream, + int64_t Value, uint64_t Address, bool IsBranch, + uint64_t Offset, uint64_t InstSize) override; +}; + +} // namespace llvm + +#endif diff --git a/lib/Target/ARM64/Disassembler/CMakeLists.txt b/lib/Target/ARM64/Disassembler/CMakeLists.txt index ad998c28c49..43ade66be14 100644 --- a/lib/Target/ARM64/Disassembler/CMakeLists.txt +++ b/lib/Target/ARM64/Disassembler/CMakeLists.txt @@ -2,6 +2,7 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/ add_llvm_library(LLVMARM64Disassembler ARM64Disassembler.cpp + ARM64ExternalSymbolizer.cpp ) # workaround for hanging compilation on MSVC8, 9 and 10 #if( MSVC_VERSION EQUAL 1400 OR MSVC_VERSION EQUAL 1500 OR MSVC_VERSION EQUAL 1600 ) -- 2.34.1