From: Chris Lattner Date: Tue, 20 Jul 2010 18:25:19 +0000 (+0000) Subject: start straightening out libedis's dependencies and make it fit X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=847da55716e9c1d39c08ed052bc86d28796cb91f;p=oota-llvm.git start straightening out libedis's dependencies and make it fit better in the llvm world. Among other things, this changes: 1. The guts of libedis are now moved into lib/MC/MCDisassembler 2. llvm-mc now depends on lib/MC/MCDisassembler, not tools/edis, so edis and mc don't have to be built in series. 3. lib/MC/MCDisassembler no longer depends on the C api, the C API depends on it. 4. Various code cleanup changes. There is still a lot to be done to make edis fit with the llvm design, but this is an incremental step in the right direction. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@108869 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/CMakeLists.txt b/CMakeLists.txt index d4f2221c9fd..a4babed29fd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -308,6 +308,7 @@ add_subdirectory(lib/Analysis) add_subdirectory(lib/Analysis/IPA) add_subdirectory(lib/MC) add_subdirectory(lib/MC/MCParser) +add_subdirectory(lib/MC/MCDisassembler) add_subdirectory(test) add_subdirectory(utils/FileCheck) diff --git a/include/llvm-c/EnhancedDisassembly.h b/include/llvm-c/EnhancedDisassembly.h index ebb2cf8a82d..d177381988d 100644 --- a/include/llvm-c/EnhancedDisassembly.h +++ b/include/llvm-c/EnhancedDisassembly.h @@ -51,41 +51,38 @@ typedef int (*EDRegisterReaderCallback)(uint64_t *value, unsigned regID, @typedef EDAssemblySyntax_t An assembly syntax for use in tokenizing instructions. */ -typedef enum { +enum { /*! @constant kEDAssemblySyntaxX86Intel Intel syntax for i386 and x86_64. */ kEDAssemblySyntaxX86Intel = 0, /*! @constant kEDAssemblySyntaxX86ATT AT&T syntax for i386 and x86_64. */ kEDAssemblySyntaxX86ATT = 1, kEDAssemblySyntaxARMUAL = 2 -} EDAssemblySyntax_t; +}; +typedef unsigned EDAssemblySyntax_t; /*! @typedef EDDisassemblerRef Encapsulates a disassembler for a single CPU architecture. */ -struct EDDisassembler; -typedef struct EDDisassembler *EDDisassemblerRef; +typedef void *EDDisassemblerRef; /*! @typedef EDInstRef Encapsulates a single disassembled instruction in one assembly syntax. */ -struct EDInst; -typedef struct EDInst *EDInstRef; +typedef void *EDInstRef; /*! @typedef EDTokenRef Encapsulates a token from the disassembly of an instruction. */ -struct EDToken; -typedef struct EDToken *EDTokenRef; +typedef void *EDTokenRef; /*! @typedef EDOperandRef Encapsulates an operand of an instruction. */ -struct EDOperand; -typedef struct EDOperand *EDOperandRef; +typedef void *EDOperandRef; /*! @functiongroup Getting a disassembler diff --git a/lib/MC/MCDisassembler/EDDisassembler.cpp b/lib/MC/MCDisassembler/EDDisassembler.cpp new file mode 100644 index 00000000000..697b3d9c051 --- /dev/null +++ b/lib/MC/MCDisassembler/EDDisassembler.cpp @@ -0,0 +1,402 @@ +//===-EDDisassembler.cpp - LLVM Enhanced Disassembler ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Enhanced Disassembly library's disassembler class. +// The disassembler is responsible for vending individual instructions according +// to a given architecture and disassembly syntax. +// +//===----------------------------------------------------------------------===// + +#include "EDDisassembler.h" +#include "EDInst.h" +#include "llvm/MC/EDInstInfo.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCParser/AsmLexer.h" +#include "llvm/MC/MCParser/MCAsmParser.h" +#include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/MemoryObject.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Target/TargetAsmLexer.h" +#include "llvm/Target/TargetAsmParser.h" +#include "llvm/Target/TargetRegistry.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSelect.h" +using namespace llvm; + +bool EDDisassembler::sInitialized = false; +EDDisassembler::DisassemblerMap_t EDDisassembler::sDisassemblers; + +struct TripleMap { + Triple::ArchType Arch; + const char *String; +}; + +static struct TripleMap triplemap[] = { + { Triple::x86, "i386-unknown-unknown" }, + { Triple::x86_64, "x86_64-unknown-unknown" }, + { Triple::arm, "arm-unknown-unknown" }, + { Triple::thumb, "thumb-unknown-unknown" }, + { Triple::InvalidArch, NULL, } +}; + +/// infoFromArch - Returns the TripleMap corresponding to a given architecture, +/// or NULL if there is an error +/// +/// @arg arch - The Triple::ArchType for the desired architecture +static const char *tripleFromArch(Triple::ArchType arch) { + unsigned int infoIndex; + + for (infoIndex = 0; triplemap[infoIndex].String != NULL; ++infoIndex) { + if (arch == triplemap[infoIndex].Arch) + return triplemap[infoIndex].String; + } + + return NULL; +} + +/// getLLVMSyntaxVariant - gets the constant to use to get an assembly printer +/// for the desired assembly syntax, suitable for passing to +/// Target::createMCInstPrinter() +/// +/// @arg arch - The target architecture +/// @arg syntax - The assembly syntax in sd form +static int getLLVMSyntaxVariant(Triple::ArchType arch, + EDDisassembler::AssemblySyntax syntax) { + switch (syntax) { + default: + return -1; + // Mappings below from X86AsmPrinter.cpp + case EDDisassembler::kEDAssemblySyntaxX86ATT: + if (arch == Triple::x86 || arch == Triple::x86_64) + return 0; + else + return -1; + case EDDisassembler::kEDAssemblySyntaxX86Intel: + if (arch == Triple::x86 || arch == Triple::x86_64) + return 1; + else + return -1; + case EDDisassembler::kEDAssemblySyntaxARMUAL: + if (arch == Triple::arm || arch == Triple::thumb) + return 0; + else + return -1; + } +} + +void EDDisassembler::initialize() { + if (sInitialized) + return; + + sInitialized = true; + + InitializeAllTargetInfos(); + InitializeAllTargets(); + InitializeAllAsmPrinters(); + InitializeAllAsmParsers(); + InitializeAllDisassemblers(); +} + +#undef BRINGUP_TARGET + +EDDisassembler *EDDisassembler::getDisassembler(Triple::ArchType arch, + AssemblySyntax syntax) { + CPUKey key; + key.Arch = arch; + key.Syntax = syntax; + + EDDisassembler::DisassemblerMap_t::iterator i = sDisassemblers.find(key); + + if (i != sDisassemblers.end()) { + return i->second; + } else { + EDDisassembler* sdd = new EDDisassembler(key); + if (!sdd->valid()) { + delete sdd; + return NULL; + } + + sDisassemblers[key] = sdd; + + return sdd; + } + + return NULL; +} + +EDDisassembler *EDDisassembler::getDisassembler(StringRef str, + AssemblySyntax syntax) { + return getDisassembler(Triple(str).getArch(), syntax); +} + +EDDisassembler::EDDisassembler(CPUKey &key) : + Valid(false), + HasSemantics(false), + ErrorStream(nulls()), + Key(key) { + const char *triple = tripleFromArch(key.Arch); + + if (!triple) + return; + + LLVMSyntaxVariant = getLLVMSyntaxVariant(key.Arch, key.Syntax); + + if (LLVMSyntaxVariant < 0) + return; + + std::string tripleString(triple); + std::string errorString; + + Tgt = TargetRegistry::lookupTarget(tripleString, + errorString); + + if (!Tgt) + return; + + std::string featureString; + + TargetMachine.reset(Tgt->createTargetMachine(tripleString, + featureString)); + + const TargetRegisterInfo *registerInfo = TargetMachine->getRegisterInfo(); + + if (!registerInfo) + return; + + initMaps(*registerInfo); + + AsmInfo.reset(Tgt->createAsmInfo(tripleString)); + + if (!AsmInfo) + return; + + Disassembler.reset(Tgt->createMCDisassembler()); + + if (!Disassembler) + return; + + InstInfos = Disassembler->getEDInfo(); + + InstString.reset(new std::string); + InstStream.reset(new raw_string_ostream(*InstString)); + InstPrinter.reset(Tgt->createMCInstPrinter(LLVMSyntaxVariant, *AsmInfo)); + + if (!InstPrinter) + return; + + GenericAsmLexer.reset(new AsmLexer(*AsmInfo)); + SpecificAsmLexer.reset(Tgt->createAsmLexer(*AsmInfo)); + SpecificAsmLexer->InstallLexer(*GenericAsmLexer); + + initMaps(*TargetMachine->getRegisterInfo()); + + Valid = true; +} + +EDDisassembler::~EDDisassembler() { + if (!valid()) + return; +} + +namespace { + /// EDMemoryObject - a subclass of MemoryObject that allows use of a callback + /// as provided by the sd interface. See MemoryObject. + class EDMemoryObject : public llvm::MemoryObject { + private: + EDByteReaderCallback Callback; + void *Arg; + public: + EDMemoryObject(EDByteReaderCallback callback, + void *arg) : Callback(callback), Arg(arg) { } + ~EDMemoryObject() { } + uint64_t getBase() const { return 0x0; } + uint64_t getExtent() const { return (uint64_t)-1; } + int readByte(uint64_t address, uint8_t *ptr) const { + if (!Callback) + return -1; + + if (Callback(ptr, address, Arg)) + return -1; + + return 0; + } + }; +} + +EDInst *EDDisassembler::createInst(EDByteReaderCallback byteReader, + uint64_t address, + void *arg) { + EDMemoryObject memoryObject(byteReader, arg); + + MCInst* inst = new MCInst; + uint64_t byteSize; + + if (!Disassembler->getInstruction(*inst, + byteSize, + memoryObject, + address, + ErrorStream)) { + delete inst; + return NULL; + } else { + const llvm::EDInstInfo *thisInstInfo; + + thisInstInfo = &InstInfos[inst->getOpcode()]; + + EDInst* sdInst = new EDInst(inst, byteSize, *this, thisInstInfo); + return sdInst; + } +} + +void EDDisassembler::initMaps(const TargetRegisterInfo ®isterInfo) { + unsigned numRegisters = registerInfo.getNumRegs(); + unsigned registerIndex; + + for (registerIndex = 0; registerIndex < numRegisters; ++registerIndex) { + const char* registerName = registerInfo.get(registerIndex).Name; + + RegVec.push_back(registerName); + RegRMap[registerName] = registerIndex; + } + + switch (Key.Arch) { + default: + break; + case Triple::x86: + case Triple::x86_64: + stackPointers.insert(registerIDWithName("SP")); + stackPointers.insert(registerIDWithName("ESP")); + stackPointers.insert(registerIDWithName("RSP")); + + programCounters.insert(registerIDWithName("IP")); + programCounters.insert(registerIDWithName("EIP")); + programCounters.insert(registerIDWithName("RIP")); + break; + case Triple::arm: + case Triple::thumb: + stackPointers.insert(registerIDWithName("SP")); + + programCounters.insert(registerIDWithName("PC")); + break; + } +} + +const char *EDDisassembler::nameWithRegisterID(unsigned registerID) const { + if (registerID >= RegVec.size()) + return NULL; + else + return RegVec[registerID].c_str(); +} + +unsigned EDDisassembler::registerIDWithName(const char *name) const { + regrmap_t::const_iterator iter = RegRMap.find(std::string(name)); + if (iter == RegRMap.end()) + return 0; + else + return (*iter).second; +} + +bool EDDisassembler::registerIsStackPointer(unsigned registerID) { + return (stackPointers.find(registerID) != stackPointers.end()); +} + +bool EDDisassembler::registerIsProgramCounter(unsigned registerID) { + return (programCounters.find(registerID) != programCounters.end()); +} + +int EDDisassembler::printInst(std::string &str, MCInst &inst) { + PrinterMutex.acquire(); + + InstPrinter->printInst(&inst, *InstStream); + InstStream->flush(); + str = *InstString; + InstString->clear(); + + PrinterMutex.release(); + + return 0; +} + +int EDDisassembler::parseInst(SmallVectorImpl &operands, + SmallVectorImpl &tokens, + const std::string &str) { + int ret = 0; + + switch (Key.Arch) { + default: + return -1; + case Triple::x86: + case Triple::x86_64: + case Triple::arm: + case Triple::thumb: + break; + } + + const char *cStr = str.c_str(); + MemoryBuffer *buf = MemoryBuffer::getMemBuffer(cStr, cStr + strlen(cStr)); + + StringRef instName; + SMLoc instLoc; + + SourceMgr sourceMgr; + sourceMgr.AddNewSourceBuffer(buf, SMLoc()); // ownership of buf handed over + MCContext context(*AsmInfo); + OwningPtr streamer(createNullStreamer(context)); + OwningPtr genericParser(createMCAsmParser(*Tgt, sourceMgr, + context, *streamer, + *AsmInfo)); + OwningPtr TargetParser(Tgt->createAsmParser(*genericParser, + *TargetMachine)); + + AsmToken OpcodeToken = genericParser->Lex(); + AsmToken NextToken = genericParser->Lex(); // consume next token, because specificParser expects us to + + if (OpcodeToken.is(AsmToken::Identifier)) { + instName = OpcodeToken.getString(); + instLoc = OpcodeToken.getLoc(); + + if (NextToken.isNot(AsmToken::Eof) && + TargetParser->ParseInstruction(instName, instLoc, operands)) + ret = -1; + } else { + ret = -1; + } + + ParserMutex.acquire(); + + if (!ret) { + GenericAsmLexer->setBuffer(buf); + + while (SpecificAsmLexer->Lex(), + SpecificAsmLexer->isNot(AsmToken::Eof) && + SpecificAsmLexer->isNot(AsmToken::EndOfStatement)) { + if (SpecificAsmLexer->is(AsmToken::Error)) { + ret = -1; + break; + } + tokens.push_back(SpecificAsmLexer->getTok()); + } + } + + ParserMutex.release(); + + return ret; +} + +int EDDisassembler::llvmSyntaxVariant() const { + return LLVMSyntaxVariant; +} diff --git a/lib/MC/MCDisassembler/EDDisassembler.h b/lib/MC/MCDisassembler/EDDisassembler.h new file mode 100644 index 00000000000..0fb853da303 --- /dev/null +++ b/lib/MC/MCDisassembler/EDDisassembler.h @@ -0,0 +1,271 @@ +//===-- EDDisassembler.h - LLVM Enhanced Disassembler -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interface for the Enhanced Disassembly library's +// disassembler class. The disassembler is responsible for vending individual +// instructions according to a given architecture and disassembly syntax. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EDDISASSEMBLER_H +#define LLVM_EDDISASSEMBLER_H + +#include "EDInfo.inc" + +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/System/Mutex.h" + +#include +#include +#include + +namespace llvm { +class AsmLexer; +class AsmToken; +class MCContext; +class MCAsmInfo; +class MCAsmLexer; +class AsmParser; +class TargetAsmLexer; +class TargetAsmParser; +class MCDisassembler; +class MCInstPrinter; +class MCInst; +class MCParsedAsmOperand; +class MCStreamer; +template class SmallVectorImpl; +class SourceMgr; +class Target; +class TargetMachine; +class TargetRegisterInfo; + +struct EDInstInfo; +struct EDInst; +struct EDOperand; +struct EDToken; + +typedef int (*EDByteReaderCallback)(uint8_t *byte, uint64_t address, void *arg); + +/// EDDisassembler - Encapsulates a disassembler for a single architecture and +/// disassembly syntax. Also manages the static disassembler registry. +struct EDDisassembler { + typedef enum { + /*! @constant kEDAssemblySyntaxX86Intel Intel syntax for i386 and x86_64. */ + kEDAssemblySyntaxX86Intel = 0, + /*! @constant kEDAssemblySyntaxX86ATT AT&T syntax for i386 and x86_64. */ + kEDAssemblySyntaxX86ATT = 1, + kEDAssemblySyntaxARMUAL = 2 + } AssemblySyntax; + + + //////////////////// + // Static members // + //////////////////// + + /// CPUKey - Encapsulates the descriptor of an architecture/disassembly-syntax + /// pair + struct CPUKey { + /// The architecture type + llvm::Triple::ArchType Arch; + + /// The assembly syntax + AssemblySyntax Syntax; + + /// operator== - Equality operator + bool operator==(const CPUKey &key) const { + return (Arch == key.Arch && + Syntax == key.Syntax); + } + + /// operator< - Less-than operator + bool operator<(const CPUKey &key) const { + if(Arch > key.Arch) + return false; + if(Syntax >= key.Syntax) + return false; + return true; + } + }; + + typedef std::map DisassemblerMap_t; + + /// True if the disassembler registry has been initialized; false if not + static bool sInitialized; + /// A map from disassembler specifications to disassemblers. Populated + /// lazily. + static DisassemblerMap_t sDisassemblers; + + /// getDisassembler - Returns the specified disassemble, or NULL on failure + /// + /// @arg arch - The desired architecture + /// @arg syntax - The desired disassembly syntax + static EDDisassembler *getDisassembler(llvm::Triple::ArchType arch, + AssemblySyntax syntax); + + /// getDisassembler - Returns the disassembler for a given combination of + /// CPU type, CPU subtype, and assembly syntax, or NULL on failure + /// + /// @arg str - The string representation of the architecture triple, e.g., + /// "x86_64-apple-darwin" + /// @arg syntax - The disassembly syntax for the required disassembler + static EDDisassembler *getDisassembler(llvm::StringRef str, + AssemblySyntax syntax); + + /// initialize - Initializes the disassembler registry and the LLVM backend + static void initialize(); + + //////////////////////// + // Per-object members // + //////////////////////// + + /// True only if the object has been successfully initialized + bool Valid; + /// True if the disassembler can provide semantic information + bool HasSemantics; + + /// The stream to write errors to + llvm::raw_ostream &ErrorStream; + + /// The architecture/syntax pair for the current architecture + CPUKey Key; + /// The LLVM target corresponding to the disassembler + const llvm::Target *Tgt; + /// The target machine instance. + llvm::OwningPtr TargetMachine; + /// The assembly information for the target architecture + llvm::OwningPtr AsmInfo; + /// The disassembler for the target architecture + llvm::OwningPtr Disassembler; + /// The output string for the instruction printer; must be guarded with + /// PrinterMutex + llvm::OwningPtr InstString; + /// The output stream for the disassembler; must be guarded with + /// PrinterMutex + llvm::OwningPtr InstStream; + /// The instruction printer for the target architecture; must be guarded with + /// PrinterMutex when printing + llvm::OwningPtr InstPrinter; + /// The mutex that guards the instruction printer's printing functions, which + /// use a shared stream + llvm::sys::Mutex PrinterMutex; + /// The array of instruction information provided by the TableGen backend for + /// the target architecture + const llvm::EDInstInfo *InstInfos; + /// The target-specific lexer for use in tokenizing strings, in + /// target-independent and target-specific portions + llvm::OwningPtr GenericAsmLexer; + llvm::OwningPtr SpecificAsmLexer; + /// The guard for the above + llvm::sys::Mutex ParserMutex; + /// The LLVM number used for the target disassembly syntax variant + int LLVMSyntaxVariant; + + typedef std::vector regvec_t; + typedef std::map regrmap_t; + + /// A vector of registers for quick mapping from LLVM register IDs to names + regvec_t RegVec; + /// A map of registers for quick mapping from register names to LLVM IDs + regrmap_t RegRMap; + + /// A set of register IDs for aliases of the stack pointer for the current + /// architecture + std::set stackPointers; + /// A set of register IDs for aliases of the program counter for the current + /// architecture + std::set programCounters; + + /// Constructor - initializes a disassembler with all the necessary objects, + /// which come pre-allocated from the registry accessor function + /// + /// @arg key - the architecture and disassembly syntax for the + /// disassembler + EDDisassembler(CPUKey& key); + + /// valid - reports whether there was a failure in the constructor. + bool valid() { + return Valid; + } + + /// hasSemantics - reports whether the disassembler can provide operands and + /// tokens. + bool hasSemantics() { + return HasSemantics; + } + + ~EDDisassembler(); + + /// createInst - creates and returns an instruction given a callback and + /// memory address, or NULL on failure + /// + /// @arg byteReader - A callback function that provides machine code bytes + /// @arg address - The address of the first byte of the instruction, + /// suitable for passing to byteReader + /// @arg arg - An opaque argument for byteReader + EDInst *createInst(EDByteReaderCallback byteReader, + uint64_t address, + void *arg); + + /// initMaps - initializes regVec and regRMap using the provided register + /// info + /// + /// @arg registerInfo - the register information to use as a source + void initMaps(const llvm::TargetRegisterInfo ®isterInfo); + /// nameWithRegisterID - Returns the name (owned by the EDDisassembler) of a + /// register for a given register ID, or NULL on failure + /// + /// @arg registerID - the ID of the register to be queried + const char *nameWithRegisterID(unsigned registerID) const; + /// registerIDWithName - Returns the ID of a register for a given register + /// name, or (unsigned)-1 on failure + /// + /// @arg name - The name of the register + unsigned registerIDWithName(const char *name) const; + + /// registerIsStackPointer - reports whether a register ID is an alias for the + /// stack pointer register + /// + /// @arg registerID - The LLVM register ID + bool registerIsStackPointer(unsigned registerID); + /// registerIsStackPointer - reports whether a register ID is an alias for the + /// stack pointer register + /// + /// @arg registerID - The LLVM register ID + bool registerIsProgramCounter(unsigned registerID); + + /// printInst - prints an MCInst to a string, returning 0 on success, or -1 + /// otherwise + /// + /// @arg str - A reference to a string which is filled in with the string + /// representation of the instruction + /// @arg inst - A reference to the MCInst to be printed + int printInst(std::string& str, + llvm::MCInst& inst); + + /// parseInst - extracts operands and tokens from a string for use in + /// tokenizing the string. Returns 0 on success, or -1 otherwise. + /// + /// @arg operands - A reference to a vector that will be filled in with the + /// parsed operands + /// @arg tokens - A reference to a vector that will be filled in with the + /// tokens + /// @arg str - The string representation of the instruction + int parseInst(llvm::SmallVectorImpl &operands, + llvm::SmallVectorImpl &tokens, + const std::string &str); + + /// llvmSyntaxVariant - returns the LLVM syntax variant for this disassembler + int llvmSyntaxVariant() const; +}; + +} // end namespace llvm + +#endif diff --git a/lib/MC/MCDisassembler/EDInst.cpp b/lib/MC/MCDisassembler/EDInst.cpp new file mode 100644 index 00000000000..e22408f060b --- /dev/null +++ b/lib/MC/MCDisassembler/EDInst.cpp @@ -0,0 +1,207 @@ +//===-EDInst.cpp - LLVM Enhanced Disassembler -----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Enhanced Disassembly library's instruction class. +// The instruction is responsible for vending the string representation, +// individual tokens, and operands for a single instruction. +// +//===----------------------------------------------------------------------===// + +#include "EDInst.h" +#include "EDDisassembler.h" +#include "EDOperand.h" +#include "EDToken.h" + +#include "llvm/MC/EDInstInfo.h" +#include "llvm/MC/MCInst.h" + +using namespace llvm; + +EDInst::EDInst(llvm::MCInst *inst, + uint64_t byteSize, + EDDisassembler &disassembler, + const llvm::EDInstInfo *info) : + Disassembler(disassembler), + Inst(inst), + ThisInstInfo(info), + ByteSize(byteSize), + BranchTarget(-1), + MoveSource(-1), + MoveTarget(-1) { + OperandOrder = ThisInstInfo->operandOrders[Disassembler.llvmSyntaxVariant()]; +} + +EDInst::~EDInst() { + unsigned int index; + unsigned int numOperands = Operands.size(); + + for (index = 0; index < numOperands; ++index) + delete Operands[index]; + + unsigned int numTokens = Tokens.size(); + + for (index = 0; index < numTokens; ++index) + delete Tokens[index]; + + delete Inst; +} + +uint64_t EDInst::byteSize() { + return ByteSize; +} + +int EDInst::stringify() { + if (StringifyResult.valid()) + return StringifyResult.result(); + + if (Disassembler.printInst(String, *Inst)) + return StringifyResult.setResult(-1); + + return StringifyResult.setResult(0); +} + +int EDInst::getString(const char*& str) { + if (stringify()) + return -1; + + str = String.c_str(); + + return 0; +} + +unsigned EDInst::instID() { + return Inst->getOpcode(); +} + +bool EDInst::isBranch() { + if (ThisInstInfo) + return + ThisInstInfo->instructionType == kInstructionTypeBranch || + ThisInstInfo->instructionType == kInstructionTypeCall; + else + return false; +} + +bool EDInst::isMove() { + if (ThisInstInfo) + return ThisInstInfo->instructionType == kInstructionTypeMove; + else + return false; +} + +int EDInst::parseOperands() { + if (ParseResult.valid()) + return ParseResult.result(); + + if (!ThisInstInfo) + return ParseResult.setResult(-1); + + unsigned int opIndex; + unsigned int mcOpIndex = 0; + + for (opIndex = 0; opIndex < ThisInstInfo->numOperands; ++opIndex) { + if (isBranch() && + (ThisInstInfo->operandFlags[opIndex] & kOperandFlagTarget)) { + BranchTarget = opIndex; + } + else if (isMove()) { + if (ThisInstInfo->operandFlags[opIndex] & kOperandFlagSource) + MoveSource = opIndex; + else if (ThisInstInfo->operandFlags[opIndex] & kOperandFlagTarget) + MoveTarget = opIndex; + } + + EDOperand *operand = new EDOperand(Disassembler, *this, opIndex, mcOpIndex); + + Operands.push_back(operand); + } + + return ParseResult.setResult(0); +} + +int EDInst::branchTargetID() { + if (parseOperands()) + return -1; + return BranchTarget; +} + +int EDInst::moveSourceID() { + if (parseOperands()) + return -1; + return MoveSource; +} + +int EDInst::moveTargetID() { + if (parseOperands()) + return -1; + return MoveTarget; +} + +int EDInst::numOperands() { + if (parseOperands()) + return -1; + return Operands.size(); +} + +int EDInst::getOperand(EDOperand *&operand, unsigned int index) { + if (parseOperands()) + return -1; + + if (index >= Operands.size()) + return -1; + + operand = Operands[index]; + return 0; +} + +int EDInst::tokenize() { + if (TokenizeResult.valid()) + return TokenizeResult.result(); + + if (stringify()) + return TokenizeResult.setResult(-1); + + return TokenizeResult.setResult(EDToken::tokenize(Tokens, + String, + OperandOrder, + Disassembler)); + +} + +int EDInst::numTokens() { + if (tokenize()) + return -1; + return Tokens.size(); +} + +int EDInst::getToken(EDToken *&token, unsigned int index) { + if (tokenize()) + return -1; + token = Tokens[index]; + return 0; +} + +#ifdef __BLOCKS__ +int EDInst::visitTokens(EDTokenVisitor_t visitor) { + if (tokenize()) + return -1; + + tokvec_t::iterator iter; + + for (iter = Tokens.begin(); iter != Tokens.end(); ++iter) { + int ret = visitor(*iter); + if (ret == 1) + return 0; + if (ret != 0) + return -1; + } + + return 0; +} +#endif diff --git a/lib/MC/MCDisassembler/EDInst.h b/lib/MC/MCDisassembler/EDInst.h new file mode 100644 index 00000000000..0a98fea2c11 --- /dev/null +++ b/lib/MC/MCDisassembler/EDInst.h @@ -0,0 +1,181 @@ +//===-- EDInst.h - LLVM Enhanced Disassembler -------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interface for the Enhanced Disassembly library's +// instruction class. The instruction is responsible for vending the string +// representation, individual tokens and operands for a single instruction. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EDINST_H +#define LLVM_EDINST_H + +#include "llvm/ADT/SmallVector.h" +#include +#include + +namespace llvm { + class MCInst; + struct EDInstInfo; + struct EDToken; + struct EDDisassembler; + struct EDOperand; + +#ifdef __BLOCKS__ + typedef int (^EDTokenVisitor_t)(EDToken *token); +#endif + +/// CachedResult - Encapsulates the result of a function along with the validity +/// of that result, so that slow functions don't need to run twice +struct CachedResult { + /// True if the result has been obtained by executing the function + bool Valid; + /// The result last obtained from the function + int Result; + + /// Constructor - Initializes an invalid result + CachedResult() : Valid(false) { } + /// valid - Returns true if the result has been obtained by executing the + /// function and false otherwise + bool valid() { return Valid; } + /// result - Returns the result of the function or an undefined value if + /// valid() is false + int result() { return Result; } + /// setResult - Sets the result of the function and declares it valid + /// returning the result (so that setResult() can be called from inside a + /// return statement) + /// @arg result - The result of the function + int setResult(int result) { Result = result; Valid = true; return result; } +}; + +/// EDInst - Encapsulates a single instruction, which can be queried for its +/// string representation, as well as its operands and tokens +struct EDInst { + /// The parent disassembler + EDDisassembler &Disassembler; + /// The containing MCInst + llvm::MCInst *Inst; + /// The instruction information provided by TableGen for this instruction + const llvm::EDInstInfo *ThisInstInfo; + /// The number of bytes for the machine code representation of the instruction + uint64_t ByteSize; + + /// The result of the stringify() function + CachedResult StringifyResult; + /// The string representation of the instruction + std::string String; + /// The order in which operands from the InstInfo's operand information appear + /// in String + const char* OperandOrder; + + /// The result of the parseOperands() function + CachedResult ParseResult; + typedef llvm::SmallVector opvec_t; + /// The instruction's operands + opvec_t Operands; + /// The operand corresponding to the target, if the instruction is a branch + int BranchTarget; + /// The operand corresponding to the source, if the instruction is a move + int MoveSource; + /// The operand corresponding to the target, if the instruction is a move + int MoveTarget; + + /// The result of the tokenize() function + CachedResult TokenizeResult; + typedef std::vector tokvec_t; + /// The instruction's tokens + tokvec_t Tokens; + + /// Constructor - initializes an instruction given the output of the LLVM + /// C++ disassembler + /// + /// @arg inst - The MCInst, which will now be owned by this object + /// @arg byteSize - The size of the consumed instruction, in bytes + /// @arg disassembler - The parent disassembler + /// @arg instInfo - The instruction information produced by the table + /// generator for this instruction + EDInst(llvm::MCInst *inst, + uint64_t byteSize, + EDDisassembler &disassembler, + const llvm::EDInstInfo *instInfo); + ~EDInst(); + + /// byteSize - returns the number of bytes consumed by the machine code + /// representation of the instruction + uint64_t byteSize(); + /// instID - returns the LLVM instruction ID of the instruction + unsigned instID(); + + /// stringify - populates the String and AsmString members of the instruction, + /// returning 0 on success or -1 otherwise + int stringify(); + /// getString - retrieves a pointer to the string representation of the + /// instructinon, returning 0 on success or -1 otherwise + /// + /// @arg str - A reference to a pointer that, on success, is set to point to + /// the string representation of the instruction; this string is still owned + /// by the instruction and will be deleted when it is + int getString(const char *&str); + + /// isBranch - Returns true if the instruction is a branch + bool isBranch(); + /// isMove - Returns true if the instruction is a move + bool isMove(); + + /// parseOperands - populates the Operands member of the instruction, + /// returning 0 on success or -1 otherwise + int parseOperands(); + /// branchTargetID - returns the ID (suitable for use with getOperand()) of + /// the target operand if the instruction is a branch, or -1 otherwise + int branchTargetID(); + /// moveSourceID - returns the ID of the source operand if the instruction + /// is a move, or -1 otherwise + int moveSourceID(); + /// moveTargetID - returns the ID of the target operand if the instruction + /// is a move, or -1 otherwise + int moveTargetID(); + + /// numOperands - returns the number of operands available to retrieve, or -1 + /// on error + int numOperands(); + /// getOperand - retrieves an operand from the instruction's operand list by + /// index, returning 0 on success or -1 on error + /// + /// @arg operand - A reference whose target is pointed at the operand on + /// success, although the operand is still owned by the EDInst + /// @arg index - The index of the operand in the instruction + int getOperand(EDOperand *&operand, unsigned int index); + + /// tokenize - populates the Tokens member of the instruction, returning 0 on + /// success or -1 otherwise + int tokenize(); + /// numTokens - returns the number of tokens in the instruction, or -1 on + /// error + int numTokens(); + /// getToken - retrieves a token from the instruction's token list by index, + /// returning 0 on success or -1 on error + /// + /// @arg token - A reference whose target is pointed at the token on success, + /// although the token is still owned by the EDInst + /// @arg index - The index of the token in the instrcutino + int getToken(EDToken *&token, unsigned int index); + +#ifdef __BLOCKS__ + /// visitTokens - Visits each token in turn and applies a block to it, + /// returning 0 if all blocks are visited and/or the block signals + /// termination by returning 1; returns -1 on error + /// + /// @arg visitor - The visitor block to apply to all tokens. + int visitTokens(EDTokenVisitor_t visitor); +#endif +}; + +} // end namespace llvm + +#endif diff --git a/lib/MC/MCDisassembler/EDOperand.cpp b/lib/MC/MCDisassembler/EDOperand.cpp new file mode 100644 index 00000000000..2aed123368d --- /dev/null +++ b/lib/MC/MCDisassembler/EDOperand.cpp @@ -0,0 +1,282 @@ +//===-- EDOperand.cpp - LLVM Enhanced Disassembler ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Enhanced Disassembly library's operand class. The +// operand is responsible for allowing evaluation given a particular register +// context. +// +//===----------------------------------------------------------------------===// + +#include "EDOperand.h" +#include "EDDisassembler.h" +#include "EDInst.h" +#include "llvm/MC/EDInstInfo.h" +#include "llvm/MC/MCInst.h" +using namespace llvm; + +EDOperand::EDOperand(const EDDisassembler &disassembler, + const EDInst &inst, + unsigned int opIndex, + unsigned int &mcOpIndex) : + Disassembler(disassembler), + Inst(inst), + OpIndex(opIndex), + MCOpIndex(mcOpIndex) { + unsigned int numMCOperands = 0; + + if (Disassembler.Key.Arch == Triple::x86 || + Disassembler.Key.Arch == Triple::x86_64) { + uint8_t operandType = inst.ThisInstInfo->operandTypes[opIndex]; + + switch (operandType) { + default: + break; + case kOperandTypeImmediate: + numMCOperands = 1; + break; + case kOperandTypeRegister: + numMCOperands = 1; + break; + case kOperandTypeX86Memory: + numMCOperands = 5; + break; + case kOperandTypeX86EffectiveAddress: + numMCOperands = 4; + break; + case kOperandTypeX86PCRelative: + numMCOperands = 1; + break; + } + } + else if (Disassembler.Key.Arch == Triple::arm || + Disassembler.Key.Arch == Triple::thumb) { + uint8_t operandType = inst.ThisInstInfo->operandTypes[opIndex]; + + switch (operandType) { + default: + case kOperandTypeARMRegisterList: + break; + case kOperandTypeImmediate: + case kOperandTypeRegister: + case kOperandTypeARMBranchTarget: + case kOperandTypeARMSoImm: + case kOperandTypeThumb2SoImm: + case kOperandTypeARMSoImm2Part: + case kOperandTypeARMPredicate: + case kOperandTypeThumbITMask: + case kOperandTypeThumb2AddrModeImm8Offset: + case kOperandTypeARMTBAddrMode: + case kOperandTypeThumb2AddrModeImm8s4Offset: + numMCOperands = 1; + break; + case kOperandTypeThumb2SoReg: + case kOperandTypeARMAddrMode2Offset: + case kOperandTypeARMAddrMode3Offset: + case kOperandTypeARMAddrMode4: + case kOperandTypeARMAddrMode5: + case kOperandTypeARMAddrModePC: + case kOperandTypeThumb2AddrModeImm8: + case kOperandTypeThumb2AddrModeImm12: + case kOperandTypeThumb2AddrModeImm8s4: + case kOperandTypeThumbAddrModeRR: + case kOperandTypeThumbAddrModeSP: + numMCOperands = 2; + break; + case kOperandTypeARMSoReg: + case kOperandTypeARMAddrMode2: + case kOperandTypeARMAddrMode3: + case kOperandTypeThumb2AddrModeSoReg: + case kOperandTypeThumbAddrModeS1: + case kOperandTypeThumbAddrModeS2: + case kOperandTypeThumbAddrModeS4: + case kOperandTypeARMAddrMode6Offset: + numMCOperands = 3; + break; + case kOperandTypeARMAddrMode6: + numMCOperands = 4; + break; + } + } + + mcOpIndex += numMCOperands; +} + +EDOperand::~EDOperand() { +} + +int EDOperand::evaluate(uint64_t &result, + EDRegisterReaderCallback callback, + void *arg) { + uint8_t operandType = Inst.ThisInstInfo->operandTypes[OpIndex]; + + switch (Disassembler.Key.Arch) { + default: + return -1; + case Triple::x86: + case Triple::x86_64: + switch (operandType) { + default: + return -1; + case kOperandTypeImmediate: + result = Inst.Inst->getOperand(MCOpIndex).getImm(); + return 0; + case kOperandTypeRegister: + { + unsigned reg = Inst.Inst->getOperand(MCOpIndex).getReg(); + return callback(&result, reg, arg); + } + case kOperandTypeX86PCRelative: + { + int64_t displacement = Inst.Inst->getOperand(MCOpIndex).getImm(); + + uint64_t ripVal; + + // TODO fix how we do this + + if (callback(&ripVal, Disassembler.registerIDWithName("RIP"), arg)) + return -1; + + result = ripVal + displacement; + return 0; + } + case kOperandTypeX86Memory: + case kOperandTypeX86EffectiveAddress: + { + unsigned baseReg = Inst.Inst->getOperand(MCOpIndex).getReg(); + uint64_t scaleAmount = Inst.Inst->getOperand(MCOpIndex+1).getImm(); + unsigned indexReg = Inst.Inst->getOperand(MCOpIndex+2).getReg(); + int64_t displacement = Inst.Inst->getOperand(MCOpIndex+3).getImm(); + //unsigned segmentReg = Inst.Inst->getOperand(MCOpIndex+4).getReg(); + + uint64_t addr = 0; + + if (baseReg) { + uint64_t baseVal; + if (callback(&baseVal, baseReg, arg)) + return -1; + addr += baseVal; + } + + if (indexReg) { + uint64_t indexVal; + if (callback(&indexVal, indexReg, arg)) + return -1; + addr += (scaleAmount * indexVal); + } + + addr += displacement; + + result = addr; + return 0; + } + } + break; + case Triple::arm: + case Triple::thumb: + switch (operandType) { + default: + return -1; + case kOperandTypeImmediate: + result = Inst.Inst->getOperand(MCOpIndex).getImm(); + return 0; + case kOperandTypeRegister: + { + unsigned reg = Inst.Inst->getOperand(MCOpIndex).getReg(); + return callback(&result, reg, arg); + } + case kOperandTypeARMBranchTarget: + { + int64_t displacement = Inst.Inst->getOperand(MCOpIndex).getImm(); + + uint64_t pcVal; + + if (callback(&pcVal, Disassembler.registerIDWithName("PC"), arg)) + return -1; + + result = pcVal + displacement; + return 0; + } + } + } + + return -1; +} + +int EDOperand::isRegister() { + return(Inst.ThisInstInfo->operandFlags[OpIndex] == kOperandTypeRegister); +} + +unsigned EDOperand::regVal() { + return Inst.Inst->getOperand(MCOpIndex).getReg(); +} + +int EDOperand::isImmediate() { + return(Inst.ThisInstInfo->operandFlags[OpIndex] == kOperandTypeImmediate); +} + +uint64_t EDOperand::immediateVal() { + return Inst.Inst->getOperand(MCOpIndex).getImm(); +} + +int EDOperand::isMemory() { + uint8_t operandType = Inst.ThisInstInfo->operandTypes[OpIndex]; + + switch (operandType) { + default: + return 0; + case kOperandTypeX86Memory: + case kOperandTypeX86PCRelative: + case kOperandTypeX86EffectiveAddress: + case kOperandTypeARMSoReg: + case kOperandTypeARMSoImm: + case kOperandTypeARMAddrMode2: + case kOperandTypeARMAddrMode2Offset: + case kOperandTypeARMAddrMode3: + case kOperandTypeARMAddrMode3Offset: + case kOperandTypeARMAddrMode4: + case kOperandTypeARMAddrMode5: + case kOperandTypeARMAddrMode6: + case kOperandTypeARMAddrModePC: + case kOperandTypeARMBranchTarget: + case kOperandTypeThumbAddrModeS1: + case kOperandTypeThumbAddrModeS2: + case kOperandTypeThumbAddrModeS4: + case kOperandTypeThumbAddrModeRR: + case kOperandTypeThumbAddrModeSP: + case kOperandTypeThumb2SoImm: + case kOperandTypeThumb2AddrModeImm8: + case kOperandTypeThumb2AddrModeImm8Offset: + case kOperandTypeThumb2AddrModeImm12: + case kOperandTypeThumb2AddrModeSoReg: + case kOperandTypeThumb2AddrModeImm8s4: + return 1; + } +} + +#ifdef __BLOCKS__ +struct RegisterReaderWrapper { + EDOperand::EDRegisterBlock_t regBlock; +}; + +int readerWrapperCallback(uint64_t *value, + unsigned regID, + void *arg) { + struct RegisterReaderWrapper *wrapper = (struct RegisterReaderWrapper *)arg; + return wrapper->regBlock(value, regID); +} + +int EDOperand::evaluate(uint64_t &result, + EDRegisterBlock_t regBlock) { + struct RegisterReaderWrapper wrapper; + wrapper.regBlock = regBlock; + return evaluate(result, + readerWrapperCallback, + (void*)&wrapper); +} +#endif diff --git a/lib/MC/MCDisassembler/EDOperand.h b/lib/MC/MCDisassembler/EDOperand.h new file mode 100644 index 00000000000..50260ec965a --- /dev/null +++ b/lib/MC/MCDisassembler/EDOperand.h @@ -0,0 +1,91 @@ +//===-EDOperand.h - LLVM Enhanced Disassembler ------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interface for the Enhanced Disassembly library's +// operand class. The operand is responsible for allowing evaluation given a +// particular register context. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EDOPERAND_H +#define LLVM_EDOPERAND_H + +#include "llvm/Support/DataTypes.h" + +namespace llvm { + +struct EDDisassembler; +struct EDInst; + +typedef int (*EDRegisterReaderCallback)(uint64_t *value, unsigned regID, + void* arg); + + +/// EDOperand - Encapsulates a single operand, which can be evaluated by the +/// client +struct EDOperand { + /// The parent disassembler + const EDDisassembler &Disassembler; + /// The parent instruction + const EDInst &Inst; + + /// The index of the operand in the EDInst + unsigned int OpIndex; + /// The index of the first component of the operand in the MCInst + unsigned int MCOpIndex; + + /// Constructor - Initializes an EDOperand + /// + /// @arg disassembler - The disassembler responsible for the operand + /// @arg inst - The instruction containing this operand + /// @arg opIndex - The index of the operand in inst + /// @arg mcOpIndex - The index of the operand in the original MCInst + EDOperand(const EDDisassembler &disassembler, + const EDInst &inst, + unsigned int opIndex, + unsigned int &mcOpIndex); + ~EDOperand(); + + /// evaluate - Returns the numeric value of an operand to the extent possible, + /// returning 0 on success or -1 if there was some problem (such as a + /// register not being readable) + /// + /// @arg result - A reference whose target is filled in with the value of + /// the operand (the address if it is a memory operand) + /// @arg callback - A function to call to obtain register values + /// @arg arg - An opaque argument to pass to callback + int evaluate(uint64_t &result, + EDRegisterReaderCallback callback, + void *arg); + + /// isRegister - Returns 1 if the operand is a register or 0 otherwise + int isRegister(); + /// regVal - Returns the register value. + unsigned regVal(); + + /// isImmediate - Returns 1 if the operand is an immediate or 0 otherwise + int isImmediate(); + /// immediateVal - Returns the immediate value. + uint64_t immediateVal(); + + /// isMemory - Returns 1 if the operand is a memory location or 0 otherwise + int isMemory(); + +#ifdef __BLOCKS__ + typedef int (^EDRegisterBlock_t)(uint64_t *value, unsigned regID); + + /// evaluate - Like evaluate for a callback, but uses a block instead + int evaluate(uint64_t &result, + EDRegisterBlock_t regBlock); +#endif +}; + +} // end namespace llvm + +#endif diff --git a/lib/MC/MCDisassembler/EDToken.cpp b/lib/MC/MCDisassembler/EDToken.cpp new file mode 100644 index 00000000000..400e1649e97 --- /dev/null +++ b/lib/MC/MCDisassembler/EDToken.cpp @@ -0,0 +1,206 @@ +//===-- EDToken.cpp - LLVM Enhanced Disassembler --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Enhanced Disassembler library's token class. The +// token is responsible for vending information about the token, such as its +// type and logical value. +// +//===----------------------------------------------------------------------===// + +#include "EDToken.h" +#include "EDDisassembler.h" +#include "llvm/MC/MCParser/MCAsmLexer.h" +#include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/ADT/SmallVector.h" +using namespace llvm; + +EDToken::EDToken(StringRef str, + enum tokenType type, + uint64_t localType, + EDDisassembler &disassembler) : + Disassembler(disassembler), + Str(str), + Type(type), + LocalType(localType), + OperandID(-1) { +} + +EDToken::~EDToken() { +} + +void EDToken::makeLiteral(bool sign, uint64_t absoluteValue) { + Type = kTokenLiteral; + LiteralSign = sign; + LiteralAbsoluteValue = absoluteValue; +} + +void EDToken::makeRegister(unsigned registerID) { + Type = kTokenRegister; + RegisterID = registerID; +} + +void EDToken::setOperandID(int operandID) { + OperandID = operandID; +} + +enum EDToken::tokenType EDToken::type() const { + return Type; +} + +uint64_t EDToken::localType() const { + return LocalType; +} + +StringRef EDToken::string() const { + return Str; +} + +int EDToken::operandID() const { + return OperandID; +} + +int EDToken::literalSign() const { + if (Type != kTokenLiteral) + return -1; + return (LiteralSign ? 1 : 0); +} + +int EDToken::literalAbsoluteValue(uint64_t &value) const { + if (Type != kTokenLiteral) + return -1; + value = LiteralAbsoluteValue; + return 0; +} + +int EDToken::registerID(unsigned ®isterID) const { + if (Type != kTokenRegister) + return -1; + registerID = RegisterID; + return 0; +} + +int EDToken::tokenize(std::vector &tokens, + std::string &str, + const char *operandOrder, + EDDisassembler &disassembler) { + SmallVector parsedOperands; + SmallVector asmTokens; + + if (disassembler.parseInst(parsedOperands, asmTokens, str)) + return -1; + + SmallVectorImpl::iterator operandIterator; + unsigned int operandIndex; + SmallVectorImpl::iterator tokenIterator; + + operandIterator = parsedOperands.begin(); + operandIndex = 0; + + bool readOpcode = false; + + const char *wsPointer = asmTokens.begin()->getLoc().getPointer(); + + for (tokenIterator = asmTokens.begin(); + tokenIterator != asmTokens.end(); + ++tokenIterator) { + SMLoc tokenLoc = tokenIterator->getLoc(); + + const char *tokenPointer = tokenLoc.getPointer(); + + if (tokenPointer > wsPointer) { + unsigned long wsLength = tokenPointer - wsPointer; + + EDToken *whitespaceToken = new EDToken(StringRef(wsPointer, wsLength), + EDToken::kTokenWhitespace, + 0, + disassembler); + + tokens.push_back(whitespaceToken); + } + + wsPointer = tokenPointer + tokenIterator->getString().size(); + + while (operandIterator != parsedOperands.end() && + tokenLoc.getPointer() > + (*operandIterator)->getEndLoc().getPointer()) { + ++operandIterator; + ++operandIndex; + } + + EDToken *token; + + switch (tokenIterator->getKind()) { + case AsmToken::Identifier: + if (!readOpcode) { + token = new EDToken(tokenIterator->getString(), + EDToken::kTokenOpcode, + (uint64_t)tokenIterator->getKind(), + disassembler); + readOpcode = true; + break; + } + // any identifier that isn't an opcode is mere punctuation; so we fall + // through + default: + token = new EDToken(tokenIterator->getString(), + EDToken::kTokenPunctuation, + (uint64_t)tokenIterator->getKind(), + disassembler); + break; + case AsmToken::Integer: + { + token = new EDToken(tokenIterator->getString(), + EDToken::kTokenLiteral, + (uint64_t)tokenIterator->getKind(), + disassembler); + + int64_t intVal = tokenIterator->getIntVal(); + + if (intVal < 0) + token->makeLiteral(true, -intVal); + else + token->makeLiteral(false, intVal); + break; + } + case AsmToken::Register: + { + token = new EDToken(tokenIterator->getString(), + EDToken::kTokenLiteral, + (uint64_t)tokenIterator->getKind(), + disassembler); + + token->makeRegister((unsigned)tokenIterator->getRegVal()); + break; + } + } + + if (operandIterator != parsedOperands.end() && + tokenLoc.getPointer() >= + (*operandIterator)->getStartLoc().getPointer()) { + /// operandIndex == 0 means the operand is the instruction (which the + /// AsmParser treats as an operand but edis does not). We therefore skip + /// operandIndex == 0 and subtract 1 from all other operand indices. + + if (operandIndex > 0) + token->setOperandID(operandOrder[operandIndex - 1]); + } + + tokens.push_back(token); + } + + return 0; +} + +int EDToken::getString(const char*& buf) { + if (PermStr.length() == 0) { + PermStr = Str.str(); + } + buf = PermStr.c_str(); + return 0; +} diff --git a/lib/MC/MCDisassembler/EDToken.h b/lib/MC/MCDisassembler/EDToken.h new file mode 100644 index 00000000000..ab596f5106a --- /dev/null +++ b/lib/MC/MCDisassembler/EDToken.h @@ -0,0 +1,138 @@ +//===-EDToken.h - LLVM Enhanced Disassembler --------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interface for the Enhanced Disassembly library's token +// class. The token is responsible for vending information about the token, +// such as its type and logical value. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EDTOKEN_H +#define LLVM_EDTOKEN_H + +#include "llvm/ADT/StringRef.h" +#include +#include + +namespace llvm { + +struct EDDisassembler; + +/// EDToken - Encapsulates a single token, which can provide a string +/// representation of itself or interpret itself in various ways, depending +/// on the token type. +struct EDToken { + enum tokenType { + kTokenWhitespace, + kTokenOpcode, + kTokenLiteral, + kTokenRegister, + kTokenPunctuation + }; + + /// The parent disassembler + EDDisassembler &Disassembler; + + /// The token's string representation + llvm::StringRef Str; + /// The token's string representation, but in a form suitable for export + std::string PermStr; + /// The type of the token, as exposed through the external API + enum tokenType Type; + /// The type of the token, as recorded by the syntax-specific tokenizer + uint64_t LocalType; + /// The operand corresponding to the token, or (unsigned int)-1 if not + /// part of an operand. + int OperandID; + + /// The sign if the token is a literal (1 if negative, 0 otherwise) + bool LiteralSign; + /// The absolute value if the token is a literal + uint64_t LiteralAbsoluteValue; + /// The LLVM register ID if the token is a register name + unsigned RegisterID; + + /// Constructor - Initializes an EDToken with the information common to all + /// tokens + /// + /// @arg str - The string corresponding to the token + /// @arg type - The token's type as exposed through the public API + /// @arg localType - The token's type as recorded by the tokenizer + /// @arg disassembler - The disassembler responsible for the token + EDToken(llvm::StringRef str, + enum tokenType type, + uint64_t localType, + EDDisassembler &disassembler); + + /// makeLiteral - Adds the information specific to a literal + /// @arg sign - The sign of the literal (1 if negative, 0 + /// otherwise) + /// + /// @arg absoluteValue - The absolute value of the literal + void makeLiteral(bool sign, uint64_t absoluteValue); + /// makeRegister - Adds the information specific to a register + /// + /// @arg registerID - The LLVM register ID + void makeRegister(unsigned registerID); + + /// setOperandID - Links the token to a numbered operand + /// + /// @arg operandID - The operand ID to link to + void setOperandID(int operandID); + + ~EDToken(); + + /// type - Returns the public type of the token + enum tokenType type() const; + /// localType - Returns the tokenizer-specific type of the token + uint64_t localType() const; + /// string - Returns the string representation of the token + llvm::StringRef string() const; + /// operandID - Returns the operand ID of the token + int operandID() const; + + /// literalSign - Returns the sign of the token + /// (1 if negative, 0 if positive or unsigned, -1 if it is not a literal) + int literalSign() const; + /// literalAbsoluteValue - Retrieves the absolute value of the token, and + /// returns -1 if the token is not a literal + /// @arg value - A reference to a value that is filled in with the absolute + /// value, if it is valid + int literalAbsoluteValue(uint64_t &value) const; + /// registerID - Retrieves the register ID of the token, and returns -1 if the + /// token is not a register + /// + /// @arg registerID - A reference to a value that is filled in with the + /// register ID, if it is valid + int registerID(unsigned ®isterID) const; + + /// tokenize - Tokenizes a string using the platform- and syntax-specific + /// tokenizer, and returns 0 on success (-1 on failure) + /// + /// @arg tokens - A vector that will be filled in with pointers to + /// allocated tokens + /// @arg str - The string, as outputted by the AsmPrinter + /// @arg operandOrder - The order of the operands from the operandFlags array + /// as they appear in str + /// @arg disassembler - The disassembler for the desired target and + // assembly syntax + static int tokenize(std::vector &tokens, + std::string &str, + const char *operandOrder, + EDDisassembler &disassembler); + + /// getString - Directs a character pointer to the string, returning 0 on + /// success (-1 on failure) + /// @arg buf - A reference to a pointer that is set to point to the string. + /// The string is still owned by the token. + int getString(const char*& buf); +}; + +} // end namespace llvm +#endif diff --git a/lib/MC/Makefile b/lib/MC/Makefile index a661fa6f408..bf8b7c0e783 100644 --- a/lib/MC/Makefile +++ b/lib/MC/Makefile @@ -10,7 +10,7 @@ LEVEL = ../.. LIBRARYNAME = LLVMMC BUILD_ARCHIVE := 1 -PARALLEL_DIRS := MCParser +PARALLEL_DIRS := MCParser MCDisassembler include $(LEVEL)/Makefile.common diff --git a/tools/Makefile b/tools/Makefile index 9bc74fe3fa4..7f0f1df7146 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -15,14 +15,11 @@ OPTIONAL_PARALLEL_DIRS := clang # NOTE: The tools are organized into five groups of four consisting of one # large and three small executables. This is done to minimize memory load # in parallel builds. Please retain this ordering. - -# libEnhancedDisassembly must be built ahead of llvm-mc -# because llvm-mc links against libEnhancedDisassembly -DIRS := llvm-config edis llvm-mc -PARALLEL_DIRS := opt llvm-as llvm-dis \ +DIRS := llvm-config +PARALLEL_DIRS := opt llvm-as llvm-dis edis \ llc llvm-ranlib llvm-ar llvm-nm \ llvm-ld llvm-prof llvm-link \ - lli llvm-extract \ + lli llvm-extract llvm-mc \ bugpoint llvm-bcanalyzer llvm-stub \ llvmc @@ -34,6 +31,7 @@ endif include $(LEVEL)/Makefile.config + # These libraries build as dynamic libraries (.dylib /.so), they can only be # built if ENABLE_PIC is set. ifeq ($(ENABLE_PIC),1) @@ -46,6 +44,14 @@ ifeq ($(ENABLE_PIC),1) else PARALLEL_DIRS += lto endif + + # The edis library is only supported if ARM and/or X86 are enabled, and if + # LLVM is being built PIC on platforms that support dylibs. + ifneq ($(DISABLE_EDIS),1) + ifneq ($(filter $(TARGETS_TO_BUILD), X86 ARM),) + PARALLEL_DIRS := $(filter-out edis, $(PARALLEL_DIRS)) + endif + endif endif endif diff --git a/tools/edis/EDDisassembler.cpp b/tools/edis/EDDisassembler.cpp deleted file mode 100644 index dc2ed3e2c2a..00000000000 --- a/tools/edis/EDDisassembler.cpp +++ /dev/null @@ -1,408 +0,0 @@ -//===-EDDisassembler.cpp - LLVM Enhanced Disassembler ---------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the Enhanced Disassembly library's disassembler class. -// The disassembler is responsible for vending individual instructions according -// to a given architecture and disassembly syntax. -// -//===----------------------------------------------------------------------===// - -#include "EDDisassembler.h" -#include "EDInst.h" - -#include "llvm/ADT/OwningPtr.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/MC/EDInstInfo.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCDisassembler.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCInst.h" -#include "llvm/MC/MCInstPrinter.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCParser/AsmLexer.h" -#include "llvm/MC/MCParser/MCAsmParser.h" -#include "llvm/MC/MCParser/MCParsedAsmOperand.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/MemoryObject.h" -#include "llvm/Support/SourceMgr.h" -#include "llvm/Target/TargetAsmLexer.h" -#include "llvm/Target/TargetAsmParser.h" -#include "llvm/Target/TargetRegistry.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSelect.h" - -using namespace llvm; - -bool EDDisassembler::sInitialized = false; -EDDisassembler::DisassemblerMap_t EDDisassembler::sDisassemblers; - -struct TripleMap { - Triple::ArchType Arch; - const char *String; -}; - -static struct TripleMap triplemap[] = { - { Triple::x86, "i386-unknown-unknown" }, - { Triple::x86_64, "x86_64-unknown-unknown" }, - { Triple::arm, "arm-unknown-unknown" }, - { Triple::thumb, "thumb-unknown-unknown" }, - { Triple::InvalidArch, NULL, } -}; - -/// infoFromArch - Returns the TripleMap corresponding to a given architecture, -/// or NULL if there is an error -/// -/// @arg arch - The Triple::ArchType for the desired architecture -static const char *tripleFromArch(Triple::ArchType arch) { - unsigned int infoIndex; - - for (infoIndex = 0; triplemap[infoIndex].String != NULL; ++infoIndex) { - if (arch == triplemap[infoIndex].Arch) - return triplemap[infoIndex].String; - } - - return NULL; -} - -/// getLLVMSyntaxVariant - gets the constant to use to get an assembly printer -/// for the desired assembly syntax, suitable for passing to -/// Target::createMCInstPrinter() -/// -/// @arg arch - The target architecture -/// @arg syntax - The assembly syntax in sd form -static int getLLVMSyntaxVariant(Triple::ArchType arch, - EDAssemblySyntax_t syntax) { - switch (syntax) { - default: - return -1; - // Mappings below from X86AsmPrinter.cpp - case kEDAssemblySyntaxX86ATT: - if (arch == Triple::x86 || arch == Triple::x86_64) - return 0; - else - return -1; - case kEDAssemblySyntaxX86Intel: - if (arch == Triple::x86 || arch == Triple::x86_64) - return 1; - else - return -1; - case kEDAssemblySyntaxARMUAL: - if (arch == Triple::arm || arch == Triple::thumb) - return 0; - else - return -1; - } -} - -void EDDisassembler::initialize() { - if (sInitialized) - return; - - sInitialized = true; - - InitializeAllTargetInfos(); - InitializeAllTargets(); - InitializeAllAsmPrinters(); - InitializeAllAsmParsers(); - InitializeAllDisassemblers(); -} - -#undef BRINGUP_TARGET - -EDDisassembler *EDDisassembler::getDisassembler(Triple::ArchType arch, - EDAssemblySyntax_t syntax) { - CPUKey key; - key.Arch = arch; - key.Syntax = syntax; - - EDDisassembler::DisassemblerMap_t::iterator i = sDisassemblers.find(key); - - if (i != sDisassemblers.end()) { - return i->second; - } else { - EDDisassembler* sdd = new EDDisassembler(key); - if (!sdd->valid()) { - delete sdd; - return NULL; - } - - sDisassemblers[key] = sdd; - - return sdd; - } - - return NULL; -} - -EDDisassembler *EDDisassembler::getDisassembler(StringRef str, - EDAssemblySyntax_t syntax) { - Triple triple(str); - - return getDisassembler(triple.getArch(), syntax); -} - -EDDisassembler::EDDisassembler(CPUKey &key) : - Valid(false), - HasSemantics(false), - ErrorStream(nulls()), - Key(key) { - const char *triple = tripleFromArch(key.Arch); - - if (!triple) - return; - - LLVMSyntaxVariant = getLLVMSyntaxVariant(key.Arch, key.Syntax); - - if (LLVMSyntaxVariant < 0) - return; - - std::string tripleString(triple); - std::string errorString; - - Tgt = TargetRegistry::lookupTarget(tripleString, - errorString); - - if (!Tgt) - return; - - std::string featureString; - - TargetMachine.reset(Tgt->createTargetMachine(tripleString, - featureString)); - - const TargetRegisterInfo *registerInfo = TargetMachine->getRegisterInfo(); - - if (!registerInfo) - return; - - initMaps(*registerInfo); - - AsmInfo.reset(Tgt->createAsmInfo(tripleString)); - - if (!AsmInfo) - return; - - Disassembler.reset(Tgt->createMCDisassembler()); - - if (!Disassembler) - return; - - InstInfos = Disassembler->getEDInfo(); - - InstString.reset(new std::string); - InstStream.reset(new raw_string_ostream(*InstString)); - InstPrinter.reset(Tgt->createMCInstPrinter(LLVMSyntaxVariant, *AsmInfo)); - - if (!InstPrinter) - return; - - GenericAsmLexer.reset(new AsmLexer(*AsmInfo)); - SpecificAsmLexer.reset(Tgt->createAsmLexer(*AsmInfo)); - SpecificAsmLexer->InstallLexer(*GenericAsmLexer); - - initMaps(*TargetMachine->getRegisterInfo()); - - Valid = true; -} - -EDDisassembler::~EDDisassembler() { - if (!valid()) - return; -} - -namespace { - /// EDMemoryObject - a subclass of MemoryObject that allows use of a callback - /// as provided by the sd interface. See MemoryObject. - class EDMemoryObject : public llvm::MemoryObject { - private: - EDByteReaderCallback Callback; - void *Arg; - public: - EDMemoryObject(EDByteReaderCallback callback, - void *arg) : Callback(callback), Arg(arg) { } - ~EDMemoryObject() { } - uint64_t getBase() const { return 0x0; } - uint64_t getExtent() const { return (uint64_t)-1; } - int readByte(uint64_t address, uint8_t *ptr) const { - if (!Callback) - return -1; - - if (Callback(ptr, address, Arg)) - return -1; - - return 0; - } - }; -} - -EDInst *EDDisassembler::createInst(EDByteReaderCallback byteReader, - uint64_t address, - void *arg) { - EDMemoryObject memoryObject(byteReader, arg); - - MCInst* inst = new MCInst; - uint64_t byteSize; - - if (!Disassembler->getInstruction(*inst, - byteSize, - memoryObject, - address, - ErrorStream)) { - delete inst; - return NULL; - } else { - const llvm::EDInstInfo *thisInstInfo; - - thisInstInfo = &InstInfos[inst->getOpcode()]; - - EDInst* sdInst = new EDInst(inst, byteSize, *this, thisInstInfo); - return sdInst; - } -} - -void EDDisassembler::initMaps(const TargetRegisterInfo ®isterInfo) { - unsigned numRegisters = registerInfo.getNumRegs(); - unsigned registerIndex; - - for (registerIndex = 0; registerIndex < numRegisters; ++registerIndex) { - const char* registerName = registerInfo.get(registerIndex).Name; - - RegVec.push_back(registerName); - RegRMap[registerName] = registerIndex; - } - - switch (Key.Arch) { - default: - break; - case Triple::x86: - case Triple::x86_64: - stackPointers.insert(registerIDWithName("SP")); - stackPointers.insert(registerIDWithName("ESP")); - stackPointers.insert(registerIDWithName("RSP")); - - programCounters.insert(registerIDWithName("IP")); - programCounters.insert(registerIDWithName("EIP")); - programCounters.insert(registerIDWithName("RIP")); - break; - case Triple::arm: - case Triple::thumb: - stackPointers.insert(registerIDWithName("SP")); - - programCounters.insert(registerIDWithName("PC")); - break; - } -} - -const char *EDDisassembler::nameWithRegisterID(unsigned registerID) const { - if (registerID >= RegVec.size()) - return NULL; - else - return RegVec[registerID].c_str(); -} - -unsigned EDDisassembler::registerIDWithName(const char *name) const { - regrmap_t::const_iterator iter = RegRMap.find(std::string(name)); - if (iter == RegRMap.end()) - return 0; - else - return (*iter).second; -} - -bool EDDisassembler::registerIsStackPointer(unsigned registerID) { - return (stackPointers.find(registerID) != stackPointers.end()); -} - -bool EDDisassembler::registerIsProgramCounter(unsigned registerID) { - return (programCounters.find(registerID) != programCounters.end()); -} - -int EDDisassembler::printInst(std::string &str, MCInst &inst) { - PrinterMutex.acquire(); - - InstPrinter->printInst(&inst, *InstStream); - InstStream->flush(); - str = *InstString; - InstString->clear(); - - PrinterMutex.release(); - - return 0; -} - -int EDDisassembler::parseInst(SmallVectorImpl &operands, - SmallVectorImpl &tokens, - const std::string &str) { - int ret = 0; - - switch (Key.Arch) { - default: - return -1; - case Triple::x86: - case Triple::x86_64: - case Triple::arm: - case Triple::thumb: - break; - } - - const char *cStr = str.c_str(); - MemoryBuffer *buf = MemoryBuffer::getMemBuffer(cStr, cStr + strlen(cStr)); - - StringRef instName; - SMLoc instLoc; - - SourceMgr sourceMgr; - sourceMgr.AddNewSourceBuffer(buf, SMLoc()); // ownership of buf handed over - MCContext context(*AsmInfo); - OwningPtr streamer(createNullStreamer(context)); - OwningPtr genericParser(createMCAsmParser(*Tgt, sourceMgr, - context, *streamer, - *AsmInfo)); - OwningPtr TargetParser(Tgt->createAsmParser(*genericParser, - *TargetMachine)); - - AsmToken OpcodeToken = genericParser->Lex(); - AsmToken NextToken = genericParser->Lex(); // consume next token, because specificParser expects us to - - if (OpcodeToken.is(AsmToken::Identifier)) { - instName = OpcodeToken.getString(); - instLoc = OpcodeToken.getLoc(); - - if (NextToken.isNot(AsmToken::Eof) && - TargetParser->ParseInstruction(instName, instLoc, operands)) - ret = -1; - } else { - ret = -1; - } - - ParserMutex.acquire(); - - if (!ret) { - GenericAsmLexer->setBuffer(buf); - - while (SpecificAsmLexer->Lex(), - SpecificAsmLexer->isNot(AsmToken::Eof) && - SpecificAsmLexer->isNot(AsmToken::EndOfStatement)) { - if (SpecificAsmLexer->is(AsmToken::Error)) { - ret = -1; - break; - } - tokens.push_back(SpecificAsmLexer->getTok()); - } - } - - ParserMutex.release(); - - return ret; -} - -int EDDisassembler::llvmSyntaxVariant() const { - return LLVMSyntaxVariant; -} diff --git a/tools/edis/EDDisassembler.h b/tools/edis/EDDisassembler.h deleted file mode 100644 index 913ae842bf9..00000000000 --- a/tools/edis/EDDisassembler.h +++ /dev/null @@ -1,259 +0,0 @@ -//===-EDDisassembler.h - LLVM Enhanced Disassembler -------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the interface for the Enhanced Disassembly library's -// disassembler class. The disassembler is responsible for vending individual -// instructions according to a given architecture and disassembly syntax. -// -//===----------------------------------------------------------------------===// - -#ifndef EDDisassembler_ -#define EDDisassembler_ - -#include "EDInfo.inc" - -#include "llvm-c/EnhancedDisassembly.h" - -#include "llvm/ADT/OwningPtr.h" -#include "llvm/ADT/Triple.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/System/Mutex.h" - -#include -#include -#include -#include - -namespace llvm { -class AsmLexer; -class AsmToken; -class MCContext; -class MCAsmInfo; -class MCAsmLexer; -class AsmParser; -class TargetAsmLexer; -class TargetAsmParser; -class MCDisassembler; -class MCInstPrinter; -class MCInst; -class MCParsedAsmOperand; -class MCStreamer; -template class SmallVectorImpl; -class SourceMgr; -class Target; -class TargetMachine; -class TargetRegisterInfo; - -struct EDInstInfo; -} - -/// EDDisassembler - Encapsulates a disassembler for a single architecture and -/// disassembly syntax. Also manages the static disassembler registry. -struct EDDisassembler { - //////////////////// - // Static members // - //////////////////// - - /// CPUKey - Encapsulates the descriptor of an architecture/disassembly-syntax - /// pair - struct CPUKey { - /// The architecture type - llvm::Triple::ArchType Arch; - - /// The assembly syntax - EDAssemblySyntax_t Syntax; - - /// operator== - Equality operator - bool operator==(const CPUKey &key) const { - return (Arch == key.Arch && - Syntax == key.Syntax); - } - - /// operator< - Less-than operator - bool operator<(const CPUKey &key) const { - if(Arch > key.Arch) - return false; - if(Syntax >= key.Syntax) - return false; - return true; - } - }; - - typedef std::map DisassemblerMap_t; - - /// True if the disassembler registry has been initialized; false if not - static bool sInitialized; - /// A map from disassembler specifications to disassemblers. Populated - /// lazily. - static DisassemblerMap_t sDisassemblers; - - /// getDisassembler - Returns the specified disassemble, or NULL on failure - /// - /// @arg arch - The desired architecture - /// @arg syntax - The desired disassembly syntax - static EDDisassembler *getDisassembler(llvm::Triple::ArchType arch, - EDAssemblySyntax_t syntax); - - /// getDisassembler - Returns the disassembler for a given combination of - /// CPU type, CPU subtype, and assembly syntax, or NULL on failure - /// - /// @arg str - The string representation of the architecture triple, e.g., - /// "x86_64-apple-darwin" - /// @arg syntax - The disassembly syntax for the required disassembler - static EDDisassembler *getDisassembler(llvm::StringRef str, - EDAssemblySyntax_t syntax); - - /// initialize - Initializes the disassembler registry and the LLVM backend - static void initialize(); - - //////////////////////// - // Per-object members // - //////////////////////// - - /// True only if the object has been successfully initialized - bool Valid; - /// True if the disassembler can provide semantic information - bool HasSemantics; - - /// The stream to write errors to - llvm::raw_ostream &ErrorStream; - - /// The architecture/syntax pair for the current architecture - CPUKey Key; - /// The LLVM target corresponding to the disassembler - const llvm::Target *Tgt; - /// The target machien instance. - llvm::OwningPtr TargetMachine; - /// The assembly information for the target architecture - llvm::OwningPtr AsmInfo; - /// The disassembler for the target architecture - llvm::OwningPtr Disassembler; - /// The output string for the instruction printer; must be guarded with - /// PrinterMutex - llvm::OwningPtr InstString; - /// The output stream for the disassembler; must be guarded with - /// PrinterMutex - llvm::OwningPtr InstStream; - /// The instruction printer for the target architecture; must be guarded with - /// PrinterMutex when printing - llvm::OwningPtr InstPrinter; - /// The mutex that guards the instruction printer's printing functions, which - /// use a shared stream - llvm::sys::Mutex PrinterMutex; - /// The array of instruction information provided by the TableGen backend for - /// the target architecture - const llvm::EDInstInfo *InstInfos; - /// The target-specific lexer for use in tokenizing strings, in - /// target-independent and target-specific portions - llvm::OwningPtr GenericAsmLexer; - llvm::OwningPtr SpecificAsmLexer; - /// The guard for the above - llvm::sys::Mutex ParserMutex; - /// The LLVM number used for the target disassembly syntax variant - int LLVMSyntaxVariant; - - typedef std::vector regvec_t; - typedef std::map regrmap_t; - - /// A vector of registers for quick mapping from LLVM register IDs to names - regvec_t RegVec; - /// A map of registers for quick mapping from register names to LLVM IDs - regrmap_t RegRMap; - - /// A set of register IDs for aliases of the stack pointer for the current - /// architecture - std::set stackPointers; - /// A set of register IDs for aliases of the program counter for the current - /// architecture - std::set programCounters; - - /// Constructor - initializes a disassembler with all the necessary objects, - /// which come pre-allocated from the registry accessor function - /// - /// @arg key - the architecture and disassembly syntax for the - /// disassembler - EDDisassembler(CPUKey& key); - - /// valid - reports whether there was a failure in the constructor. - bool valid() { - return Valid; - } - - /// hasSemantics - reports whether the disassembler can provide operands and - /// tokens. - bool hasSemantics() { - return HasSemantics; - } - - ~EDDisassembler(); - - /// createInst - creates and returns an instruction given a callback and - /// memory address, or NULL on failure - /// - /// @arg byteReader - A callback function that provides machine code bytes - /// @arg address - The address of the first byte of the instruction, - /// suitable for passing to byteReader - /// @arg arg - An opaque argument for byteReader - EDInst *createInst(EDByteReaderCallback byteReader, - uint64_t address, - void *arg); - - /// initMaps - initializes regVec and regRMap using the provided register - /// info - /// - /// @arg registerInfo - the register information to use as a source - void initMaps(const llvm::TargetRegisterInfo ®isterInfo); - /// nameWithRegisterID - Returns the name (owned by the EDDisassembler) of a - /// register for a given register ID, or NULL on failure - /// - /// @arg registerID - the ID of the register to be queried - const char *nameWithRegisterID(unsigned registerID) const; - /// registerIDWithName - Returns the ID of a register for a given register - /// name, or (unsigned)-1 on failure - /// - /// @arg name - The name of the register - unsigned registerIDWithName(const char *name) const; - - /// registerIsStackPointer - reports whether a register ID is an alias for the - /// stack pointer register - /// - /// @arg registerID - The LLVM register ID - bool registerIsStackPointer(unsigned registerID); - /// registerIsStackPointer - reports whether a register ID is an alias for the - /// stack pointer register - /// - /// @arg registerID - The LLVM register ID - bool registerIsProgramCounter(unsigned registerID); - - /// printInst - prints an MCInst to a string, returning 0 on success, or -1 - /// otherwise - /// - /// @arg str - A reference to a string which is filled in with the string - /// representation of the instruction - /// @arg inst - A reference to the MCInst to be printed - int printInst(std::string& str, - llvm::MCInst& inst); - - /// parseInst - extracts operands and tokens from a string for use in - /// tokenizing the string. Returns 0 on success, or -1 otherwise. - /// - /// @arg operands - A reference to a vector that will be filled in with the - /// parsed operands - /// @arg tokens - A reference to a vector that will be filled in with the - /// tokens - /// @arg str - The string representation of the instruction - int parseInst(llvm::SmallVectorImpl &operands, - llvm::SmallVectorImpl &tokens, - const std::string &str); - - /// llvmSyntaxVariant - returns the LLVM syntax variant for this disassembler - int llvmSyntaxVariant() const; -}; - -#endif diff --git a/tools/edis/EDInst.cpp b/tools/edis/EDInst.cpp deleted file mode 100644 index c009f0f8687..00000000000 --- a/tools/edis/EDInst.cpp +++ /dev/null @@ -1,207 +0,0 @@ -//===-EDInst.cpp - LLVM Enhanced Disassembler -----------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the Enhanced Disassembly library's instruction class. -// The instruction is responsible for vending the string representation, -// individual tokens, and operands for a single instruction. -// -//===----------------------------------------------------------------------===// - -#include "EDDisassembler.h" -#include "EDInst.h" -#include "EDOperand.h" -#include "EDToken.h" - -#include "llvm/MC/EDInstInfo.h" -#include "llvm/MC/MCInst.h" - -using namespace llvm; - -EDInst::EDInst(llvm::MCInst *inst, - uint64_t byteSize, - EDDisassembler &disassembler, - const llvm::EDInstInfo *info) : - Disassembler(disassembler), - Inst(inst), - ThisInstInfo(info), - ByteSize(byteSize), - BranchTarget(-1), - MoveSource(-1), - MoveTarget(-1) { - OperandOrder = ThisInstInfo->operandOrders[Disassembler.llvmSyntaxVariant()]; -} - -EDInst::~EDInst() { - unsigned int index; - unsigned int numOperands = Operands.size(); - - for (index = 0; index < numOperands; ++index) - delete Operands[index]; - - unsigned int numTokens = Tokens.size(); - - for (index = 0; index < numTokens; ++index) - delete Tokens[index]; - - delete Inst; -} - -uint64_t EDInst::byteSize() { - return ByteSize; -} - -int EDInst::stringify() { - if (StringifyResult.valid()) - return StringifyResult.result(); - - if (Disassembler.printInst(String, *Inst)) - return StringifyResult.setResult(-1); - - return StringifyResult.setResult(0); -} - -int EDInst::getString(const char*& str) { - if (stringify()) - return -1; - - str = String.c_str(); - - return 0; -} - -unsigned EDInst::instID() { - return Inst->getOpcode(); -} - -bool EDInst::isBranch() { - if (ThisInstInfo) - return - ThisInstInfo->instructionType == kInstructionTypeBranch || - ThisInstInfo->instructionType == kInstructionTypeCall; - else - return false; -} - -bool EDInst::isMove() { - if (ThisInstInfo) - return ThisInstInfo->instructionType == kInstructionTypeMove; - else - return false; -} - -int EDInst::parseOperands() { - if (ParseResult.valid()) - return ParseResult.result(); - - if (!ThisInstInfo) - return ParseResult.setResult(-1); - - unsigned int opIndex; - unsigned int mcOpIndex = 0; - - for (opIndex = 0; opIndex < ThisInstInfo->numOperands; ++opIndex) { - if (isBranch() && - (ThisInstInfo->operandFlags[opIndex] & kOperandFlagTarget)) { - BranchTarget = opIndex; - } - else if (isMove()) { - if (ThisInstInfo->operandFlags[opIndex] & kOperandFlagSource) - MoveSource = opIndex; - else if (ThisInstInfo->operandFlags[opIndex] & kOperandFlagTarget) - MoveTarget = opIndex; - } - - EDOperand *operand = new EDOperand(Disassembler, *this, opIndex, mcOpIndex); - - Operands.push_back(operand); - } - - return ParseResult.setResult(0); -} - -int EDInst::branchTargetID() { - if (parseOperands()) - return -1; - return BranchTarget; -} - -int EDInst::moveSourceID() { - if (parseOperands()) - return -1; - return MoveSource; -} - -int EDInst::moveTargetID() { - if (parseOperands()) - return -1; - return MoveTarget; -} - -int EDInst::numOperands() { - if (parseOperands()) - return -1; - return Operands.size(); -} - -int EDInst::getOperand(EDOperand *&operand, unsigned int index) { - if (parseOperands()) - return -1; - - if (index >= Operands.size()) - return -1; - - operand = Operands[index]; - return 0; -} - -int EDInst::tokenize() { - if (TokenizeResult.valid()) - return TokenizeResult.result(); - - if (stringify()) - return TokenizeResult.setResult(-1); - - return TokenizeResult.setResult(EDToken::tokenize(Tokens, - String, - OperandOrder, - Disassembler)); - -} - -int EDInst::numTokens() { - if (tokenize()) - return -1; - return Tokens.size(); -} - -int EDInst::getToken(EDToken *&token, unsigned int index) { - if (tokenize()) - return -1; - token = Tokens[index]; - return 0; -} - -#ifdef __BLOCKS__ -int EDInst::visitTokens(EDTokenVisitor_t visitor) { - if (tokenize()) - return -1; - - tokvec_t::iterator iter; - - for (iter = Tokens.begin(); iter != Tokens.end(); ++iter) { - int ret = visitor(*iter); - if (ret == 1) - return 0; - if (ret != 0) - return -1; - } - - return 0; -} -#endif diff --git a/tools/edis/EDInst.h b/tools/edis/EDInst.h deleted file mode 100644 index c8a747ff99f..00000000000 --- a/tools/edis/EDInst.h +++ /dev/null @@ -1,175 +0,0 @@ -//===-EDInst.h - LLVM Enhanced Disassembler ---------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the interface for the Enhanced Disassembly library's -// instruction class. The instruction is responsible for vending the string -// representation, individual tokens and operands for a single instruction. -// -//===----------------------------------------------------------------------===// - -#ifndef EDInst_ -#define EDInst_ - -#include "llvm-c/EnhancedDisassembly.h" - -#include "llvm/ADT/SmallVector.h" - -#include -#include - -namespace llvm { - struct EDInstInfo; -} - -/// CachedResult - Encapsulates the result of a function along with the validity -/// of that result, so that slow functions don't need to run twice -struct CachedResult { - /// True if the result has been obtained by executing the function - bool Valid; - /// The result last obtained from the function - int Result; - - /// Constructor - Initializes an invalid result - CachedResult() : Valid(false) { } - /// valid - Returns true if the result has been obtained by executing the - /// function and false otherwise - bool valid() { return Valid; } - /// result - Returns the result of the function or an undefined value if - /// valid() is false - int result() { return Result; } - /// setResult - Sets the result of the function and declares it valid - /// returning the result (so that setResult() can be called from inside a - /// return statement) - /// @arg result - The result of the function - int setResult(int result) { Result = result; Valid = true; return result; } -}; - -/// EDInst - Encapsulates a single instruction, which can be queried for its -/// string representation, as well as its operands and tokens -struct EDInst { - /// The parent disassembler - EDDisassembler &Disassembler; - /// The containing MCInst - llvm::MCInst *Inst; - /// The instruction information provided by TableGen for this instruction - const llvm::EDInstInfo *ThisInstInfo; - /// The number of bytes for the machine code representation of the instruction - uint64_t ByteSize; - - /// The result of the stringify() function - CachedResult StringifyResult; - /// The string representation of the instruction - std::string String; - /// The order in which operands from the InstInfo's operand information appear - /// in String - const char* OperandOrder; - - /// The result of the parseOperands() function - CachedResult ParseResult; - typedef llvm::SmallVector opvec_t; - /// The instruction's operands - opvec_t Operands; - /// The operand corresponding to the target, if the instruction is a branch - int BranchTarget; - /// The operand corresponding to the source, if the instruction is a move - int MoveSource; - /// The operand corresponding to the target, if the instruction is a move - int MoveTarget; - - /// The result of the tokenize() function - CachedResult TokenizeResult; - typedef std::vector tokvec_t; - /// The instruction's tokens - tokvec_t Tokens; - - /// Constructor - initializes an instruction given the output of the LLVM - /// C++ disassembler - /// - /// @arg inst - The MCInst, which will now be owned by this object - /// @arg byteSize - The size of the consumed instruction, in bytes - /// @arg disassembler - The parent disassembler - /// @arg instInfo - The instruction information produced by the table - /// generator for this instruction - EDInst(llvm::MCInst *inst, - uint64_t byteSize, - EDDisassembler &disassembler, - const llvm::EDInstInfo *instInfo); - ~EDInst(); - - /// byteSize - returns the number of bytes consumed by the machine code - /// representation of the instruction - uint64_t byteSize(); - /// instID - returns the LLVM instruction ID of the instruction - unsigned instID(); - - /// stringify - populates the String and AsmString members of the instruction, - /// returning 0 on success or -1 otherwise - int stringify(); - /// getString - retrieves a pointer to the string representation of the - /// instructinon, returning 0 on success or -1 otherwise - /// - /// @arg str - A reference to a pointer that, on success, is set to point to - /// the string representation of the instruction; this string is still owned - /// by the instruction and will be deleted when it is - int getString(const char *&str); - - /// isBranch - Returns true if the instruction is a branch - bool isBranch(); - /// isMove - Returns true if the instruction is a move - bool isMove(); - - /// parseOperands - populates the Operands member of the instruction, - /// returning 0 on success or -1 otherwise - int parseOperands(); - /// branchTargetID - returns the ID (suitable for use with getOperand()) of - /// the target operand if the instruction is a branch, or -1 otherwise - int branchTargetID(); - /// moveSourceID - returns the ID of the source operand if the instruction - /// is a move, or -1 otherwise - int moveSourceID(); - /// moveTargetID - returns the ID of the target operand if the instruction - /// is a move, or -1 otherwise - int moveTargetID(); - - /// numOperands - returns the number of operands available to retrieve, or -1 - /// on error - int numOperands(); - /// getOperand - retrieves an operand from the instruction's operand list by - /// index, returning 0 on success or -1 on error - /// - /// @arg operand - A reference whose target is pointed at the operand on - /// success, although the operand is still owned by the EDInst - /// @arg index - The index of the operand in the instruction - int getOperand(EDOperand *&operand, unsigned int index); - - /// tokenize - populates the Tokens member of the instruction, returning 0 on - /// success or -1 otherwise - int tokenize(); - /// numTokens - returns the number of tokens in the instruction, or -1 on - /// error - int numTokens(); - /// getToken - retrieves a token from the instruction's token list by index, - /// returning 0 on success or -1 on error - /// - /// @arg token - A reference whose target is pointed at the token on success, - /// although the token is still owned by the EDInst - /// @arg index - The index of the token in the instrcutino - int getToken(EDToken *&token, unsigned int index); - -#ifdef __BLOCKS__ - /// visitTokens - Visits each token in turn and applies a block to it, - /// returning 0 if all blocks are visited and/or the block signals - /// termination by returning 1; returns -1 on error - /// - /// @arg visitor - The visitor block to apply to all tokens. - int visitTokens(EDTokenVisitor_t visitor); -#endif -}; - -#endif diff --git a/tools/edis/EDMain.cpp b/tools/edis/EDMain.cpp index b6ca32f2db8..16855b3f45d 100644 --- a/tools/edis/EDMain.cpp +++ b/tools/edis/EDMain.cpp @@ -1,4 +1,4 @@ -//===-EDMain.cpp - LLVM Enhanced Disassembly C API ------------------------===// +//===-- EDMain.cpp - LLVM Enhanced Disassembly C API ----------------------===// // // The LLVM Compiler Infrastructure // @@ -11,33 +11,46 @@ // //===----------------------------------------------------------------------===// -#include "EDDisassembler.h" -#include "EDInst.h" -#include "EDOperand.h" -#include "EDToken.h" - +// FIXME: This code isn't layered right, the headers should be moved to +// include llvm/MC/MCDisassembler or something. +#include "../../lib/MC/MCDisassembler/EDDisassembler.h" +#include "../../lib/MC/MCDisassembler/EDInst.h" +#include "../../lib/MC/MCDisassembler/EDOperand.h" +#include "../../lib/MC/MCDisassembler/EDToken.h" #include "llvm-c/EnhancedDisassembly.h" +using namespace llvm; int EDGetDisassembler(EDDisassemblerRef *disassembler, const char *triple, EDAssemblySyntax_t syntax) { EDDisassembler::initialize(); - EDDisassemblerRef ret = EDDisassembler::getDisassembler(triple, - syntax); + EDDisassembler::AssemblySyntax Syntax; + switch (syntax) { + default: assert(0 && "Unknown assembly syntax!"); + case kEDAssemblySyntaxX86Intel: + Syntax = EDDisassembler::kEDAssemblySyntaxX86Intel; + break; + case kEDAssemblySyntaxX86ATT: + Syntax = EDDisassembler::kEDAssemblySyntaxX86ATT; + break; + case kEDAssemblySyntaxARMUAL: + Syntax = EDDisassembler::kEDAssemblySyntaxARMUAL; + break; + } + + EDDisassemblerRef ret = EDDisassembler::getDisassembler(triple, Syntax); - if (ret) { - *disassembler = ret; - return 0; - } else { + if (!ret) return -1; - } + *disassembler = ret; + return 0; } int EDGetRegisterName(const char** regName, EDDisassemblerRef disassembler, unsigned regID) { - const char* name = disassembler->nameWithRegisterID(regID); + const char *name = ((EDDisassembler*)disassembler)->nameWithRegisterID(regID); if (!name) return -1; *regName = name; @@ -46,24 +59,25 @@ int EDGetRegisterName(const char** regName, int EDRegisterIsStackPointer(EDDisassemblerRef disassembler, unsigned regID) { - return disassembler->registerIsStackPointer(regID) ? 1 : 0; + return ((EDDisassembler*)disassembler)->registerIsStackPointer(regID) ? 1 : 0; } int EDRegisterIsProgramCounter(EDDisassemblerRef disassembler, unsigned regID) { - return disassembler->registerIsProgramCounter(regID) ? 1 : 0; + return ((EDDisassembler*)disassembler)->registerIsProgramCounter(regID) ? 1:0; } unsigned int EDCreateInsts(EDInstRef *insts, unsigned int count, EDDisassemblerRef disassembler, - EDByteReaderCallback byteReader, + ::EDByteReaderCallback byteReader, uint64_t address, void *arg) { unsigned int index; for (index = 0; index < count; ++index) { - EDInst *inst = disassembler->createInst(byteReader, address, arg); + EDInst *inst = ((EDDisassembler*)disassembler)->createInst(byteReader, + address, arg); if (!inst) return index; @@ -76,163 +90,143 @@ unsigned int EDCreateInsts(EDInstRef *insts, } void EDReleaseInst(EDInstRef inst) { - delete inst; + delete ((EDInst*)inst); } int EDInstByteSize(EDInstRef inst) { - return inst->byteSize(); + return ((EDInst*)inst)->byteSize(); } int EDGetInstString(const char **buf, EDInstRef inst) { - return inst->getString(*buf); + return ((EDInst*)inst)->getString(*buf); } int EDInstID(unsigned *instID, EDInstRef inst) { - *instID = inst->instID(); + *instID = ((EDInst*)inst)->instID(); return 0; } int EDInstIsBranch(EDInstRef inst) { - return inst->isBranch(); + return ((EDInst*)inst)->isBranch(); } int EDInstIsMove(EDInstRef inst) { - return inst->isMove(); + return ((EDInst*)inst)->isMove(); } int EDBranchTargetID(EDInstRef inst) { - return inst->branchTargetID(); + return ((EDInst*)inst)->branchTargetID(); } int EDMoveSourceID(EDInstRef inst) { - return inst->moveSourceID(); + return ((EDInst*)inst)->moveSourceID(); } int EDMoveTargetID(EDInstRef inst) { - return inst->moveTargetID(); + return ((EDInst*)inst)->moveTargetID(); } int EDNumTokens(EDInstRef inst) { - return inst->numTokens(); + return ((EDInst*)inst)->numTokens(); } int EDGetToken(EDTokenRef *token, EDInstRef inst, int index) { - return inst->getToken(*token, index); + return ((EDInst*)inst)->getToken(*(EDToken**)token, index); } int EDGetTokenString(const char **buf, EDTokenRef token) { - return token->getString(*buf); + return ((EDToken*)token)->getString(*buf); } int EDOperandIndexForToken(EDTokenRef token) { - return token->operandID(); + return ((EDToken*)token)->operandID(); } int EDTokenIsWhitespace(EDTokenRef token) { - if (token->type() == EDToken::kTokenWhitespace) - return 1; - else - return 0; + return ((EDToken*)token)->type() == EDToken::kTokenWhitespace; } int EDTokenIsPunctuation(EDTokenRef token) { - if (token->type() == EDToken::kTokenPunctuation) - return 1; - else - return 0; + return ((EDToken*)token)->type() == EDToken::kTokenPunctuation; } int EDTokenIsOpcode(EDTokenRef token) { - if (token->type() == EDToken::kTokenOpcode) - return 1; - else - return 0; + return ((EDToken*)token)->type() == EDToken::kTokenOpcode; } int EDTokenIsLiteral(EDTokenRef token) { - if (token->type() == EDToken::kTokenLiteral) - return 1; - else - return 0; + return ((EDToken*)token)->type() == EDToken::kTokenLiteral; } int EDTokenIsRegister(EDTokenRef token) { - if (token->type() == EDToken::kTokenRegister) - return 1; - else - return 0; + return ((EDToken*)token)->type() == EDToken::kTokenRegister; } int EDTokenIsNegativeLiteral(EDTokenRef token) { - if (token->type() != EDToken::kTokenLiteral) + if (((EDToken*)token)->type() != EDToken::kTokenLiteral) return -1; - return token->literalSign(); + return ((EDToken*)token)->literalSign(); } -int EDLiteralTokenAbsoluteValue(uint64_t *value, - EDTokenRef token) { - if (token->type() != EDToken::kTokenLiteral) +int EDLiteralTokenAbsoluteValue(uint64_t *value, EDTokenRef token) { + if (((EDToken*)token)->type() != EDToken::kTokenLiteral) return -1; - return token->literalAbsoluteValue(*value); + return ((EDToken*)token)->literalAbsoluteValue(*value); } int EDRegisterTokenValue(unsigned *registerID, EDTokenRef token) { - if (token->type() != EDToken::kTokenRegister) + if (((EDToken*)token)->type() != EDToken::kTokenRegister) return -1; - return token->registerID(*registerID); + return ((EDToken*)token)->registerID(*registerID); } int EDNumOperands(EDInstRef inst) { - return inst->numOperands(); + return ((EDInst*)inst)->numOperands(); } int EDGetOperand(EDOperandRef *operand, EDInstRef inst, int index) { - return inst->getOperand(*operand, index); + return ((EDInst*)inst)->getOperand(*(EDOperand**)operand, index); } int EDOperandIsRegister(EDOperandRef operand) { - return operand->isRegister(); + return ((EDOperand*)operand)->isRegister(); } int EDOperandIsImmediate(EDOperandRef operand) { - return operand->isImmediate(); + return ((EDOperand*)operand)->isImmediate(); } int EDOperandIsMemory(EDOperandRef operand) { - return operand->isMemory(); + return ((EDOperand*)operand)->isMemory(); } -int EDRegisterOperandValue(unsigned *value, - EDOperandRef operand) { - if (!operand->isRegister()) +int EDRegisterOperandValue(unsigned *value, EDOperandRef operand) { + if (!((EDOperand*)operand)->isRegister()) return -1; - *value = operand->regVal(); + *value = ((EDOperand*)operand)->regVal(); return 0; } -int EDImmediateOperandValue(uint64_t *value, - EDOperandRef operand) { - if (!operand->isImmediate()) +int EDImmediateOperandValue(uint64_t *value, EDOperandRef operand) { + if (!((EDOperand*)operand)->isImmediate()) return -1; - *value = operand->immediateVal(); + *value = ((EDOperand*)operand)->immediateVal(); return 0; } -int EDEvaluateOperand(uint64_t *result, - EDOperandRef operand, - EDRegisterReaderCallback regReader, - void *arg) { - return operand->evaluate(*result, regReader, arg); +int EDEvaluateOperand(uint64_t *result, EDOperandRef operand, + ::EDRegisterReaderCallback regReader, void *arg) { + return ((EDOperand*)operand)->evaluate(*result, regReader, arg); } #ifdef __BLOCKS__ @@ -264,15 +258,13 @@ unsigned int EDBlockCreateInsts(EDInstRef *insts, (void*)&wrapper); } -int EDBlockEvaluateOperand(uint64_t *result, - EDOperandRef operand, +int EDBlockEvaluateOperand(uint64_t *result, EDOperandRef operand, EDRegisterBlock_t regBlock) { - return operand->evaluate(*result, regBlock); + return ((EDOperand*)operand)->evaluate(*result, regBlock); } -int EDBlockVisitTokens(EDInstRef inst, - EDTokenVisitor_t visitor) { - return inst->visitTokens(visitor); +int EDBlockVisitTokens(EDInstRef inst, ::EDTokenVisitor_t visitor) { + return ((EDInst*)inst)->visitTokens((llvm::EDTokenVisitor_t)visitor); } #else diff --git a/tools/edis/EDOperand.cpp b/tools/edis/EDOperand.cpp deleted file mode 100644 index d63c1c6bfbf..00000000000 --- a/tools/edis/EDOperand.cpp +++ /dev/null @@ -1,284 +0,0 @@ -//===-EDOperand.cpp - LLVM Enhanced Disassembler --------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the Enhanced Disassembly library's operand class. The -// operand is responsible for allowing evaluation given a particular register -// context. -// -//===----------------------------------------------------------------------===// - -#include "EDDisassembler.h" -#include "EDInst.h" -#include "EDOperand.h" - -#include "llvm/MC/EDInstInfo.h" -#include "llvm/MC/MCInst.h" - -using namespace llvm; - -EDOperand::EDOperand(const EDDisassembler &disassembler, - const EDInst &inst, - unsigned int opIndex, - unsigned int &mcOpIndex) : - Disassembler(disassembler), - Inst(inst), - OpIndex(opIndex), - MCOpIndex(mcOpIndex) { - unsigned int numMCOperands = 0; - - if (Disassembler.Key.Arch == Triple::x86 || - Disassembler.Key.Arch == Triple::x86_64) { - uint8_t operandType = inst.ThisInstInfo->operandTypes[opIndex]; - - switch (operandType) { - default: - break; - case kOperandTypeImmediate: - numMCOperands = 1; - break; - case kOperandTypeRegister: - numMCOperands = 1; - break; - case kOperandTypeX86Memory: - numMCOperands = 5; - break; - case kOperandTypeX86EffectiveAddress: - numMCOperands = 4; - break; - case kOperandTypeX86PCRelative: - numMCOperands = 1; - break; - } - } - else if (Disassembler.Key.Arch == Triple::arm || - Disassembler.Key.Arch == Triple::thumb) { - uint8_t operandType = inst.ThisInstInfo->operandTypes[opIndex]; - - switch (operandType) { - default: - case kOperandTypeARMRegisterList: - break; - case kOperandTypeImmediate: - case kOperandTypeRegister: - case kOperandTypeARMBranchTarget: - case kOperandTypeARMSoImm: - case kOperandTypeThumb2SoImm: - case kOperandTypeARMSoImm2Part: - case kOperandTypeARMPredicate: - case kOperandTypeThumbITMask: - case kOperandTypeThumb2AddrModeImm8Offset: - case kOperandTypeARMTBAddrMode: - case kOperandTypeThumb2AddrModeImm8s4Offset: - numMCOperands = 1; - break; - case kOperandTypeThumb2SoReg: - case kOperandTypeARMAddrMode2Offset: - case kOperandTypeARMAddrMode3Offset: - case kOperandTypeARMAddrMode4: - case kOperandTypeARMAddrMode5: - case kOperandTypeARMAddrModePC: - case kOperandTypeThumb2AddrModeImm8: - case kOperandTypeThumb2AddrModeImm12: - case kOperandTypeThumb2AddrModeImm8s4: - case kOperandTypeThumbAddrModeRR: - case kOperandTypeThumbAddrModeSP: - numMCOperands = 2; - break; - case kOperandTypeARMSoReg: - case kOperandTypeARMAddrMode2: - case kOperandTypeARMAddrMode3: - case kOperandTypeThumb2AddrModeSoReg: - case kOperandTypeThumbAddrModeS1: - case kOperandTypeThumbAddrModeS2: - case kOperandTypeThumbAddrModeS4: - case kOperandTypeARMAddrMode6Offset: - numMCOperands = 3; - break; - case kOperandTypeARMAddrMode6: - numMCOperands = 4; - break; - } - } - - mcOpIndex += numMCOperands; -} - -EDOperand::~EDOperand() { -} - -int EDOperand::evaluate(uint64_t &result, - EDRegisterReaderCallback callback, - void *arg) { - uint8_t operandType = Inst.ThisInstInfo->operandTypes[OpIndex]; - - switch (Disassembler.Key.Arch) { - default: - return -1; - case Triple::x86: - case Triple::x86_64: - switch (operandType) { - default: - return -1; - case kOperandTypeImmediate: - result = Inst.Inst->getOperand(MCOpIndex).getImm(); - return 0; - case kOperandTypeRegister: - { - unsigned reg = Inst.Inst->getOperand(MCOpIndex).getReg(); - return callback(&result, reg, arg); - } - case kOperandTypeX86PCRelative: - { - int64_t displacement = Inst.Inst->getOperand(MCOpIndex).getImm(); - - uint64_t ripVal; - - // TODO fix how we do this - - if (callback(&ripVal, Disassembler.registerIDWithName("RIP"), arg)) - return -1; - - result = ripVal + displacement; - return 0; - } - case kOperandTypeX86Memory: - case kOperandTypeX86EffectiveAddress: - { - unsigned baseReg = Inst.Inst->getOperand(MCOpIndex).getReg(); - uint64_t scaleAmount = Inst.Inst->getOperand(MCOpIndex+1).getImm(); - unsigned indexReg = Inst.Inst->getOperand(MCOpIndex+2).getReg(); - int64_t displacement = Inst.Inst->getOperand(MCOpIndex+3).getImm(); - //unsigned segmentReg = Inst.Inst->getOperand(MCOpIndex+4).getReg(); - - uint64_t addr = 0; - - if (baseReg) { - uint64_t baseVal; - if (callback(&baseVal, baseReg, arg)) - return -1; - addr += baseVal; - } - - if (indexReg) { - uint64_t indexVal; - if (callback(&indexVal, indexReg, arg)) - return -1; - addr += (scaleAmount * indexVal); - } - - addr += displacement; - - result = addr; - return 0; - } - } - break; - case Triple::arm: - case Triple::thumb: - switch (operandType) { - default: - return -1; - case kOperandTypeImmediate: - result = Inst.Inst->getOperand(MCOpIndex).getImm(); - return 0; - case kOperandTypeRegister: - { - unsigned reg = Inst.Inst->getOperand(MCOpIndex).getReg(); - return callback(&result, reg, arg); - } - case kOperandTypeARMBranchTarget: - { - int64_t displacement = Inst.Inst->getOperand(MCOpIndex).getImm(); - - uint64_t pcVal; - - if (callback(&pcVal, Disassembler.registerIDWithName("PC"), arg)) - return -1; - - result = pcVal + displacement; - return 0; - } - } - } - - return -1; -} - -int EDOperand::isRegister() { - return(Inst.ThisInstInfo->operandFlags[OpIndex] == kOperandTypeRegister); -} - -unsigned EDOperand::regVal() { - return Inst.Inst->getOperand(MCOpIndex).getReg(); -} - -int EDOperand::isImmediate() { - return(Inst.ThisInstInfo->operandFlags[OpIndex] == kOperandTypeImmediate); -} - -uint64_t EDOperand::immediateVal() { - return Inst.Inst->getOperand(MCOpIndex).getImm(); -} - -int EDOperand::isMemory() { - uint8_t operandType = Inst.ThisInstInfo->operandTypes[OpIndex]; - - switch (operandType) { - default: - return 0; - case kOperandTypeX86Memory: - case kOperandTypeX86PCRelative: - case kOperandTypeX86EffectiveAddress: - case kOperandTypeARMSoReg: - case kOperandTypeARMSoImm: - case kOperandTypeARMAddrMode2: - case kOperandTypeARMAddrMode2Offset: - case kOperandTypeARMAddrMode3: - case kOperandTypeARMAddrMode3Offset: - case kOperandTypeARMAddrMode4: - case kOperandTypeARMAddrMode5: - case kOperandTypeARMAddrMode6: - case kOperandTypeARMAddrModePC: - case kOperandTypeARMBranchTarget: - case kOperandTypeThumbAddrModeS1: - case kOperandTypeThumbAddrModeS2: - case kOperandTypeThumbAddrModeS4: - case kOperandTypeThumbAddrModeRR: - case kOperandTypeThumbAddrModeSP: - case kOperandTypeThumb2SoImm: - case kOperandTypeThumb2AddrModeImm8: - case kOperandTypeThumb2AddrModeImm8Offset: - case kOperandTypeThumb2AddrModeImm12: - case kOperandTypeThumb2AddrModeSoReg: - case kOperandTypeThumb2AddrModeImm8s4: - return 1; - } -} - -#ifdef __BLOCKS__ -struct RegisterReaderWrapper { - EDRegisterBlock_t regBlock; -}; - -int readerWrapperCallback(uint64_t *value, - unsigned regID, - void *arg) { - struct RegisterReaderWrapper *wrapper = (struct RegisterReaderWrapper *)arg; - return wrapper->regBlock(value, regID); -} - -int EDOperand::evaluate(uint64_t &result, - EDRegisterBlock_t regBlock) { - struct RegisterReaderWrapper wrapper; - wrapper.regBlock = regBlock; - return evaluate(result, - readerWrapperCallback, - (void*)&wrapper); -} -#endif diff --git a/tools/edis/EDOperand.h b/tools/edis/EDOperand.h deleted file mode 100644 index ad9345b758b..00000000000 --- a/tools/edis/EDOperand.h +++ /dev/null @@ -1,78 +0,0 @@ -//===-EDOperand.h - LLVM Enhanced Disassembler ------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the interface for the Enhanced Disassembly library's -// operand class. The operand is responsible for allowing evaluation given a -// particular register context. -// -//===----------------------------------------------------------------------===// - -#ifndef EDOperand_ -#define EDOperand_ - -#include "llvm-c/EnhancedDisassembly.h" - -/// EDOperand - Encapsulates a single operand, which can be evaluated by the -/// client -struct EDOperand { - /// The parent disassembler - const EDDisassembler &Disassembler; - /// The parent instruction - const EDInst &Inst; - - /// The index of the operand in the EDInst - unsigned int OpIndex; - /// The index of the first component of the operand in the MCInst - unsigned int MCOpIndex; - - /// Constructor - Initializes an EDOperand - /// - /// @arg disassembler - The disassembler responsible for the operand - /// @arg inst - The instruction containing this operand - /// @arg opIndex - The index of the operand in inst - /// @arg mcOpIndex - The index of the operand in the original MCInst - EDOperand(const EDDisassembler &disassembler, - const EDInst &inst, - unsigned int opIndex, - unsigned int &mcOpIndex); - ~EDOperand(); - - /// evaluate - Returns the numeric value of an operand to the extent possible, - /// returning 0 on success or -1 if there was some problem (such as a - /// register not being readable) - /// - /// @arg result - A reference whose target is filled in with the value of - /// the operand (the address if it is a memory operand) - /// @arg callback - A function to call to obtain register values - /// @arg arg - An opaque argument to pass to callback - int evaluate(uint64_t &result, - EDRegisterReaderCallback callback, - void *arg); - - /// isRegister - Returns 1 if the operand is a register or 0 otherwise - int isRegister(); - /// regVal - Returns the register value. - unsigned regVal(); - - /// isImmediate - Returns 1 if the operand is an immediate or 0 otherwise - int isImmediate(); - /// immediateVal - Returns the immediate value. - uint64_t immediateVal(); - - /// isMemory - Returns 1 if the operand is a memory location or 0 otherwise - int isMemory(); - -#ifdef __BLOCKS__ - /// evaluate - Like evaluate for a callback, but uses a block instead - int evaluate(uint64_t &result, - EDRegisterBlock_t regBlock); -#endif -}; - -#endif diff --git a/tools/edis/EDToken.cpp b/tools/edis/EDToken.cpp deleted file mode 100644 index 3bcb0a14b8c..00000000000 --- a/tools/edis/EDToken.cpp +++ /dev/null @@ -1,208 +0,0 @@ -//===-EDToken.cpp - LLVM Enhanced Disassembler ----------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the Enhanced Disassembler library's token class. The -// token is responsible for vending information about the token, such as its -// type and logical value. -// -//===----------------------------------------------------------------------===// - -#include "EDDisassembler.h" -#include "EDToken.h" - -#include "llvm/ADT/SmallVector.h" -#include "llvm/MC/MCParser/MCAsmLexer.h" -#include "llvm/MC/MCParser/MCParsedAsmOperand.h" - -using namespace llvm; - -EDToken::EDToken(StringRef str, - enum tokenType type, - uint64_t localType, - EDDisassembler &disassembler) : - Disassembler(disassembler), - Str(str), - Type(type), - LocalType(localType), - OperandID(-1) { -} - -EDToken::~EDToken() { -} - -void EDToken::makeLiteral(bool sign, uint64_t absoluteValue) { - Type = kTokenLiteral; - LiteralSign = sign; - LiteralAbsoluteValue = absoluteValue; -} - -void EDToken::makeRegister(unsigned registerID) { - Type = kTokenRegister; - RegisterID = registerID; -} - -void EDToken::setOperandID(int operandID) { - OperandID = operandID; -} - -enum EDToken::tokenType EDToken::type() const { - return Type; -} - -uint64_t EDToken::localType() const { - return LocalType; -} - -StringRef EDToken::string() const { - return Str; -} - -int EDToken::operandID() const { - return OperandID; -} - -int EDToken::literalSign() const { - if (Type != kTokenLiteral) - return -1; - return (LiteralSign ? 1 : 0); -} - -int EDToken::literalAbsoluteValue(uint64_t &value) const { - if (Type != kTokenLiteral) - return -1; - value = LiteralAbsoluteValue; - return 0; -} - -int EDToken::registerID(unsigned ®isterID) const { - if (Type != kTokenRegister) - return -1; - registerID = RegisterID; - return 0; -} - -int EDToken::tokenize(std::vector &tokens, - std::string &str, - const char *operandOrder, - EDDisassembler &disassembler) { - SmallVector parsedOperands; - SmallVector asmTokens; - - if (disassembler.parseInst(parsedOperands, asmTokens, str)) - return -1; - - SmallVectorImpl::iterator operandIterator; - unsigned int operandIndex; - SmallVectorImpl::iterator tokenIterator; - - operandIterator = parsedOperands.begin(); - operandIndex = 0; - - bool readOpcode = false; - - const char *wsPointer = asmTokens.begin()->getLoc().getPointer(); - - for (tokenIterator = asmTokens.begin(); - tokenIterator != asmTokens.end(); - ++tokenIterator) { - SMLoc tokenLoc = tokenIterator->getLoc(); - - const char *tokenPointer = tokenLoc.getPointer(); - - if (tokenPointer > wsPointer) { - unsigned long wsLength = tokenPointer - wsPointer; - - EDToken *whitespaceToken = new EDToken(StringRef(wsPointer, wsLength), - EDToken::kTokenWhitespace, - 0, - disassembler); - - tokens.push_back(whitespaceToken); - } - - wsPointer = tokenPointer + tokenIterator->getString().size(); - - while (operandIterator != parsedOperands.end() && - tokenLoc.getPointer() > - (*operandIterator)->getEndLoc().getPointer()) { - ++operandIterator; - ++operandIndex; - } - - EDToken *token; - - switch (tokenIterator->getKind()) { - case AsmToken::Identifier: - if (!readOpcode) { - token = new EDToken(tokenIterator->getString(), - EDToken::kTokenOpcode, - (uint64_t)tokenIterator->getKind(), - disassembler); - readOpcode = true; - break; - } - // any identifier that isn't an opcode is mere punctuation; so we fall - // through - default: - token = new EDToken(tokenIterator->getString(), - EDToken::kTokenPunctuation, - (uint64_t)tokenIterator->getKind(), - disassembler); - break; - case AsmToken::Integer: - { - token = new EDToken(tokenIterator->getString(), - EDToken::kTokenLiteral, - (uint64_t)tokenIterator->getKind(), - disassembler); - - int64_t intVal = tokenIterator->getIntVal(); - - if (intVal < 0) - token->makeLiteral(true, -intVal); - else - token->makeLiteral(false, intVal); - break; - } - case AsmToken::Register: - { - token = new EDToken(tokenIterator->getString(), - EDToken::kTokenLiteral, - (uint64_t)tokenIterator->getKind(), - disassembler); - - token->makeRegister((unsigned)tokenIterator->getRegVal()); - break; - } - } - - if (operandIterator != parsedOperands.end() && - tokenLoc.getPointer() >= - (*operandIterator)->getStartLoc().getPointer()) { - /// operandIndex == 0 means the operand is the instruction (which the - /// AsmParser treats as an operand but edis does not). We therefore skip - /// operandIndex == 0 and subtract 1 from all other operand indices. - - if (operandIndex > 0) - token->setOperandID(operandOrder[operandIndex - 1]); - } - - tokens.push_back(token); - } - - return 0; -} - -int EDToken::getString(const char*& buf) { - if (PermStr.length() == 0) { - PermStr = Str.str(); - } - buf = PermStr.c_str(); - return 0; -} diff --git a/tools/edis/EDToken.h b/tools/edis/EDToken.h deleted file mode 100644 index e4ae91f7ec3..00000000000 --- a/tools/edis/EDToken.h +++ /dev/null @@ -1,135 +0,0 @@ -//===-EDToken.h - LLVM Enhanced Disassembler --------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the interface for the Enhanced Disassembly library's token -// class. The token is responsible for vending information about the token, -// such as its type and logical value. -// -//===----------------------------------------------------------------------===// - -#ifndef EDToken_ -#define EDToken_ - -#include "llvm-c/EnhancedDisassembly.h" -#include "llvm/ADT/StringRef.h" - -#include -#include - -/// EDToken - Encapsulates a single token, which can provide a string -/// representation of itself or interpret itself in various ways, depending -/// on the token type. -struct EDToken { - enum tokenType { - kTokenWhitespace, - kTokenOpcode, - kTokenLiteral, - kTokenRegister, - kTokenPunctuation - }; - - /// The parent disassembler - EDDisassembler &Disassembler; - - /// The token's string representation - llvm::StringRef Str; - /// The token's string representation, but in a form suitable for export - std::string PermStr; - /// The type of the token, as exposed through the external API - enum tokenType Type; - /// The type of the token, as recorded by the syntax-specific tokenizer - uint64_t LocalType; - /// The operand corresponding to the token, or (unsigned int)-1 if not - /// part of an operand. - int OperandID; - - /// The sign if the token is a literal (1 if negative, 0 otherwise) - bool LiteralSign; - /// The absolute value if the token is a literal - uint64_t LiteralAbsoluteValue; - /// The LLVM register ID if the token is a register name - unsigned RegisterID; - - /// Constructor - Initializes an EDToken with the information common to all - /// tokens - /// - /// @arg str - The string corresponding to the token - /// @arg type - The token's type as exposed through the public API - /// @arg localType - The token's type as recorded by the tokenizer - /// @arg disassembler - The disassembler responsible for the token - EDToken(llvm::StringRef str, - enum tokenType type, - uint64_t localType, - EDDisassembler &disassembler); - - /// makeLiteral - Adds the information specific to a literal - /// @arg sign - The sign of the literal (1 if negative, 0 - /// otherwise) - /// - /// @arg absoluteValue - The absolute value of the literal - void makeLiteral(bool sign, uint64_t absoluteValue); - /// makeRegister - Adds the information specific to a register - /// - /// @arg registerID - The LLVM register ID - void makeRegister(unsigned registerID); - - /// setOperandID - Links the token to a numbered operand - /// - /// @arg operandID - The operand ID to link to - void setOperandID(int operandID); - - ~EDToken(); - - /// type - Returns the public type of the token - enum tokenType type() const; - /// localType - Returns the tokenizer-specific type of the token - uint64_t localType() const; - /// string - Returns the string representation of the token - llvm::StringRef string() const; - /// operandID - Returns the operand ID of the token - int operandID() const; - - /// literalSign - Returns the sign of the token - /// (1 if negative, 0 if positive or unsigned, -1 if it is not a literal) - int literalSign() const; - /// literalAbsoluteValue - Retrieves the absolute value of the token, and - /// returns -1 if the token is not a literal - /// @arg value - A reference to a value that is filled in with the absolute - /// value, if it is valid - int literalAbsoluteValue(uint64_t &value) const; - /// registerID - Retrieves the register ID of the token, and returns -1 if the - /// token is not a register - /// - /// @arg registerID - A reference to a value that is filled in with the - /// register ID, if it is valid - int registerID(unsigned ®isterID) const; - - /// tokenize - Tokenizes a string using the platform- and syntax-specific - /// tokenizer, and returns 0 on success (-1 on failure) - /// - /// @arg tokens - A vector that will be filled in with pointers to - /// allocated tokens - /// @arg str - The string, as outputted by the AsmPrinter - /// @arg operandOrder - The order of the operands from the operandFlags array - /// as they appear in str - /// @arg disassembler - The disassembler for the desired target and - // assembly syntax - static int tokenize(std::vector &tokens, - std::string &str, - const char *operandOrder, - EDDisassembler &disassembler); - - /// getString - Directs a character pointer to the string, returning 0 on - /// success (-1 on failure) - /// @arg buf - A reference to a pointer that is set to point to the string. - /// The string is still owned by the token. - int getString(const char*& buf); -}; - -#endif diff --git a/tools/llvm-mc/CMakeLists.txt b/tools/llvm-mc/CMakeLists.txt index 8b61a4e8db1..805caf403a1 100644 --- a/tools/llvm-mc/CMakeLists.txt +++ b/tools/llvm-mc/CMakeLists.txt @@ -1,5 +1,4 @@ -set( LLVM_USED_LIBS EnhancedDisassembly) -set(LLVM_LINK_COMPONENTS ${LLVM_TARGETS_TO_BUILD} support MC MCParser) +set(LLVM_LINK_COMPONENTS ${LLVM_TARGETS_TO_BUILD} support MC MCParser MCDisassembler) add_llvm_tool(llvm-mc llvm-mc.cpp diff --git a/tools/llvm-mc/Disassembler.cpp b/tools/llvm-mc/Disassembler.cpp index 37b2cb80563..7c9d7456f39 100644 --- a/tools/llvm-mc/Disassembler.cpp +++ b/tools/llvm-mc/Disassembler.cpp @@ -13,21 +13,21 @@ //===----------------------------------------------------------------------===// #include "Disassembler.h" - -#include "llvm/ADT/OwningPtr.h" -#include "llvm/ADT/Triple.h" +#include "../../lib/MC/MCDisassembler/EDDisassembler.h" +#include "../../lib/MC/MCDisassembler/EDInst.h" +#include "../../lib/MC/MCDisassembler/EDOperand.h" +#include "../../lib/MC/MCDisassembler/EDToken.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCDisassembler.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstPrinter.h" #include "llvm/Target/TargetRegistry.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/Triple.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/MemoryObject.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/SourceMgr.h" - -#include "llvm-c/EnhancedDisassembly.h" - using namespace llvm; typedef std::vector > ByteArrayTy; @@ -179,21 +179,17 @@ static int byteArrayReader(uint8_t *B, uint64_t A, void *Arg) { } static int verboseEvaluator(uint64_t *V, unsigned R, void *Arg) { - EDDisassemblerRef &disassembler = *((EDDisassemblerRef*)Arg); - - const char *regName; + EDDisassembler &disassembler = *((EDDisassembler *)Arg); - if (!EDGetRegisterName(®Name, - disassembler, - R)) + if (const char *regName = disassembler.nameWithRegisterID(R)) outs() << "[" << regName << "/" << R << "]"; - if (EDRegisterIsStackPointer(disassembler, R)) + + if (disassembler.registerIsStackPointer(R)) outs() << "(sp)"; - if (EDRegisterIsProgramCounter(disassembler, R)) + if (disassembler.registerIsProgramCounter(R)) outs() << "(pc)"; *V = 0; - return 0; } @@ -209,10 +205,8 @@ int Disassembler::disassembleEnhanced(const std::string &TS, return -1; } - EDDisassemblerRef disassembler; - Triple T(TS); - EDAssemblySyntax_t AS; + EDDisassembler::AssemblySyntax AS; switch (T.getArch()) { default: @@ -220,90 +214,82 @@ int Disassembler::disassembleEnhanced(const std::string &TS, return -1; case Triple::arm: case Triple::thumb: - AS = kEDAssemblySyntaxARMUAL; + AS = EDDisassembler::kEDAssemblySyntaxARMUAL; break; case Triple::x86: case Triple::x86_64: - AS = kEDAssemblySyntaxX86ATT; + AS = EDDisassembler::kEDAssemblySyntaxX86ATT; break; } - if (EDGetDisassembler(&disassembler, - TS.c_str(), - AS)) { - errs() << "error: couldn't get disassembler for " << TS.c_str() << "\n"; + EDDisassembler::initialize(); + EDDisassembler *disassembler = + EDDisassembler::getDisassembler(TS.c_str(), AS); + + if (disassembler == 0) { + errs() << "error: couldn't get disassembler for " << TS << '\n'; return -1; } - EDInstRef inst; - - if (EDCreateInsts(&inst, 1, disassembler, byteArrayReader, 0,&ByteArray) - != 1) { + EDInst *inst = + disassembler->createInst(byteArrayReader, 0, &ByteArray); + + if (inst == 0) { errs() << "error: Didn't get an instruction\n"; return -1; } - int numTokens = EDNumTokens(inst); - - if (numTokens < 0) { - errs() << "error: Couldn't count the instruction's tokens\n"; + unsigned numTokens = inst->numTokens(); + if ((int)numTokens < 0) { + errs() << "error: couldn't count the instruction's tokens\n"; return -1; } - int tokenIndex; - - for (tokenIndex = 0; tokenIndex < numTokens; ++tokenIndex) { - EDTokenRef token; + for (unsigned tokenIndex = 0; tokenIndex != numTokens; ++tokenIndex) { + EDToken *token; - if (EDGetToken(&token, inst, tokenIndex)) { + if (inst->getToken(token, tokenIndex)) { errs() << "error: Couldn't get token\n"; return -1; } const char *buf; - - if (EDGetTokenString(&buf, token)) { + if (token->getString(buf)) { errs() << "error: Couldn't get string for token\n"; return -1; } - outs() << "["; - - int operandIndex = EDOperandIndexForToken(token); + outs() << '['; + int operandIndex = token->operandID(); if (operandIndex >= 0) outs() << operandIndex << "-"; - if (EDTokenIsWhitespace(token)) { - outs() << "w"; - } else if (EDTokenIsPunctuation(token)) { - outs() << "p"; - } else if (EDTokenIsOpcode(token)) { - outs() << "o"; - } else if (EDTokenIsLiteral(token)) { - outs() << "l"; - } else if (EDTokenIsRegister(token)) { - outs() << "r"; - } else { - outs() << "?"; + switch (token->type()) { + default: outs() << "?"; break; + case EDToken::kTokenWhitespace: outs() << "w"; break; + case EDToken::kTokenPunctuation: outs() << "p"; break; + case EDToken::kTokenOpcode: outs() << "o"; break; + case EDToken::kTokenLiteral: outs() << "l"; break; + case EDToken::kTokenRegister: outs() << "r"; break; } outs() << ":" << buf; - if (EDTokenIsLiteral(token)) { + if (token->type() == EDToken::kTokenLiteral) { outs() << "="; - if (EDTokenIsNegativeLiteral(token)) + if (token->literalSign()) outs() << "-"; uint64_t absoluteValue; - if (EDLiteralTokenAbsoluteValue(&absoluteValue, token)) { + if (token->literalAbsoluteValue(absoluteValue)) { errs() << "error: Couldn't get the value of a literal token\n"; return -1; } outs() << absoluteValue; - } else if (EDTokenIsRegister(token)) { + } else if (token->type() == EDToken::kTokenRegister) { outs() << "="; unsigned regID; - if (EDRegisterTokenValue(®ID, token)) { + if (token->registerID(regID)) { errs() << "error: Couldn't get the ID of a register token\n"; return -1; } @@ -315,45 +301,34 @@ int Disassembler::disassembleEnhanced(const std::string &TS, outs() << " "; - if (EDInstIsBranch(inst)) + if (inst->isBranch()) outs() << "
"; - if (EDInstIsMove(inst)) + if (inst->isMove()) outs() << " "; - int numOperands = EDNumOperands(inst); + unsigned numOperands = inst->numOperands(); - if (numOperands < 0) { + if ((int)numOperands < 0) { errs() << "error: Couldn't count operands\n"; return -1; } - int operandIndex; - - for (operandIndex = 0; operandIndex < numOperands; ++operandIndex) { + for (unsigned operandIndex = 0; operandIndex != numOperands; ++operandIndex) { outs() << operandIndex << ":"; - EDOperandRef operand; - - if (EDGetOperand(&operand, - inst, - operandIndex)) { - errs() << "error: Couldn't get operand\n"; + EDOperand *operand; + if (inst->getOperand(operand, operandIndex)) { + errs() << "error: couldn't get operand\n"; return -1; } uint64_t evaluatedResult; - - EDEvaluateOperand(&evaluatedResult, - operand, - verboseEvaluator, - &disassembler); - - outs() << "=" << evaluatedResult; - - outs() << " "; + evaluatedResult = operand->evaluate(evaluatedResult, verboseEvaluator, + disassembler); + outs() << "=" << evaluatedResult << " "; } - outs() << "\n"; + outs() << '\n'; return 0; } diff --git a/tools/llvm-mc/Makefile b/tools/llvm-mc/Makefile index a1274932495..934a6e4dd08 100644 --- a/tools/llvm-mc/Makefile +++ b/tools/llvm-mc/Makefile @@ -18,9 +18,7 @@ TOOL_NO_EXPORTS = 1 # early so we can set up LINK_COMPONENTS before including Makefile.rules include $(LEVEL)/Makefile.config -LINK_COMPONENTS := $(TARGETS_TO_BUILD) MCParser MC support +LINK_COMPONENTS := $(TARGETS_TO_BUILD) MCDisassembler MCParser MC support include $(LLVM_SRC_ROOT)/Makefile.rules -# Using LIBS instead of USEDLIBS to force static linking -LIBS += $(LLVMLibDir)/libEnhancedDisassembly.a