add_subdirectory(lib/Analysis/IPA)
add_subdirectory(lib/MC)
add_subdirectory(lib/MC/MCParser)
+add_subdirectory(lib/MC/MCDisassembler)
add_subdirectory(test)
add_subdirectory(utils/FileCheck)
@typedef EDAssemblySyntax_t
An assembly syntax for use in tokenizing instructions.
*/
-typedef enum {
+enum {
/*! @constant kEDAssemblySyntaxX86Intel Intel syntax for i386 and x86_64. */
kEDAssemblySyntaxX86Intel = 0,
/*! @constant kEDAssemblySyntaxX86ATT AT&T syntax for i386 and x86_64. */
kEDAssemblySyntaxX86ATT = 1,
kEDAssemblySyntaxARMUAL = 2
-} EDAssemblySyntax_t;
+};
+typedef unsigned EDAssemblySyntax_t;
/*!
@typedef EDDisassemblerRef
Encapsulates a disassembler for a single CPU architecture.
*/
-struct EDDisassembler;
-typedef struct EDDisassembler *EDDisassemblerRef;
+typedef void *EDDisassemblerRef;
/*!
@typedef EDInstRef
Encapsulates a single disassembled instruction in one assembly syntax.
*/
-struct EDInst;
-typedef struct EDInst *EDInstRef;
+typedef void *EDInstRef;
/*!
@typedef EDTokenRef
Encapsulates a token from the disassembly of an instruction.
*/
-struct EDToken;
-typedef struct EDToken *EDTokenRef;
+typedef void *EDTokenRef;
/*!
@typedef EDOperandRef
Encapsulates an operand of an instruction.
*/
-struct EDOperand;
-typedef struct EDOperand *EDOperandRef;
+typedef void *EDOperandRef;
/*!
@functiongroup Getting a disassembler
--- /dev/null
+//===-EDDisassembler.cpp - LLVM Enhanced Disassembler ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Enhanced Disassembly library's disassembler class.
+// The disassembler is responsible for vending individual instructions according
+// to a given architecture and disassembly syntax.
+//
+//===----------------------------------------------------------------------===//
+
+#include "EDDisassembler.h"
+#include "EDInst.h"
+#include "llvm/MC/EDInstInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCParser/AsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Target/TargetAsmLexer.h"
+#include "llvm/Target/TargetAsmParser.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSelect.h"
+using namespace llvm;
+
+bool EDDisassembler::sInitialized = false;
+EDDisassembler::DisassemblerMap_t EDDisassembler::sDisassemblers;
+
+struct TripleMap {
+ Triple::ArchType Arch;
+ const char *String;
+};
+
+static struct TripleMap triplemap[] = {
+ { Triple::x86, "i386-unknown-unknown" },
+ { Triple::x86_64, "x86_64-unknown-unknown" },
+ { Triple::arm, "arm-unknown-unknown" },
+ { Triple::thumb, "thumb-unknown-unknown" },
+ { Triple::InvalidArch, NULL, }
+};
+
+/// infoFromArch - Returns the TripleMap corresponding to a given architecture,
+/// or NULL if there is an error
+///
+/// @arg arch - The Triple::ArchType for the desired architecture
+static const char *tripleFromArch(Triple::ArchType arch) {
+ unsigned int infoIndex;
+
+ for (infoIndex = 0; triplemap[infoIndex].String != NULL; ++infoIndex) {
+ if (arch == triplemap[infoIndex].Arch)
+ return triplemap[infoIndex].String;
+ }
+
+ return NULL;
+}
+
+/// getLLVMSyntaxVariant - gets the constant to use to get an assembly printer
+/// for the desired assembly syntax, suitable for passing to
+/// Target::createMCInstPrinter()
+///
+/// @arg arch - The target architecture
+/// @arg syntax - The assembly syntax in sd form
+static int getLLVMSyntaxVariant(Triple::ArchType arch,
+ EDDisassembler::AssemblySyntax syntax) {
+ switch (syntax) {
+ default:
+ return -1;
+ // Mappings below from X86AsmPrinter.cpp
+ case EDDisassembler::kEDAssemblySyntaxX86ATT:
+ if (arch == Triple::x86 || arch == Triple::x86_64)
+ return 0;
+ else
+ return -1;
+ case EDDisassembler::kEDAssemblySyntaxX86Intel:
+ if (arch == Triple::x86 || arch == Triple::x86_64)
+ return 1;
+ else
+ return -1;
+ case EDDisassembler::kEDAssemblySyntaxARMUAL:
+ if (arch == Triple::arm || arch == Triple::thumb)
+ return 0;
+ else
+ return -1;
+ }
+}
+
+void EDDisassembler::initialize() {
+ if (sInitialized)
+ return;
+
+ sInitialized = true;
+
+ InitializeAllTargetInfos();
+ InitializeAllTargets();
+ InitializeAllAsmPrinters();
+ InitializeAllAsmParsers();
+ InitializeAllDisassemblers();
+}
+
+#undef BRINGUP_TARGET
+
+EDDisassembler *EDDisassembler::getDisassembler(Triple::ArchType arch,
+ AssemblySyntax syntax) {
+ CPUKey key;
+ key.Arch = arch;
+ key.Syntax = syntax;
+
+ EDDisassembler::DisassemblerMap_t::iterator i = sDisassemblers.find(key);
+
+ if (i != sDisassemblers.end()) {
+ return i->second;
+ } else {
+ EDDisassembler* sdd = new EDDisassembler(key);
+ if (!sdd->valid()) {
+ delete sdd;
+ return NULL;
+ }
+
+ sDisassemblers[key] = sdd;
+
+ return sdd;
+ }
+
+ return NULL;
+}
+
+EDDisassembler *EDDisassembler::getDisassembler(StringRef str,
+ AssemblySyntax syntax) {
+ return getDisassembler(Triple(str).getArch(), syntax);
+}
+
+EDDisassembler::EDDisassembler(CPUKey &key) :
+ Valid(false),
+ HasSemantics(false),
+ ErrorStream(nulls()),
+ Key(key) {
+ const char *triple = tripleFromArch(key.Arch);
+
+ if (!triple)
+ return;
+
+ LLVMSyntaxVariant = getLLVMSyntaxVariant(key.Arch, key.Syntax);
+
+ if (LLVMSyntaxVariant < 0)
+ return;
+
+ std::string tripleString(triple);
+ std::string errorString;
+
+ Tgt = TargetRegistry::lookupTarget(tripleString,
+ errorString);
+
+ if (!Tgt)
+ return;
+
+ std::string featureString;
+
+ TargetMachine.reset(Tgt->createTargetMachine(tripleString,
+ featureString));
+
+ const TargetRegisterInfo *registerInfo = TargetMachine->getRegisterInfo();
+
+ if (!registerInfo)
+ return;
+
+ initMaps(*registerInfo);
+
+ AsmInfo.reset(Tgt->createAsmInfo(tripleString));
+
+ if (!AsmInfo)
+ return;
+
+ Disassembler.reset(Tgt->createMCDisassembler());
+
+ if (!Disassembler)
+ return;
+
+ InstInfos = Disassembler->getEDInfo();
+
+ InstString.reset(new std::string);
+ InstStream.reset(new raw_string_ostream(*InstString));
+ InstPrinter.reset(Tgt->createMCInstPrinter(LLVMSyntaxVariant, *AsmInfo));
+
+ if (!InstPrinter)
+ return;
+
+ GenericAsmLexer.reset(new AsmLexer(*AsmInfo));
+ SpecificAsmLexer.reset(Tgt->createAsmLexer(*AsmInfo));
+ SpecificAsmLexer->InstallLexer(*GenericAsmLexer);
+
+ initMaps(*TargetMachine->getRegisterInfo());
+
+ Valid = true;
+}
+
+EDDisassembler::~EDDisassembler() {
+ if (!valid())
+ return;
+}
+
+namespace {
+ /// EDMemoryObject - a subclass of MemoryObject that allows use of a callback
+ /// as provided by the sd interface. See MemoryObject.
+ class EDMemoryObject : public llvm::MemoryObject {
+ private:
+ EDByteReaderCallback Callback;
+ void *Arg;
+ public:
+ EDMemoryObject(EDByteReaderCallback callback,
+ void *arg) : Callback(callback), Arg(arg) { }
+ ~EDMemoryObject() { }
+ uint64_t getBase() const { return 0x0; }
+ uint64_t getExtent() const { return (uint64_t)-1; }
+ int readByte(uint64_t address, uint8_t *ptr) const {
+ if (!Callback)
+ return -1;
+
+ if (Callback(ptr, address, Arg))
+ return -1;
+
+ return 0;
+ }
+ };
+}
+
+EDInst *EDDisassembler::createInst(EDByteReaderCallback byteReader,
+ uint64_t address,
+ void *arg) {
+ EDMemoryObject memoryObject(byteReader, arg);
+
+ MCInst* inst = new MCInst;
+ uint64_t byteSize;
+
+ if (!Disassembler->getInstruction(*inst,
+ byteSize,
+ memoryObject,
+ address,
+ ErrorStream)) {
+ delete inst;
+ return NULL;
+ } else {
+ const llvm::EDInstInfo *thisInstInfo;
+
+ thisInstInfo = &InstInfos[inst->getOpcode()];
+
+ EDInst* sdInst = new EDInst(inst, byteSize, *this, thisInstInfo);
+ return sdInst;
+ }
+}
+
+void EDDisassembler::initMaps(const TargetRegisterInfo ®isterInfo) {
+ unsigned numRegisters = registerInfo.getNumRegs();
+ unsigned registerIndex;
+
+ for (registerIndex = 0; registerIndex < numRegisters; ++registerIndex) {
+ const char* registerName = registerInfo.get(registerIndex).Name;
+
+ RegVec.push_back(registerName);
+ RegRMap[registerName] = registerIndex;
+ }
+
+ switch (Key.Arch) {
+ default:
+ break;
+ case Triple::x86:
+ case Triple::x86_64:
+ stackPointers.insert(registerIDWithName("SP"));
+ stackPointers.insert(registerIDWithName("ESP"));
+ stackPointers.insert(registerIDWithName("RSP"));
+
+ programCounters.insert(registerIDWithName("IP"));
+ programCounters.insert(registerIDWithName("EIP"));
+ programCounters.insert(registerIDWithName("RIP"));
+ break;
+ case Triple::arm:
+ case Triple::thumb:
+ stackPointers.insert(registerIDWithName("SP"));
+
+ programCounters.insert(registerIDWithName("PC"));
+ break;
+ }
+}
+
+const char *EDDisassembler::nameWithRegisterID(unsigned registerID) const {
+ if (registerID >= RegVec.size())
+ return NULL;
+ else
+ return RegVec[registerID].c_str();
+}
+
+unsigned EDDisassembler::registerIDWithName(const char *name) const {
+ regrmap_t::const_iterator iter = RegRMap.find(std::string(name));
+ if (iter == RegRMap.end())
+ return 0;
+ else
+ return (*iter).second;
+}
+
+bool EDDisassembler::registerIsStackPointer(unsigned registerID) {
+ return (stackPointers.find(registerID) != stackPointers.end());
+}
+
+bool EDDisassembler::registerIsProgramCounter(unsigned registerID) {
+ return (programCounters.find(registerID) != programCounters.end());
+}
+
+int EDDisassembler::printInst(std::string &str, MCInst &inst) {
+ PrinterMutex.acquire();
+
+ InstPrinter->printInst(&inst, *InstStream);
+ InstStream->flush();
+ str = *InstString;
+ InstString->clear();
+
+ PrinterMutex.release();
+
+ return 0;
+}
+
+int EDDisassembler::parseInst(SmallVectorImpl<MCParsedAsmOperand*> &operands,
+ SmallVectorImpl<AsmToken> &tokens,
+ const std::string &str) {
+ int ret = 0;
+
+ switch (Key.Arch) {
+ default:
+ return -1;
+ case Triple::x86:
+ case Triple::x86_64:
+ case Triple::arm:
+ case Triple::thumb:
+ break;
+ }
+
+ const char *cStr = str.c_str();
+ MemoryBuffer *buf = MemoryBuffer::getMemBuffer(cStr, cStr + strlen(cStr));
+
+ StringRef instName;
+ SMLoc instLoc;
+
+ SourceMgr sourceMgr;
+ sourceMgr.AddNewSourceBuffer(buf, SMLoc()); // ownership of buf handed over
+ MCContext context(*AsmInfo);
+ OwningPtr<MCStreamer> streamer(createNullStreamer(context));
+ OwningPtr<MCAsmParser> genericParser(createMCAsmParser(*Tgt, sourceMgr,
+ context, *streamer,
+ *AsmInfo));
+ OwningPtr<TargetAsmParser> TargetParser(Tgt->createAsmParser(*genericParser,
+ *TargetMachine));
+
+ AsmToken OpcodeToken = genericParser->Lex();
+ AsmToken NextToken = genericParser->Lex(); // consume next token, because specificParser expects us to
+
+ if (OpcodeToken.is(AsmToken::Identifier)) {
+ instName = OpcodeToken.getString();
+ instLoc = OpcodeToken.getLoc();
+
+ if (NextToken.isNot(AsmToken::Eof) &&
+ TargetParser->ParseInstruction(instName, instLoc, operands))
+ ret = -1;
+ } else {
+ ret = -1;
+ }
+
+ ParserMutex.acquire();
+
+ if (!ret) {
+ GenericAsmLexer->setBuffer(buf);
+
+ while (SpecificAsmLexer->Lex(),
+ SpecificAsmLexer->isNot(AsmToken::Eof) &&
+ SpecificAsmLexer->isNot(AsmToken::EndOfStatement)) {
+ if (SpecificAsmLexer->is(AsmToken::Error)) {
+ ret = -1;
+ break;
+ }
+ tokens.push_back(SpecificAsmLexer->getTok());
+ }
+ }
+
+ ParserMutex.release();
+
+ return ret;
+}
+
+int EDDisassembler::llvmSyntaxVariant() const {
+ return LLVMSyntaxVariant;
+}
--- /dev/null
+//===-- EDDisassembler.h - LLVM Enhanced Disassembler -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interface for the Enhanced Disassembly library's
+// disassembler class. The disassembler is responsible for vending individual
+// instructions according to a given architecture and disassembly syntax.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EDDISASSEMBLER_H
+#define LLVM_EDDISASSEMBLER_H
+
+#include "EDInfo.inc"
+
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/System/Mutex.h"
+
+#include <map>
+#include <set>
+#include <vector>
+
+namespace llvm {
+class AsmLexer;
+class AsmToken;
+class MCContext;
+class MCAsmInfo;
+class MCAsmLexer;
+class AsmParser;
+class TargetAsmLexer;
+class TargetAsmParser;
+class MCDisassembler;
+class MCInstPrinter;
+class MCInst;
+class MCParsedAsmOperand;
+class MCStreamer;
+template <typename T> class SmallVectorImpl;
+class SourceMgr;
+class Target;
+class TargetMachine;
+class TargetRegisterInfo;
+
+struct EDInstInfo;
+struct EDInst;
+struct EDOperand;
+struct EDToken;
+
+typedef int (*EDByteReaderCallback)(uint8_t *byte, uint64_t address, void *arg);
+
+/// EDDisassembler - Encapsulates a disassembler for a single architecture and
+/// disassembly syntax. Also manages the static disassembler registry.
+struct EDDisassembler {
+ typedef enum {
+ /*! @constant kEDAssemblySyntaxX86Intel Intel syntax for i386 and x86_64. */
+ kEDAssemblySyntaxX86Intel = 0,
+ /*! @constant kEDAssemblySyntaxX86ATT AT&T syntax for i386 and x86_64. */
+ kEDAssemblySyntaxX86ATT = 1,
+ kEDAssemblySyntaxARMUAL = 2
+ } AssemblySyntax;
+
+
+ ////////////////////
+ // Static members //
+ ////////////////////
+
+ /// CPUKey - Encapsulates the descriptor of an architecture/disassembly-syntax
+ /// pair
+ struct CPUKey {
+ /// The architecture type
+ llvm::Triple::ArchType Arch;
+
+ /// The assembly syntax
+ AssemblySyntax Syntax;
+
+ /// operator== - Equality operator
+ bool operator==(const CPUKey &key) const {
+ return (Arch == key.Arch &&
+ Syntax == key.Syntax);
+ }
+
+ /// operator< - Less-than operator
+ bool operator<(const CPUKey &key) const {
+ if(Arch > key.Arch)
+ return false;
+ if(Syntax >= key.Syntax)
+ return false;
+ return true;
+ }
+ };
+
+ typedef std::map<CPUKey, EDDisassembler*> DisassemblerMap_t;
+
+ /// True if the disassembler registry has been initialized; false if not
+ static bool sInitialized;
+ /// A map from disassembler specifications to disassemblers. Populated
+ /// lazily.
+ static DisassemblerMap_t sDisassemblers;
+
+ /// getDisassembler - Returns the specified disassemble, or NULL on failure
+ ///
+ /// @arg arch - The desired architecture
+ /// @arg syntax - The desired disassembly syntax
+ static EDDisassembler *getDisassembler(llvm::Triple::ArchType arch,
+ AssemblySyntax syntax);
+
+ /// getDisassembler - Returns the disassembler for a given combination of
+ /// CPU type, CPU subtype, and assembly syntax, or NULL on failure
+ ///
+ /// @arg str - The string representation of the architecture triple, e.g.,
+ /// "x86_64-apple-darwin"
+ /// @arg syntax - The disassembly syntax for the required disassembler
+ static EDDisassembler *getDisassembler(llvm::StringRef str,
+ AssemblySyntax syntax);
+
+ /// initialize - Initializes the disassembler registry and the LLVM backend
+ static void initialize();
+
+ ////////////////////////
+ // Per-object members //
+ ////////////////////////
+
+ /// True only if the object has been successfully initialized
+ bool Valid;
+ /// True if the disassembler can provide semantic information
+ bool HasSemantics;
+
+ /// The stream to write errors to
+ llvm::raw_ostream &ErrorStream;
+
+ /// The architecture/syntax pair for the current architecture
+ CPUKey Key;
+ /// The LLVM target corresponding to the disassembler
+ const llvm::Target *Tgt;
+ /// The target machine instance.
+ llvm::OwningPtr<llvm::TargetMachine> TargetMachine;
+ /// The assembly information for the target architecture
+ llvm::OwningPtr<const llvm::MCAsmInfo> AsmInfo;
+ /// The disassembler for the target architecture
+ llvm::OwningPtr<const llvm::MCDisassembler> Disassembler;
+ /// The output string for the instruction printer; must be guarded with
+ /// PrinterMutex
+ llvm::OwningPtr<std::string> InstString;
+ /// The output stream for the disassembler; must be guarded with
+ /// PrinterMutex
+ llvm::OwningPtr<llvm::raw_string_ostream> InstStream;
+ /// The instruction printer for the target architecture; must be guarded with
+ /// PrinterMutex when printing
+ llvm::OwningPtr<llvm::MCInstPrinter> InstPrinter;
+ /// The mutex that guards the instruction printer's printing functions, which
+ /// use a shared stream
+ llvm::sys::Mutex PrinterMutex;
+ /// The array of instruction information provided by the TableGen backend for
+ /// the target architecture
+ const llvm::EDInstInfo *InstInfos;
+ /// The target-specific lexer for use in tokenizing strings, in
+ /// target-independent and target-specific portions
+ llvm::OwningPtr<llvm::AsmLexer> GenericAsmLexer;
+ llvm::OwningPtr<llvm::TargetAsmLexer> SpecificAsmLexer;
+ /// The guard for the above
+ llvm::sys::Mutex ParserMutex;
+ /// The LLVM number used for the target disassembly syntax variant
+ int LLVMSyntaxVariant;
+
+ typedef std::vector<std::string> regvec_t;
+ typedef std::map<std::string, unsigned> regrmap_t;
+
+ /// A vector of registers for quick mapping from LLVM register IDs to names
+ regvec_t RegVec;
+ /// A map of registers for quick mapping from register names to LLVM IDs
+ regrmap_t RegRMap;
+
+ /// A set of register IDs for aliases of the stack pointer for the current
+ /// architecture
+ std::set<unsigned> stackPointers;
+ /// A set of register IDs for aliases of the program counter for the current
+ /// architecture
+ std::set<unsigned> programCounters;
+
+ /// Constructor - initializes a disassembler with all the necessary objects,
+ /// which come pre-allocated from the registry accessor function
+ ///
+ /// @arg key - the architecture and disassembly syntax for the
+ /// disassembler
+ EDDisassembler(CPUKey& key);
+
+ /// valid - reports whether there was a failure in the constructor.
+ bool valid() {
+ return Valid;
+ }
+
+ /// hasSemantics - reports whether the disassembler can provide operands and
+ /// tokens.
+ bool hasSemantics() {
+ return HasSemantics;
+ }
+
+ ~EDDisassembler();
+
+ /// createInst - creates and returns an instruction given a callback and
+ /// memory address, or NULL on failure
+ ///
+ /// @arg byteReader - A callback function that provides machine code bytes
+ /// @arg address - The address of the first byte of the instruction,
+ /// suitable for passing to byteReader
+ /// @arg arg - An opaque argument for byteReader
+ EDInst *createInst(EDByteReaderCallback byteReader,
+ uint64_t address,
+ void *arg);
+
+ /// initMaps - initializes regVec and regRMap using the provided register
+ /// info
+ ///
+ /// @arg registerInfo - the register information to use as a source
+ void initMaps(const llvm::TargetRegisterInfo ®isterInfo);
+ /// nameWithRegisterID - Returns the name (owned by the EDDisassembler) of a
+ /// register for a given register ID, or NULL on failure
+ ///
+ /// @arg registerID - the ID of the register to be queried
+ const char *nameWithRegisterID(unsigned registerID) const;
+ /// registerIDWithName - Returns the ID of a register for a given register
+ /// name, or (unsigned)-1 on failure
+ ///
+ /// @arg name - The name of the register
+ unsigned registerIDWithName(const char *name) const;
+
+ /// registerIsStackPointer - reports whether a register ID is an alias for the
+ /// stack pointer register
+ ///
+ /// @arg registerID - The LLVM register ID
+ bool registerIsStackPointer(unsigned registerID);
+ /// registerIsStackPointer - reports whether a register ID is an alias for the
+ /// stack pointer register
+ ///
+ /// @arg registerID - The LLVM register ID
+ bool registerIsProgramCounter(unsigned registerID);
+
+ /// printInst - prints an MCInst to a string, returning 0 on success, or -1
+ /// otherwise
+ ///
+ /// @arg str - A reference to a string which is filled in with the string
+ /// representation of the instruction
+ /// @arg inst - A reference to the MCInst to be printed
+ int printInst(std::string& str,
+ llvm::MCInst& inst);
+
+ /// parseInst - extracts operands and tokens from a string for use in
+ /// tokenizing the string. Returns 0 on success, or -1 otherwise.
+ ///
+ /// @arg operands - A reference to a vector that will be filled in with the
+ /// parsed operands
+ /// @arg tokens - A reference to a vector that will be filled in with the
+ /// tokens
+ /// @arg str - The string representation of the instruction
+ int parseInst(llvm::SmallVectorImpl<llvm::MCParsedAsmOperand*> &operands,
+ llvm::SmallVectorImpl<llvm::AsmToken> &tokens,
+ const std::string &str);
+
+ /// llvmSyntaxVariant - returns the LLVM syntax variant for this disassembler
+ int llvmSyntaxVariant() const;
+};
+
+} // end namespace llvm
+
+#endif
--- /dev/null
+//===-EDInst.cpp - LLVM Enhanced Disassembler -----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Enhanced Disassembly library's instruction class.
+// The instruction is responsible for vending the string representation,
+// individual tokens, and operands for a single instruction.
+//
+//===----------------------------------------------------------------------===//
+
+#include "EDInst.h"
+#include "EDDisassembler.h"
+#include "EDOperand.h"
+#include "EDToken.h"
+
+#include "llvm/MC/EDInstInfo.h"
+#include "llvm/MC/MCInst.h"
+
+using namespace llvm;
+
+EDInst::EDInst(llvm::MCInst *inst,
+ uint64_t byteSize,
+ EDDisassembler &disassembler,
+ const llvm::EDInstInfo *info) :
+ Disassembler(disassembler),
+ Inst(inst),
+ ThisInstInfo(info),
+ ByteSize(byteSize),
+ BranchTarget(-1),
+ MoveSource(-1),
+ MoveTarget(-1) {
+ OperandOrder = ThisInstInfo->operandOrders[Disassembler.llvmSyntaxVariant()];
+}
+
+EDInst::~EDInst() {
+ unsigned int index;
+ unsigned int numOperands = Operands.size();
+
+ for (index = 0; index < numOperands; ++index)
+ delete Operands[index];
+
+ unsigned int numTokens = Tokens.size();
+
+ for (index = 0; index < numTokens; ++index)
+ delete Tokens[index];
+
+ delete Inst;
+}
+
+uint64_t EDInst::byteSize() {
+ return ByteSize;
+}
+
+int EDInst::stringify() {
+ if (StringifyResult.valid())
+ return StringifyResult.result();
+
+ if (Disassembler.printInst(String, *Inst))
+ return StringifyResult.setResult(-1);
+
+ return StringifyResult.setResult(0);
+}
+
+int EDInst::getString(const char*& str) {
+ if (stringify())
+ return -1;
+
+ str = String.c_str();
+
+ return 0;
+}
+
+unsigned EDInst::instID() {
+ return Inst->getOpcode();
+}
+
+bool EDInst::isBranch() {
+ if (ThisInstInfo)
+ return
+ ThisInstInfo->instructionType == kInstructionTypeBranch ||
+ ThisInstInfo->instructionType == kInstructionTypeCall;
+ else
+ return false;
+}
+
+bool EDInst::isMove() {
+ if (ThisInstInfo)
+ return ThisInstInfo->instructionType == kInstructionTypeMove;
+ else
+ return false;
+}
+
+int EDInst::parseOperands() {
+ if (ParseResult.valid())
+ return ParseResult.result();
+
+ if (!ThisInstInfo)
+ return ParseResult.setResult(-1);
+
+ unsigned int opIndex;
+ unsigned int mcOpIndex = 0;
+
+ for (opIndex = 0; opIndex < ThisInstInfo->numOperands; ++opIndex) {
+ if (isBranch() &&
+ (ThisInstInfo->operandFlags[opIndex] & kOperandFlagTarget)) {
+ BranchTarget = opIndex;
+ }
+ else if (isMove()) {
+ if (ThisInstInfo->operandFlags[opIndex] & kOperandFlagSource)
+ MoveSource = opIndex;
+ else if (ThisInstInfo->operandFlags[opIndex] & kOperandFlagTarget)
+ MoveTarget = opIndex;
+ }
+
+ EDOperand *operand = new EDOperand(Disassembler, *this, opIndex, mcOpIndex);
+
+ Operands.push_back(operand);
+ }
+
+ return ParseResult.setResult(0);
+}
+
+int EDInst::branchTargetID() {
+ if (parseOperands())
+ return -1;
+ return BranchTarget;
+}
+
+int EDInst::moveSourceID() {
+ if (parseOperands())
+ return -1;
+ return MoveSource;
+}
+
+int EDInst::moveTargetID() {
+ if (parseOperands())
+ return -1;
+ return MoveTarget;
+}
+
+int EDInst::numOperands() {
+ if (parseOperands())
+ return -1;
+ return Operands.size();
+}
+
+int EDInst::getOperand(EDOperand *&operand, unsigned int index) {
+ if (parseOperands())
+ return -1;
+
+ if (index >= Operands.size())
+ return -1;
+
+ operand = Operands[index];
+ return 0;
+}
+
+int EDInst::tokenize() {
+ if (TokenizeResult.valid())
+ return TokenizeResult.result();
+
+ if (stringify())
+ return TokenizeResult.setResult(-1);
+
+ return TokenizeResult.setResult(EDToken::tokenize(Tokens,
+ String,
+ OperandOrder,
+ Disassembler));
+
+}
+
+int EDInst::numTokens() {
+ if (tokenize())
+ return -1;
+ return Tokens.size();
+}
+
+int EDInst::getToken(EDToken *&token, unsigned int index) {
+ if (tokenize())
+ return -1;
+ token = Tokens[index];
+ return 0;
+}
+
+#ifdef __BLOCKS__
+int EDInst::visitTokens(EDTokenVisitor_t visitor) {
+ if (tokenize())
+ return -1;
+
+ tokvec_t::iterator iter;
+
+ for (iter = Tokens.begin(); iter != Tokens.end(); ++iter) {
+ int ret = visitor(*iter);
+ if (ret == 1)
+ return 0;
+ if (ret != 0)
+ return -1;
+ }
+
+ return 0;
+}
+#endif
--- /dev/null
+//===-- EDInst.h - LLVM Enhanced Disassembler -------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interface for the Enhanced Disassembly library's
+// instruction class. The instruction is responsible for vending the string
+// representation, individual tokens and operands for a single instruction.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EDINST_H
+#define LLVM_EDINST_H
+
+#include "llvm/ADT/SmallVector.h"
+#include <string>
+#include <vector>
+
+namespace llvm {
+ class MCInst;
+ struct EDInstInfo;
+ struct EDToken;
+ struct EDDisassembler;
+ struct EDOperand;
+
+#ifdef __BLOCKS__
+ typedef int (^EDTokenVisitor_t)(EDToken *token);
+#endif
+
+/// CachedResult - Encapsulates the result of a function along with the validity
+/// of that result, so that slow functions don't need to run twice
+struct CachedResult {
+ /// True if the result has been obtained by executing the function
+ bool Valid;
+ /// The result last obtained from the function
+ int Result;
+
+ /// Constructor - Initializes an invalid result
+ CachedResult() : Valid(false) { }
+ /// valid - Returns true if the result has been obtained by executing the
+ /// function and false otherwise
+ bool valid() { return Valid; }
+ /// result - Returns the result of the function or an undefined value if
+ /// valid() is false
+ int result() { return Result; }
+ /// setResult - Sets the result of the function and declares it valid
+ /// returning the result (so that setResult() can be called from inside a
+ /// return statement)
+ /// @arg result - The result of the function
+ int setResult(int result) { Result = result; Valid = true; return result; }
+};
+
+/// EDInst - Encapsulates a single instruction, which can be queried for its
+/// string representation, as well as its operands and tokens
+struct EDInst {
+ /// The parent disassembler
+ EDDisassembler &Disassembler;
+ /// The containing MCInst
+ llvm::MCInst *Inst;
+ /// The instruction information provided by TableGen for this instruction
+ const llvm::EDInstInfo *ThisInstInfo;
+ /// The number of bytes for the machine code representation of the instruction
+ uint64_t ByteSize;
+
+ /// The result of the stringify() function
+ CachedResult StringifyResult;
+ /// The string representation of the instruction
+ std::string String;
+ /// The order in which operands from the InstInfo's operand information appear
+ /// in String
+ const char* OperandOrder;
+
+ /// The result of the parseOperands() function
+ CachedResult ParseResult;
+ typedef llvm::SmallVector<EDOperand*, 5> opvec_t;
+ /// The instruction's operands
+ opvec_t Operands;
+ /// The operand corresponding to the target, if the instruction is a branch
+ int BranchTarget;
+ /// The operand corresponding to the source, if the instruction is a move
+ int MoveSource;
+ /// The operand corresponding to the target, if the instruction is a move
+ int MoveTarget;
+
+ /// The result of the tokenize() function
+ CachedResult TokenizeResult;
+ typedef std::vector<EDToken*> tokvec_t;
+ /// The instruction's tokens
+ tokvec_t Tokens;
+
+ /// Constructor - initializes an instruction given the output of the LLVM
+ /// C++ disassembler
+ ///
+ /// @arg inst - The MCInst, which will now be owned by this object
+ /// @arg byteSize - The size of the consumed instruction, in bytes
+ /// @arg disassembler - The parent disassembler
+ /// @arg instInfo - The instruction information produced by the table
+ /// generator for this instruction
+ EDInst(llvm::MCInst *inst,
+ uint64_t byteSize,
+ EDDisassembler &disassembler,
+ const llvm::EDInstInfo *instInfo);
+ ~EDInst();
+
+ /// byteSize - returns the number of bytes consumed by the machine code
+ /// representation of the instruction
+ uint64_t byteSize();
+ /// instID - returns the LLVM instruction ID of the instruction
+ unsigned instID();
+
+ /// stringify - populates the String and AsmString members of the instruction,
+ /// returning 0 on success or -1 otherwise
+ int stringify();
+ /// getString - retrieves a pointer to the string representation of the
+ /// instructinon, returning 0 on success or -1 otherwise
+ ///
+ /// @arg str - A reference to a pointer that, on success, is set to point to
+ /// the string representation of the instruction; this string is still owned
+ /// by the instruction and will be deleted when it is
+ int getString(const char *&str);
+
+ /// isBranch - Returns true if the instruction is a branch
+ bool isBranch();
+ /// isMove - Returns true if the instruction is a move
+ bool isMove();
+
+ /// parseOperands - populates the Operands member of the instruction,
+ /// returning 0 on success or -1 otherwise
+ int parseOperands();
+ /// branchTargetID - returns the ID (suitable for use with getOperand()) of
+ /// the target operand if the instruction is a branch, or -1 otherwise
+ int branchTargetID();
+ /// moveSourceID - returns the ID of the source operand if the instruction
+ /// is a move, or -1 otherwise
+ int moveSourceID();
+ /// moveTargetID - returns the ID of the target operand if the instruction
+ /// is a move, or -1 otherwise
+ int moveTargetID();
+
+ /// numOperands - returns the number of operands available to retrieve, or -1
+ /// on error
+ int numOperands();
+ /// getOperand - retrieves an operand from the instruction's operand list by
+ /// index, returning 0 on success or -1 on error
+ ///
+ /// @arg operand - A reference whose target is pointed at the operand on
+ /// success, although the operand is still owned by the EDInst
+ /// @arg index - The index of the operand in the instruction
+ int getOperand(EDOperand *&operand, unsigned int index);
+
+ /// tokenize - populates the Tokens member of the instruction, returning 0 on
+ /// success or -1 otherwise
+ int tokenize();
+ /// numTokens - returns the number of tokens in the instruction, or -1 on
+ /// error
+ int numTokens();
+ /// getToken - retrieves a token from the instruction's token list by index,
+ /// returning 0 on success or -1 on error
+ ///
+ /// @arg token - A reference whose target is pointed at the token on success,
+ /// although the token is still owned by the EDInst
+ /// @arg index - The index of the token in the instrcutino
+ int getToken(EDToken *&token, unsigned int index);
+
+#ifdef __BLOCKS__
+ /// visitTokens - Visits each token in turn and applies a block to it,
+ /// returning 0 if all blocks are visited and/or the block signals
+ /// termination by returning 1; returns -1 on error
+ ///
+ /// @arg visitor - The visitor block to apply to all tokens.
+ int visitTokens(EDTokenVisitor_t visitor);
+#endif
+};
+
+} // end namespace llvm
+
+#endif
--- /dev/null
+//===-- EDOperand.cpp - LLVM Enhanced Disassembler ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Enhanced Disassembly library's operand class. The
+// operand is responsible for allowing evaluation given a particular register
+// context.
+//
+//===----------------------------------------------------------------------===//
+
+#include "EDOperand.h"
+#include "EDDisassembler.h"
+#include "EDInst.h"
+#include "llvm/MC/EDInstInfo.h"
+#include "llvm/MC/MCInst.h"
+using namespace llvm;
+
+EDOperand::EDOperand(const EDDisassembler &disassembler,
+ const EDInst &inst,
+ unsigned int opIndex,
+ unsigned int &mcOpIndex) :
+ Disassembler(disassembler),
+ Inst(inst),
+ OpIndex(opIndex),
+ MCOpIndex(mcOpIndex) {
+ unsigned int numMCOperands = 0;
+
+ if (Disassembler.Key.Arch == Triple::x86 ||
+ Disassembler.Key.Arch == Triple::x86_64) {
+ uint8_t operandType = inst.ThisInstInfo->operandTypes[opIndex];
+
+ switch (operandType) {
+ default:
+ break;
+ case kOperandTypeImmediate:
+ numMCOperands = 1;
+ break;
+ case kOperandTypeRegister:
+ numMCOperands = 1;
+ break;
+ case kOperandTypeX86Memory:
+ numMCOperands = 5;
+ break;
+ case kOperandTypeX86EffectiveAddress:
+ numMCOperands = 4;
+ break;
+ case kOperandTypeX86PCRelative:
+ numMCOperands = 1;
+ break;
+ }
+ }
+ else if (Disassembler.Key.Arch == Triple::arm ||
+ Disassembler.Key.Arch == Triple::thumb) {
+ uint8_t operandType = inst.ThisInstInfo->operandTypes[opIndex];
+
+ switch (operandType) {
+ default:
+ case kOperandTypeARMRegisterList:
+ break;
+ case kOperandTypeImmediate:
+ case kOperandTypeRegister:
+ case kOperandTypeARMBranchTarget:
+ case kOperandTypeARMSoImm:
+ case kOperandTypeThumb2SoImm:
+ case kOperandTypeARMSoImm2Part:
+ case kOperandTypeARMPredicate:
+ case kOperandTypeThumbITMask:
+ case kOperandTypeThumb2AddrModeImm8Offset:
+ case kOperandTypeARMTBAddrMode:
+ case kOperandTypeThumb2AddrModeImm8s4Offset:
+ numMCOperands = 1;
+ break;
+ case kOperandTypeThumb2SoReg:
+ case kOperandTypeARMAddrMode2Offset:
+ case kOperandTypeARMAddrMode3Offset:
+ case kOperandTypeARMAddrMode4:
+ case kOperandTypeARMAddrMode5:
+ case kOperandTypeARMAddrModePC:
+ case kOperandTypeThumb2AddrModeImm8:
+ case kOperandTypeThumb2AddrModeImm12:
+ case kOperandTypeThumb2AddrModeImm8s4:
+ case kOperandTypeThumbAddrModeRR:
+ case kOperandTypeThumbAddrModeSP:
+ numMCOperands = 2;
+ break;
+ case kOperandTypeARMSoReg:
+ case kOperandTypeARMAddrMode2:
+ case kOperandTypeARMAddrMode3:
+ case kOperandTypeThumb2AddrModeSoReg:
+ case kOperandTypeThumbAddrModeS1:
+ case kOperandTypeThumbAddrModeS2:
+ case kOperandTypeThumbAddrModeS4:
+ case kOperandTypeARMAddrMode6Offset:
+ numMCOperands = 3;
+ break;
+ case kOperandTypeARMAddrMode6:
+ numMCOperands = 4;
+ break;
+ }
+ }
+
+ mcOpIndex += numMCOperands;
+}
+
+EDOperand::~EDOperand() {
+}
+
+int EDOperand::evaluate(uint64_t &result,
+ EDRegisterReaderCallback callback,
+ void *arg) {
+ uint8_t operandType = Inst.ThisInstInfo->operandTypes[OpIndex];
+
+ switch (Disassembler.Key.Arch) {
+ default:
+ return -1;
+ case Triple::x86:
+ case Triple::x86_64:
+ switch (operandType) {
+ default:
+ return -1;
+ case kOperandTypeImmediate:
+ result = Inst.Inst->getOperand(MCOpIndex).getImm();
+ return 0;
+ case kOperandTypeRegister:
+ {
+ unsigned reg = Inst.Inst->getOperand(MCOpIndex).getReg();
+ return callback(&result, reg, arg);
+ }
+ case kOperandTypeX86PCRelative:
+ {
+ int64_t displacement = Inst.Inst->getOperand(MCOpIndex).getImm();
+
+ uint64_t ripVal;
+
+ // TODO fix how we do this
+
+ if (callback(&ripVal, Disassembler.registerIDWithName("RIP"), arg))
+ return -1;
+
+ result = ripVal + displacement;
+ return 0;
+ }
+ case kOperandTypeX86Memory:
+ case kOperandTypeX86EffectiveAddress:
+ {
+ unsigned baseReg = Inst.Inst->getOperand(MCOpIndex).getReg();
+ uint64_t scaleAmount = Inst.Inst->getOperand(MCOpIndex+1).getImm();
+ unsigned indexReg = Inst.Inst->getOperand(MCOpIndex+2).getReg();
+ int64_t displacement = Inst.Inst->getOperand(MCOpIndex+3).getImm();
+ //unsigned segmentReg = Inst.Inst->getOperand(MCOpIndex+4).getReg();
+
+ uint64_t addr = 0;
+
+ if (baseReg) {
+ uint64_t baseVal;
+ if (callback(&baseVal, baseReg, arg))
+ return -1;
+ addr += baseVal;
+ }
+
+ if (indexReg) {
+ uint64_t indexVal;
+ if (callback(&indexVal, indexReg, arg))
+ return -1;
+ addr += (scaleAmount * indexVal);
+ }
+
+ addr += displacement;
+
+ result = addr;
+ return 0;
+ }
+ }
+ break;
+ case Triple::arm:
+ case Triple::thumb:
+ switch (operandType) {
+ default:
+ return -1;
+ case kOperandTypeImmediate:
+ result = Inst.Inst->getOperand(MCOpIndex).getImm();
+ return 0;
+ case kOperandTypeRegister:
+ {
+ unsigned reg = Inst.Inst->getOperand(MCOpIndex).getReg();
+ return callback(&result, reg, arg);
+ }
+ case kOperandTypeARMBranchTarget:
+ {
+ int64_t displacement = Inst.Inst->getOperand(MCOpIndex).getImm();
+
+ uint64_t pcVal;
+
+ if (callback(&pcVal, Disassembler.registerIDWithName("PC"), arg))
+ return -1;
+
+ result = pcVal + displacement;
+ return 0;
+ }
+ }
+ }
+
+ return -1;
+}
+
+int EDOperand::isRegister() {
+ return(Inst.ThisInstInfo->operandFlags[OpIndex] == kOperandTypeRegister);
+}
+
+unsigned EDOperand::regVal() {
+ return Inst.Inst->getOperand(MCOpIndex).getReg();
+}
+
+int EDOperand::isImmediate() {
+ return(Inst.ThisInstInfo->operandFlags[OpIndex] == kOperandTypeImmediate);
+}
+
+uint64_t EDOperand::immediateVal() {
+ return Inst.Inst->getOperand(MCOpIndex).getImm();
+}
+
+int EDOperand::isMemory() {
+ uint8_t operandType = Inst.ThisInstInfo->operandTypes[OpIndex];
+
+ switch (operandType) {
+ default:
+ return 0;
+ case kOperandTypeX86Memory:
+ case kOperandTypeX86PCRelative:
+ case kOperandTypeX86EffectiveAddress:
+ case kOperandTypeARMSoReg:
+ case kOperandTypeARMSoImm:
+ case kOperandTypeARMAddrMode2:
+ case kOperandTypeARMAddrMode2Offset:
+ case kOperandTypeARMAddrMode3:
+ case kOperandTypeARMAddrMode3Offset:
+ case kOperandTypeARMAddrMode4:
+ case kOperandTypeARMAddrMode5:
+ case kOperandTypeARMAddrMode6:
+ case kOperandTypeARMAddrModePC:
+ case kOperandTypeARMBranchTarget:
+ case kOperandTypeThumbAddrModeS1:
+ case kOperandTypeThumbAddrModeS2:
+ case kOperandTypeThumbAddrModeS4:
+ case kOperandTypeThumbAddrModeRR:
+ case kOperandTypeThumbAddrModeSP:
+ case kOperandTypeThumb2SoImm:
+ case kOperandTypeThumb2AddrModeImm8:
+ case kOperandTypeThumb2AddrModeImm8Offset:
+ case kOperandTypeThumb2AddrModeImm12:
+ case kOperandTypeThumb2AddrModeSoReg:
+ case kOperandTypeThumb2AddrModeImm8s4:
+ return 1;
+ }
+}
+
+#ifdef __BLOCKS__
+struct RegisterReaderWrapper {
+ EDOperand::EDRegisterBlock_t regBlock;
+};
+
+int readerWrapperCallback(uint64_t *value,
+ unsigned regID,
+ void *arg) {
+ struct RegisterReaderWrapper *wrapper = (struct RegisterReaderWrapper *)arg;
+ return wrapper->regBlock(value, regID);
+}
+
+int EDOperand::evaluate(uint64_t &result,
+ EDRegisterBlock_t regBlock) {
+ struct RegisterReaderWrapper wrapper;
+ wrapper.regBlock = regBlock;
+ return evaluate(result,
+ readerWrapperCallback,
+ (void*)&wrapper);
+}
+#endif
--- /dev/null
+//===-EDOperand.h - LLVM Enhanced Disassembler ------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interface for the Enhanced Disassembly library's
+// operand class. The operand is responsible for allowing evaluation given a
+// particular register context.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EDOPERAND_H
+#define LLVM_EDOPERAND_H
+
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+
+struct EDDisassembler;
+struct EDInst;
+
+typedef int (*EDRegisterReaderCallback)(uint64_t *value, unsigned regID,
+ void* arg);
+
+
+/// EDOperand - Encapsulates a single operand, which can be evaluated by the
+/// client
+struct EDOperand {
+ /// The parent disassembler
+ const EDDisassembler &Disassembler;
+ /// The parent instruction
+ const EDInst &Inst;
+
+ /// The index of the operand in the EDInst
+ unsigned int OpIndex;
+ /// The index of the first component of the operand in the MCInst
+ unsigned int MCOpIndex;
+
+ /// Constructor - Initializes an EDOperand
+ ///
+ /// @arg disassembler - The disassembler responsible for the operand
+ /// @arg inst - The instruction containing this operand
+ /// @arg opIndex - The index of the operand in inst
+ /// @arg mcOpIndex - The index of the operand in the original MCInst
+ EDOperand(const EDDisassembler &disassembler,
+ const EDInst &inst,
+ unsigned int opIndex,
+ unsigned int &mcOpIndex);
+ ~EDOperand();
+
+ /// evaluate - Returns the numeric value of an operand to the extent possible,
+ /// returning 0 on success or -1 if there was some problem (such as a
+ /// register not being readable)
+ ///
+ /// @arg result - A reference whose target is filled in with the value of
+ /// the operand (the address if it is a memory operand)
+ /// @arg callback - A function to call to obtain register values
+ /// @arg arg - An opaque argument to pass to callback
+ int evaluate(uint64_t &result,
+ EDRegisterReaderCallback callback,
+ void *arg);
+
+ /// isRegister - Returns 1 if the operand is a register or 0 otherwise
+ int isRegister();
+ /// regVal - Returns the register value.
+ unsigned regVal();
+
+ /// isImmediate - Returns 1 if the operand is an immediate or 0 otherwise
+ int isImmediate();
+ /// immediateVal - Returns the immediate value.
+ uint64_t immediateVal();
+
+ /// isMemory - Returns 1 if the operand is a memory location or 0 otherwise
+ int isMemory();
+
+#ifdef __BLOCKS__
+ typedef int (^EDRegisterBlock_t)(uint64_t *value, unsigned regID);
+
+ /// evaluate - Like evaluate for a callback, but uses a block instead
+ int evaluate(uint64_t &result,
+ EDRegisterBlock_t regBlock);
+#endif
+};
+
+} // end namespace llvm
+
+#endif
--- /dev/null
+//===-- EDToken.cpp - LLVM Enhanced Disassembler --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Enhanced Disassembler library's token class. The
+// token is responsible for vending information about the token, such as its
+// type and logical value.
+//
+//===----------------------------------------------------------------------===//
+
+#include "EDToken.h"
+#include "EDDisassembler.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/ADT/SmallVector.h"
+using namespace llvm;
+
+EDToken::EDToken(StringRef str,
+ enum tokenType type,
+ uint64_t localType,
+ EDDisassembler &disassembler) :
+ Disassembler(disassembler),
+ Str(str),
+ Type(type),
+ LocalType(localType),
+ OperandID(-1) {
+}
+
+EDToken::~EDToken() {
+}
+
+void EDToken::makeLiteral(bool sign, uint64_t absoluteValue) {
+ Type = kTokenLiteral;
+ LiteralSign = sign;
+ LiteralAbsoluteValue = absoluteValue;
+}
+
+void EDToken::makeRegister(unsigned registerID) {
+ Type = kTokenRegister;
+ RegisterID = registerID;
+}
+
+void EDToken::setOperandID(int operandID) {
+ OperandID = operandID;
+}
+
+enum EDToken::tokenType EDToken::type() const {
+ return Type;
+}
+
+uint64_t EDToken::localType() const {
+ return LocalType;
+}
+
+StringRef EDToken::string() const {
+ return Str;
+}
+
+int EDToken::operandID() const {
+ return OperandID;
+}
+
+int EDToken::literalSign() const {
+ if (Type != kTokenLiteral)
+ return -1;
+ return (LiteralSign ? 1 : 0);
+}
+
+int EDToken::literalAbsoluteValue(uint64_t &value) const {
+ if (Type != kTokenLiteral)
+ return -1;
+ value = LiteralAbsoluteValue;
+ return 0;
+}
+
+int EDToken::registerID(unsigned ®isterID) const {
+ if (Type != kTokenRegister)
+ return -1;
+ registerID = RegisterID;
+ return 0;
+}
+
+int EDToken::tokenize(std::vector<EDToken*> &tokens,
+ std::string &str,
+ const char *operandOrder,
+ EDDisassembler &disassembler) {
+ SmallVector<MCParsedAsmOperand*, 5> parsedOperands;
+ SmallVector<AsmToken, 10> asmTokens;
+
+ if (disassembler.parseInst(parsedOperands, asmTokens, str))
+ return -1;
+
+ SmallVectorImpl<MCParsedAsmOperand*>::iterator operandIterator;
+ unsigned int operandIndex;
+ SmallVectorImpl<AsmToken>::iterator tokenIterator;
+
+ operandIterator = parsedOperands.begin();
+ operandIndex = 0;
+
+ bool readOpcode = false;
+
+ const char *wsPointer = asmTokens.begin()->getLoc().getPointer();
+
+ for (tokenIterator = asmTokens.begin();
+ tokenIterator != asmTokens.end();
+ ++tokenIterator) {
+ SMLoc tokenLoc = tokenIterator->getLoc();
+
+ const char *tokenPointer = tokenLoc.getPointer();
+
+ if (tokenPointer > wsPointer) {
+ unsigned long wsLength = tokenPointer - wsPointer;
+
+ EDToken *whitespaceToken = new EDToken(StringRef(wsPointer, wsLength),
+ EDToken::kTokenWhitespace,
+ 0,
+ disassembler);
+
+ tokens.push_back(whitespaceToken);
+ }
+
+ wsPointer = tokenPointer + tokenIterator->getString().size();
+
+ while (operandIterator != parsedOperands.end() &&
+ tokenLoc.getPointer() >
+ (*operandIterator)->getEndLoc().getPointer()) {
+ ++operandIterator;
+ ++operandIndex;
+ }
+
+ EDToken *token;
+
+ switch (tokenIterator->getKind()) {
+ case AsmToken::Identifier:
+ if (!readOpcode) {
+ token = new EDToken(tokenIterator->getString(),
+ EDToken::kTokenOpcode,
+ (uint64_t)tokenIterator->getKind(),
+ disassembler);
+ readOpcode = true;
+ break;
+ }
+ // any identifier that isn't an opcode is mere punctuation; so we fall
+ // through
+ default:
+ token = new EDToken(tokenIterator->getString(),
+ EDToken::kTokenPunctuation,
+ (uint64_t)tokenIterator->getKind(),
+ disassembler);
+ break;
+ case AsmToken::Integer:
+ {
+ token = new EDToken(tokenIterator->getString(),
+ EDToken::kTokenLiteral,
+ (uint64_t)tokenIterator->getKind(),
+ disassembler);
+
+ int64_t intVal = tokenIterator->getIntVal();
+
+ if (intVal < 0)
+ token->makeLiteral(true, -intVal);
+ else
+ token->makeLiteral(false, intVal);
+ break;
+ }
+ case AsmToken::Register:
+ {
+ token = new EDToken(tokenIterator->getString(),
+ EDToken::kTokenLiteral,
+ (uint64_t)tokenIterator->getKind(),
+ disassembler);
+
+ token->makeRegister((unsigned)tokenIterator->getRegVal());
+ break;
+ }
+ }
+
+ if (operandIterator != parsedOperands.end() &&
+ tokenLoc.getPointer() >=
+ (*operandIterator)->getStartLoc().getPointer()) {
+ /// operandIndex == 0 means the operand is the instruction (which the
+ /// AsmParser treats as an operand but edis does not). We therefore skip
+ /// operandIndex == 0 and subtract 1 from all other operand indices.
+
+ if (operandIndex > 0)
+ token->setOperandID(operandOrder[operandIndex - 1]);
+ }
+
+ tokens.push_back(token);
+ }
+
+ return 0;
+}
+
+int EDToken::getString(const char*& buf) {
+ if (PermStr.length() == 0) {
+ PermStr = Str.str();
+ }
+ buf = PermStr.c_str();
+ return 0;
+}
--- /dev/null
+//===-EDToken.h - LLVM Enhanced Disassembler --------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interface for the Enhanced Disassembly library's token
+// class. The token is responsible for vending information about the token,
+// such as its type and logical value.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EDTOKEN_H
+#define LLVM_EDTOKEN_H
+
+#include "llvm/ADT/StringRef.h"
+#include <string>
+#include <vector>
+
+namespace llvm {
+
+struct EDDisassembler;
+
+/// EDToken - Encapsulates a single token, which can provide a string
+/// representation of itself or interpret itself in various ways, depending
+/// on the token type.
+struct EDToken {
+ enum tokenType {
+ kTokenWhitespace,
+ kTokenOpcode,
+ kTokenLiteral,
+ kTokenRegister,
+ kTokenPunctuation
+ };
+
+ /// The parent disassembler
+ EDDisassembler &Disassembler;
+
+ /// The token's string representation
+ llvm::StringRef Str;
+ /// The token's string representation, but in a form suitable for export
+ std::string PermStr;
+ /// The type of the token, as exposed through the external API
+ enum tokenType Type;
+ /// The type of the token, as recorded by the syntax-specific tokenizer
+ uint64_t LocalType;
+ /// The operand corresponding to the token, or (unsigned int)-1 if not
+ /// part of an operand.
+ int OperandID;
+
+ /// The sign if the token is a literal (1 if negative, 0 otherwise)
+ bool LiteralSign;
+ /// The absolute value if the token is a literal
+ uint64_t LiteralAbsoluteValue;
+ /// The LLVM register ID if the token is a register name
+ unsigned RegisterID;
+
+ /// Constructor - Initializes an EDToken with the information common to all
+ /// tokens
+ ///
+ /// @arg str - The string corresponding to the token
+ /// @arg type - The token's type as exposed through the public API
+ /// @arg localType - The token's type as recorded by the tokenizer
+ /// @arg disassembler - The disassembler responsible for the token
+ EDToken(llvm::StringRef str,
+ enum tokenType type,
+ uint64_t localType,
+ EDDisassembler &disassembler);
+
+ /// makeLiteral - Adds the information specific to a literal
+ /// @arg sign - The sign of the literal (1 if negative, 0
+ /// otherwise)
+ ///
+ /// @arg absoluteValue - The absolute value of the literal
+ void makeLiteral(bool sign, uint64_t absoluteValue);
+ /// makeRegister - Adds the information specific to a register
+ ///
+ /// @arg registerID - The LLVM register ID
+ void makeRegister(unsigned registerID);
+
+ /// setOperandID - Links the token to a numbered operand
+ ///
+ /// @arg operandID - The operand ID to link to
+ void setOperandID(int operandID);
+
+ ~EDToken();
+
+ /// type - Returns the public type of the token
+ enum tokenType type() const;
+ /// localType - Returns the tokenizer-specific type of the token
+ uint64_t localType() const;
+ /// string - Returns the string representation of the token
+ llvm::StringRef string() const;
+ /// operandID - Returns the operand ID of the token
+ int operandID() const;
+
+ /// literalSign - Returns the sign of the token
+ /// (1 if negative, 0 if positive or unsigned, -1 if it is not a literal)
+ int literalSign() const;
+ /// literalAbsoluteValue - Retrieves the absolute value of the token, and
+ /// returns -1 if the token is not a literal
+ /// @arg value - A reference to a value that is filled in with the absolute
+ /// value, if it is valid
+ int literalAbsoluteValue(uint64_t &value) const;
+ /// registerID - Retrieves the register ID of the token, and returns -1 if the
+ /// token is not a register
+ ///
+ /// @arg registerID - A reference to a value that is filled in with the
+ /// register ID, if it is valid
+ int registerID(unsigned ®isterID) const;
+
+ /// tokenize - Tokenizes a string using the platform- and syntax-specific
+ /// tokenizer, and returns 0 on success (-1 on failure)
+ ///
+ /// @arg tokens - A vector that will be filled in with pointers to
+ /// allocated tokens
+ /// @arg str - The string, as outputted by the AsmPrinter
+ /// @arg operandOrder - The order of the operands from the operandFlags array
+ /// as they appear in str
+ /// @arg disassembler - The disassembler for the desired target and
+ // assembly syntax
+ static int tokenize(std::vector<EDToken*> &tokens,
+ std::string &str,
+ const char *operandOrder,
+ EDDisassembler &disassembler);
+
+ /// getString - Directs a character pointer to the string, returning 0 on
+ /// success (-1 on failure)
+ /// @arg buf - A reference to a pointer that is set to point to the string.
+ /// The string is still owned by the token.
+ int getString(const char*& buf);
+};
+
+} // end namespace llvm
+#endif
LEVEL = ../..
LIBRARYNAME = LLVMMC
BUILD_ARCHIVE := 1
-PARALLEL_DIRS := MCParser
+PARALLEL_DIRS := MCParser MCDisassembler
include $(LEVEL)/Makefile.common
# NOTE: The tools are organized into five groups of four consisting of one
# large and three small executables. This is done to minimize memory load
# in parallel builds. Please retain this ordering.
-
-# libEnhancedDisassembly must be built ahead of llvm-mc
-# because llvm-mc links against libEnhancedDisassembly
-DIRS := llvm-config edis llvm-mc
-PARALLEL_DIRS := opt llvm-as llvm-dis \
+DIRS := llvm-config
+PARALLEL_DIRS := opt llvm-as llvm-dis edis \
llc llvm-ranlib llvm-ar llvm-nm \
llvm-ld llvm-prof llvm-link \
- lli llvm-extract \
+ lli llvm-extract llvm-mc \
bugpoint llvm-bcanalyzer llvm-stub \
llvmc
include $(LEVEL)/Makefile.config
+
# These libraries build as dynamic libraries (.dylib /.so), they can only be
# built if ENABLE_PIC is set.
ifeq ($(ENABLE_PIC),1)
else
PARALLEL_DIRS += lto
endif
+
+ # The edis library is only supported if ARM and/or X86 are enabled, and if
+ # LLVM is being built PIC on platforms that support dylibs.
+ ifneq ($(DISABLE_EDIS),1)
+ ifneq ($(filter $(TARGETS_TO_BUILD), X86 ARM),)
+ PARALLEL_DIRS := $(filter-out edis, $(PARALLEL_DIRS))
+ endif
+ endif
endif
endif
+++ /dev/null
-//===-EDDisassembler.cpp - LLVM Enhanced Disassembler ---------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the Enhanced Disassembly library's disassembler class.
-// The disassembler is responsible for vending individual instructions according
-// to a given architecture and disassembly syntax.
-//
-//===----------------------------------------------------------------------===//
-
-#include "EDDisassembler.h"
-#include "EDInst.h"
-
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/MC/EDInstInfo.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCDisassembler.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCInstPrinter.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCParser/AsmLexer.h"
-#include "llvm/MC/MCParser/MCAsmParser.h"
-#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/MemoryObject.h"
-#include "llvm/Support/SourceMgr.h"
-#include "llvm/Target/TargetAsmLexer.h"
-#include "llvm/Target/TargetAsmParser.h"
-#include "llvm/Target/TargetRegistry.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSelect.h"
-
-using namespace llvm;
-
-bool EDDisassembler::sInitialized = false;
-EDDisassembler::DisassemblerMap_t EDDisassembler::sDisassemblers;
-
-struct TripleMap {
- Triple::ArchType Arch;
- const char *String;
-};
-
-static struct TripleMap triplemap[] = {
- { Triple::x86, "i386-unknown-unknown" },
- { Triple::x86_64, "x86_64-unknown-unknown" },
- { Triple::arm, "arm-unknown-unknown" },
- { Triple::thumb, "thumb-unknown-unknown" },
- { Triple::InvalidArch, NULL, }
-};
-
-/// infoFromArch - Returns the TripleMap corresponding to a given architecture,
-/// or NULL if there is an error
-///
-/// @arg arch - The Triple::ArchType for the desired architecture
-static const char *tripleFromArch(Triple::ArchType arch) {
- unsigned int infoIndex;
-
- for (infoIndex = 0; triplemap[infoIndex].String != NULL; ++infoIndex) {
- if (arch == triplemap[infoIndex].Arch)
- return triplemap[infoIndex].String;
- }
-
- return NULL;
-}
-
-/// getLLVMSyntaxVariant - gets the constant to use to get an assembly printer
-/// for the desired assembly syntax, suitable for passing to
-/// Target::createMCInstPrinter()
-///
-/// @arg arch - The target architecture
-/// @arg syntax - The assembly syntax in sd form
-static int getLLVMSyntaxVariant(Triple::ArchType arch,
- EDAssemblySyntax_t syntax) {
- switch (syntax) {
- default:
- return -1;
- // Mappings below from X86AsmPrinter.cpp
- case kEDAssemblySyntaxX86ATT:
- if (arch == Triple::x86 || arch == Triple::x86_64)
- return 0;
- else
- return -1;
- case kEDAssemblySyntaxX86Intel:
- if (arch == Triple::x86 || arch == Triple::x86_64)
- return 1;
- else
- return -1;
- case kEDAssemblySyntaxARMUAL:
- if (arch == Triple::arm || arch == Triple::thumb)
- return 0;
- else
- return -1;
- }
-}
-
-void EDDisassembler::initialize() {
- if (sInitialized)
- return;
-
- sInitialized = true;
-
- InitializeAllTargetInfos();
- InitializeAllTargets();
- InitializeAllAsmPrinters();
- InitializeAllAsmParsers();
- InitializeAllDisassemblers();
-}
-
-#undef BRINGUP_TARGET
-
-EDDisassembler *EDDisassembler::getDisassembler(Triple::ArchType arch,
- EDAssemblySyntax_t syntax) {
- CPUKey key;
- key.Arch = arch;
- key.Syntax = syntax;
-
- EDDisassembler::DisassemblerMap_t::iterator i = sDisassemblers.find(key);
-
- if (i != sDisassemblers.end()) {
- return i->second;
- } else {
- EDDisassembler* sdd = new EDDisassembler(key);
- if (!sdd->valid()) {
- delete sdd;
- return NULL;
- }
-
- sDisassemblers[key] = sdd;
-
- return sdd;
- }
-
- return NULL;
-}
-
-EDDisassembler *EDDisassembler::getDisassembler(StringRef str,
- EDAssemblySyntax_t syntax) {
- Triple triple(str);
-
- return getDisassembler(triple.getArch(), syntax);
-}
-
-EDDisassembler::EDDisassembler(CPUKey &key) :
- Valid(false),
- HasSemantics(false),
- ErrorStream(nulls()),
- Key(key) {
- const char *triple = tripleFromArch(key.Arch);
-
- if (!triple)
- return;
-
- LLVMSyntaxVariant = getLLVMSyntaxVariant(key.Arch, key.Syntax);
-
- if (LLVMSyntaxVariant < 0)
- return;
-
- std::string tripleString(triple);
- std::string errorString;
-
- Tgt = TargetRegistry::lookupTarget(tripleString,
- errorString);
-
- if (!Tgt)
- return;
-
- std::string featureString;
-
- TargetMachine.reset(Tgt->createTargetMachine(tripleString,
- featureString));
-
- const TargetRegisterInfo *registerInfo = TargetMachine->getRegisterInfo();
-
- if (!registerInfo)
- return;
-
- initMaps(*registerInfo);
-
- AsmInfo.reset(Tgt->createAsmInfo(tripleString));
-
- if (!AsmInfo)
- return;
-
- Disassembler.reset(Tgt->createMCDisassembler());
-
- if (!Disassembler)
- return;
-
- InstInfos = Disassembler->getEDInfo();
-
- InstString.reset(new std::string);
- InstStream.reset(new raw_string_ostream(*InstString));
- InstPrinter.reset(Tgt->createMCInstPrinter(LLVMSyntaxVariant, *AsmInfo));
-
- if (!InstPrinter)
- return;
-
- GenericAsmLexer.reset(new AsmLexer(*AsmInfo));
- SpecificAsmLexer.reset(Tgt->createAsmLexer(*AsmInfo));
- SpecificAsmLexer->InstallLexer(*GenericAsmLexer);
-
- initMaps(*TargetMachine->getRegisterInfo());
-
- Valid = true;
-}
-
-EDDisassembler::~EDDisassembler() {
- if (!valid())
- return;
-}
-
-namespace {
- /// EDMemoryObject - a subclass of MemoryObject that allows use of a callback
- /// as provided by the sd interface. See MemoryObject.
- class EDMemoryObject : public llvm::MemoryObject {
- private:
- EDByteReaderCallback Callback;
- void *Arg;
- public:
- EDMemoryObject(EDByteReaderCallback callback,
- void *arg) : Callback(callback), Arg(arg) { }
- ~EDMemoryObject() { }
- uint64_t getBase() const { return 0x0; }
- uint64_t getExtent() const { return (uint64_t)-1; }
- int readByte(uint64_t address, uint8_t *ptr) const {
- if (!Callback)
- return -1;
-
- if (Callback(ptr, address, Arg))
- return -1;
-
- return 0;
- }
- };
-}
-
-EDInst *EDDisassembler::createInst(EDByteReaderCallback byteReader,
- uint64_t address,
- void *arg) {
- EDMemoryObject memoryObject(byteReader, arg);
-
- MCInst* inst = new MCInst;
- uint64_t byteSize;
-
- if (!Disassembler->getInstruction(*inst,
- byteSize,
- memoryObject,
- address,
- ErrorStream)) {
- delete inst;
- return NULL;
- } else {
- const llvm::EDInstInfo *thisInstInfo;
-
- thisInstInfo = &InstInfos[inst->getOpcode()];
-
- EDInst* sdInst = new EDInst(inst, byteSize, *this, thisInstInfo);
- return sdInst;
- }
-}
-
-void EDDisassembler::initMaps(const TargetRegisterInfo ®isterInfo) {
- unsigned numRegisters = registerInfo.getNumRegs();
- unsigned registerIndex;
-
- for (registerIndex = 0; registerIndex < numRegisters; ++registerIndex) {
- const char* registerName = registerInfo.get(registerIndex).Name;
-
- RegVec.push_back(registerName);
- RegRMap[registerName] = registerIndex;
- }
-
- switch (Key.Arch) {
- default:
- break;
- case Triple::x86:
- case Triple::x86_64:
- stackPointers.insert(registerIDWithName("SP"));
- stackPointers.insert(registerIDWithName("ESP"));
- stackPointers.insert(registerIDWithName("RSP"));
-
- programCounters.insert(registerIDWithName("IP"));
- programCounters.insert(registerIDWithName("EIP"));
- programCounters.insert(registerIDWithName("RIP"));
- break;
- case Triple::arm:
- case Triple::thumb:
- stackPointers.insert(registerIDWithName("SP"));
-
- programCounters.insert(registerIDWithName("PC"));
- break;
- }
-}
-
-const char *EDDisassembler::nameWithRegisterID(unsigned registerID) const {
- if (registerID >= RegVec.size())
- return NULL;
- else
- return RegVec[registerID].c_str();
-}
-
-unsigned EDDisassembler::registerIDWithName(const char *name) const {
- regrmap_t::const_iterator iter = RegRMap.find(std::string(name));
- if (iter == RegRMap.end())
- return 0;
- else
- return (*iter).second;
-}
-
-bool EDDisassembler::registerIsStackPointer(unsigned registerID) {
- return (stackPointers.find(registerID) != stackPointers.end());
-}
-
-bool EDDisassembler::registerIsProgramCounter(unsigned registerID) {
- return (programCounters.find(registerID) != programCounters.end());
-}
-
-int EDDisassembler::printInst(std::string &str, MCInst &inst) {
- PrinterMutex.acquire();
-
- InstPrinter->printInst(&inst, *InstStream);
- InstStream->flush();
- str = *InstString;
- InstString->clear();
-
- PrinterMutex.release();
-
- return 0;
-}
-
-int EDDisassembler::parseInst(SmallVectorImpl<MCParsedAsmOperand*> &operands,
- SmallVectorImpl<AsmToken> &tokens,
- const std::string &str) {
- int ret = 0;
-
- switch (Key.Arch) {
- default:
- return -1;
- case Triple::x86:
- case Triple::x86_64:
- case Triple::arm:
- case Triple::thumb:
- break;
- }
-
- const char *cStr = str.c_str();
- MemoryBuffer *buf = MemoryBuffer::getMemBuffer(cStr, cStr + strlen(cStr));
-
- StringRef instName;
- SMLoc instLoc;
-
- SourceMgr sourceMgr;
- sourceMgr.AddNewSourceBuffer(buf, SMLoc()); // ownership of buf handed over
- MCContext context(*AsmInfo);
- OwningPtr<MCStreamer> streamer(createNullStreamer(context));
- OwningPtr<MCAsmParser> genericParser(createMCAsmParser(*Tgt, sourceMgr,
- context, *streamer,
- *AsmInfo));
- OwningPtr<TargetAsmParser> TargetParser(Tgt->createAsmParser(*genericParser,
- *TargetMachine));
-
- AsmToken OpcodeToken = genericParser->Lex();
- AsmToken NextToken = genericParser->Lex(); // consume next token, because specificParser expects us to
-
- if (OpcodeToken.is(AsmToken::Identifier)) {
- instName = OpcodeToken.getString();
- instLoc = OpcodeToken.getLoc();
-
- if (NextToken.isNot(AsmToken::Eof) &&
- TargetParser->ParseInstruction(instName, instLoc, operands))
- ret = -1;
- } else {
- ret = -1;
- }
-
- ParserMutex.acquire();
-
- if (!ret) {
- GenericAsmLexer->setBuffer(buf);
-
- while (SpecificAsmLexer->Lex(),
- SpecificAsmLexer->isNot(AsmToken::Eof) &&
- SpecificAsmLexer->isNot(AsmToken::EndOfStatement)) {
- if (SpecificAsmLexer->is(AsmToken::Error)) {
- ret = -1;
- break;
- }
- tokens.push_back(SpecificAsmLexer->getTok());
- }
- }
-
- ParserMutex.release();
-
- return ret;
-}
-
-int EDDisassembler::llvmSyntaxVariant() const {
- return LLVMSyntaxVariant;
-}
+++ /dev/null
-//===-EDDisassembler.h - LLVM Enhanced Disassembler -------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the interface for the Enhanced Disassembly library's
-// disassembler class. The disassembler is responsible for vending individual
-// instructions according to a given architecture and disassembly syntax.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef EDDisassembler_
-#define EDDisassembler_
-
-#include "EDInfo.inc"
-
-#include "llvm-c/EnhancedDisassembly.h"
-
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/Triple.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Mutex.h"
-
-#include <map>
-#include <set>
-#include <string>
-#include <vector>
-
-namespace llvm {
-class AsmLexer;
-class AsmToken;
-class MCContext;
-class MCAsmInfo;
-class MCAsmLexer;
-class AsmParser;
-class TargetAsmLexer;
-class TargetAsmParser;
-class MCDisassembler;
-class MCInstPrinter;
-class MCInst;
-class MCParsedAsmOperand;
-class MCStreamer;
-template <typename T> class SmallVectorImpl;
-class SourceMgr;
-class Target;
-class TargetMachine;
-class TargetRegisterInfo;
-
-struct EDInstInfo;
-}
-
-/// EDDisassembler - Encapsulates a disassembler for a single architecture and
-/// disassembly syntax. Also manages the static disassembler registry.
-struct EDDisassembler {
- ////////////////////
- // Static members //
- ////////////////////
-
- /// CPUKey - Encapsulates the descriptor of an architecture/disassembly-syntax
- /// pair
- struct CPUKey {
- /// The architecture type
- llvm::Triple::ArchType Arch;
-
- /// The assembly syntax
- EDAssemblySyntax_t Syntax;
-
- /// operator== - Equality operator
- bool operator==(const CPUKey &key) const {
- return (Arch == key.Arch &&
- Syntax == key.Syntax);
- }
-
- /// operator< - Less-than operator
- bool operator<(const CPUKey &key) const {
- if(Arch > key.Arch)
- return false;
- if(Syntax >= key.Syntax)
- return false;
- return true;
- }
- };
-
- typedef std::map<CPUKey, EDDisassembler*> DisassemblerMap_t;
-
- /// True if the disassembler registry has been initialized; false if not
- static bool sInitialized;
- /// A map from disassembler specifications to disassemblers. Populated
- /// lazily.
- static DisassemblerMap_t sDisassemblers;
-
- /// getDisassembler - Returns the specified disassemble, or NULL on failure
- ///
- /// @arg arch - The desired architecture
- /// @arg syntax - The desired disassembly syntax
- static EDDisassembler *getDisassembler(llvm::Triple::ArchType arch,
- EDAssemblySyntax_t syntax);
-
- /// getDisassembler - Returns the disassembler for a given combination of
- /// CPU type, CPU subtype, and assembly syntax, or NULL on failure
- ///
- /// @arg str - The string representation of the architecture triple, e.g.,
- /// "x86_64-apple-darwin"
- /// @arg syntax - The disassembly syntax for the required disassembler
- static EDDisassembler *getDisassembler(llvm::StringRef str,
- EDAssemblySyntax_t syntax);
-
- /// initialize - Initializes the disassembler registry and the LLVM backend
- static void initialize();
-
- ////////////////////////
- // Per-object members //
- ////////////////////////
-
- /// True only if the object has been successfully initialized
- bool Valid;
- /// True if the disassembler can provide semantic information
- bool HasSemantics;
-
- /// The stream to write errors to
- llvm::raw_ostream &ErrorStream;
-
- /// The architecture/syntax pair for the current architecture
- CPUKey Key;
- /// The LLVM target corresponding to the disassembler
- const llvm::Target *Tgt;
- /// The target machien instance.
- llvm::OwningPtr<llvm::TargetMachine> TargetMachine;
- /// The assembly information for the target architecture
- llvm::OwningPtr<const llvm::MCAsmInfo> AsmInfo;
- /// The disassembler for the target architecture
- llvm::OwningPtr<const llvm::MCDisassembler> Disassembler;
- /// The output string for the instruction printer; must be guarded with
- /// PrinterMutex
- llvm::OwningPtr<std::string> InstString;
- /// The output stream for the disassembler; must be guarded with
- /// PrinterMutex
- llvm::OwningPtr<llvm::raw_string_ostream> InstStream;
- /// The instruction printer for the target architecture; must be guarded with
- /// PrinterMutex when printing
- llvm::OwningPtr<llvm::MCInstPrinter> InstPrinter;
- /// The mutex that guards the instruction printer's printing functions, which
- /// use a shared stream
- llvm::sys::Mutex PrinterMutex;
- /// The array of instruction information provided by the TableGen backend for
- /// the target architecture
- const llvm::EDInstInfo *InstInfos;
- /// The target-specific lexer for use in tokenizing strings, in
- /// target-independent and target-specific portions
- llvm::OwningPtr<llvm::AsmLexer> GenericAsmLexer;
- llvm::OwningPtr<llvm::TargetAsmLexer> SpecificAsmLexer;
- /// The guard for the above
- llvm::sys::Mutex ParserMutex;
- /// The LLVM number used for the target disassembly syntax variant
- int LLVMSyntaxVariant;
-
- typedef std::vector<std::string> regvec_t;
- typedef std::map<std::string, unsigned> regrmap_t;
-
- /// A vector of registers for quick mapping from LLVM register IDs to names
- regvec_t RegVec;
- /// A map of registers for quick mapping from register names to LLVM IDs
- regrmap_t RegRMap;
-
- /// A set of register IDs for aliases of the stack pointer for the current
- /// architecture
- std::set<unsigned> stackPointers;
- /// A set of register IDs for aliases of the program counter for the current
- /// architecture
- std::set<unsigned> programCounters;
-
- /// Constructor - initializes a disassembler with all the necessary objects,
- /// which come pre-allocated from the registry accessor function
- ///
- /// @arg key - the architecture and disassembly syntax for the
- /// disassembler
- EDDisassembler(CPUKey& key);
-
- /// valid - reports whether there was a failure in the constructor.
- bool valid() {
- return Valid;
- }
-
- /// hasSemantics - reports whether the disassembler can provide operands and
- /// tokens.
- bool hasSemantics() {
- return HasSemantics;
- }
-
- ~EDDisassembler();
-
- /// createInst - creates and returns an instruction given a callback and
- /// memory address, or NULL on failure
- ///
- /// @arg byteReader - A callback function that provides machine code bytes
- /// @arg address - The address of the first byte of the instruction,
- /// suitable for passing to byteReader
- /// @arg arg - An opaque argument for byteReader
- EDInst *createInst(EDByteReaderCallback byteReader,
- uint64_t address,
- void *arg);
-
- /// initMaps - initializes regVec and regRMap using the provided register
- /// info
- ///
- /// @arg registerInfo - the register information to use as a source
- void initMaps(const llvm::TargetRegisterInfo ®isterInfo);
- /// nameWithRegisterID - Returns the name (owned by the EDDisassembler) of a
- /// register for a given register ID, or NULL on failure
- ///
- /// @arg registerID - the ID of the register to be queried
- const char *nameWithRegisterID(unsigned registerID) const;
- /// registerIDWithName - Returns the ID of a register for a given register
- /// name, or (unsigned)-1 on failure
- ///
- /// @arg name - The name of the register
- unsigned registerIDWithName(const char *name) const;
-
- /// registerIsStackPointer - reports whether a register ID is an alias for the
- /// stack pointer register
- ///
- /// @arg registerID - The LLVM register ID
- bool registerIsStackPointer(unsigned registerID);
- /// registerIsStackPointer - reports whether a register ID is an alias for the
- /// stack pointer register
- ///
- /// @arg registerID - The LLVM register ID
- bool registerIsProgramCounter(unsigned registerID);
-
- /// printInst - prints an MCInst to a string, returning 0 on success, or -1
- /// otherwise
- ///
- /// @arg str - A reference to a string which is filled in with the string
- /// representation of the instruction
- /// @arg inst - A reference to the MCInst to be printed
- int printInst(std::string& str,
- llvm::MCInst& inst);
-
- /// parseInst - extracts operands and tokens from a string for use in
- /// tokenizing the string. Returns 0 on success, or -1 otherwise.
- ///
- /// @arg operands - A reference to a vector that will be filled in with the
- /// parsed operands
- /// @arg tokens - A reference to a vector that will be filled in with the
- /// tokens
- /// @arg str - The string representation of the instruction
- int parseInst(llvm::SmallVectorImpl<llvm::MCParsedAsmOperand*> &operands,
- llvm::SmallVectorImpl<llvm::AsmToken> &tokens,
- const std::string &str);
-
- /// llvmSyntaxVariant - returns the LLVM syntax variant for this disassembler
- int llvmSyntaxVariant() const;
-};
-
-#endif
+++ /dev/null
-//===-EDInst.cpp - LLVM Enhanced Disassembler -----------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the Enhanced Disassembly library's instruction class.
-// The instruction is responsible for vending the string representation,
-// individual tokens, and operands for a single instruction.
-//
-//===----------------------------------------------------------------------===//
-
-#include "EDDisassembler.h"
-#include "EDInst.h"
-#include "EDOperand.h"
-#include "EDToken.h"
-
-#include "llvm/MC/EDInstInfo.h"
-#include "llvm/MC/MCInst.h"
-
-using namespace llvm;
-
-EDInst::EDInst(llvm::MCInst *inst,
- uint64_t byteSize,
- EDDisassembler &disassembler,
- const llvm::EDInstInfo *info) :
- Disassembler(disassembler),
- Inst(inst),
- ThisInstInfo(info),
- ByteSize(byteSize),
- BranchTarget(-1),
- MoveSource(-1),
- MoveTarget(-1) {
- OperandOrder = ThisInstInfo->operandOrders[Disassembler.llvmSyntaxVariant()];
-}
-
-EDInst::~EDInst() {
- unsigned int index;
- unsigned int numOperands = Operands.size();
-
- for (index = 0; index < numOperands; ++index)
- delete Operands[index];
-
- unsigned int numTokens = Tokens.size();
-
- for (index = 0; index < numTokens; ++index)
- delete Tokens[index];
-
- delete Inst;
-}
-
-uint64_t EDInst::byteSize() {
- return ByteSize;
-}
-
-int EDInst::stringify() {
- if (StringifyResult.valid())
- return StringifyResult.result();
-
- if (Disassembler.printInst(String, *Inst))
- return StringifyResult.setResult(-1);
-
- return StringifyResult.setResult(0);
-}
-
-int EDInst::getString(const char*& str) {
- if (stringify())
- return -1;
-
- str = String.c_str();
-
- return 0;
-}
-
-unsigned EDInst::instID() {
- return Inst->getOpcode();
-}
-
-bool EDInst::isBranch() {
- if (ThisInstInfo)
- return
- ThisInstInfo->instructionType == kInstructionTypeBranch ||
- ThisInstInfo->instructionType == kInstructionTypeCall;
- else
- return false;
-}
-
-bool EDInst::isMove() {
- if (ThisInstInfo)
- return ThisInstInfo->instructionType == kInstructionTypeMove;
- else
- return false;
-}
-
-int EDInst::parseOperands() {
- if (ParseResult.valid())
- return ParseResult.result();
-
- if (!ThisInstInfo)
- return ParseResult.setResult(-1);
-
- unsigned int opIndex;
- unsigned int mcOpIndex = 0;
-
- for (opIndex = 0; opIndex < ThisInstInfo->numOperands; ++opIndex) {
- if (isBranch() &&
- (ThisInstInfo->operandFlags[opIndex] & kOperandFlagTarget)) {
- BranchTarget = opIndex;
- }
- else if (isMove()) {
- if (ThisInstInfo->operandFlags[opIndex] & kOperandFlagSource)
- MoveSource = opIndex;
- else if (ThisInstInfo->operandFlags[opIndex] & kOperandFlagTarget)
- MoveTarget = opIndex;
- }
-
- EDOperand *operand = new EDOperand(Disassembler, *this, opIndex, mcOpIndex);
-
- Operands.push_back(operand);
- }
-
- return ParseResult.setResult(0);
-}
-
-int EDInst::branchTargetID() {
- if (parseOperands())
- return -1;
- return BranchTarget;
-}
-
-int EDInst::moveSourceID() {
- if (parseOperands())
- return -1;
- return MoveSource;
-}
-
-int EDInst::moveTargetID() {
- if (parseOperands())
- return -1;
- return MoveTarget;
-}
-
-int EDInst::numOperands() {
- if (parseOperands())
- return -1;
- return Operands.size();
-}
-
-int EDInst::getOperand(EDOperand *&operand, unsigned int index) {
- if (parseOperands())
- return -1;
-
- if (index >= Operands.size())
- return -1;
-
- operand = Operands[index];
- return 0;
-}
-
-int EDInst::tokenize() {
- if (TokenizeResult.valid())
- return TokenizeResult.result();
-
- if (stringify())
- return TokenizeResult.setResult(-1);
-
- return TokenizeResult.setResult(EDToken::tokenize(Tokens,
- String,
- OperandOrder,
- Disassembler));
-
-}
-
-int EDInst::numTokens() {
- if (tokenize())
- return -1;
- return Tokens.size();
-}
-
-int EDInst::getToken(EDToken *&token, unsigned int index) {
- if (tokenize())
- return -1;
- token = Tokens[index];
- return 0;
-}
-
-#ifdef __BLOCKS__
-int EDInst::visitTokens(EDTokenVisitor_t visitor) {
- if (tokenize())
- return -1;
-
- tokvec_t::iterator iter;
-
- for (iter = Tokens.begin(); iter != Tokens.end(); ++iter) {
- int ret = visitor(*iter);
- if (ret == 1)
- return 0;
- if (ret != 0)
- return -1;
- }
-
- return 0;
-}
-#endif
+++ /dev/null
-//===-EDInst.h - LLVM Enhanced Disassembler ---------------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the interface for the Enhanced Disassembly library's
-// instruction class. The instruction is responsible for vending the string
-// representation, individual tokens and operands for a single instruction.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef EDInst_
-#define EDInst_
-
-#include "llvm-c/EnhancedDisassembly.h"
-
-#include "llvm/ADT/SmallVector.h"
-
-#include <string>
-#include <vector>
-
-namespace llvm {
- struct EDInstInfo;
-}
-
-/// CachedResult - Encapsulates the result of a function along with the validity
-/// of that result, so that slow functions don't need to run twice
-struct CachedResult {
- /// True if the result has been obtained by executing the function
- bool Valid;
- /// The result last obtained from the function
- int Result;
-
- /// Constructor - Initializes an invalid result
- CachedResult() : Valid(false) { }
- /// valid - Returns true if the result has been obtained by executing the
- /// function and false otherwise
- bool valid() { return Valid; }
- /// result - Returns the result of the function or an undefined value if
- /// valid() is false
- int result() { return Result; }
- /// setResult - Sets the result of the function and declares it valid
- /// returning the result (so that setResult() can be called from inside a
- /// return statement)
- /// @arg result - The result of the function
- int setResult(int result) { Result = result; Valid = true; return result; }
-};
-
-/// EDInst - Encapsulates a single instruction, which can be queried for its
-/// string representation, as well as its operands and tokens
-struct EDInst {
- /// The parent disassembler
- EDDisassembler &Disassembler;
- /// The containing MCInst
- llvm::MCInst *Inst;
- /// The instruction information provided by TableGen for this instruction
- const llvm::EDInstInfo *ThisInstInfo;
- /// The number of bytes for the machine code representation of the instruction
- uint64_t ByteSize;
-
- /// The result of the stringify() function
- CachedResult StringifyResult;
- /// The string representation of the instruction
- std::string String;
- /// The order in which operands from the InstInfo's operand information appear
- /// in String
- const char* OperandOrder;
-
- /// The result of the parseOperands() function
- CachedResult ParseResult;
- typedef llvm::SmallVector<EDOperand*, 5> opvec_t;
- /// The instruction's operands
- opvec_t Operands;
- /// The operand corresponding to the target, if the instruction is a branch
- int BranchTarget;
- /// The operand corresponding to the source, if the instruction is a move
- int MoveSource;
- /// The operand corresponding to the target, if the instruction is a move
- int MoveTarget;
-
- /// The result of the tokenize() function
- CachedResult TokenizeResult;
- typedef std::vector<EDToken*> tokvec_t;
- /// The instruction's tokens
- tokvec_t Tokens;
-
- /// Constructor - initializes an instruction given the output of the LLVM
- /// C++ disassembler
- ///
- /// @arg inst - The MCInst, which will now be owned by this object
- /// @arg byteSize - The size of the consumed instruction, in bytes
- /// @arg disassembler - The parent disassembler
- /// @arg instInfo - The instruction information produced by the table
- /// generator for this instruction
- EDInst(llvm::MCInst *inst,
- uint64_t byteSize,
- EDDisassembler &disassembler,
- const llvm::EDInstInfo *instInfo);
- ~EDInst();
-
- /// byteSize - returns the number of bytes consumed by the machine code
- /// representation of the instruction
- uint64_t byteSize();
- /// instID - returns the LLVM instruction ID of the instruction
- unsigned instID();
-
- /// stringify - populates the String and AsmString members of the instruction,
- /// returning 0 on success or -1 otherwise
- int stringify();
- /// getString - retrieves a pointer to the string representation of the
- /// instructinon, returning 0 on success or -1 otherwise
- ///
- /// @arg str - A reference to a pointer that, on success, is set to point to
- /// the string representation of the instruction; this string is still owned
- /// by the instruction and will be deleted when it is
- int getString(const char *&str);
-
- /// isBranch - Returns true if the instruction is a branch
- bool isBranch();
- /// isMove - Returns true if the instruction is a move
- bool isMove();
-
- /// parseOperands - populates the Operands member of the instruction,
- /// returning 0 on success or -1 otherwise
- int parseOperands();
- /// branchTargetID - returns the ID (suitable for use with getOperand()) of
- /// the target operand if the instruction is a branch, or -1 otherwise
- int branchTargetID();
- /// moveSourceID - returns the ID of the source operand if the instruction
- /// is a move, or -1 otherwise
- int moveSourceID();
- /// moveTargetID - returns the ID of the target operand if the instruction
- /// is a move, or -1 otherwise
- int moveTargetID();
-
- /// numOperands - returns the number of operands available to retrieve, or -1
- /// on error
- int numOperands();
- /// getOperand - retrieves an operand from the instruction's operand list by
- /// index, returning 0 on success or -1 on error
- ///
- /// @arg operand - A reference whose target is pointed at the operand on
- /// success, although the operand is still owned by the EDInst
- /// @arg index - The index of the operand in the instruction
- int getOperand(EDOperand *&operand, unsigned int index);
-
- /// tokenize - populates the Tokens member of the instruction, returning 0 on
- /// success or -1 otherwise
- int tokenize();
- /// numTokens - returns the number of tokens in the instruction, or -1 on
- /// error
- int numTokens();
- /// getToken - retrieves a token from the instruction's token list by index,
- /// returning 0 on success or -1 on error
- ///
- /// @arg token - A reference whose target is pointed at the token on success,
- /// although the token is still owned by the EDInst
- /// @arg index - The index of the token in the instrcutino
- int getToken(EDToken *&token, unsigned int index);
-
-#ifdef __BLOCKS__
- /// visitTokens - Visits each token in turn and applies a block to it,
- /// returning 0 if all blocks are visited and/or the block signals
- /// termination by returning 1; returns -1 on error
- ///
- /// @arg visitor - The visitor block to apply to all tokens.
- int visitTokens(EDTokenVisitor_t visitor);
-#endif
-};
-
-#endif
-//===-EDMain.cpp - LLVM Enhanced Disassembly C API ------------------------===//
+//===-- EDMain.cpp - LLVM Enhanced Disassembly C API ----------------------===//
//
// The LLVM Compiler Infrastructure
//
//
//===----------------------------------------------------------------------===//
-#include "EDDisassembler.h"
-#include "EDInst.h"
-#include "EDOperand.h"
-#include "EDToken.h"
-
+// FIXME: This code isn't layered right, the headers should be moved to
+// include llvm/MC/MCDisassembler or something.
+#include "../../lib/MC/MCDisassembler/EDDisassembler.h"
+#include "../../lib/MC/MCDisassembler/EDInst.h"
+#include "../../lib/MC/MCDisassembler/EDOperand.h"
+#include "../../lib/MC/MCDisassembler/EDToken.h"
#include "llvm-c/EnhancedDisassembly.h"
+using namespace llvm;
int EDGetDisassembler(EDDisassemblerRef *disassembler,
const char *triple,
EDAssemblySyntax_t syntax) {
EDDisassembler::initialize();
- EDDisassemblerRef ret = EDDisassembler::getDisassembler(triple,
- syntax);
+ EDDisassembler::AssemblySyntax Syntax;
+ switch (syntax) {
+ default: assert(0 && "Unknown assembly syntax!");
+ case kEDAssemblySyntaxX86Intel:
+ Syntax = EDDisassembler::kEDAssemblySyntaxX86Intel;
+ break;
+ case kEDAssemblySyntaxX86ATT:
+ Syntax = EDDisassembler::kEDAssemblySyntaxX86ATT;
+ break;
+ case kEDAssemblySyntaxARMUAL:
+ Syntax = EDDisassembler::kEDAssemblySyntaxARMUAL;
+ break;
+ }
+
+ EDDisassemblerRef ret = EDDisassembler::getDisassembler(triple, Syntax);
- if (ret) {
- *disassembler = ret;
- return 0;
- } else {
+ if (!ret)
return -1;
- }
+ *disassembler = ret;
+ return 0;
}
int EDGetRegisterName(const char** regName,
EDDisassemblerRef disassembler,
unsigned regID) {
- const char* name = disassembler->nameWithRegisterID(regID);
+ const char *name = ((EDDisassembler*)disassembler)->nameWithRegisterID(regID);
if (!name)
return -1;
*regName = name;
int EDRegisterIsStackPointer(EDDisassemblerRef disassembler,
unsigned regID) {
- return disassembler->registerIsStackPointer(regID) ? 1 : 0;
+ return ((EDDisassembler*)disassembler)->registerIsStackPointer(regID) ? 1 : 0;
}
int EDRegisterIsProgramCounter(EDDisassemblerRef disassembler,
unsigned regID) {
- return disassembler->registerIsProgramCounter(regID) ? 1 : 0;
+ return ((EDDisassembler*)disassembler)->registerIsProgramCounter(regID) ? 1:0;
}
unsigned int EDCreateInsts(EDInstRef *insts,
unsigned int count,
EDDisassemblerRef disassembler,
- EDByteReaderCallback byteReader,
+ ::EDByteReaderCallback byteReader,
uint64_t address,
void *arg) {
unsigned int index;
for (index = 0; index < count; ++index) {
- EDInst *inst = disassembler->createInst(byteReader, address, arg);
+ EDInst *inst = ((EDDisassembler*)disassembler)->createInst(byteReader,
+ address, arg);
if (!inst)
return index;
}
void EDReleaseInst(EDInstRef inst) {
- delete inst;
+ delete ((EDInst*)inst);
}
int EDInstByteSize(EDInstRef inst) {
- return inst->byteSize();
+ return ((EDInst*)inst)->byteSize();
}
int EDGetInstString(const char **buf,
EDInstRef inst) {
- return inst->getString(*buf);
+ return ((EDInst*)inst)->getString(*buf);
}
int EDInstID(unsigned *instID, EDInstRef inst) {
- *instID = inst->instID();
+ *instID = ((EDInst*)inst)->instID();
return 0;
}
int EDInstIsBranch(EDInstRef inst) {
- return inst->isBranch();
+ return ((EDInst*)inst)->isBranch();
}
int EDInstIsMove(EDInstRef inst) {
- return inst->isMove();
+ return ((EDInst*)inst)->isMove();
}
int EDBranchTargetID(EDInstRef inst) {
- return inst->branchTargetID();
+ return ((EDInst*)inst)->branchTargetID();
}
int EDMoveSourceID(EDInstRef inst) {
- return inst->moveSourceID();
+ return ((EDInst*)inst)->moveSourceID();
}
int EDMoveTargetID(EDInstRef inst) {
- return inst->moveTargetID();
+ return ((EDInst*)inst)->moveTargetID();
}
int EDNumTokens(EDInstRef inst) {
- return inst->numTokens();
+ return ((EDInst*)inst)->numTokens();
}
int EDGetToken(EDTokenRef *token,
EDInstRef inst,
int index) {
- return inst->getToken(*token, index);
+ return ((EDInst*)inst)->getToken(*(EDToken**)token, index);
}
int EDGetTokenString(const char **buf,
EDTokenRef token) {
- return token->getString(*buf);
+ return ((EDToken*)token)->getString(*buf);
}
int EDOperandIndexForToken(EDTokenRef token) {
- return token->operandID();
+ return ((EDToken*)token)->operandID();
}
int EDTokenIsWhitespace(EDTokenRef token) {
- if (token->type() == EDToken::kTokenWhitespace)
- return 1;
- else
- return 0;
+ return ((EDToken*)token)->type() == EDToken::kTokenWhitespace;
}
int EDTokenIsPunctuation(EDTokenRef token) {
- if (token->type() == EDToken::kTokenPunctuation)
- return 1;
- else
- return 0;
+ return ((EDToken*)token)->type() == EDToken::kTokenPunctuation;
}
int EDTokenIsOpcode(EDTokenRef token) {
- if (token->type() == EDToken::kTokenOpcode)
- return 1;
- else
- return 0;
+ return ((EDToken*)token)->type() == EDToken::kTokenOpcode;
}
int EDTokenIsLiteral(EDTokenRef token) {
- if (token->type() == EDToken::kTokenLiteral)
- return 1;
- else
- return 0;
+ return ((EDToken*)token)->type() == EDToken::kTokenLiteral;
}
int EDTokenIsRegister(EDTokenRef token) {
- if (token->type() == EDToken::kTokenRegister)
- return 1;
- else
- return 0;
+ return ((EDToken*)token)->type() == EDToken::kTokenRegister;
}
int EDTokenIsNegativeLiteral(EDTokenRef token) {
- if (token->type() != EDToken::kTokenLiteral)
+ if (((EDToken*)token)->type() != EDToken::kTokenLiteral)
return -1;
- return token->literalSign();
+ return ((EDToken*)token)->literalSign();
}
-int EDLiteralTokenAbsoluteValue(uint64_t *value,
- EDTokenRef token) {
- if (token->type() != EDToken::kTokenLiteral)
+int EDLiteralTokenAbsoluteValue(uint64_t *value, EDTokenRef token) {
+ if (((EDToken*)token)->type() != EDToken::kTokenLiteral)
return -1;
- return token->literalAbsoluteValue(*value);
+ return ((EDToken*)token)->literalAbsoluteValue(*value);
}
int EDRegisterTokenValue(unsigned *registerID,
EDTokenRef token) {
- if (token->type() != EDToken::kTokenRegister)
+ if (((EDToken*)token)->type() != EDToken::kTokenRegister)
return -1;
- return token->registerID(*registerID);
+ return ((EDToken*)token)->registerID(*registerID);
}
int EDNumOperands(EDInstRef inst) {
- return inst->numOperands();
+ return ((EDInst*)inst)->numOperands();
}
int EDGetOperand(EDOperandRef *operand,
EDInstRef inst,
int index) {
- return inst->getOperand(*operand, index);
+ return ((EDInst*)inst)->getOperand(*(EDOperand**)operand, index);
}
int EDOperandIsRegister(EDOperandRef operand) {
- return operand->isRegister();
+ return ((EDOperand*)operand)->isRegister();
}
int EDOperandIsImmediate(EDOperandRef operand) {
- return operand->isImmediate();
+ return ((EDOperand*)operand)->isImmediate();
}
int EDOperandIsMemory(EDOperandRef operand) {
- return operand->isMemory();
+ return ((EDOperand*)operand)->isMemory();
}
-int EDRegisterOperandValue(unsigned *value,
- EDOperandRef operand) {
- if (!operand->isRegister())
+int EDRegisterOperandValue(unsigned *value, EDOperandRef operand) {
+ if (!((EDOperand*)operand)->isRegister())
return -1;
- *value = operand->regVal();
+ *value = ((EDOperand*)operand)->regVal();
return 0;
}
-int EDImmediateOperandValue(uint64_t *value,
- EDOperandRef operand) {
- if (!operand->isImmediate())
+int EDImmediateOperandValue(uint64_t *value, EDOperandRef operand) {
+ if (!((EDOperand*)operand)->isImmediate())
return -1;
- *value = operand->immediateVal();
+ *value = ((EDOperand*)operand)->immediateVal();
return 0;
}
-int EDEvaluateOperand(uint64_t *result,
- EDOperandRef operand,
- EDRegisterReaderCallback regReader,
- void *arg) {
- return operand->evaluate(*result, regReader, arg);
+int EDEvaluateOperand(uint64_t *result, EDOperandRef operand,
+ ::EDRegisterReaderCallback regReader, void *arg) {
+ return ((EDOperand*)operand)->evaluate(*result, regReader, arg);
}
#ifdef __BLOCKS__
(void*)&wrapper);
}
-int EDBlockEvaluateOperand(uint64_t *result,
- EDOperandRef operand,
+int EDBlockEvaluateOperand(uint64_t *result, EDOperandRef operand,
EDRegisterBlock_t regBlock) {
- return operand->evaluate(*result, regBlock);
+ return ((EDOperand*)operand)->evaluate(*result, regBlock);
}
-int EDBlockVisitTokens(EDInstRef inst,
- EDTokenVisitor_t visitor) {
- return inst->visitTokens(visitor);
+int EDBlockVisitTokens(EDInstRef inst, ::EDTokenVisitor_t visitor) {
+ return ((EDInst*)inst)->visitTokens((llvm::EDTokenVisitor_t)visitor);
}
#else
+++ /dev/null
-//===-EDOperand.cpp - LLVM Enhanced Disassembler --------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the Enhanced Disassembly library's operand class. The
-// operand is responsible for allowing evaluation given a particular register
-// context.
-//
-//===----------------------------------------------------------------------===//
-
-#include "EDDisassembler.h"
-#include "EDInst.h"
-#include "EDOperand.h"
-
-#include "llvm/MC/EDInstInfo.h"
-#include "llvm/MC/MCInst.h"
-
-using namespace llvm;
-
-EDOperand::EDOperand(const EDDisassembler &disassembler,
- const EDInst &inst,
- unsigned int opIndex,
- unsigned int &mcOpIndex) :
- Disassembler(disassembler),
- Inst(inst),
- OpIndex(opIndex),
- MCOpIndex(mcOpIndex) {
- unsigned int numMCOperands = 0;
-
- if (Disassembler.Key.Arch == Triple::x86 ||
- Disassembler.Key.Arch == Triple::x86_64) {
- uint8_t operandType = inst.ThisInstInfo->operandTypes[opIndex];
-
- switch (operandType) {
- default:
- break;
- case kOperandTypeImmediate:
- numMCOperands = 1;
- break;
- case kOperandTypeRegister:
- numMCOperands = 1;
- break;
- case kOperandTypeX86Memory:
- numMCOperands = 5;
- break;
- case kOperandTypeX86EffectiveAddress:
- numMCOperands = 4;
- break;
- case kOperandTypeX86PCRelative:
- numMCOperands = 1;
- break;
- }
- }
- else if (Disassembler.Key.Arch == Triple::arm ||
- Disassembler.Key.Arch == Triple::thumb) {
- uint8_t operandType = inst.ThisInstInfo->operandTypes[opIndex];
-
- switch (operandType) {
- default:
- case kOperandTypeARMRegisterList:
- break;
- case kOperandTypeImmediate:
- case kOperandTypeRegister:
- case kOperandTypeARMBranchTarget:
- case kOperandTypeARMSoImm:
- case kOperandTypeThumb2SoImm:
- case kOperandTypeARMSoImm2Part:
- case kOperandTypeARMPredicate:
- case kOperandTypeThumbITMask:
- case kOperandTypeThumb2AddrModeImm8Offset:
- case kOperandTypeARMTBAddrMode:
- case kOperandTypeThumb2AddrModeImm8s4Offset:
- numMCOperands = 1;
- break;
- case kOperandTypeThumb2SoReg:
- case kOperandTypeARMAddrMode2Offset:
- case kOperandTypeARMAddrMode3Offset:
- case kOperandTypeARMAddrMode4:
- case kOperandTypeARMAddrMode5:
- case kOperandTypeARMAddrModePC:
- case kOperandTypeThumb2AddrModeImm8:
- case kOperandTypeThumb2AddrModeImm12:
- case kOperandTypeThumb2AddrModeImm8s4:
- case kOperandTypeThumbAddrModeRR:
- case kOperandTypeThumbAddrModeSP:
- numMCOperands = 2;
- break;
- case kOperandTypeARMSoReg:
- case kOperandTypeARMAddrMode2:
- case kOperandTypeARMAddrMode3:
- case kOperandTypeThumb2AddrModeSoReg:
- case kOperandTypeThumbAddrModeS1:
- case kOperandTypeThumbAddrModeS2:
- case kOperandTypeThumbAddrModeS4:
- case kOperandTypeARMAddrMode6Offset:
- numMCOperands = 3;
- break;
- case kOperandTypeARMAddrMode6:
- numMCOperands = 4;
- break;
- }
- }
-
- mcOpIndex += numMCOperands;
-}
-
-EDOperand::~EDOperand() {
-}
-
-int EDOperand::evaluate(uint64_t &result,
- EDRegisterReaderCallback callback,
- void *arg) {
- uint8_t operandType = Inst.ThisInstInfo->operandTypes[OpIndex];
-
- switch (Disassembler.Key.Arch) {
- default:
- return -1;
- case Triple::x86:
- case Triple::x86_64:
- switch (operandType) {
- default:
- return -1;
- case kOperandTypeImmediate:
- result = Inst.Inst->getOperand(MCOpIndex).getImm();
- return 0;
- case kOperandTypeRegister:
- {
- unsigned reg = Inst.Inst->getOperand(MCOpIndex).getReg();
- return callback(&result, reg, arg);
- }
- case kOperandTypeX86PCRelative:
- {
- int64_t displacement = Inst.Inst->getOperand(MCOpIndex).getImm();
-
- uint64_t ripVal;
-
- // TODO fix how we do this
-
- if (callback(&ripVal, Disassembler.registerIDWithName("RIP"), arg))
- return -1;
-
- result = ripVal + displacement;
- return 0;
- }
- case kOperandTypeX86Memory:
- case kOperandTypeX86EffectiveAddress:
- {
- unsigned baseReg = Inst.Inst->getOperand(MCOpIndex).getReg();
- uint64_t scaleAmount = Inst.Inst->getOperand(MCOpIndex+1).getImm();
- unsigned indexReg = Inst.Inst->getOperand(MCOpIndex+2).getReg();
- int64_t displacement = Inst.Inst->getOperand(MCOpIndex+3).getImm();
- //unsigned segmentReg = Inst.Inst->getOperand(MCOpIndex+4).getReg();
-
- uint64_t addr = 0;
-
- if (baseReg) {
- uint64_t baseVal;
- if (callback(&baseVal, baseReg, arg))
- return -1;
- addr += baseVal;
- }
-
- if (indexReg) {
- uint64_t indexVal;
- if (callback(&indexVal, indexReg, arg))
- return -1;
- addr += (scaleAmount * indexVal);
- }
-
- addr += displacement;
-
- result = addr;
- return 0;
- }
- }
- break;
- case Triple::arm:
- case Triple::thumb:
- switch (operandType) {
- default:
- return -1;
- case kOperandTypeImmediate:
- result = Inst.Inst->getOperand(MCOpIndex).getImm();
- return 0;
- case kOperandTypeRegister:
- {
- unsigned reg = Inst.Inst->getOperand(MCOpIndex).getReg();
- return callback(&result, reg, arg);
- }
- case kOperandTypeARMBranchTarget:
- {
- int64_t displacement = Inst.Inst->getOperand(MCOpIndex).getImm();
-
- uint64_t pcVal;
-
- if (callback(&pcVal, Disassembler.registerIDWithName("PC"), arg))
- return -1;
-
- result = pcVal + displacement;
- return 0;
- }
- }
- }
-
- return -1;
-}
-
-int EDOperand::isRegister() {
- return(Inst.ThisInstInfo->operandFlags[OpIndex] == kOperandTypeRegister);
-}
-
-unsigned EDOperand::regVal() {
- return Inst.Inst->getOperand(MCOpIndex).getReg();
-}
-
-int EDOperand::isImmediate() {
- return(Inst.ThisInstInfo->operandFlags[OpIndex] == kOperandTypeImmediate);
-}
-
-uint64_t EDOperand::immediateVal() {
- return Inst.Inst->getOperand(MCOpIndex).getImm();
-}
-
-int EDOperand::isMemory() {
- uint8_t operandType = Inst.ThisInstInfo->operandTypes[OpIndex];
-
- switch (operandType) {
- default:
- return 0;
- case kOperandTypeX86Memory:
- case kOperandTypeX86PCRelative:
- case kOperandTypeX86EffectiveAddress:
- case kOperandTypeARMSoReg:
- case kOperandTypeARMSoImm:
- case kOperandTypeARMAddrMode2:
- case kOperandTypeARMAddrMode2Offset:
- case kOperandTypeARMAddrMode3:
- case kOperandTypeARMAddrMode3Offset:
- case kOperandTypeARMAddrMode4:
- case kOperandTypeARMAddrMode5:
- case kOperandTypeARMAddrMode6:
- case kOperandTypeARMAddrModePC:
- case kOperandTypeARMBranchTarget:
- case kOperandTypeThumbAddrModeS1:
- case kOperandTypeThumbAddrModeS2:
- case kOperandTypeThumbAddrModeS4:
- case kOperandTypeThumbAddrModeRR:
- case kOperandTypeThumbAddrModeSP:
- case kOperandTypeThumb2SoImm:
- case kOperandTypeThumb2AddrModeImm8:
- case kOperandTypeThumb2AddrModeImm8Offset:
- case kOperandTypeThumb2AddrModeImm12:
- case kOperandTypeThumb2AddrModeSoReg:
- case kOperandTypeThumb2AddrModeImm8s4:
- return 1;
- }
-}
-
-#ifdef __BLOCKS__
-struct RegisterReaderWrapper {
- EDRegisterBlock_t regBlock;
-};
-
-int readerWrapperCallback(uint64_t *value,
- unsigned regID,
- void *arg) {
- struct RegisterReaderWrapper *wrapper = (struct RegisterReaderWrapper *)arg;
- return wrapper->regBlock(value, regID);
-}
-
-int EDOperand::evaluate(uint64_t &result,
- EDRegisterBlock_t regBlock) {
- struct RegisterReaderWrapper wrapper;
- wrapper.regBlock = regBlock;
- return evaluate(result,
- readerWrapperCallback,
- (void*)&wrapper);
-}
-#endif
+++ /dev/null
-//===-EDOperand.h - LLVM Enhanced Disassembler ------------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the interface for the Enhanced Disassembly library's
-// operand class. The operand is responsible for allowing evaluation given a
-// particular register context.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef EDOperand_
-#define EDOperand_
-
-#include "llvm-c/EnhancedDisassembly.h"
-
-/// EDOperand - Encapsulates a single operand, which can be evaluated by the
-/// client
-struct EDOperand {
- /// The parent disassembler
- const EDDisassembler &Disassembler;
- /// The parent instruction
- const EDInst &Inst;
-
- /// The index of the operand in the EDInst
- unsigned int OpIndex;
- /// The index of the first component of the operand in the MCInst
- unsigned int MCOpIndex;
-
- /// Constructor - Initializes an EDOperand
- ///
- /// @arg disassembler - The disassembler responsible for the operand
- /// @arg inst - The instruction containing this operand
- /// @arg opIndex - The index of the operand in inst
- /// @arg mcOpIndex - The index of the operand in the original MCInst
- EDOperand(const EDDisassembler &disassembler,
- const EDInst &inst,
- unsigned int opIndex,
- unsigned int &mcOpIndex);
- ~EDOperand();
-
- /// evaluate - Returns the numeric value of an operand to the extent possible,
- /// returning 0 on success or -1 if there was some problem (such as a
- /// register not being readable)
- ///
- /// @arg result - A reference whose target is filled in with the value of
- /// the operand (the address if it is a memory operand)
- /// @arg callback - A function to call to obtain register values
- /// @arg arg - An opaque argument to pass to callback
- int evaluate(uint64_t &result,
- EDRegisterReaderCallback callback,
- void *arg);
-
- /// isRegister - Returns 1 if the operand is a register or 0 otherwise
- int isRegister();
- /// regVal - Returns the register value.
- unsigned regVal();
-
- /// isImmediate - Returns 1 if the operand is an immediate or 0 otherwise
- int isImmediate();
- /// immediateVal - Returns the immediate value.
- uint64_t immediateVal();
-
- /// isMemory - Returns 1 if the operand is a memory location or 0 otherwise
- int isMemory();
-
-#ifdef __BLOCKS__
- /// evaluate - Like evaluate for a callback, but uses a block instead
- int evaluate(uint64_t &result,
- EDRegisterBlock_t regBlock);
-#endif
-};
-
-#endif
+++ /dev/null
-//===-EDToken.cpp - LLVM Enhanced Disassembler ----------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the Enhanced Disassembler library's token class. The
-// token is responsible for vending information about the token, such as its
-// type and logical value.
-//
-//===----------------------------------------------------------------------===//
-
-#include "EDDisassembler.h"
-#include "EDToken.h"
-
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/MC/MCParser/MCAsmLexer.h"
-#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
-
-using namespace llvm;
-
-EDToken::EDToken(StringRef str,
- enum tokenType type,
- uint64_t localType,
- EDDisassembler &disassembler) :
- Disassembler(disassembler),
- Str(str),
- Type(type),
- LocalType(localType),
- OperandID(-1) {
-}
-
-EDToken::~EDToken() {
-}
-
-void EDToken::makeLiteral(bool sign, uint64_t absoluteValue) {
- Type = kTokenLiteral;
- LiteralSign = sign;
- LiteralAbsoluteValue = absoluteValue;
-}
-
-void EDToken::makeRegister(unsigned registerID) {
- Type = kTokenRegister;
- RegisterID = registerID;
-}
-
-void EDToken::setOperandID(int operandID) {
- OperandID = operandID;
-}
-
-enum EDToken::tokenType EDToken::type() const {
- return Type;
-}
-
-uint64_t EDToken::localType() const {
- return LocalType;
-}
-
-StringRef EDToken::string() const {
- return Str;
-}
-
-int EDToken::operandID() const {
- return OperandID;
-}
-
-int EDToken::literalSign() const {
- if (Type != kTokenLiteral)
- return -1;
- return (LiteralSign ? 1 : 0);
-}
-
-int EDToken::literalAbsoluteValue(uint64_t &value) const {
- if (Type != kTokenLiteral)
- return -1;
- value = LiteralAbsoluteValue;
- return 0;
-}
-
-int EDToken::registerID(unsigned ®isterID) const {
- if (Type != kTokenRegister)
- return -1;
- registerID = RegisterID;
- return 0;
-}
-
-int EDToken::tokenize(std::vector<EDToken*> &tokens,
- std::string &str,
- const char *operandOrder,
- EDDisassembler &disassembler) {
- SmallVector<MCParsedAsmOperand*, 5> parsedOperands;
- SmallVector<AsmToken, 10> asmTokens;
-
- if (disassembler.parseInst(parsedOperands, asmTokens, str))
- return -1;
-
- SmallVectorImpl<MCParsedAsmOperand*>::iterator operandIterator;
- unsigned int operandIndex;
- SmallVectorImpl<AsmToken>::iterator tokenIterator;
-
- operandIterator = parsedOperands.begin();
- operandIndex = 0;
-
- bool readOpcode = false;
-
- const char *wsPointer = asmTokens.begin()->getLoc().getPointer();
-
- for (tokenIterator = asmTokens.begin();
- tokenIterator != asmTokens.end();
- ++tokenIterator) {
- SMLoc tokenLoc = tokenIterator->getLoc();
-
- const char *tokenPointer = tokenLoc.getPointer();
-
- if (tokenPointer > wsPointer) {
- unsigned long wsLength = tokenPointer - wsPointer;
-
- EDToken *whitespaceToken = new EDToken(StringRef(wsPointer, wsLength),
- EDToken::kTokenWhitespace,
- 0,
- disassembler);
-
- tokens.push_back(whitespaceToken);
- }
-
- wsPointer = tokenPointer + tokenIterator->getString().size();
-
- while (operandIterator != parsedOperands.end() &&
- tokenLoc.getPointer() >
- (*operandIterator)->getEndLoc().getPointer()) {
- ++operandIterator;
- ++operandIndex;
- }
-
- EDToken *token;
-
- switch (tokenIterator->getKind()) {
- case AsmToken::Identifier:
- if (!readOpcode) {
- token = new EDToken(tokenIterator->getString(),
- EDToken::kTokenOpcode,
- (uint64_t)tokenIterator->getKind(),
- disassembler);
- readOpcode = true;
- break;
- }
- // any identifier that isn't an opcode is mere punctuation; so we fall
- // through
- default:
- token = new EDToken(tokenIterator->getString(),
- EDToken::kTokenPunctuation,
- (uint64_t)tokenIterator->getKind(),
- disassembler);
- break;
- case AsmToken::Integer:
- {
- token = new EDToken(tokenIterator->getString(),
- EDToken::kTokenLiteral,
- (uint64_t)tokenIterator->getKind(),
- disassembler);
-
- int64_t intVal = tokenIterator->getIntVal();
-
- if (intVal < 0)
- token->makeLiteral(true, -intVal);
- else
- token->makeLiteral(false, intVal);
- break;
- }
- case AsmToken::Register:
- {
- token = new EDToken(tokenIterator->getString(),
- EDToken::kTokenLiteral,
- (uint64_t)tokenIterator->getKind(),
- disassembler);
-
- token->makeRegister((unsigned)tokenIterator->getRegVal());
- break;
- }
- }
-
- if (operandIterator != parsedOperands.end() &&
- tokenLoc.getPointer() >=
- (*operandIterator)->getStartLoc().getPointer()) {
- /// operandIndex == 0 means the operand is the instruction (which the
- /// AsmParser treats as an operand but edis does not). We therefore skip
- /// operandIndex == 0 and subtract 1 from all other operand indices.
-
- if (operandIndex > 0)
- token->setOperandID(operandOrder[operandIndex - 1]);
- }
-
- tokens.push_back(token);
- }
-
- return 0;
-}
-
-int EDToken::getString(const char*& buf) {
- if (PermStr.length() == 0) {
- PermStr = Str.str();
- }
- buf = PermStr.c_str();
- return 0;
-}
+++ /dev/null
-//===-EDToken.h - LLVM Enhanced Disassembler --------------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the interface for the Enhanced Disassembly library's token
-// class. The token is responsible for vending information about the token,
-// such as its type and logical value.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef EDToken_
-#define EDToken_
-
-#include "llvm-c/EnhancedDisassembly.h"
-#include "llvm/ADT/StringRef.h"
-
-#include <string>
-#include <vector>
-
-/// EDToken - Encapsulates a single token, which can provide a string
-/// representation of itself or interpret itself in various ways, depending
-/// on the token type.
-struct EDToken {
- enum tokenType {
- kTokenWhitespace,
- kTokenOpcode,
- kTokenLiteral,
- kTokenRegister,
- kTokenPunctuation
- };
-
- /// The parent disassembler
- EDDisassembler &Disassembler;
-
- /// The token's string representation
- llvm::StringRef Str;
- /// The token's string representation, but in a form suitable for export
- std::string PermStr;
- /// The type of the token, as exposed through the external API
- enum tokenType Type;
- /// The type of the token, as recorded by the syntax-specific tokenizer
- uint64_t LocalType;
- /// The operand corresponding to the token, or (unsigned int)-1 if not
- /// part of an operand.
- int OperandID;
-
- /// The sign if the token is a literal (1 if negative, 0 otherwise)
- bool LiteralSign;
- /// The absolute value if the token is a literal
- uint64_t LiteralAbsoluteValue;
- /// The LLVM register ID if the token is a register name
- unsigned RegisterID;
-
- /// Constructor - Initializes an EDToken with the information common to all
- /// tokens
- ///
- /// @arg str - The string corresponding to the token
- /// @arg type - The token's type as exposed through the public API
- /// @arg localType - The token's type as recorded by the tokenizer
- /// @arg disassembler - The disassembler responsible for the token
- EDToken(llvm::StringRef str,
- enum tokenType type,
- uint64_t localType,
- EDDisassembler &disassembler);
-
- /// makeLiteral - Adds the information specific to a literal
- /// @arg sign - The sign of the literal (1 if negative, 0
- /// otherwise)
- ///
- /// @arg absoluteValue - The absolute value of the literal
- void makeLiteral(bool sign, uint64_t absoluteValue);
- /// makeRegister - Adds the information specific to a register
- ///
- /// @arg registerID - The LLVM register ID
- void makeRegister(unsigned registerID);
-
- /// setOperandID - Links the token to a numbered operand
- ///
- /// @arg operandID - The operand ID to link to
- void setOperandID(int operandID);
-
- ~EDToken();
-
- /// type - Returns the public type of the token
- enum tokenType type() const;
- /// localType - Returns the tokenizer-specific type of the token
- uint64_t localType() const;
- /// string - Returns the string representation of the token
- llvm::StringRef string() const;
- /// operandID - Returns the operand ID of the token
- int operandID() const;
-
- /// literalSign - Returns the sign of the token
- /// (1 if negative, 0 if positive or unsigned, -1 if it is not a literal)
- int literalSign() const;
- /// literalAbsoluteValue - Retrieves the absolute value of the token, and
- /// returns -1 if the token is not a literal
- /// @arg value - A reference to a value that is filled in with the absolute
- /// value, if it is valid
- int literalAbsoluteValue(uint64_t &value) const;
- /// registerID - Retrieves the register ID of the token, and returns -1 if the
- /// token is not a register
- ///
- /// @arg registerID - A reference to a value that is filled in with the
- /// register ID, if it is valid
- int registerID(unsigned ®isterID) const;
-
- /// tokenize - Tokenizes a string using the platform- and syntax-specific
- /// tokenizer, and returns 0 on success (-1 on failure)
- ///
- /// @arg tokens - A vector that will be filled in with pointers to
- /// allocated tokens
- /// @arg str - The string, as outputted by the AsmPrinter
- /// @arg operandOrder - The order of the operands from the operandFlags array
- /// as they appear in str
- /// @arg disassembler - The disassembler for the desired target and
- // assembly syntax
- static int tokenize(std::vector<EDToken*> &tokens,
- std::string &str,
- const char *operandOrder,
- EDDisassembler &disassembler);
-
- /// getString - Directs a character pointer to the string, returning 0 on
- /// success (-1 on failure)
- /// @arg buf - A reference to a pointer that is set to point to the string.
- /// The string is still owned by the token.
- int getString(const char*& buf);
-};
-
-#endif
-set( LLVM_USED_LIBS EnhancedDisassembly)
-set(LLVM_LINK_COMPONENTS ${LLVM_TARGETS_TO_BUILD} support MC MCParser)
+set(LLVM_LINK_COMPONENTS ${LLVM_TARGETS_TO_BUILD} support MC MCParser MCDisassembler)
add_llvm_tool(llvm-mc
llvm-mc.cpp
//===----------------------------------------------------------------------===//
#include "Disassembler.h"
-
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/Triple.h"
+#include "../../lib/MC/MCDisassembler/EDDisassembler.h"
+#include "../../lib/MC/MCDisassembler/EDInst.h"
+#include "../../lib/MC/MCDisassembler/EDOperand.h"
+#include "../../lib/MC/MCDisassembler/EDToken.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCDisassembler.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/MemoryObject.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/SourceMgr.h"
-
-#include "llvm-c/EnhancedDisassembly.h"
-
using namespace llvm;
typedef std::vector<std::pair<unsigned char, const char*> > ByteArrayTy;
}
static int verboseEvaluator(uint64_t *V, unsigned R, void *Arg) {
- EDDisassemblerRef &disassembler = *((EDDisassemblerRef*)Arg);
-
- const char *regName;
+ EDDisassembler &disassembler = *((EDDisassembler *)Arg);
- if (!EDGetRegisterName(®Name,
- disassembler,
- R))
+ if (const char *regName = disassembler.nameWithRegisterID(R))
outs() << "[" << regName << "/" << R << "]";
- if (EDRegisterIsStackPointer(disassembler, R))
+
+ if (disassembler.registerIsStackPointer(R))
outs() << "(sp)";
- if (EDRegisterIsProgramCounter(disassembler, R))
+ if (disassembler.registerIsProgramCounter(R))
outs() << "(pc)";
*V = 0;
-
return 0;
}
return -1;
}
- EDDisassemblerRef disassembler;
-
Triple T(TS);
- EDAssemblySyntax_t AS;
+ EDDisassembler::AssemblySyntax AS;
switch (T.getArch()) {
default:
return -1;
case Triple::arm:
case Triple::thumb:
- AS = kEDAssemblySyntaxARMUAL;
+ AS = EDDisassembler::kEDAssemblySyntaxARMUAL;
break;
case Triple::x86:
case Triple::x86_64:
- AS = kEDAssemblySyntaxX86ATT;
+ AS = EDDisassembler::kEDAssemblySyntaxX86ATT;
break;
}
- if (EDGetDisassembler(&disassembler,
- TS.c_str(),
- AS)) {
- errs() << "error: couldn't get disassembler for " << TS.c_str() << "\n";
+ EDDisassembler::initialize();
+ EDDisassembler *disassembler =
+ EDDisassembler::getDisassembler(TS.c_str(), AS);
+
+ if (disassembler == 0) {
+ errs() << "error: couldn't get disassembler for " << TS << '\n';
return -1;
}
- EDInstRef inst;
-
- if (EDCreateInsts(&inst, 1, disassembler, byteArrayReader, 0,&ByteArray)
- != 1) {
+ EDInst *inst =
+ disassembler->createInst(byteArrayReader, 0, &ByteArray);
+
+ if (inst == 0) {
errs() << "error: Didn't get an instruction\n";
return -1;
}
- int numTokens = EDNumTokens(inst);
-
- if (numTokens < 0) {
- errs() << "error: Couldn't count the instruction's tokens\n";
+ unsigned numTokens = inst->numTokens();
+ if ((int)numTokens < 0) {
+ errs() << "error: couldn't count the instruction's tokens\n";
return -1;
}
- int tokenIndex;
-
- for (tokenIndex = 0; tokenIndex < numTokens; ++tokenIndex) {
- EDTokenRef token;
+ for (unsigned tokenIndex = 0; tokenIndex != numTokens; ++tokenIndex) {
+ EDToken *token;
- if (EDGetToken(&token, inst, tokenIndex)) {
+ if (inst->getToken(token, tokenIndex)) {
errs() << "error: Couldn't get token\n";
return -1;
}
const char *buf;
-
- if (EDGetTokenString(&buf, token)) {
+ if (token->getString(buf)) {
errs() << "error: Couldn't get string for token\n";
return -1;
}
- outs() << "[";
-
- int operandIndex = EDOperandIndexForToken(token);
+ outs() << '[';
+ int operandIndex = token->operandID();
if (operandIndex >= 0)
outs() << operandIndex << "-";
- if (EDTokenIsWhitespace(token)) {
- outs() << "w";
- } else if (EDTokenIsPunctuation(token)) {
- outs() << "p";
- } else if (EDTokenIsOpcode(token)) {
- outs() << "o";
- } else if (EDTokenIsLiteral(token)) {
- outs() << "l";
- } else if (EDTokenIsRegister(token)) {
- outs() << "r";
- } else {
- outs() << "?";
+ switch (token->type()) {
+ default: outs() << "?"; break;
+ case EDToken::kTokenWhitespace: outs() << "w"; break;
+ case EDToken::kTokenPunctuation: outs() << "p"; break;
+ case EDToken::kTokenOpcode: outs() << "o"; break;
+ case EDToken::kTokenLiteral: outs() << "l"; break;
+ case EDToken::kTokenRegister: outs() << "r"; break;
}
outs() << ":" << buf;
- if (EDTokenIsLiteral(token)) {
+ if (token->type() == EDToken::kTokenLiteral) {
outs() << "=";
- if (EDTokenIsNegativeLiteral(token))
+ if (token->literalSign())
outs() << "-";
uint64_t absoluteValue;
- if (EDLiteralTokenAbsoluteValue(&absoluteValue, token)) {
+ if (token->literalAbsoluteValue(absoluteValue)) {
errs() << "error: Couldn't get the value of a literal token\n";
return -1;
}
outs() << absoluteValue;
- } else if (EDTokenIsRegister(token)) {
+ } else if (token->type() == EDToken::kTokenRegister) {
outs() << "=";
unsigned regID;
- if (EDRegisterTokenValue(®ID, token)) {
+ if (token->registerID(regID)) {
errs() << "error: Couldn't get the ID of a register token\n";
return -1;
}
outs() << " ";
- if (EDInstIsBranch(inst))
+ if (inst->isBranch())
outs() << "<br> ";
- if (EDInstIsMove(inst))
+ if (inst->isMove())
outs() << "<mov> ";
- int numOperands = EDNumOperands(inst);
+ unsigned numOperands = inst->numOperands();
- if (numOperands < 0) {
+ if ((int)numOperands < 0) {
errs() << "error: Couldn't count operands\n";
return -1;
}
- int operandIndex;
-
- for (operandIndex = 0; operandIndex < numOperands; ++operandIndex) {
+ for (unsigned operandIndex = 0; operandIndex != numOperands; ++operandIndex) {
outs() << operandIndex << ":";
- EDOperandRef operand;
-
- if (EDGetOperand(&operand,
- inst,
- operandIndex)) {
- errs() << "error: Couldn't get operand\n";
+ EDOperand *operand;
+ if (inst->getOperand(operand, operandIndex)) {
+ errs() << "error: couldn't get operand\n";
return -1;
}
uint64_t evaluatedResult;
-
- EDEvaluateOperand(&evaluatedResult,
- operand,
- verboseEvaluator,
- &disassembler);
-
- outs() << "=" << evaluatedResult;
-
- outs() << " ";
+ evaluatedResult = operand->evaluate(evaluatedResult, verboseEvaluator,
+ disassembler);
+ outs() << "=" << evaluatedResult << " ";
}
- outs() << "\n";
+ outs() << '\n';
return 0;
}
# early so we can set up LINK_COMPONENTS before including Makefile.rules
include $(LEVEL)/Makefile.config
-LINK_COMPONENTS := $(TARGETS_TO_BUILD) MCParser MC support
+LINK_COMPONENTS := $(TARGETS_TO_BUILD) MCDisassembler MCParser MC support
include $(LLVM_SRC_ROOT)/Makefile.rules
-# Using LIBS instead of USEDLIBS to force static linking
-LIBS += $(LLVMLibDir)/libEnhancedDisassembly.a