X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FARM%2FAsmParser%2FARMAsmParser.cpp;h=12225b00ed0556c03541b6c73900be24f6f1253e;hb=5fa22a19750c082ff161db1702ebe96dd2a787e7;hp=4574607c93f3089f705a3b542ebd2a66edcdad2d;hpb=a7ba3a81c008142a91d799e2ec3152cfd6bbb15f;p=oota-llvm.git diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 4574607c93f..12225b00ed0 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -8,21 +8,24 @@ //===----------------------------------------------------------------------===// #include "ARM.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Twine.h" -#include "llvm/MC/MCAsmLexer.h" -#include "llvm/MC/MCAsmParser.h" +#include "ARMAddressingModes.h" +#include "ARMSubtarget.h" +#include "llvm/MC/MCParser/MCAsmLexer.h" +#include "llvm/MC/MCParser/MCAsmParser.h" +#include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" -#include "llvm/Support/SourceMgr.h" #include "llvm/Target/TargetRegistry.h" #include "llvm/Target/TargetAsmParser.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Twine.h" using namespace llvm; -namespace { -struct ARMOperand; - // The shift types for register controlled shifts in arm memory addressing enum ShiftType { Lsl, @@ -32,67 +35,88 @@ enum ShiftType { Rrx }; +namespace { + +class ARMOperand; + class ARMAsmParser : public TargetAsmParser { MCAsmParser &Parser; + TargetMachine &TM; -private: MCAsmParser &getParser() const { return Parser; } - MCAsmLexer &getLexer() const { return Parser.getLexer(); } void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); } - bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); } - bool ParseRegister(ARMOperand &Op); - - bool ParseMemory(ARMOperand &Op); - - bool ParseShift(enum ShiftType *St, const MCExpr *ShiftAmount); - - bool ParseOperand(ARMOperand &Op); - + int TryParseRegister(); + ARMOperand *TryParseRegisterWithWriteBack(); + ARMOperand *ParseRegisterList(); + ARMOperand *ParseMemory(); + ARMOperand *ParseOperand(); + + bool ParseMemoryOffsetReg(bool &Negative, + bool &OffsetRegShifted, + enum ShiftType &ShiftType, + const MCExpr *&ShiftAmount, + const MCExpr *&Offset, + bool &OffsetIsReg, + int &OffsetRegNum, + SMLoc &E); + bool ParseShift(enum ShiftType &St, const MCExpr *&ShiftAmount, SMLoc &E); bool ParseDirectiveWord(unsigned Size, SMLoc L); + bool ParseDirectiveThumb(SMLoc L); + bool ParseDirectiveThumbFunc(SMLoc L); + bool ParseDirectiveCode(SMLoc L); + bool ParseDirectiveSyntax(SMLoc L); - // TODO - For now hacked versions of the next two are in here in this file to - // allow some parser testing until the table gen versions are implemented. + bool MatchAndEmitInstruction(SMLoc IDLoc, + SmallVectorImpl &Operands, + MCStreamer &Out); /// @name Auto-generated Match Functions /// { - bool MatchInstruction(SmallVectorImpl &Operands, - MCInst &Inst); - /// MatchRegisterName - Match the given string to a register name, or 0 if - /// there is no match. - unsigned MatchRegisterName(const StringRef &Name); +#define GET_ASSEMBLER_HEADER +#include "ARMGenAsmMatcher.inc" /// } - public: - ARMAsmParser(const Target &T, MCAsmParser &_Parser) - : TargetAsmParser(T), Parser(_Parser) {} - - virtual bool ParseInstruction(const StringRef &Name, MCInst &Inst); + ARMAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &_TM) + : TargetAsmParser(T), Parser(_Parser), TM(_TM) { + // Initialize the set of available features. + setAvailableFeatures(ComputeAvailableFeatures( + &TM.getSubtarget())); + } + virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc, + SmallVectorImpl &Operands); virtual bool ParseDirective(AsmToken DirectiveID); }; - } // end anonymous namespace namespace { /// ARMOperand - Instances of this class represent a parsed ARM machine /// instruction. -struct ARMOperand { - enum { - Token, +class ARMOperand : public MCParsedAsmOperand { + enum KindTy { + CondCode, + Immediate, + Memory, Register, - Memory + RegisterList, + Token } Kind; + SMLoc StartLoc, EndLoc; union { + struct { + ARMCC::CondCodes Val; + } CC; + struct { const char *Data; unsigned Length; @@ -100,25 +124,75 @@ struct ARMOperand { struct { unsigned RegNum; + bool Writeback; } Reg; + struct { + SmallVector *Registers; + } RegList; + + struct { + const MCExpr *Val; + } Imm; + // This is for all forms of ARM address expressions struct { unsigned BaseRegNum; - bool OffsetIsReg; - const MCExpr *Offset; // used when OffsetIsReg is false - unsigned OffsetRegNum; // used when OffsetIsReg is true - bool OffsetRegShifted; // only used when OffsetIsReg is true - enum ShiftType ShiftType; // used when OffsetRegShifted is true - const MCExpr *ShiftAmount; // used when OffsetRegShifted is true - bool Preindexed; - bool Postindexed; - bool Negative; // only used when OffsetIsReg is true - bool Writeback; + unsigned OffsetRegNum; // used when OffsetIsReg is true + const MCExpr *Offset; // used when OffsetIsReg is false + const MCExpr *ShiftAmount; // used when OffsetRegShifted is true + enum ShiftType ShiftType; // used when OffsetRegShifted is true + unsigned OffsetRegShifted : 1; // only used when OffsetIsReg is true + unsigned Preindexed : 1; + unsigned Postindexed : 1; + unsigned OffsetIsReg : 1; + unsigned Negative : 1; // only used when OffsetIsReg is true + unsigned Writeback : 1; } Mem; - }; + ARMOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {} +public: + ARMOperand(const ARMOperand &o) : MCParsedAsmOperand() { + Kind = o.Kind; + StartLoc = o.StartLoc; + EndLoc = o.EndLoc; + switch (Kind) { + case CondCode: + CC = o.CC; + break; + case Token: + Tok = o.Tok; + break; + case Register: + Reg = o.Reg; + break; + case RegisterList: + RegList = o.RegList; + break; + case Immediate: + Imm = o.Imm; + break; + case Memory: + Mem = o.Mem; + break; + } + } + ~ARMOperand() { + if (isRegList()) + delete RegList.Registers; + } + + /// getStartLoc - Get the location of the first token of this operand. + SMLoc getStartLoc() const { return StartLoc; } + /// getEndLoc - Get the location of the last token of this operand. + SMLoc getEndLoc() const { return EndLoc; } + + ARMCC::CondCodes getCondCode() const { + assert(Kind == CondCode && "Invalid access!"); + return CC.Val; + } + StringRef getToken() const { assert(Kind == Token && "Invalid access!"); return StringRef(Tok.Data, Tok.Length); @@ -129,92 +203,359 @@ struct ARMOperand { return Reg.RegNum; } - bool isToken() const {return Kind == Token; } + const SmallVectorImpl &getRegList() const { + assert(Kind == RegisterList && "Invalid access!"); + return *RegList.Registers; + } + const MCExpr *getImm() const { + assert(Kind == Immediate && "Invalid access!"); + return Imm.Val; + } + + bool isCondCode() const { return Kind == CondCode; } + bool isImm() const { return Kind == Immediate; } bool isReg() const { return Kind == Register; } + bool isRegList() const { return Kind == RegisterList; } + bool isToken() const { return Kind == Token; } + bool isMemory() const { return Kind == Memory; } + bool isMemMode5() const { + if (!isMemory() || Mem.OffsetIsReg || Mem.OffsetRegShifted || + Mem.Writeback || Mem.Negative) + return false; + // If there is an offset expression, make sure it's valid. + if (!Mem.Offset) + return true; + const MCConstantExpr *CE = dyn_cast(Mem.Offset); + if (!CE) + return false; + // The offset must be a multiple of 4 in the range 0-1020. + int64_t Value = CE->getValue(); + return ((Value & 0x3) == 0 && Value <= 1020 && Value >= -1020); + } + + void addExpr(MCInst &Inst, const MCExpr *Expr) const { + // Add as immediates when possible. Null MCExpr = 0. + if (Expr == 0) + Inst.addOperand(MCOperand::CreateImm(0)); + else if (const MCConstantExpr *CE = dyn_cast(Expr)) + Inst.addOperand(MCOperand::CreateImm(CE->getValue())); + else + Inst.addOperand(MCOperand::CreateExpr(Expr)); + } + + void addCondCodeOperands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateImm(unsigned(getCondCode()))); + // FIXME: What belongs here? + Inst.addOperand(MCOperand::CreateReg(0)); + } void addRegOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); Inst.addOperand(MCOperand::CreateReg(getReg())); } - static ARMOperand CreateToken(StringRef Str) { - ARMOperand Res; - Res.Kind = Token; - Res.Tok.Data = Str.data(); - Res.Tok.Length = Str.size(); - return Res; - } - - static ARMOperand CreateReg(unsigned RegNum) { - ARMOperand Res; - Res.Kind = Register; - Res.Reg.RegNum = RegNum; - return Res; - } - - static ARMOperand CreateMem(unsigned BaseRegNum, bool OffsetIsReg, - const MCExpr *Offset, unsigned OffsetRegNum, - bool OffsetRegShifted, enum ShiftType ShiftType, - const MCExpr *ShiftAmount, bool Preindexed, - bool Postindexed, bool Negative, bool Writeback) { - ARMOperand Res; - Res.Kind = Memory; - Res.Mem.BaseRegNum = BaseRegNum; - Res.Mem.OffsetIsReg = OffsetIsReg; - Res.Mem.Offset = Offset; - Res.Mem.OffsetRegNum = OffsetRegNum; - Res.Mem.OffsetRegShifted = OffsetRegShifted; - Res.Mem.ShiftType = ShiftType; - Res.Mem.ShiftAmount = ShiftAmount; - Res.Mem.Preindexed = Preindexed; - Res.Mem.Postindexed = Postindexed; - Res.Mem.Negative = Negative; - Res.Mem.Writeback = Writeback; - return Res; + void addRegListOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const SmallVectorImpl &RegList = getRegList(); + for (SmallVectorImpl::const_iterator + I = RegList.begin(), E = RegList.end(); I != E; ++I) + Inst.addOperand(MCOperand::CreateReg(*I)); + } + + void addImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + addExpr(Inst, getImm()); + } + + void addMemMode5Operands(MCInst &Inst, unsigned N) const { + assert(N == 2 && isMemMode5() && "Invalid number of operands!"); + + Inst.addOperand(MCOperand::CreateReg(Mem.BaseRegNum)); + assert(!Mem.OffsetIsReg && "Invalid mode 5 operand"); + + // FIXME: #-0 is encoded differently than #0. Does the parser preserve + // the difference? + if (Mem.Offset) { + const MCConstantExpr *CE = dyn_cast(Mem.Offset); + assert(CE && "Non-constant mode 5 offset operand!"); + + // The MCInst offset operand doesn't include the low two bits (like + // the instruction encoding). + int64_t Offset = CE->getValue() / 4; + if (Offset >= 0) + Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(ARM_AM::add, + Offset))); + else + Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(ARM_AM::sub, + -Offset))); + } else { + Inst.addOperand(MCOperand::CreateImm(0)); + } + } + + virtual void dump(raw_ostream &OS) const; + + static ARMOperand *CreateCondCode(ARMCC::CondCodes CC, SMLoc S) { + ARMOperand *Op = new ARMOperand(CondCode); + Op->CC.Val = CC; + Op->StartLoc = S; + Op->EndLoc = S; + return Op; + } + + static ARMOperand *CreateToken(StringRef Str, SMLoc S) { + ARMOperand *Op = new ARMOperand(Token); + Op->Tok.Data = Str.data(); + Op->Tok.Length = Str.size(); + Op->StartLoc = S; + Op->EndLoc = S; + return Op; + } + + static ARMOperand *CreateReg(unsigned RegNum, bool Writeback, SMLoc S, + SMLoc E) { + ARMOperand *Op = new ARMOperand(Register); + Op->Reg.RegNum = RegNum; + Op->Reg.Writeback = Writeback; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } + + static ARMOperand * + CreateRegList(const SmallVectorImpl > &Regs, + SMLoc S, SMLoc E) { + ARMOperand *Op = new ARMOperand(RegisterList); + Op->RegList.Registers = new SmallVector(); + for (SmallVectorImpl >::const_iterator + I = Regs.begin(), E = Regs.end(); I != E; ++I) + Op->RegList.Registers->push_back(I->first); + std::sort(Op->RegList.Registers->begin(), Op->RegList.Registers->end()); + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } + + static ARMOperand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E) { + ARMOperand *Op = new ARMOperand(Immediate); + Op->Imm.Val = Val; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } + + static ARMOperand *CreateMem(unsigned BaseRegNum, bool OffsetIsReg, + const MCExpr *Offset, unsigned OffsetRegNum, + bool OffsetRegShifted, enum ShiftType ShiftType, + const MCExpr *ShiftAmount, bool Preindexed, + bool Postindexed, bool Negative, bool Writeback, + SMLoc S, SMLoc E) { + ARMOperand *Op = new ARMOperand(Memory); + Op->Mem.BaseRegNum = BaseRegNum; + Op->Mem.OffsetIsReg = OffsetIsReg; + Op->Mem.Offset = Offset; + Op->Mem.OffsetRegNum = OffsetRegNum; + Op->Mem.OffsetRegShifted = OffsetRegShifted; + Op->Mem.ShiftType = ShiftType; + Op->Mem.ShiftAmount = ShiftAmount; + Op->Mem.Preindexed = Preindexed; + Op->Mem.Postindexed = Postindexed; + Op->Mem.Negative = Negative; + Op->Mem.Writeback = Writeback; + + Op->StartLoc = S; + Op->EndLoc = E; + return Op; } }; } // end anonymous namespace. -// Try to parse a register name. The token must be an Identifier when called, -// and if it is a register name a Reg operand is created, the token is eaten -// and false is returned. Else true is returned and no token is eaten. -// TODO this is likely to change to allow different register types and or to -// parse for a specific register type. -bool ARMAsmParser::ParseRegister(ARMOperand &Op) { - const AsmToken &Tok = getLexer().getTok(); +void ARMOperand::dump(raw_ostream &OS) const { + switch (Kind) { + case CondCode: + OS << ARMCondCodeToString(getCondCode()); + break; + case Immediate: + getImm()->print(OS); + break; + case Memory: + OS << ""; + break; + case Register: + OS << ""; + break; + case RegisterList: { + OS << " &RegList = getRegList(); + for (SmallVectorImpl::const_iterator + I = RegList.begin(), E = RegList.end(); I != E; ) { + OS << *I; + if (++I < E) OS << ", "; + } + + OS << ">"; + break; + } + case Token: + OS << "'" << getToken() << "'"; + break; + } +} + +/// @name Auto-generated Match Functions +/// { + +static unsigned MatchRegisterName(StringRef Name); + +/// } + +/// Try to parse a register name. The token must be an Identifier when called, +/// and if it is a register name the token is eaten and the register number is +/// returned. Otherwise return -1. +/// +int ARMAsmParser::TryParseRegister() { + const AsmToken &Tok = Parser.getTok(); assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier"); // FIXME: Validate register for the current architecture; we have to do // validation later, so maybe there is no need for this here. - unsigned RegNum; - - RegNum = MatchRegisterName(Tok.getString()); + unsigned RegNum = MatchRegisterName(Tok.getString()); if (RegNum == 0) - return true; + return -1; + Parser.Lex(); // Eat identifier token. + return RegNum; +} - Op = ARMOperand::CreateReg(RegNum); - getLexer().Lex(); // Eat identifier token. - return false; +/// Try to parse a register name. The token must be an Identifier when called, +/// and if it is a register name the token is eaten and the register number is +/// returned. Otherwise return -1. +/// +/// TODO this is likely to change to allow different register types and or to +/// parse for a specific register type. +ARMOperand *ARMAsmParser::TryParseRegisterWithWriteBack() { + SMLoc S = Parser.getTok().getLoc(); + int RegNo = TryParseRegister(); + if (RegNo == -1) + return 0; + + SMLoc E = Parser.getTok().getLoc(); + + bool Writeback = false; + const AsmToken &ExclaimTok = Parser.getTok(); + if (ExclaimTok.is(AsmToken::Exclaim)) { + E = ExclaimTok.getLoc(); + Writeback = true; + Parser.Lex(); // Eat exclaim token + } + + return ARMOperand::CreateReg(RegNo, Writeback, S, E); +} + +/// Parse a register list, return it if successful else return null. The first +/// token must be a '{' when called. +ARMOperand *ARMAsmParser::ParseRegisterList() { + assert(Parser.getTok().is(AsmToken::LCurly) && + "Token is not a Left Curly Brace"); + SMLoc S = Parser.getTok().getLoc(); + + // Read the rest of the registers in the list. + unsigned PrevRegNum = 0; + SmallVector, 32> Registers; + + do { + bool IsRange = Parser.getTok().is(AsmToken::Minus); + Parser.Lex(); // Eat non-identifier token. + + const AsmToken &RegTok = Parser.getTok(); + SMLoc RegLoc = RegTok.getLoc(); + if (RegTok.isNot(AsmToken::Identifier)) { + Error(RegLoc, "register expected"); + return 0; + } + + int RegNum = TryParseRegister(); + if (RegNum == -1) { + Error(RegLoc, "register expected"); + return 0; + } + + if (IsRange) { + int Reg = PrevRegNum; + do { + ++Reg; + Registers.push_back(std::make_pair(Reg, RegLoc)); + } while (Reg != RegNum); + } else { + Registers.push_back(std::make_pair(RegNum, RegLoc)); + } + + PrevRegNum = RegNum; + } while (Parser.getTok().is(AsmToken::Comma) || + Parser.getTok().is(AsmToken::Minus)); + + // Process the right curly brace of the list. + const AsmToken &RCurlyTok = Parser.getTok(); + if (RCurlyTok.isNot(AsmToken::RCurly)) { + Error(RCurlyTok.getLoc(), "'}' expected"); + return 0; + } + + SMLoc E = RCurlyTok.getLoc(); + Parser.Lex(); // Eat right curly brace token. + + // Verify the register list. + SmallVectorImpl >::const_iterator + RI = Registers.begin(), RE = Registers.end(); + + unsigned HighRegNum = RI->first; + DenseMap RegMap; + RegMap[RI->first] = true; + + for (++RI; RI != RE; ++RI) { + const std::pair &RegInfo = *RI; + + if (RegMap[RegInfo.first]) { + Error(RegInfo.second, "register duplicated in register list"); + return 0; + } + + if (RegInfo.first < HighRegNum) + Warning(RegInfo.second, + "register not in ascending order in register list"); + + RegMap[RegInfo.first] = true; + HighRegNum = std::max(RegInfo.first, HighRegNum); + } + + return ARMOperand::CreateRegList(Registers, S, E); } -// Try to parse an arm memory expression. It must start with a '[' token. -// TODO Only preindexing and postindexing addressing are started, unindexed -// with option, etc are still to do. -bool ARMAsmParser::ParseMemory(ARMOperand &Op) { - const AsmToken &LBracTok = getLexer().getTok(); - assert(LBracTok.is(AsmToken::LBrac) && "Token is not an Left Bracket"); - getLexer().Lex(); // Eat left bracket token. - - const AsmToken &BaseRegTok = getLexer().getTok(); - if (BaseRegTok.isNot(AsmToken::Identifier)) - return Error(BaseRegTok.getLoc(), "register expected"); - unsigned BaseRegNum = MatchRegisterName(BaseRegTok.getString()); - if (BaseRegNum == 0) - return Error(BaseRegTok.getLoc(), "register expected"); - getLexer().Lex(); // Eat identifier token. +/// Parse an ARM memory expression, return false if successful else return true +/// or an error. The first token must be a '[' when called. +/// TODO Only preindexing and postindexing addressing are started, unindexed +/// with option, etc are still to do. +ARMOperand *ARMAsmParser::ParseMemory() { + SMLoc S, E; + assert(Parser.getTok().is(AsmToken::LBrac) && + "Token is not a Left Bracket"); + S = Parser.getTok().getLoc(); + Parser.Lex(); // Eat left bracket token. + + const AsmToken &BaseRegTok = Parser.getTok(); + if (BaseRegTok.isNot(AsmToken::Identifier)) { + Error(BaseRegTok.getLoc(), "register expected"); + return 0; + } + int BaseRegNum = TryParseRegister(); + if (BaseRegNum == -1) { + Error(BaseRegTok.getLoc(), "register expected"); + return 0; + } bool Preindexed = false; bool Postindexed = false; @@ -222,245 +563,352 @@ bool ARMAsmParser::ParseMemory(ARMOperand &Op) { bool Negative = false; bool Writeback = false; - // First look for preindexed address forms: - // [Rn, +/-Rm] - // [Rn, #offset] - // [Rn, +/-Rm, shift] - // that is after the "[Rn" we now have see if the next token is a comma. - const AsmToken &Tok = getLexer().getTok(); + // First look for preindexed address forms, that is after the "[Rn" we now + // have to see if the next token is a comma. + const AsmToken &Tok = Parser.getTok(); if (Tok.is(AsmToken::Comma)) { Preindexed = true; - getLexer().Lex(); // Eat comma token. - - const AsmToken &NextTok = getLexer().getTok(); - if (NextTok.is(AsmToken::Plus)) - getLexer().Lex(); // Eat plus token. - else if (NextTok.is(AsmToken::Minus)) { - Negative = true; - getLexer().Lex(); // Eat minus token - } - - // See if there is a register following the "[Rn," we have so far. - const AsmToken &OffsetRegTok = getLexer().getTok(); - unsigned OffsetRegNum = MatchRegisterName(OffsetRegTok.getString()); - bool OffsetRegShifted = false; + Parser.Lex(); // Eat comma token. + int OffsetRegNum; + bool OffsetRegShifted; enum ShiftType ShiftType; const MCExpr *ShiftAmount; const MCExpr *Offset; - if (OffsetRegNum != 0) { - OffsetIsReg = true; - getLexer().Lex(); // Eat identifier token for the offset register. - // Look for a comma then a shift - const AsmToken &Tok = getLexer().getTok(); - if (Tok.is(AsmToken::Comma)) { - getLexer().Lex(); // Eat comma token. - - const AsmToken &Tok = getLexer().getTok(); - if (ParseShift(&ShiftType, ShiftAmount)) - return Error(Tok.getLoc(), "shift expected"); - OffsetRegShifted = true; - } - } - else { // "[Rn," we have so far was not followed by "Rm" - // Look for #offset following the "[Rn," - const AsmToken &HashTok = getLexer().getTok(); - if (HashTok.isNot(AsmToken::Hash)) - return Error(HashTok.getLoc(), "'#' expected"); - getLexer().Lex(); // Eat hash token. - - if (getParser().ParseExpression(Offset)) - return true; + if (ParseMemoryOffsetReg(Negative, OffsetRegShifted, ShiftType, ShiftAmount, + Offset, OffsetIsReg, OffsetRegNum, E)) + return 0; + const AsmToken &RBracTok = Parser.getTok(); + if (RBracTok.isNot(AsmToken::RBrac)) { + Error(RBracTok.getLoc(), "']' expected"); + return 0; } - const AsmToken &RBracTok = getLexer().getTok(); - if (RBracTok.isNot(AsmToken::RBrac)) - return Error(RBracTok.getLoc(), "']' expected"); - getLexer().Lex(); // Eat right bracket token. + E = RBracTok.getLoc(); + Parser.Lex(); // Eat right bracket token. - const AsmToken &ExclaimTok = getLexer().getTok(); + const AsmToken &ExclaimTok = Parser.getTok(); if (ExclaimTok.is(AsmToken::Exclaim)) { + E = ExclaimTok.getLoc(); Writeback = true; - getLexer().Lex(); // Eat exclaim token + Parser.Lex(); // Eat exclaim token } - Op = ARMOperand::CreateMem(BaseRegNum, OffsetIsReg, Offset, OffsetRegNum, - OffsetRegShifted, ShiftType, ShiftAmount, - Preindexed, Postindexed, Negative, Writeback); - return false; + return ARMOperand::CreateMem(BaseRegNum, OffsetIsReg, Offset, OffsetRegNum, + OffsetRegShifted, ShiftType, ShiftAmount, + Preindexed, Postindexed, Negative, Writeback, + S, E); } // The "[Rn" we have so far was not followed by a comma. else if (Tok.is(AsmToken::RBrac)) { - // This is a post indexing addressing forms: - // [Rn], #offset - // [Rn], +/-Rm - // [Rn], +/-Rm, shift - // that is a ']' follows after the "[Rn". - Postindexed = true; - Writeback = true; - getLexer().Lex(); // Eat right bracket token. - - const AsmToken &CommaTok = getLexer().getTok(); - if (CommaTok.isNot(AsmToken::Comma)) - return Error(CommaTok.getLoc(), "',' expected"); - getLexer().Lex(); // Eat comma token. - - const AsmToken &NextTok = getLexer().getTok(); - if (NextTok.is(AsmToken::Plus)) - getLexer().Lex(); // Eat plus token. - else if (NextTok.is(AsmToken::Minus)) { - Negative = true; - getLexer().Lex(); // Eat minus token - } + // If there's anything other than the right brace, this is a post indexing + // addressing form. + E = Tok.getLoc(); + Parser.Lex(); // Eat right bracket token. - // See if there is a register following the "[Rn]," we have so far. - const AsmToken &OffsetRegTok = getLexer().getTok(); - unsigned OffsetRegNum = MatchRegisterName(OffsetRegTok.getString()); + int OffsetRegNum = 0; bool OffsetRegShifted = false; enum ShiftType ShiftType; const MCExpr *ShiftAmount; - const MCExpr *Offset; - if (OffsetRegNum != 0) { - OffsetIsReg = true; - getLexer().Lex(); // Eat identifier token for the offset register. - // Look for a comma then a shift - const AsmToken &Tok = getLexer().getTok(); - if (Tok.is(AsmToken::Comma)) { - getLexer().Lex(); // Eat comma token. - - const AsmToken &Tok = getLexer().getTok(); - if (ParseShift(&ShiftType, ShiftAmount)) - return Error(Tok.getLoc(), "shift expected"); - OffsetRegShifted = true; + const MCExpr *Offset = 0; + + const AsmToken &NextTok = Parser.getTok(); + if (NextTok.isNot(AsmToken::EndOfStatement)) { + Postindexed = true; + Writeback = true; + if (NextTok.isNot(AsmToken::Comma)) { + Error(NextTok.getLoc(), "',' expected"); + return 0; } + Parser.Lex(); // Eat comma token. + if (ParseMemoryOffsetReg(Negative, OffsetRegShifted, ShiftType, + ShiftAmount, Offset, OffsetIsReg, OffsetRegNum, + E)) + return 0; } - else { // "[Rn]," we have so far was not followed by "Rm" - // Look for #offset following the "[Rn]," - const AsmToken &HashTok = getLexer().getTok(); - if (HashTok.isNot(AsmToken::Hash)) - return Error(HashTok.getLoc(), "'#' expected"); - getLexer().Lex(); // Eat hash token. - - if (getParser().ParseExpression(Offset)) - return true; + + return ARMOperand::CreateMem(BaseRegNum, OffsetIsReg, Offset, OffsetRegNum, + OffsetRegShifted, ShiftType, ShiftAmount, + Preindexed, Postindexed, Negative, Writeback, + S, E); + } + + return 0; +} + +/// Parse the offset of a memory operand after we have seen "[Rn," or "[Rn]," +/// we will parse the following (were +/- means that a plus or minus is +/// optional): +/// +/-Rm +/// +/-Rm, shift +/// #offset +/// we return false on success or an error otherwise. +bool ARMAsmParser::ParseMemoryOffsetReg(bool &Negative, + bool &OffsetRegShifted, + enum ShiftType &ShiftType, + const MCExpr *&ShiftAmount, + const MCExpr *&Offset, + bool &OffsetIsReg, + int &OffsetRegNum, + SMLoc &E) { + Negative = false; + OffsetRegShifted = false; + OffsetIsReg = false; + OffsetRegNum = -1; + const AsmToken &NextTok = Parser.getTok(); + E = NextTok.getLoc(); + if (NextTok.is(AsmToken::Plus)) + Parser.Lex(); // Eat plus token. + else if (NextTok.is(AsmToken::Minus)) { + Negative = true; + Parser.Lex(); // Eat minus token + } + // See if there is a register following the "[Rn," or "[Rn]," we have so far. + const AsmToken &OffsetRegTok = Parser.getTok(); + if (OffsetRegTok.is(AsmToken::Identifier)) { + SMLoc CurLoc = OffsetRegTok.getLoc(); + OffsetRegNum = TryParseRegister(); + if (OffsetRegNum != -1) { + OffsetIsReg = true; + E = CurLoc; } - Op = ARMOperand::CreateMem(BaseRegNum, OffsetIsReg, Offset, OffsetRegNum, - OffsetRegShifted, ShiftType, ShiftAmount, - Preindexed, Postindexed, Negative, Writeback); - return false; } - return true; + // If we parsed a register as the offset then there can be a shift after that. + if (OffsetRegNum != -1) { + // Look for a comma then a shift + const AsmToken &Tok = Parser.getTok(); + if (Tok.is(AsmToken::Comma)) { + Parser.Lex(); // Eat comma token. + + const AsmToken &Tok = Parser.getTok(); + if (ParseShift(ShiftType, ShiftAmount, E)) + return Error(Tok.getLoc(), "shift expected"); + OffsetRegShifted = true; + } + } + else { // the "[Rn," or "[Rn,]" we have so far was not followed by "Rm" + // Look for #offset following the "[Rn," or "[Rn]," + const AsmToken &HashTok = Parser.getTok(); + if (HashTok.isNot(AsmToken::Hash)) + return Error(HashTok.getLoc(), "'#' expected"); + + Parser.Lex(); // Eat hash token. + + if (getParser().ParseExpression(Offset)) + return true; + E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + } + return false; } /// ParseShift as one of these two: /// ( lsl | lsr | asr | ror ) , # shift_amount /// rrx /// and returns true if it parses a shift otherwise it returns false. -bool ARMAsmParser::ParseShift(ShiftType *St, const MCExpr *ShiftAmount) { - const AsmToken &Tok = getLexer().getTok(); +bool ARMAsmParser::ParseShift(ShiftType &St, const MCExpr *&ShiftAmount, + SMLoc &E) { + const AsmToken &Tok = Parser.getTok(); if (Tok.isNot(AsmToken::Identifier)) return true; - const StringRef &ShiftName = Tok.getString(); + StringRef ShiftName = Tok.getString(); if (ShiftName == "lsl" || ShiftName == "LSL") - *St = Lsl; + St = Lsl; else if (ShiftName == "lsr" || ShiftName == "LSR") - *St = Lsr; + St = Lsr; else if (ShiftName == "asr" || ShiftName == "ASR") - *St = Asr; + St = Asr; else if (ShiftName == "ror" || ShiftName == "ROR") - *St = Ror; + St = Ror; else if (ShiftName == "rrx" || ShiftName == "RRX") - *St = Rrx; + St = Rrx; else return true; - getLexer().Lex(); // Eat shift type token. + Parser.Lex(); // Eat shift type token. - // For all but a Rotate right there must be a '#' and a shift amount - if (*St != Rrx) { - // Look for # following the shift type - const AsmToken &HashTok = getLexer().getTok(); - if (HashTok.isNot(AsmToken::Hash)) - return Error(HashTok.getLoc(), "'#' expected"); - getLexer().Lex(); // Eat hash token. - - if (getParser().ParseExpression(ShiftAmount)) - return true; - } - - return false; -} + // Rrx stands alone. + if (St == Rrx) + return false; -// A hack to allow some testing -unsigned ARMAsmParser::MatchRegisterName(const StringRef &Name) { - if (Name == "r1") - return 1; - else if (Name == "r2") - return 2; - else if (Name == "r3") - return 3; - return 0; -} + // Otherwise, there must be a '#' and a shift amount. + const AsmToken &HashTok = Parser.getTok(); + if (HashTok.isNot(AsmToken::Hash)) + return Error(HashTok.getLoc(), "'#' expected"); + Parser.Lex(); // Eat hash token. -// A hack to allow some testing -bool ARMAsmParser::MatchInstruction(SmallVectorImpl &Operands, - MCInst &Inst) { - struct ARMOperand Op0 = Operands[0]; - assert(Op0.Kind == ARMOperand::Token && "First operand not a Token"); - const StringRef &Mnemonic = Op0.getToken(); - if (Mnemonic == "add" || - Mnemonic == "ldr") - return false; + if (getParser().ParseExpression(ShiftAmount)) + return true; - return true; + return false; } -// TODO - this is a work in progress -bool ARMAsmParser::ParseOperand(ARMOperand &Op) { +/// Parse a arm instruction operand. For now this parses the operand regardless +/// of the mnemonic. +ARMOperand *ARMAsmParser::ParseOperand() { + SMLoc S, E; switch (getLexer().getKind()) { + default: + Error(Parser.getTok().getLoc(), "unexpected token in operand"); + return 0; case AsmToken::Identifier: - if (!ParseRegister(Op)) - return false; - // TODO parse other operands that start with an identifier - return true; + if (ARMOperand *Op = TryParseRegisterWithWriteBack()) + return Op; + + // This was not a register so parse other operands that start with an + // identifier (like labels) as expressions and create them as immediates. + const MCExpr *IdVal; + S = Parser.getTok().getLoc(); + if (getParser().ParseExpression(IdVal)) + return 0; + E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + return ARMOperand::CreateImm(IdVal, S, E); case AsmToken::LBrac: - if (!ParseMemory(Op)) - return false; - default: - return true; + return ParseMemory(); + case AsmToken::LCurly: + return ParseRegisterList(); + case AsmToken::Hash: + // #42 -> immediate. + // TODO: ":lower16:" and ":upper16:" modifiers after # before immediate + S = Parser.getTok().getLoc(); + Parser.Lex(); + const MCExpr *ImmVal; + if (getParser().ParseExpression(ImmVal)) + return 0; + E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + return ARMOperand::CreateImm(ImmVal, S, E); } } -bool ARMAsmParser::ParseInstruction(const StringRef &Name, MCInst &Inst) { - SmallVector Operands; +/// Parse an arm instruction mnemonic followed by its operands. +bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc, + SmallVectorImpl &Operands) { + // Create the leading tokens for the mnemonic, split by '.' characters. + size_t Start = 0, Next = Name.find('.'); + StringRef Head = Name.slice(Start, Next); + + // Determine the predicate, if any. + // + // FIXME: We need a way to check whether a prefix supports predication, + // otherwise we will end up with an ambiguity for instructions that happen to + // end with a predicate name. + // FIXME: Likewise, some arithmetic instructions have an 's' prefix which + // indicates to update the condition codes. Those instructions have an + // additional immediate operand which encodes the prefix as reg0 or CPSR. + // Just checking for a suffix of 's' definitely creates ambiguities; e.g, + // the SMMLS instruction. + unsigned CC = StringSwitch(Head.substr(Head.size()-2)) + .Case("eq", ARMCC::EQ) + .Case("ne", ARMCC::NE) + .Case("hs", ARMCC::HS) + .Case("lo", ARMCC::LO) + .Case("mi", ARMCC::MI) + .Case("pl", ARMCC::PL) + .Case("vs", ARMCC::VS) + .Case("vc", ARMCC::VC) + .Case("hi", ARMCC::HI) + .Case("ls", ARMCC::LS) + .Case("ge", ARMCC::GE) + .Case("lt", ARMCC::LT) + .Case("gt", ARMCC::GT) + .Case("le", ARMCC::LE) + .Case("al", ARMCC::AL) + .Default(~0U); + + if (CC == ~0U || + (CC == ARMCC::LS && (Head == "vmls" || Head == "vnmls"))) { + CC = ARMCC::AL; + } else { + Head = Head.slice(0, Head.size() - 2); + } - Operands.push_back(ARMOperand::CreateToken(Name)); + Operands.push_back(ARMOperand::CreateToken(Head, NameLoc)); + // FIXME: Should only add this operand for predicated instructions + Operands.push_back(ARMOperand::CreateCondCode(ARMCC::CondCodes(CC), NameLoc)); - SMLoc Loc = getLexer().getTok().getLoc(); - if (getLexer().isNot(AsmToken::EndOfStatement)) { + // Add the remaining tokens in the mnemonic. + while (Next != StringRef::npos) { + Start = Next; + Next = Name.find('.', Start + 1); + Head = Name.slice(Start, Next); + + Operands.push_back(ARMOperand::CreateToken(Head, NameLoc)); + } + // Read the remaining operands. + if (getLexer().isNot(AsmToken::EndOfStatement)) { // Read the first operand. - Operands.push_back(ARMOperand()); - if (ParseOperand(Operands.back())) + if (ARMOperand *Op = ParseOperand()) + Operands.push_back(Op); + else { + Parser.EatToEndOfStatement(); return true; + } while (getLexer().is(AsmToken::Comma)) { - getLexer().Lex(); // Eat the comma. + Parser.Lex(); // Eat the comma. // Parse and remember the operand. - Operands.push_back(ARMOperand()); - if (ParseOperand(Operands.back())) + if (ARMOperand *Op = ParseOperand()) + Operands.push_back(Op); + else { + Parser.EatToEndOfStatement(); return true; + } } } - if (!MatchInstruction(Operands, Inst)) + + if (getLexer().isNot(AsmToken::EndOfStatement)) { + Parser.EatToEndOfStatement(); + return TokError("unexpected token in argument list"); + } + + Parser.Lex(); // Consume the EndOfStatement + return false; +} + +bool ARMAsmParser:: +MatchAndEmitInstruction(SMLoc IDLoc, + SmallVectorImpl &Operands, + MCStreamer &Out) { + MCInst Inst; + unsigned ErrorInfo; + switch (MatchInstructionImpl(Operands, Inst, ErrorInfo)) { + case Match_Success: + Out.EmitInstruction(Inst); return false; + case Match_MissingFeature: + Error(IDLoc, "instruction requires a CPU feature not currently enabled"); + return true; + case Match_InvalidOperand: { + SMLoc ErrorLoc = IDLoc; + if (ErrorInfo != ~0U) { + if (ErrorInfo >= Operands.size()) + return Error(IDLoc, "too few operands for instruction"); + + ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getStartLoc(); + if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; + } + + return Error(ErrorLoc, "invalid operand for instruction"); + } + case Match_MnemonicFail: + return Error(IDLoc, "unrecognized instruction mnemonic"); + } - Error(Loc, "ARMAsmParser::ParseInstruction only partly implemented"); + llvm_unreachable("Implement any new match types added!"); return true; } +/// ParseDirective parses the arm specific directives bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) { StringRef IDVal = DirectiveID.getIdentifier(); if (IDVal == ".word") return ParseDirectiveWord(4, DirectiveID.getLoc()); + else if (IDVal == ".thumb") + return ParseDirectiveThumb(DirectiveID.getLoc()); + else if (IDVal == ".thumb_func") + return ParseDirectiveThumbFunc(DirectiveID.getLoc()); + else if (IDVal == ".code") + return ParseDirectiveCode(DirectiveID.getLoc()); + else if (IDVal == ".syntax") + return ParseDirectiveSyntax(DirectiveID.getLoc()); return true; } @@ -473,24 +921,111 @@ bool ARMAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { if (getParser().ParseExpression(Value)) return true; - getParser().getStreamer().EmitValue(Value, Size); + getParser().getStreamer().EmitValue(Value, Size, 0/*addrspace*/); if (getLexer().is(AsmToken::EndOfStatement)) break; - + // FIXME: Improve diagnostic. if (getLexer().isNot(AsmToken::Comma)) return Error(L, "unexpected token in directive"); - getLexer().Lex(); + Parser.Lex(); } } - getLexer().Lex(); + Parser.Lex(); return false; } -// Force static initialization. +/// ParseDirectiveThumb +/// ::= .thumb +bool ARMAsmParser::ParseDirectiveThumb(SMLoc L) { + if (getLexer().isNot(AsmToken::EndOfStatement)) + return Error(L, "unexpected token in directive"); + Parser.Lex(); + + // TODO: set thumb mode + // TODO: tell the MC streamer the mode + // getParser().getStreamer().Emit???(); + return false; +} + +/// ParseDirectiveThumbFunc +/// ::= .thumbfunc symbol_name +bool ARMAsmParser::ParseDirectiveThumbFunc(SMLoc L) { + const AsmToken &Tok = Parser.getTok(); + if (Tok.isNot(AsmToken::Identifier) && Tok.isNot(AsmToken::String)) + return Error(L, "unexpected token in .thumb_func directive"); + StringRef Name = Tok.getString(); + Parser.Lex(); // Consume the identifier token. + if (getLexer().isNot(AsmToken::EndOfStatement)) + return Error(L, "unexpected token in directive"); + Parser.Lex(); + + // Mark symbol as a thumb symbol. + MCSymbol *Func = getParser().getContext().GetOrCreateSymbol(Name); + getParser().getStreamer().EmitThumbFunc(Func); + return false; +} + +/// ParseDirectiveSyntax +/// ::= .syntax unified | divided +bool ARMAsmParser::ParseDirectiveSyntax(SMLoc L) { + const AsmToken &Tok = Parser.getTok(); + if (Tok.isNot(AsmToken::Identifier)) + return Error(L, "unexpected token in .syntax directive"); + StringRef Mode = Tok.getString(); + if (Mode == "unified" || Mode == "UNIFIED") + Parser.Lex(); + else if (Mode == "divided" || Mode == "DIVIDED") + Parser.Lex(); + else + return Error(L, "unrecognized syntax mode in .syntax directive"); + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return Error(Parser.getTok().getLoc(), "unexpected token in directive"); + Parser.Lex(); + + // TODO tell the MC streamer the mode + // getParser().getStreamer().Emit???(); + return false; +} + +/// ParseDirectiveCode +/// ::= .code 16 | 32 +bool ARMAsmParser::ParseDirectiveCode(SMLoc L) { + const AsmToken &Tok = Parser.getTok(); + if (Tok.isNot(AsmToken::Integer)) + return Error(L, "unexpected token in .code directive"); + int64_t Val = Parser.getTok().getIntVal(); + if (Val == 16) + Parser.Lex(); + else if (Val == 32) + Parser.Lex(); + else + return Error(L, "invalid operand to .code directive"); + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return Error(Parser.getTok().getLoc(), "unexpected token in directive"); + Parser.Lex(); + + if (Val == 16) + getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16); + else + getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32); + + return false; +} + +extern "C" void LLVMInitializeARMAsmLexer(); + +/// Force static initialization. extern "C" void LLVMInitializeARMAsmParser() { RegisterAsmParser X(TheARMTarget); RegisterAsmParser Y(TheThumbTarget); + LLVMInitializeARMAsmLexer(); } + +#define GET_REGISTER_MATCHER +#define GET_MATCHER_IMPLEMENTATION +#include "ARMGenAsmMatcher.inc"