1 //===-- ARMAsmParser.cpp - Parse ARM assembly to MCInst instructions ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 #include "ARMSubtarget.h"
12 #include "llvm/MC/MCParser/MCAsmLexer.h"
13 #include "llvm/MC/MCParser/MCAsmParser.h"
14 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
15 #include "llvm/MC/MCStreamer.h"
16 #include "llvm/MC/MCExpr.h"
17 #include "llvm/MC/MCInst.h"
18 #include "llvm/Target/TargetRegistry.h"
19 #include "llvm/Target/TargetAsmParser.h"
20 #include "llvm/Support/Compiler.h"
21 #include "llvm/Support/SourceMgr.h"
22 #include "llvm/ADT/OwningPtr.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/ADT/Twine.h"
30 // The shift types for register controlled shifts in arm memory addressing
39 class ARMAsmParser : public TargetAsmParser {
44 MCAsmParser &getParser() const { return Parser; }
46 MCAsmLexer &getLexer() const { return Parser.getLexer(); }
48 void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); }
50 bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
52 bool MaybeParseRegister(OwningPtr<ARMOperand> &Op, bool ParseWriteBack);
54 bool ParseRegisterList(OwningPtr<ARMOperand> &Op);
56 bool ParseMemory(OwningPtr<ARMOperand> &Op);
58 bool ParseMemoryOffsetReg(bool &Negative,
59 bool &OffsetRegShifted,
60 enum ShiftType &ShiftType,
61 const MCExpr *&ShiftAmount,
62 const MCExpr *&Offset,
67 bool ParseShift(enum ShiftType &St, const MCExpr *&ShiftAmount, SMLoc &E);
69 bool ParseOperand(OwningPtr<ARMOperand> &Op);
71 bool ParseDirectiveWord(unsigned Size, SMLoc L);
73 bool ParseDirectiveThumb(SMLoc L);
75 bool ParseDirectiveThumbFunc(SMLoc L);
77 bool ParseDirectiveCode(SMLoc L);
79 bool ParseDirectiveSyntax(SMLoc L);
81 /// @name Auto-generated Match Functions
84 unsigned ComputeAvailableFeatures(const ARMSubtarget *Subtarget) const;
86 bool MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
93 ARMAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &_TM)
94 : TargetAsmParser(T), Parser(_Parser), TM(_TM) {}
96 virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc,
97 SmallVectorImpl<MCParsedAsmOperand*> &Operands);
99 virtual bool ParseDirective(AsmToken DirectiveID);
102 /// ARMOperand - Instances of this class represent a parsed ARM machine
104 struct ARMOperand : public MCParsedAsmOperand {
116 SMLoc StartLoc, EndLoc;
120 ARMCC::CondCodes Val;
137 // This is for all forms of ARM address expressions
140 unsigned OffsetRegNum; // used when OffsetIsReg is true
141 const MCExpr *Offset; // used when OffsetIsReg is false
142 const MCExpr *ShiftAmount; // used when OffsetRegShifted is true
143 enum ShiftType ShiftType; // used when OffsetRegShifted is true
145 OffsetRegShifted : 1, // only used when OffsetIsReg is true
149 Negative : 1, // only used when OffsetIsReg is true
155 ARMOperand(KindTy K, SMLoc S, SMLoc E)
156 : Kind(K), StartLoc(S), EndLoc(E) {}
158 ARMOperand(const ARMOperand &o) : MCParsedAsmOperand() {
160 StartLoc = o.StartLoc;
181 /// getStartLoc - Get the location of the first token of this operand.
182 SMLoc getStartLoc() const { return StartLoc; }
183 /// getEndLoc - Get the location of the last token of this operand.
184 SMLoc getEndLoc() const { return EndLoc; }
186 ARMCC::CondCodes getCondCode() const {
187 assert(Kind == CondCode && "Invalid access!");
191 StringRef getToken() const {
192 assert(Kind == Token && "Invalid access!");
193 return StringRef(Tok.Data, Tok.Length);
196 unsigned getReg() const {
197 assert(Kind == Register && "Invalid access!");
201 const MCExpr *getImm() const {
202 assert(Kind == Immediate && "Invalid access!");
206 bool isCondCode() const { return Kind == CondCode; }
208 bool isImm() const { return Kind == Immediate; }
210 bool isReg() const { return Kind == Register; }
212 bool isToken() const {return Kind == Token; }
214 void addExpr(MCInst &Inst, const MCExpr *Expr) const {
215 // Add as immediates when possible.
216 if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
217 Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
219 Inst.addOperand(MCOperand::CreateExpr(Expr));
222 void addCondCodeOperands(MCInst &Inst, unsigned N) const {
223 assert(N == 1 && "Invalid number of operands!");
224 Inst.addOperand(MCOperand::CreateImm(unsigned(getCondCode())));
227 void addRegOperands(MCInst &Inst, unsigned N) const {
228 assert(N == 1 && "Invalid number of operands!");
229 Inst.addOperand(MCOperand::CreateReg(getReg()));
232 void addImmOperands(MCInst &Inst, unsigned N) const {
233 assert(N == 1 && "Invalid number of operands!");
234 addExpr(Inst, getImm());
237 static void CreateToken(OwningPtr<ARMOperand> &Op, StringRef Str,
239 Op.reset(new ARMOperand);
241 Op->Tok.Data = Str.data();
242 Op->Tok.Length = Str.size();
247 static void CreateReg(OwningPtr<ARMOperand> &Op, unsigned RegNum,
248 bool Writeback, SMLoc S, SMLoc E) {
249 Op.reset(new ARMOperand);
251 Op->Reg.RegNum = RegNum;
252 Op->Reg.Writeback = Writeback;
258 static void CreateImm(OwningPtr<ARMOperand> &Op, const MCExpr *Val,
260 Op.reset(new ARMOperand);
261 Op->Kind = Immediate;
268 static void CreateMem(OwningPtr<ARMOperand> &Op,
269 unsigned BaseRegNum, bool OffsetIsReg,
270 const MCExpr *Offset, unsigned OffsetRegNum,
271 bool OffsetRegShifted, enum ShiftType ShiftType,
272 const MCExpr *ShiftAmount, bool Preindexed,
273 bool Postindexed, bool Negative, bool Writeback,
275 Op.reset(new ARMOperand);
277 Op->Mem.BaseRegNum = BaseRegNum;
278 Op->Mem.OffsetIsReg = OffsetIsReg;
279 Op->Mem.Offset = Offset;
280 Op->Mem.OffsetRegNum = OffsetRegNum;
281 Op->Mem.OffsetRegShifted = OffsetRegShifted;
282 Op->Mem.ShiftType = ShiftType;
283 Op->Mem.ShiftAmount = ShiftAmount;
284 Op->Mem.Preindexed = Preindexed;
285 Op->Mem.Postindexed = Postindexed;
286 Op->Mem.Negative = Negative;
287 Op->Mem.Writeback = Writeback;
294 } // end anonymous namespace.
297 /// @name Auto-generated Match Functions
300 static unsigned MatchRegisterName(StringRef Name);
304 /// Try to parse a register name. The token must be an Identifier when called,
305 /// and if it is a register name a Reg operand is created, the token is eaten
306 /// and false is returned. Else true is returned and no token is eaten.
307 /// TODO this is likely to change to allow different register types and or to
308 /// parse for a specific register type.
309 bool ARMAsmParser::MaybeParseRegister
310 (OwningPtr<ARMOperand> &Op, bool ParseWriteBack) {
312 const AsmToken &Tok = Parser.getTok();
313 assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
315 // FIXME: Validate register for the current architecture; we have to do
316 // validation later, so maybe there is no need for this here.
319 RegNum = MatchRegisterName(Tok.getString());
325 Parser.Lex(); // Eat identifier token.
327 E = Parser.getTok().getLoc();
329 bool Writeback = false;
330 if (ParseWriteBack) {
331 const AsmToken &ExclaimTok = Parser.getTok();
332 if (ExclaimTok.is(AsmToken::Exclaim)) {
333 E = ExclaimTok.getLoc();
335 Parser.Lex(); // Eat exclaim token
339 ARMOperand::CreateReg(Op, RegNum, Writeback, S, E);
344 /// Parse a register list, return false if successful else return true or an
345 /// error. The first token must be a '{' when called.
346 bool ARMAsmParser::ParseRegisterList(OwningPtr<ARMOperand> &Op) {
348 assert(Parser.getTok().is(AsmToken::LCurly) &&
349 "Token is not an Left Curly Brace");
350 S = Parser.getTok().getLoc();
351 Parser.Lex(); // Eat left curly brace token.
353 const AsmToken &RegTok = Parser.getTok();
354 SMLoc RegLoc = RegTok.getLoc();
355 if (RegTok.isNot(AsmToken::Identifier))
356 return Error(RegLoc, "register expected");
357 int RegNum = MatchRegisterName(RegTok.getString());
359 return Error(RegLoc, "register expected");
360 Parser.Lex(); // Eat identifier token.
361 unsigned RegList = 1 << RegNum;
363 int HighRegNum = RegNum;
364 // TODO ranges like "{Rn-Rm}"
365 while (Parser.getTok().is(AsmToken::Comma)) {
366 Parser.Lex(); // Eat comma token.
368 const AsmToken &RegTok = Parser.getTok();
369 SMLoc RegLoc = RegTok.getLoc();
370 if (RegTok.isNot(AsmToken::Identifier))
371 return Error(RegLoc, "register expected");
372 int RegNum = MatchRegisterName(RegTok.getString());
374 return Error(RegLoc, "register expected");
376 if (RegList & (1 << RegNum))
377 Warning(RegLoc, "register duplicated in register list");
378 else if (RegNum <= HighRegNum)
379 Warning(RegLoc, "register not in ascending order in register list");
380 RegList |= 1 << RegNum;
383 Parser.Lex(); // Eat identifier token.
385 const AsmToken &RCurlyTok = Parser.getTok();
386 if (RCurlyTok.isNot(AsmToken::RCurly))
387 return Error(RCurlyTok.getLoc(), "'}' expected");
388 E = RCurlyTok.getLoc();
389 Parser.Lex(); // Eat left curly brace token.
394 /// Parse an arm memory expression, return false if successful else return true
395 /// or an error. The first token must be a '[' when called.
396 /// TODO Only preindexing and postindexing addressing are started, unindexed
397 /// with option, etc are still to do.
398 bool ARMAsmParser::ParseMemory(OwningPtr<ARMOperand> &Op) {
400 assert(Parser.getTok().is(AsmToken::LBrac) &&
401 "Token is not an Left Bracket");
402 S = Parser.getTok().getLoc();
403 Parser.Lex(); // Eat left bracket token.
405 const AsmToken &BaseRegTok = Parser.getTok();
406 if (BaseRegTok.isNot(AsmToken::Identifier))
407 return Error(BaseRegTok.getLoc(), "register expected");
408 if (MaybeParseRegister(Op, false))
409 return Error(BaseRegTok.getLoc(), "register expected");
410 int BaseRegNum = Op->getReg();
412 bool Preindexed = false;
413 bool Postindexed = false;
414 bool OffsetIsReg = false;
415 bool Negative = false;
416 bool Writeback = false;
418 // First look for preindexed address forms, that is after the "[Rn" we now
419 // have to see if the next token is a comma.
420 const AsmToken &Tok = Parser.getTok();
421 if (Tok.is(AsmToken::Comma)) {
423 Parser.Lex(); // Eat comma token.
425 bool OffsetRegShifted;
426 enum ShiftType ShiftType;
427 const MCExpr *ShiftAmount;
428 const MCExpr *Offset;
429 if(ParseMemoryOffsetReg(Negative, OffsetRegShifted, ShiftType, ShiftAmount,
430 Offset, OffsetIsReg, OffsetRegNum, E))
432 const AsmToken &RBracTok = Parser.getTok();
433 if (RBracTok.isNot(AsmToken::RBrac))
434 return Error(RBracTok.getLoc(), "']' expected");
435 E = RBracTok.getLoc();
436 Parser.Lex(); // Eat right bracket token.
438 const AsmToken &ExclaimTok = Parser.getTok();
439 if (ExclaimTok.is(AsmToken::Exclaim)) {
440 E = ExclaimTok.getLoc();
442 Parser.Lex(); // Eat exclaim token
444 ARMOperand::CreateMem(Op, BaseRegNum, OffsetIsReg, Offset, OffsetRegNum,
445 OffsetRegShifted, ShiftType, ShiftAmount,
446 Preindexed, Postindexed, Negative, Writeback, S, E);
449 // The "[Rn" we have so far was not followed by a comma.
450 else if (Tok.is(AsmToken::RBrac)) {
451 // This is a post indexing addressing forms, that is a ']' follows after
456 Parser.Lex(); // Eat right bracket token.
458 int OffsetRegNum = 0;
459 bool OffsetRegShifted = false;
460 enum ShiftType ShiftType;
461 const MCExpr *ShiftAmount;
462 const MCExpr *Offset;
464 const AsmToken &NextTok = Parser.getTok();
465 if (NextTok.isNot(AsmToken::EndOfStatement)) {
466 if (NextTok.isNot(AsmToken::Comma))
467 return Error(NextTok.getLoc(), "',' expected");
468 Parser.Lex(); // Eat comma token.
469 if(ParseMemoryOffsetReg(Negative, OffsetRegShifted, ShiftType,
470 ShiftAmount, Offset, OffsetIsReg, OffsetRegNum,
475 ARMOperand::CreateMem(Op, BaseRegNum, OffsetIsReg, Offset, OffsetRegNum,
476 OffsetRegShifted, ShiftType, ShiftAmount,
477 Preindexed, Postindexed, Negative, Writeback, S, E);
484 /// Parse the offset of a memory operand after we have seen "[Rn," or "[Rn],"
485 /// we will parse the following (were +/- means that a plus or minus is
490 /// we return false on success or an error otherwise.
491 bool ARMAsmParser::ParseMemoryOffsetReg(bool &Negative,
492 bool &OffsetRegShifted,
493 enum ShiftType &ShiftType,
494 const MCExpr *&ShiftAmount,
495 const MCExpr *&Offset,
499 OwningPtr<ARMOperand> Op;
501 OffsetRegShifted = false;
504 const AsmToken &NextTok = Parser.getTok();
505 E = NextTok.getLoc();
506 if (NextTok.is(AsmToken::Plus))
507 Parser.Lex(); // Eat plus token.
508 else if (NextTok.is(AsmToken::Minus)) {
510 Parser.Lex(); // Eat minus token
512 // See if there is a register following the "[Rn," or "[Rn]," we have so far.
513 const AsmToken &OffsetRegTok = Parser.getTok();
514 if (OffsetRegTok.is(AsmToken::Identifier)) {
515 OffsetIsReg = !MaybeParseRegister(Op, false);
518 OffsetRegNum = Op->getReg();
521 // If we parsed a register as the offset then their can be a shift after that
522 if (OffsetRegNum != -1) {
523 // Look for a comma then a shift
524 const AsmToken &Tok = Parser.getTok();
525 if (Tok.is(AsmToken::Comma)) {
526 Parser.Lex(); // Eat comma token.
528 const AsmToken &Tok = Parser.getTok();
529 if (ParseShift(ShiftType, ShiftAmount, E))
530 return Error(Tok.getLoc(), "shift expected");
531 OffsetRegShifted = true;
534 else { // the "[Rn," or "[Rn,]" we have so far was not followed by "Rm"
535 // Look for #offset following the "[Rn," or "[Rn],"
536 const AsmToken &HashTok = Parser.getTok();
537 if (HashTok.isNot(AsmToken::Hash))
538 return Error(HashTok.getLoc(), "'#' expected");
540 Parser.Lex(); // Eat hash token.
542 if (getParser().ParseExpression(Offset))
544 E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
549 /// ParseShift as one of these two:
550 /// ( lsl | lsr | asr | ror ) , # shift_amount
552 /// and returns true if it parses a shift otherwise it returns false.
553 bool ARMAsmParser::ParseShift(ShiftType &St,
554 const MCExpr *&ShiftAmount,
556 const AsmToken &Tok = Parser.getTok();
557 if (Tok.isNot(AsmToken::Identifier))
559 StringRef ShiftName = Tok.getString();
560 if (ShiftName == "lsl" || ShiftName == "LSL")
562 else if (ShiftName == "lsr" || ShiftName == "LSR")
564 else if (ShiftName == "asr" || ShiftName == "ASR")
566 else if (ShiftName == "ror" || ShiftName == "ROR")
568 else if (ShiftName == "rrx" || ShiftName == "RRX")
572 Parser.Lex(); // Eat shift type token.
578 // Otherwise, there must be a '#' and a shift amount.
579 const AsmToken &HashTok = Parser.getTok();
580 if (HashTok.isNot(AsmToken::Hash))
581 return Error(HashTok.getLoc(), "'#' expected");
582 Parser.Lex(); // Eat hash token.
584 if (getParser().ParseExpression(ShiftAmount))
590 /// Parse a arm instruction operand. For now this parses the operand regardless
592 bool ARMAsmParser::ParseOperand(OwningPtr<ARMOperand> &Op) {
595 switch (getLexer().getKind()) {
596 case AsmToken::Identifier:
597 if (!MaybeParseRegister(Op, true))
599 // This was not a register so parse other operands that start with an
600 // identifier (like labels) as expressions and create them as immediates.
602 S = Parser.getTok().getLoc();
603 if (getParser().ParseExpression(IdVal))
605 E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
606 ARMOperand::CreateImm(Op, IdVal, S, E);
608 case AsmToken::LBrac:
609 return ParseMemory(Op);
610 case AsmToken::LCurly:
611 return ParseRegisterList(Op);
614 // TODO: ":lower16:" and ":upper16:" modifiers after # before immediate
615 S = Parser.getTok().getLoc();
617 const MCExpr *ImmVal;
618 if (getParser().ParseExpression(ImmVal))
620 E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
621 ARMOperand::CreateImm(Op, ImmVal, S, E);
624 return Error(Parser.getTok().getLoc(), "unexpected token in operand");
628 /// Parse an arm instruction mnemonic followed by its operands.
629 bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc,
630 SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
631 OwningPtr<ARMOperand> Op;
632 ARMOperand::CreateToken(Op, Name, NameLoc);
634 Operands.push_back(Op.take());
636 if (getLexer().isNot(AsmToken::EndOfStatement)) {
638 // Read the first operand.
639 OwningPtr<ARMOperand> Op;
640 if (ParseOperand(Op)) return true;
641 Operands.push_back(Op.take());
643 while (getLexer().is(AsmToken::Comma)) {
644 Parser.Lex(); // Eat the comma.
646 // Parse and remember the operand.
647 if (ParseOperand(Op)) return true;
648 Operands.push_back(Op.take());
654 /// ParseDirective parses the arm specific directives
655 bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) {
656 StringRef IDVal = DirectiveID.getIdentifier();
657 if (IDVal == ".word")
658 return ParseDirectiveWord(4, DirectiveID.getLoc());
659 else if (IDVal == ".thumb")
660 return ParseDirectiveThumb(DirectiveID.getLoc());
661 else if (IDVal == ".thumb_func")
662 return ParseDirectiveThumbFunc(DirectiveID.getLoc());
663 else if (IDVal == ".code")
664 return ParseDirectiveCode(DirectiveID.getLoc());
665 else if (IDVal == ".syntax")
666 return ParseDirectiveSyntax(DirectiveID.getLoc());
670 /// ParseDirectiveWord
671 /// ::= .word [ expression (, expression)* ]
672 bool ARMAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
673 if (getLexer().isNot(AsmToken::EndOfStatement)) {
676 if (getParser().ParseExpression(Value))
679 getParser().getStreamer().EmitValue(Value, Size, 0/*addrspace*/);
681 if (getLexer().is(AsmToken::EndOfStatement))
684 // FIXME: Improve diagnostic.
685 if (getLexer().isNot(AsmToken::Comma))
686 return Error(L, "unexpected token in directive");
695 /// ParseDirectiveThumb
697 bool ARMAsmParser::ParseDirectiveThumb(SMLoc L) {
698 if (getLexer().isNot(AsmToken::EndOfStatement))
699 return Error(L, "unexpected token in directive");
702 // TODO: set thumb mode
703 // TODO: tell the MC streamer the mode
704 // getParser().getStreamer().Emit???();
708 /// ParseDirectiveThumbFunc
709 /// ::= .thumbfunc symbol_name
710 bool ARMAsmParser::ParseDirectiveThumbFunc(SMLoc L) {
711 const AsmToken &Tok = Parser.getTok();
712 if (Tok.isNot(AsmToken::Identifier) && Tok.isNot(AsmToken::String))
713 return Error(L, "unexpected token in .syntax directive");
714 StringRef ATTRIBUTE_UNUSED SymbolName = Parser.getTok().getIdentifier();
715 Parser.Lex(); // Consume the identifier token.
717 if (getLexer().isNot(AsmToken::EndOfStatement))
718 return Error(L, "unexpected token in directive");
721 // TODO: mark symbol as a thumb symbol
722 // getParser().getStreamer().Emit???();
726 /// ParseDirectiveSyntax
727 /// ::= .syntax unified | divided
728 bool ARMAsmParser::ParseDirectiveSyntax(SMLoc L) {
729 const AsmToken &Tok = Parser.getTok();
730 if (Tok.isNot(AsmToken::Identifier))
731 return Error(L, "unexpected token in .syntax directive");
732 StringRef Mode = Tok.getString();
733 if (Mode == "unified" || Mode == "UNIFIED")
735 else if (Mode == "divided" || Mode == "DIVIDED")
738 return Error(L, "unrecognized syntax mode in .syntax directive");
740 if (getLexer().isNot(AsmToken::EndOfStatement))
741 return Error(Parser.getTok().getLoc(), "unexpected token in directive");
744 // TODO tell the MC streamer the mode
745 // getParser().getStreamer().Emit???();
749 /// ParseDirectiveCode
750 /// ::= .code 16 | 32
751 bool ARMAsmParser::ParseDirectiveCode(SMLoc L) {
752 const AsmToken &Tok = Parser.getTok();
753 if (Tok.isNot(AsmToken::Integer))
754 return Error(L, "unexpected token in .code directive");
755 int64_t Val = Parser.getTok().getIntVal();
761 return Error(L, "invalid operand to .code directive");
763 if (getLexer().isNot(AsmToken::EndOfStatement))
764 return Error(Parser.getTok().getLoc(), "unexpected token in directive");
767 // TODO tell the MC streamer the mode
768 // getParser().getStreamer().Emit???();
772 extern "C" void LLVMInitializeARMAsmLexer();
774 /// Force static initialization.
775 extern "C" void LLVMInitializeARMAsmParser() {
776 RegisterAsmParser<ARMAsmParser> X(TheARMTarget);
777 RegisterAsmParser<ARMAsmParser> Y(TheThumbTarget);
778 LLVMInitializeARMAsmLexer();
781 #include "ARMGenAsmMatcher.inc"