1 //===-- ARMAsmParser.cpp - Parse ARM assembly to MCInst instructions ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 #include "llvm/ADT/SmallVector.h"
12 #include "llvm/ADT/Twine.h"
13 #include "llvm/MC/MCAsmLexer.h"
14 #include "llvm/MC/MCAsmParser.h"
15 #include "llvm/MC/MCStreamer.h"
16 #include "llvm/MC/MCExpr.h"
17 #include "llvm/MC/MCInst.h"
18 #include "llvm/Support/SourceMgr.h"
19 #include "llvm/Target/TargetRegistry.h"
20 #include "llvm/Target/TargetAsmParser.h"
26 // The shift types for register controlled shifts in arm memory addressing
35 class ARMAsmParser : public TargetAsmParser {
39 MCAsmParser &getParser() const { return Parser; }
41 MCAsmLexer &getLexer() const { return Parser.getLexer(); }
43 void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); }
45 bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
47 bool ParseRegister(ARMOperand &Op);
49 bool ParseRegisterList(ARMOperand &Op);
51 bool ParseMemory(ARMOperand &Op);
53 bool ParseShift(enum ShiftType *St, const MCExpr *&ShiftAmount);
55 bool ParseOperand(ARMOperand &Op);
57 bool ParseDirectiveWord(unsigned Size, SMLoc L);
59 // TODO - For now hacked versions of the next two are in here in this file to
60 // allow some parser testing until the table gen versions are implemented.
62 /// @name Auto-generated Match Functions
64 bool MatchInstruction(SmallVectorImpl<ARMOperand> &Operands,
67 /// MatchRegisterName - Match the given string to a register name and return
68 /// its register number, or -1 if there is no match. To allow return values
69 /// to be used directly in register lists, arm registers have values between
71 int MatchRegisterName(const StringRef &Name);
77 ARMAsmParser(const Target &T, MCAsmParser &_Parser)
78 : TargetAsmParser(T), Parser(_Parser) {}
80 virtual bool ParseInstruction(const StringRef &Name, MCInst &Inst);
82 virtual bool ParseDirective(AsmToken DirectiveID);
85 } // end anonymous namespace
89 /// ARMOperand - Instances of this class represent a parsed ARM machine
110 // This is for all forms of ARM address expressions
114 const MCExpr *Offset; // used when OffsetIsReg is false
115 unsigned OffsetRegNum; // used when OffsetIsReg is true
116 bool OffsetRegShifted; // only used when OffsetIsReg is true
117 enum ShiftType ShiftType; // used when OffsetRegShifted is true
118 const MCExpr *ShiftAmount; // used when OffsetRegShifted is true
121 bool Negative; // only used when OffsetIsReg is true
127 StringRef getToken() const {
128 assert(Kind == Token && "Invalid access!");
129 return StringRef(Tok.Data, Tok.Length);
132 unsigned getReg() const {
133 assert(Kind == Register && "Invalid access!");
137 bool isToken() const {return Kind == Token; }
139 bool isReg() const { return Kind == Register; }
141 void addRegOperands(MCInst &Inst, unsigned N) const {
142 assert(N == 1 && "Invalid number of operands!");
143 Inst.addOperand(MCOperand::CreateReg(getReg()));
146 static ARMOperand CreateToken(StringRef Str) {
149 Res.Tok.Data = Str.data();
150 Res.Tok.Length = Str.size();
154 static ARMOperand CreateReg(unsigned RegNum, bool Writeback) {
157 Res.Reg.RegNum = RegNum;
158 Res.Reg.Writeback = Writeback;
162 static ARMOperand CreateMem(unsigned BaseRegNum, bool OffsetIsReg,
163 const MCExpr *Offset, unsigned OffsetRegNum,
164 bool OffsetRegShifted, enum ShiftType ShiftType,
165 const MCExpr *ShiftAmount, bool Preindexed,
166 bool Postindexed, bool Negative, bool Writeback) {
169 Res.Mem.BaseRegNum = BaseRegNum;
170 Res.Mem.OffsetIsReg = OffsetIsReg;
171 Res.Mem.Offset = Offset;
172 Res.Mem.OffsetRegNum = OffsetRegNum;
173 Res.Mem.OffsetRegShifted = OffsetRegShifted;
174 Res.Mem.ShiftType = ShiftType;
175 Res.Mem.ShiftAmount = ShiftAmount;
176 Res.Mem.Preindexed = Preindexed;
177 Res.Mem.Postindexed = Postindexed;
178 Res.Mem.Negative = Negative;
179 Res.Mem.Writeback = Writeback;
184 } // end anonymous namespace.
186 // Try to parse a register name. The token must be an Identifier when called,
187 // and if it is a register name a Reg operand is created, the token is eaten
188 // and false is returned. Else true is returned and no token is eaten.
189 // TODO this is likely to change to allow different register types and or to
190 // parse for a specific register type.
191 bool ARMAsmParser::ParseRegister(ARMOperand &Op) {
192 const AsmToken &Tok = getLexer().getTok();
193 assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
195 // FIXME: Validate register for the current architecture; we have to do
196 // validation later, so maybe there is no need for this here.
199 RegNum = MatchRegisterName(Tok.getString());
202 getLexer().Lex(); // Eat identifier token.
204 bool Writeback = false;
205 const AsmToken &ExclaimTok = getLexer().getTok();
206 if (ExclaimTok.is(AsmToken::Exclaim)) {
208 getLexer().Lex(); // Eat exclaim token
211 Op = ARMOperand::CreateReg(RegNum, Writeback);
216 // Try to parse a register list. The first token must be a '{' when called
218 bool ARMAsmParser::ParseRegisterList(ARMOperand &Op) {
219 const AsmToken &LCurlyTok = getLexer().getTok();
220 assert(LCurlyTok.is(AsmToken::LCurly) && "Token is not an Left Curly Brace");
221 getLexer().Lex(); // Eat left curly brace token.
223 const AsmToken &RegTok = getLexer().getTok();
224 SMLoc RegLoc = RegTok.getLoc();
225 if (RegTok.isNot(AsmToken::Identifier))
226 return Error(RegLoc, "register expected");
227 int RegNum = MatchRegisterName(RegTok.getString());
229 return Error(RegLoc, "register expected");
230 getLexer().Lex(); // Eat identifier token.
231 unsigned RegList = 1 << RegNum;
233 int HighRegNum = RegNum;
234 // TODO ranges like "{Rn-Rm}"
235 while (getLexer().getTok().is(AsmToken::Comma)) {
236 getLexer().Lex(); // Eat comma token.
238 const AsmToken &RegTok = getLexer().getTok();
239 SMLoc RegLoc = RegTok.getLoc();
240 if (RegTok.isNot(AsmToken::Identifier))
241 return Error(RegLoc, "register expected");
242 int RegNum = MatchRegisterName(RegTok.getString());
244 return Error(RegLoc, "register expected");
246 if (RegList & (1 << RegNum))
247 Warning(RegLoc, "register duplicated in register list");
248 else if (RegNum <= HighRegNum)
249 Warning(RegLoc, "register not in ascending order in register list");
250 RegList |= 1 << RegNum;
253 getLexer().Lex(); // Eat identifier token.
255 const AsmToken &RCurlyTok = getLexer().getTok();
256 if (RCurlyTok.isNot(AsmToken::RCurly))
257 return Error(RCurlyTok.getLoc(), "'}' expected");
258 getLexer().Lex(); // Eat left curly brace token.
263 // Try to parse an arm memory expression. It must start with a '[' token.
264 // TODO Only preindexing and postindexing addressing are started, unindexed
265 // with option, etc are still to do.
266 bool ARMAsmParser::ParseMemory(ARMOperand &Op) {
267 const AsmToken &LBracTok = getLexer().getTok();
268 assert(LBracTok.is(AsmToken::LBrac) && "Token is not an Left Bracket");
269 getLexer().Lex(); // Eat left bracket token.
271 const AsmToken &BaseRegTok = getLexer().getTok();
272 if (BaseRegTok.isNot(AsmToken::Identifier))
273 return Error(BaseRegTok.getLoc(), "register expected");
274 int BaseRegNum = MatchRegisterName(BaseRegTok.getString());
275 if (BaseRegNum == -1)
276 return Error(BaseRegTok.getLoc(), "register expected");
277 getLexer().Lex(); // Eat identifier token.
279 bool Preindexed = false;
280 bool Postindexed = false;
281 bool OffsetIsReg = false;
282 bool Negative = false;
283 bool Writeback = false;
285 // First look for preindexed address forms:
288 // [Rn, +/-Rm, shift]
289 // that is after the "[Rn" we now have see if the next token is a comma.
290 const AsmToken &Tok = getLexer().getTok();
291 if (Tok.is(AsmToken::Comma)) {
293 getLexer().Lex(); // Eat comma token.
295 const AsmToken &NextTok = getLexer().getTok();
296 if (NextTok.is(AsmToken::Plus))
297 getLexer().Lex(); // Eat plus token.
298 else if (NextTok.is(AsmToken::Minus)) {
300 getLexer().Lex(); // Eat minus token
303 // See if there is a register following the "[Rn," we have so far.
304 const AsmToken &OffsetRegTok = getLexer().getTok();
305 int OffsetRegNum = MatchRegisterName(OffsetRegTok.getString());
306 bool OffsetRegShifted = false;
307 enum ShiftType ShiftType;
308 const MCExpr *ShiftAmount;
309 const MCExpr *Offset;
310 if (OffsetRegNum != -1) {
312 getLexer().Lex(); // Eat identifier token for the offset register.
313 // Look for a comma then a shift
314 const AsmToken &Tok = getLexer().getTok();
315 if (Tok.is(AsmToken::Comma)) {
316 getLexer().Lex(); // Eat comma token.
318 const AsmToken &Tok = getLexer().getTok();
319 if (ParseShift(&ShiftType, ShiftAmount))
320 return Error(Tok.getLoc(), "shift expected");
321 OffsetRegShifted = true;
324 else { // "[Rn," we have so far was not followed by "Rm"
325 // Look for #offset following the "[Rn,"
326 const AsmToken &HashTok = getLexer().getTok();
327 if (HashTok.isNot(AsmToken::Hash))
328 return Error(HashTok.getLoc(), "'#' expected");
329 getLexer().Lex(); // Eat hash token.
331 if (getParser().ParseExpression(Offset))
334 const AsmToken &RBracTok = getLexer().getTok();
335 if (RBracTok.isNot(AsmToken::RBrac))
336 return Error(RBracTok.getLoc(), "']' expected");
337 getLexer().Lex(); // Eat right bracket token.
339 const AsmToken &ExclaimTok = getLexer().getTok();
340 if (ExclaimTok.is(AsmToken::Exclaim)) {
342 getLexer().Lex(); // Eat exclaim token
344 Op = ARMOperand::CreateMem(BaseRegNum, OffsetIsReg, Offset, OffsetRegNum,
345 OffsetRegShifted, ShiftType, ShiftAmount,
346 Preindexed, Postindexed, Negative, Writeback);
349 // The "[Rn" we have so far was not followed by a comma.
350 else if (Tok.is(AsmToken::RBrac)) {
351 // This is a post indexing addressing forms:
354 // [Rn], +/-Rm, shift
355 // that is a ']' follows after the "[Rn".
358 getLexer().Lex(); // Eat right bracket token.
360 const AsmToken &CommaTok = getLexer().getTok();
361 if (CommaTok.isNot(AsmToken::Comma))
362 return Error(CommaTok.getLoc(), "',' expected");
363 getLexer().Lex(); // Eat comma token.
365 const AsmToken &NextTok = getLexer().getTok();
366 if (NextTok.is(AsmToken::Plus))
367 getLexer().Lex(); // Eat plus token.
368 else if (NextTok.is(AsmToken::Minus)) {
370 getLexer().Lex(); // Eat minus token
373 // See if there is a register following the "[Rn]," we have so far.
374 const AsmToken &OffsetRegTok = getLexer().getTok();
375 int OffsetRegNum = MatchRegisterName(OffsetRegTok.getString());
376 bool OffsetRegShifted = false;
377 enum ShiftType ShiftType;
378 const MCExpr *ShiftAmount;
379 const MCExpr *Offset;
380 if (OffsetRegNum != -1) {
382 getLexer().Lex(); // Eat identifier token for the offset register.
383 // Look for a comma then a shift
384 const AsmToken &Tok = getLexer().getTok();
385 if (Tok.is(AsmToken::Comma)) {
386 getLexer().Lex(); // Eat comma token.
388 const AsmToken &Tok = getLexer().getTok();
389 if (ParseShift(&ShiftType, ShiftAmount))
390 return Error(Tok.getLoc(), "shift expected");
391 OffsetRegShifted = true;
394 else { // "[Rn]," we have so far was not followed by "Rm"
395 // Look for #offset following the "[Rn],"
396 const AsmToken &HashTok = getLexer().getTok();
397 if (HashTok.isNot(AsmToken::Hash))
398 return Error(HashTok.getLoc(), "'#' expected");
399 getLexer().Lex(); // Eat hash token.
401 if (getParser().ParseExpression(Offset))
404 Op = ARMOperand::CreateMem(BaseRegNum, OffsetIsReg, Offset, OffsetRegNum,
405 OffsetRegShifted, ShiftType, ShiftAmount,
406 Preindexed, Postindexed, Negative, Writeback);
413 /// ParseShift as one of these two:
414 /// ( lsl | lsr | asr | ror ) , # shift_amount
416 /// and returns true if it parses a shift otherwise it returns false.
417 bool ARMAsmParser::ParseShift(ShiftType *St, const MCExpr *&ShiftAmount) {
418 const AsmToken &Tok = getLexer().getTok();
419 if (Tok.isNot(AsmToken::Identifier))
421 const StringRef &ShiftName = Tok.getString();
422 if (ShiftName == "lsl" || ShiftName == "LSL")
424 else if (ShiftName == "lsr" || ShiftName == "LSR")
426 else if (ShiftName == "asr" || ShiftName == "ASR")
428 else if (ShiftName == "ror" || ShiftName == "ROR")
430 else if (ShiftName == "rrx" || ShiftName == "RRX")
434 getLexer().Lex(); // Eat shift type token.
436 // For all but a Rotate right there must be a '#' and a shift amount
438 // Look for # following the shift type
439 const AsmToken &HashTok = getLexer().getTok();
440 if (HashTok.isNot(AsmToken::Hash))
441 return Error(HashTok.getLoc(), "'#' expected");
442 getLexer().Lex(); // Eat hash token.
444 if (getParser().ParseExpression(ShiftAmount))
451 // A hack to allow some testing
452 int ARMAsmParser::MatchRegisterName(const StringRef &Name) {
453 if (Name == "r0" || Name == "R0")
455 else if (Name == "r1" || Name == "R1")
457 else if (Name == "r2" || Name == "R2")
459 else if (Name == "r3" || Name == "R3")
461 else if (Name == "r3" || Name == "R3")
463 else if (Name == "r4" || Name == "R4")
465 else if (Name == "r5" || Name == "R5")
467 else if (Name == "r6" || Name == "R6")
469 else if (Name == "r7" || Name == "R7")
471 else if (Name == "r8" || Name == "R8")
473 else if (Name == "r9" || Name == "R9")
475 else if (Name == "r10" || Name == "R10")
477 else if (Name == "r11" || Name == "R11" || Name == "fp")
479 else if (Name == "r12" || Name == "R12" || Name == "ip")
481 else if (Name == "r13" || Name == "R13" || Name == "sp")
483 else if (Name == "r14" || Name == "R14" || Name == "lr")
485 else if (Name == "r15" || Name == "R15" || Name == "pc")
490 // A hack to allow some testing
491 bool ARMAsmParser::MatchInstruction(SmallVectorImpl<ARMOperand> &Operands,
493 struct ARMOperand Op0 = Operands[0];
494 assert(Op0.Kind == ARMOperand::Token && "First operand not a Token");
495 const StringRef &Mnemonic = Op0.getToken();
496 if (Mnemonic == "add" ||
497 Mnemonic == "stmfd" ||
499 Mnemonic == "ldmfd" ||
507 // TODO - this is a work in progress
508 bool ARMAsmParser::ParseOperand(ARMOperand &Op) {
509 switch (getLexer().getKind()) {
510 case AsmToken::Identifier:
511 if (!ParseRegister(Op))
513 // TODO parse other operands that start with an identifier like labels
514 return Error(getLexer().getTok().getLoc(), "labels not yet supported");
515 case AsmToken::LBrac:
516 if (!ParseMemory(Op))
518 case AsmToken::LCurly:
519 if (!ParseRegisterList(Op))
522 return Error(getLexer().getTok().getLoc(), "immediates not yet supported");
524 return Error(getLexer().getTok().getLoc(), "unexpected token in operand");
528 bool ARMAsmParser::ParseInstruction(const StringRef &Name, MCInst &Inst) {
529 SmallVector<ARMOperand, 7> Operands;
531 Operands.push_back(ARMOperand::CreateToken(Name));
533 SMLoc Loc = getLexer().getTok().getLoc();
534 if (getLexer().isNot(AsmToken::EndOfStatement)) {
536 // Read the first operand.
537 Operands.push_back(ARMOperand());
538 if (ParseOperand(Operands.back()))
541 while (getLexer().is(AsmToken::Comma)) {
542 getLexer().Lex(); // Eat the comma.
544 // Parse and remember the operand.
545 Operands.push_back(ARMOperand());
546 if (ParseOperand(Operands.back()))
550 if (!MatchInstruction(Operands, Inst))
553 Error(Loc, "ARMAsmParser::ParseInstruction only partly implemented");
557 bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) {
558 StringRef IDVal = DirectiveID.getIdentifier();
559 if (IDVal == ".word")
560 return ParseDirectiveWord(4, DirectiveID.getLoc());
564 /// ParseDirectiveWord
565 /// ::= .word [ expression (, expression)* ]
566 bool ARMAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
567 if (getLexer().isNot(AsmToken::EndOfStatement)) {
570 if (getParser().ParseExpression(Value))
573 getParser().getStreamer().EmitValue(Value, Size);
575 if (getLexer().is(AsmToken::EndOfStatement))
578 // FIXME: Improve diagnostic.
579 if (getLexer().isNot(AsmToken::Comma))
580 return Error(L, "unexpected token in directive");
589 // Force static initialization.
590 extern "C" void LLVMInitializeARMAsmParser() {
591 RegisterAsmParser<ARMAsmParser> X(TheARMTarget);
592 RegisterAsmParser<ARMAsmParser> Y(TheThumbTarget);