lib/Target/ARM/AsmParser/ARMAsmParser.cpp

   1 //===-- ARMAsmParser.cpp - Parse ARM assembly to MCInst instructions ------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9
  10 #include "ARM.h"
  11 #include "llvm/ADT/SmallVector.h"
  12 #include "llvm/ADT/Twine.h"
  13 #include "llvm/MC/MCAsmLexer.h"
  14 #include "llvm/MC/MCAsmParser.h"
  15 #include "llvm/MC/MCStreamer.h"
  16 #include "llvm/MC/MCExpr.h"
  17 #include "llvm/MC/MCInst.h"
  18 #include "llvm/Support/SourceMgr.h"
  19 #include "llvm/Target/TargetRegistry.h"
  20 #include "llvm/Target/TargetAsmParser.h"
  21 using namespace llvm;
  22
  23 namespace {
  24 struct ARMOperand;
  25
  26 // The shift types for register controlled shifts in arm memory addressing
  27 enum ShiftType {
  28   Lsl,
  29   Lsr,
  30   Asr,
  31   Ror,
  32   Rrx
  33 };
  34
  35 class ARMAsmParser : public TargetAsmParser {
  36   MCAsmParser &Parser;
  37
  38 private:
  39   MCAsmParser &getParser() const { return Parser; }
  40
  41   MCAsmLexer &getLexer() const { return Parser.getLexer(); }
  42
  43   void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); }
  44
  45   bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
  46
  47   bool ParseRegister(ARMOperand &Op);
  48
  49   bool ParseMemory(ARMOperand &Op);
  50
  51   bool ParseShift(enum ShiftType *St, const MCExpr *ShiftAmount);
  52
  53   bool ParseOperand(ARMOperand &Op);
  54
  55   bool ParseDirectiveWord(unsigned Size, SMLoc L);
  56
  57   // TODO - For now hacked versions of the next two are in here in this file to
  58   // allow some parser testing until the table gen versions are implemented.
  59
  60   /// @name Auto-generated Match Functions
  61   /// {
  62   bool MatchInstruction(SmallVectorImpl<ARMOperand> &Operands,
  63                         MCInst &Inst);
  64
  65   /// MatchRegisterName - Match the given string to a register name, or 0 if
  66   /// there is no match.
  67   unsigned MatchRegisterName(const StringRef &Name);
  68
  69   /// }
  70
  71
  72 public:
  73   ARMAsmParser(const Target &T, MCAsmParser &_Parser)
  74     : TargetAsmParser(T), Parser(_Parser) {}
  75
  76   virtual bool ParseInstruction(const StringRef &Name, MCInst &Inst);
  77
  78   virtual bool ParseDirective(AsmToken DirectiveID);
  79 };
  80
  81 } // end anonymous namespace
  82
  83 namespace {
  84
  85 /// ARMOperand - Instances of this class represent a parsed ARM machine
  86 /// instruction.
  87 struct ARMOperand {
  88   enum {
  89     Token,
  90     Register,
  91     Memory
  92   } Kind;
  93
  94
  95   union {
  96     struct {
  97       const char *Data;
  98       unsigned Length;
  99     } Tok;
 100
 101     struct {
 102       unsigned RegNum;
 103     } Reg;
 104
 105     // This is for all forms of ARM address expressions
 106     struct {
 107       unsigned BaseRegNum;
 108       bool OffsetIsReg;
 109       const MCExpr *Offset; // used when OffsetIsReg is false
 110       unsigned OffsetRegNum; // used when OffsetIsReg is true
 111       bool OffsetRegShifted; // only used when OffsetIsReg is true
 112       enum ShiftType ShiftType;  // used when OffsetRegShifted is true
 113       const MCExpr *ShiftAmount; // used when OffsetRegShifted is true
 114       bool Preindexed;
 115       bool Postindexed;
 116       bool Negative; // only used when OffsetIsReg is true
 117       bool Writeback;
 118     } Mem;
 119
 120   };
 121
 122   StringRef getToken() const {
 123     assert(Kind == Token && "Invalid access!");
 124     return StringRef(Tok.Data, Tok.Length);
 125   }
 126
 127   unsigned getReg() const {
 128     assert(Kind == Register && "Invalid access!");
 129     return Reg.RegNum;
 130   }
 131
 132   bool isToken() const {return Kind == Token; }
 133
 134   bool isReg() const { return Kind == Register; }
 135
 136   void addRegOperands(MCInst &Inst, unsigned N) const {
 137     assert(N == 1 && "Invalid number of operands!");
 138     Inst.addOperand(MCOperand::CreateReg(getReg()));
 139   }
 140
 141   static ARMOperand CreateToken(StringRef Str) {
 142     ARMOperand Res;
 143     Res.Kind = Token;
 144     Res.Tok.Data = Str.data();
 145     Res.Tok.Length = Str.size();
 146     return Res;
 147   }
 148
 149   static ARMOperand CreateReg(unsigned RegNum) {
 150     ARMOperand Res;
 151     Res.Kind = Register;
 152     Res.Reg.RegNum = RegNum;
 153     return Res;
 154   }
 155
 156   static ARMOperand CreateMem(unsigned BaseRegNum, bool OffsetIsReg,
 157                               const MCExpr *Offset, unsigned OffsetRegNum,
 158                               bool OffsetRegShifted, enum ShiftType ShiftType,
 159                               const MCExpr *ShiftAmount, bool Preindexed,
 160                               bool Postindexed, bool Negative, bool Writeback) {
 161     ARMOperand Res;
 162     Res.Kind = Memory;
 163     Res.Mem.BaseRegNum = BaseRegNum;
 164     Res.Mem.OffsetIsReg = OffsetIsReg;
 165     Res.Mem.Offset = Offset;
 166     Res.Mem.OffsetRegNum = OffsetRegNum;
 167     Res.Mem.OffsetRegShifted = OffsetRegShifted;
 168     Res.Mem.ShiftType = ShiftType;
 169     Res.Mem.ShiftAmount = ShiftAmount;
 170     Res.Mem.Preindexed = Preindexed;
 171     Res.Mem.Postindexed = Postindexed;
 172     Res.Mem.Negative = Negative;
 173     Res.Mem.Writeback = Writeback;
 174     return Res;
 175   }
 176 };
 177
 178 } // end anonymous namespace.
 179
 180 // Try to parse a register name.  The token must be an Identifier when called,
 181 // and if it is a register name a Reg operand is created, the token is eaten
 182 // and false is returned.  Else true is returned and no token is eaten.
 183 // TODO this is likely to change to allow different register types and or to
 184 // parse for a specific register type.
 185 bool ARMAsmParser::ParseRegister(ARMOperand &Op) {
 186   const AsmToken &Tok = getLexer().getTok();
 187   assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
 188
 189   // FIXME: Validate register for the current architecture; we have to do
 190   // validation later, so maybe there is no need for this here.
 191   unsigned RegNum;
 192
 193   RegNum = MatchRegisterName(Tok.getString());
 194   if (RegNum == 0)
 195     return true;
 196
 197   Op = ARMOperand::CreateReg(RegNum);
 198   getLexer().Lex(); // Eat identifier token.
 199
 200   return false;
 201 }
 202
 203 // Try to parse an arm memory expression.  It must start with a '[' token.
 204 // TODO Only preindexing and postindexing addressing are started, unindexed
 205 // with option, etc are still to do.
 206 bool ARMAsmParser::ParseMemory(ARMOperand &Op) {
 207   const AsmToken &LBracTok = getLexer().getTok();
 208   assert(LBracTok.is(AsmToken::LBrac) && "Token is not an Left Bracket");
 209   getLexer().Lex(); // Eat left bracket token.
 210
 211   const AsmToken &BaseRegTok = getLexer().getTok();
 212   if (BaseRegTok.isNot(AsmToken::Identifier))
 213     return Error(BaseRegTok.getLoc(), "register expected");
 214   unsigned BaseRegNum = MatchRegisterName(BaseRegTok.getString());
 215   if (BaseRegNum == 0)
 216     return Error(BaseRegTok.getLoc(), "register expected");
 217   getLexer().Lex(); // Eat identifier token.
 218
 219   bool Preindexed = false;
 220   bool Postindexed = false;
 221   bool OffsetIsReg = false;
 222   bool Negative = false;
 223   bool Writeback = false;
 224
 225   // First look for preindexed address forms:
 226   //  [Rn, +/-Rm]
 227   //  [Rn, #offset]
 228   //  [Rn, +/-Rm, shift]
 229   // that is after the "[Rn" we now have see if the next token is a comma.
 230   const AsmToken &Tok = getLexer().getTok();
 231   if (Tok.is(AsmToken::Comma)) {
 232     Preindexed = true;
 233     getLexer().Lex(); // Eat comma token.
 234
 235     const AsmToken &NextTok = getLexer().getTok();
 236     if (NextTok.is(AsmToken::Plus))
 237       getLexer().Lex(); // Eat plus token.
 238     else if (NextTok.is(AsmToken::Minus)) {
 239       Negative = true;
 240       getLexer().Lex(); // Eat minus token
 241     }
 242
 243     // See if there is a register following the "[Rn," we have so far.
 244     const AsmToken &OffsetRegTok = getLexer().getTok();
 245     unsigned OffsetRegNum = MatchRegisterName(OffsetRegTok.getString());
 246     bool OffsetRegShifted = false;
 247     enum ShiftType ShiftType;
 248     const MCExpr *ShiftAmount;
 249     const MCExpr *Offset;
 250     if (OffsetRegNum != 0) {
 251       OffsetIsReg = true;
 252       getLexer().Lex(); // Eat identifier token for the offset register.
 253       // Look for a comma then a shift
 254       const AsmToken &Tok = getLexer().getTok();
 255       if (Tok.is(AsmToken::Comma)) {
 256         getLexer().Lex(); // Eat comma token.
 257
 258         const AsmToken &Tok = getLexer().getTok();
 259         if (ParseShift(&ShiftType, ShiftAmount))
 260           return Error(Tok.getLoc(), "shift expected");
 261         OffsetRegShifted = true;
 262       }
 263     }
 264     else { // "[Rn," we have so far was not followed by "Rm"
 265       // Look for #offset following the "[Rn,"
 266       const AsmToken &HashTok = getLexer().getTok();
 267       if (HashTok.isNot(AsmToken::Hash))
 268         return Error(HashTok.getLoc(), "'#' expected");
 269       getLexer().Lex(); // Eat hash token.
 270
 271       if (getParser().ParseExpression(Offset))
 272        return true;
 273     }
 274     const AsmToken &RBracTok = getLexer().getTok();
 275     if (RBracTok.isNot(AsmToken::RBrac))
 276       return Error(RBracTok.getLoc(), "']' expected");
 277     getLexer().Lex(); // Eat right bracket token.
 278
 279     const AsmToken &ExclaimTok = getLexer().getTok();
 280     if (ExclaimTok.is(AsmToken::Exclaim)) {
 281       Writeback = true;
 282       getLexer().Lex(); // Eat exclaim token
 283     }
 284     Op = ARMOperand::CreateMem(BaseRegNum, OffsetIsReg, Offset, OffsetRegNum,
 285                                OffsetRegShifted, ShiftType, ShiftAmount,
 286                                Preindexed, Postindexed, Negative, Writeback);
 287     return false;
 288   }
 289   // The "[Rn" we have so far was not followed by a comma.
 290   else if (Tok.is(AsmToken::RBrac)) {
 291     // This is a post indexing addressing forms:
 292     //  [Rn], #offset
 293     //  [Rn], +/-Rm
 294     //  [Rn], +/-Rm, shift
 295     // that is a ']' follows after the "[Rn".
 296     Postindexed = true;
 297     Writeback = true;
 298     getLexer().Lex(); // Eat right bracket token.
 299
 300     const AsmToken &CommaTok = getLexer().getTok();
 301     if (CommaTok.isNot(AsmToken::Comma))
 302       return Error(CommaTok.getLoc(), "',' expected");
 303     getLexer().Lex(); // Eat comma token.
 304
 305     const AsmToken &NextTok = getLexer().getTok();
 306     if (NextTok.is(AsmToken::Plus))
 307       getLexer().Lex(); // Eat plus token.
 308     else if (NextTok.is(AsmToken::Minus)) {
 309       Negative = true;
 310       getLexer().Lex(); // Eat minus token
 311     }
 312
 313     // See if there is a register following the "[Rn]," we have so far.
 314     const AsmToken &OffsetRegTok = getLexer().getTok();
 315     unsigned OffsetRegNum = MatchRegisterName(OffsetRegTok.getString());
 316     bool OffsetRegShifted = false;
 317     enum ShiftType ShiftType;
 318     const MCExpr *ShiftAmount;
 319     const MCExpr *Offset;
 320     if (OffsetRegNum != 0) {
 321       OffsetIsReg = true;
 322       getLexer().Lex(); // Eat identifier token for the offset register.
 323       // Look for a comma then a shift
 324       const AsmToken &Tok = getLexer().getTok();
 325       if (Tok.is(AsmToken::Comma)) {
 326         getLexer().Lex(); // Eat comma token.
 327
 328         const AsmToken &Tok = getLexer().getTok();
 329         if (ParseShift(&ShiftType, ShiftAmount))
 330           return Error(Tok.getLoc(), "shift expected");
 331         OffsetRegShifted = true;
 332       }
 333     }
 334     else { // "[Rn]," we have so far was not followed by "Rm"
 335       // Look for #offset following the "[Rn],"
 336       const AsmToken &HashTok = getLexer().getTok();
 337       if (HashTok.isNot(AsmToken::Hash))
 338         return Error(HashTok.getLoc(), "'#' expected");
 339       getLexer().Lex(); // Eat hash token.
 340
 341       if (getParser().ParseExpression(Offset))
 342        return true;
 343     }
 344     Op = ARMOperand::CreateMem(BaseRegNum, OffsetIsReg, Offset, OffsetRegNum,
 345                                OffsetRegShifted, ShiftType, ShiftAmount,
 346                                Preindexed, Postindexed, Negative, Writeback);
 347     return false;
 348   }
 349
 350   return true;
 351 }
 352
 353 /// ParseShift as one of these two:
 354 ///   ( lsl | lsr | asr | ror ) , # shift_amount
 355 ///   rrx
 356 /// and returns true if it parses a shift otherwise it returns false.
 357 bool ARMAsmParser::ParseShift(ShiftType *St, const MCExpr *ShiftAmount) {
 358   const AsmToken &Tok = getLexer().getTok();
 359   if (Tok.isNot(AsmToken::Identifier))
 360     return true;
 361   const StringRef &ShiftName = Tok.getString();
 362   if (ShiftName == "lsl" || ShiftName == "LSL")
 363     *St = Lsl;
 364   else if (ShiftName == "lsr" || ShiftName == "LSR")
 365     *St = Lsr;
 366   else if (ShiftName == "asr" || ShiftName == "ASR")
 367     *St = Asr;
 368   else if (ShiftName == "ror" || ShiftName == "ROR")
 369     *St = Ror;
 370   else if (ShiftName == "rrx" || ShiftName == "RRX")
 371     *St = Rrx;
 372   else
 373     return true;
 374   getLexer().Lex(); // Eat shift type token.
 375
 376   // For all but a Rotate right there must be a '#' and a shift amount
 377   if (*St != Rrx) {
 378     // Look for # following the shift type
 379     const AsmToken &HashTok = getLexer().getTok();
 380     if (HashTok.isNot(AsmToken::Hash))
 381       return Error(HashTok.getLoc(), "'#' expected");
 382     getLexer().Lex(); // Eat hash token.
 383
 384     if (getParser().ParseExpression(ShiftAmount))
 385       return true;
 386   }
 387
 388   return false;
 389 }
 390
 391 // A hack to allow some testing
 392 unsigned ARMAsmParser::MatchRegisterName(const StringRef &Name) {
 393   if (Name == "r1")
 394     return 1;
 395   else if (Name == "r2")
 396     return 2;
 397   else if (Name == "r3")
 398     return 3;
 399   return 0;
 400 }
 401
 402 // A hack to allow some testing
 403 bool ARMAsmParser::MatchInstruction(SmallVectorImpl<ARMOperand> &Operands,
 404                                     MCInst &Inst) {
 405   struct ARMOperand Op0 = Operands[0];
 406   assert(Op0.Kind == ARMOperand::Token && "First operand not a Token");
 407   const StringRef &Mnemonic = Op0.getToken();
 408   if (Mnemonic == "add" ||
 409       Mnemonic == "ldr")
 410     return false;
 411
 412   return true;
 413 }
 414
 415 // TODO - this is a work in progress
 416 bool ARMAsmParser::ParseOperand(ARMOperand &Op) {
 417   switch (getLexer().getKind()) {
 418   case AsmToken::Identifier:
 419     if (!ParseRegister(Op))
 420       return false;
 421     // TODO parse other operands that start with an identifier
 422     return true;
 423   case AsmToken::LBrac:
 424     if (!ParseMemory(Op))
 425       return false;
 426   default:
 427     return true;
 428   }
 429 }
 430
 431 bool ARMAsmParser::ParseInstruction(const StringRef &Name, MCInst &Inst) {
 432   SmallVector<ARMOperand, 7> Operands;
 433
 434   Operands.push_back(ARMOperand::CreateToken(Name));
 435
 436   SMLoc Loc = getLexer().getTok().getLoc();
 437   if (getLexer().isNot(AsmToken::EndOfStatement)) {
 438
 439     // Read the first operand.
 440     Operands.push_back(ARMOperand());
 441     if (ParseOperand(Operands.back()))
 442       return true;
 443
 444     while (getLexer().is(AsmToken::Comma)) {
 445       getLexer().Lex();  // Eat the comma.
 446
 447       // Parse and remember the operand.
 448       Operands.push_back(ARMOperand());
 449       if (ParseOperand(Operands.back()))
 450         return true;
 451     }
 452   }
 453   if (!MatchInstruction(Operands, Inst))
 454     return false;
 455
 456   Error(Loc, "ARMAsmParser::ParseInstruction only partly implemented");
 457   return true;
 458 }
 459
 460 bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) {
 461   StringRef IDVal = DirectiveID.getIdentifier();
 462   if (IDVal == ".word")
 463     return ParseDirectiveWord(4, DirectiveID.getLoc());
 464   return true;
 465 }
 466
 467 /// ParseDirectiveWord
 468 ///  ::= .word [ expression (, expression)* ]
 469 bool ARMAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
 470   if (getLexer().isNot(AsmToken::EndOfStatement)) {
 471     for (;;) {
 472       const MCExpr *Value;
 473       if (getParser().ParseExpression(Value))
 474         return true;
 475
 476       getParser().getStreamer().EmitValue(Value, Size);
 477
 478       if (getLexer().is(AsmToken::EndOfStatement))
 479         break;
 480
 481       // FIXME: Improve diagnostic.
 482       if (getLexer().isNot(AsmToken::Comma))
 483         return Error(L, "unexpected token in directive");
 484       getLexer().Lex();
 485     }
 486   }
 487
 488   getLexer().Lex();
 489   return false;
 490 }
 491
 492 // Force static initialization.
 493 extern "C" void LLVMInitializeARMAsmParser() {
 494   RegisterAsmParser<ARMAsmParser> X(TheARMTarget);
 495   RegisterAsmParser<ARMAsmParser> Y(TheThumbTarget);
 496 }