From c41938303b08199793b4370eb667d5bd9b97e61c Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Mon, 22 Jun 2009 05:51:26 +0000 Subject: [PATCH] implement memory operand parsing. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@73875 91177308-0d34-0410-b5e6-96231b3b80d8 --- tools/llvm-mc/AsmParser.cpp | 144 ++++++++++++++++++++++++++++++++---- tools/llvm-mc/AsmParser.h | 2 + 2 files changed, 132 insertions(+), 14 deletions(-) diff --git a/tools/llvm-mc/AsmParser.cpp b/tools/llvm-mc/AsmParser.cpp index 1ab1af48ad0..fc226a5c9a7 100644 --- a/tools/llvm-mc/AsmParser.cpp +++ b/tools/llvm-mc/AsmParser.cpp @@ -48,10 +48,12 @@ void AsmParser::EatToEndOfStatement() { Lexer.Lex(); } + struct AsmParser::X86Operand { enum { Register, - Immediate + Immediate, + Memory } Kind; union { @@ -63,6 +65,14 @@ struct AsmParser::X86Operand { // FIXME: Should be a general expression. int64_t Val; } Imm; + + struct { + unsigned SegReg; + int64_t Disp; // FIXME: Should be a general expression. + unsigned BaseReg; + unsigned Scale; + unsigned ScaleReg; + } Mem; }; static X86Operand CreateReg(unsigned RegNo) { @@ -77,33 +87,39 @@ struct AsmParser::X86Operand { Res.Imm.Val = Val; return Res; } + static X86Operand CreateMem(unsigned SegReg, int64_t Disp, unsigned BaseReg, + unsigned Scale, unsigned ScaleReg) { + X86Operand Res; + Res.Kind = Memory; + Res.Mem.SegReg = SegReg; + Res.Mem.Disp = Disp; + Res.Mem.BaseReg = BaseReg; + Res.Mem.Scale = Scale; + Res.Mem.ScaleReg = ScaleReg; + return Res; + } }; bool AsmParser::ParseX86Operand(X86Operand &Op) { switch (Lexer.getKind()) { default: - return TokError("unknown token at start of instruction operand"); + return ParseX86MemOperand(Op); case asmtok::Register: // FIXME: Decode reg #. + // FIXME: if a segment register, this could either be just the seg reg, or + // the start of a memory operand. Op = X86Operand::CreateReg(0); Lexer.Lex(); // Eat register. return false; - case asmtok::Dollar: + case asmtok::Dollar: { // $42 -> immediate. Lexer.Lex(); - // FIXME: Parse an arbitrary expression here, like $(4+5) - if (Lexer.isNot(asmtok::IntVal)) + int64_t Val; + if (ParseExpression(Val)) return TokError("expected integer constant"); - - Op = X86Operand::CreateReg(Lexer.getCurIntVal()); - Lexer.Lex(); // Eat register. - return false; - case asmtok::Identifier: - // This is a label, this should be parsed as part of an expression, to - // handle things like LFOO+4 - Op = X86Operand::CreateImm(0); // FIXME. - Lexer.Lex(); // Eat identifier. + Op = X86Operand::CreateReg(Val); return false; + } //case asmtok::Star: // * %eax @@ -112,7 +128,107 @@ bool AsmParser::ParseX86Operand(X86Operand &Op) { } } +/// ParseX86MemOperand: segment: disp(basereg, indexreg, scale) +bool AsmParser::ParseX86MemOperand(X86Operand &Op) { + // FIXME: If SegReg ':' (e.g. %gs:), eat and remember. + unsigned SegReg = 0; + + + // We have to disambiguate a parenthesized expression "(4+5)" from the start + // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The + // only way to do this without lookahead is to eat the ( and see what is after + // it. + int64_t Disp = 0; + if (Lexer.isNot(asmtok::LParen)) { + if (ParseExpression(Disp)) return true; + + // After parsing the base expression we could either have a parenthesized + // memory address or not. If not, return now. If so, eat the (. + if (Lexer.isNot(asmtok::LParen)) { + Op = X86Operand::CreateMem(SegReg, Disp, 0, 0, 0); + return false; + } + + // Eat the '('. + Lexer.Lex(); + } else { + // Okay, we have a '('. We don't know if this is an expression or not, but + // so we have to eat the ( to see beyond it. + Lexer.Lex(); // Eat the '('. + + if (Lexer.is(asmtok::Register) || Lexer.is(asmtok::Comma)) { + // Nothing to do here, fall into the code below with the '(' part of the + // memory operand consumed. + } else { + // FIXME: Call ParseParenExpression with the leading ( consumed. + return TokError("FIXME: Paren expr not implemented yet!"); + } + } + + // If we reached here, then we just ate the ( of the memory operand. Process + // the rest of the memory operand. + unsigned BaseReg = 0, ScaleReg = 0, Scale = 0; + + if (Lexer.is(asmtok::Register)) { + BaseReg = 123; // FIXME: decode reg # + Lexer.Lex(); // eat the register. + } + + if (Lexer.is(asmtok::Comma)) { + Lexer.Lex(); // eat the comma. + + if (Lexer.is(asmtok::Register)) { + ScaleReg = 123; // FIXME: decode reg # + Lexer.Lex(); // eat the register. + Scale = 1; // If not specified, the scale defaults to 1. + } + + if (Lexer.is(asmtok::Comma)) { + Lexer.Lex(); // eat the comma. + + // If present, get and validate scale amount. + if (Lexer.is(asmtok::IntVal)) { + int64_t ScaleVal = Lexer.getCurIntVal(); + if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8) + return TokError("scale factor in address must be 1, 2, 4 or 8"); + Lexer.Lex(); // eat the scale. + Scale = (unsigned)ScaleVal; + } + } + } + + // Ok, we've eaten the memory operand, verify we have a ')' and eat it too. + if (Lexer.isNot(asmtok::RParen)) + return TokError("unexpected token in memory operand"); + Lexer.Lex(); // Eat the ')'. + + Op = X86Operand::CreateMem(SegReg, Disp, BaseReg, Scale, ScaleReg); + return false; +} + +/// ParseExpression - Parse an expression and return it. +/// FIXME: This should handle real expressions, we do something trivial for now. +bool AsmParser::ParseExpression(int64_t &Res) { + switch (Lexer.getKind()) { + default: + return TokError("unknown token in expression"); + case asmtok::Identifier: + // This is a label, this should be parsed as part of an expression, to + // handle things like LFOO+4 + Res = 0; // FIXME. + Lexer.Lex(); // Eat identifier. + return false; + case asmtok::IntVal: + Res = Lexer.getCurIntVal(); + Lexer.Lex(); // Eat identifier. + return false; + } +} + + + + /// ParseStatement: /// ::= EndOfStatement /// ::= Label* Directive ...Operands... EndOfStatement diff --git a/tools/llvm-mc/AsmParser.h b/tools/llvm-mc/AsmParser.h index 54e3741778e..670d987bfc3 100644 --- a/tools/llvm-mc/AsmParser.h +++ b/tools/llvm-mc/AsmParser.h @@ -37,6 +37,8 @@ private: void EatToEndOfStatement(); bool ParseX86Operand(X86Operand &Op); + bool ParseX86MemOperand(X86Operand &Op); + bool ParseExpression(int64_t &Res); }; } // end namespace llvm -- 2.34.1