add_llvm_library(LLVMMIRParser
+ MILexer.cpp
MIParser.cpp
MIRParser.cpp
)
--- /dev/null
+//===- MILexer.cpp - Machine instructions lexer implementation ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the lexing of machine instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MILexer.h"
+#include "llvm/ADT/Twine.h"
+#include <cctype>
+
+using namespace llvm;
+
+namespace {
+
+/// This class provides a way to iterate and get characters from the source
+/// string.
+class Cursor {
+ const char *Ptr;
+ const char *End;
+
+public:
+ explicit Cursor(StringRef Str) {
+ Ptr = Str.data();
+ End = Ptr + Str.size();
+ }
+
+ bool isEOF() const { return Ptr == End; }
+
+ char peek() const { return isEOF() ? 0 : *Ptr; }
+
+ void advance() { ++Ptr; }
+
+ StringRef remaining() const { return StringRef(Ptr, End - Ptr); }
+
+ StringRef upto(Cursor C) const {
+ assert(C.Ptr >= Ptr && C.Ptr <= End);
+ return StringRef(Ptr, C.Ptr - Ptr);
+ }
+
+ StringRef::iterator location() const { return Ptr; }
+};
+
+} // end anonymous namespace
+
+/// Skip the leading whitespace characters and return the updated cursor.
+static Cursor skipWhitespace(Cursor C) {
+ while (isspace(C.peek()))
+ C.advance();
+ return C;
+}
+
+static bool isIdentifierChar(char C) {
+ return isalpha(C) || isdigit(C) || C == '_' || C == '-' || C == '.';
+}
+
+static Cursor lexIdentifier(Cursor C, MIToken &Token) {
+ auto Range = C;
+ while (isIdentifierChar(C.peek()))
+ C.advance();
+ Token = MIToken(MIToken::Identifier, Range.upto(C));
+ return C;
+}
+
+StringRef llvm::lexMIToken(
+ StringRef Source, MIToken &Token,
+ function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) {
+ auto C = skipWhitespace(Cursor(Source));
+ if (C.isEOF()) {
+ Token = MIToken(MIToken::Eof, C.remaining());
+ return C.remaining();
+ }
+
+ auto Char = C.peek();
+ if (isalpha(Char) || Char == '_')
+ return lexIdentifier(C, Token).remaining();
+ Token = MIToken(MIToken::Error, C.remaining());
+ ErrorCallback(C.location(),
+ Twine("unexpected character '") + Twine(Char) + "'");
+ return C.remaining();
+}
--- /dev/null
+//===- MILexer.h - Lexer for machine instructions -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the function that lexes the machine instruction source
+// string.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_MIRPARSER_MILEXER_H
+#define LLVM_LIB_CODEGEN_MIRPARSER_MILEXER_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/STLExtras.h"
+#include <functional>
+
+namespace llvm {
+
+class Twine;
+
+/// A token produced by the machine instruction lexer.
+struct MIToken {
+ enum TokenKind {
+ // Markers
+ Eof,
+ Error,
+
+ // Identifier tokens
+ Identifier
+ };
+
+private:
+ TokenKind Kind;
+ StringRef Range;
+
+public:
+ MIToken(TokenKind Kind, StringRef Range) : Kind(Kind), Range(Range) {}
+
+ TokenKind kind() const { return Kind; }
+
+ bool isError() const { return Kind == Error; }
+
+ bool is(TokenKind K) const { return Kind == K; }
+
+ bool isNot(TokenKind K) const { return Kind != K; }
+
+ StringRef::iterator location() const { return Range.begin(); }
+
+ StringRef stringValue() const { return Range; }
+};
+
+/// Consume a single machine instruction token in the given source and return
+/// the remaining source string.
+StringRef lexMIToken(
+ StringRef Source, MIToken &Token,
+ function_ref<void(StringRef::iterator, const Twine &)> ErrorCallback);
+
+} // end namespace llvm
+
+#endif
//===----------------------------------------------------------------------===//
#include "MIParser.h"
+#include "MILexer.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
SourceMgr &SM;
MachineFunction &MF;
SMDiagnostic &Error;
- StringRef Source;
+ StringRef Source, CurrentSource;
+ MIToken Token;
/// Maps from instruction names to op codes.
StringMap<unsigned> Names2InstrOpCodes;
MIParser(SourceMgr &SM, MachineFunction &MF, SMDiagnostic &Error,
StringRef Source);
+ void lex();
+
/// Report an error at the current location with the given message.
///
/// This function always return true.
bool error(const Twine &Msg);
+ /// Report an error at the given location with the given message.
+ ///
+ /// This function always return true.
+ bool error(StringRef::iterator Loc, const Twine &Msg);
+
MachineInstr *parse();
private:
/// Try to convert an instruction name to an opcode. Return true if the
/// instruction name is invalid.
bool parseInstrName(StringRef InstrName, unsigned &OpCode);
+
+ bool parseInstruction(unsigned &OpCode);
};
} // end anonymous namespace
MIParser::MIParser(SourceMgr &SM, MachineFunction &MF, SMDiagnostic &Error,
StringRef Source)
- : SM(SM), MF(MF), Error(Error), Source(Source) {}
+ : SM(SM), MF(MF), Error(Error), Source(Source), CurrentSource(Source),
+ Token(MIToken::Error, StringRef()) {}
+
+void MIParser::lex() {
+ CurrentSource = lexMIToken(
+ CurrentSource, Token,
+ [this](StringRef::iterator Loc, const Twine &Msg) { error(Loc, Msg); });
+}
+
+bool MIParser::error(const Twine &Msg) { return error(Token.location(), Msg); }
-bool MIParser::error(const Twine &Msg) {
+bool MIParser::error(StringRef::iterator Loc, const Twine &Msg) {
// TODO: Get the proper location in the MIR file, not just a location inside
// the string.
- Error =
- SMDiagnostic(SM, SMLoc(), SM.getMemoryBuffer(SM.getMainFileID())
- ->getBufferIdentifier(),
- 1, 0, SourceMgr::DK_Error, Msg.str(), Source, None, None);
+ assert(Loc >= Source.data() && Loc <= (Source.data() + Source.size()));
+ Error = SMDiagnostic(
+ SM, SMLoc(),
+ SM.getMemoryBuffer(SM.getMainFileID())->getBufferIdentifier(), 1,
+ Loc - Source.data(), SourceMgr::DK_Error, Msg.str(), Source, None, None);
return true;
}
MachineInstr *MIParser::parse() {
- StringRef InstrName = Source;
+ lex();
+
unsigned OpCode;
- if (parseInstrName(InstrName, OpCode)) {
- error(Twine("unknown machine instruction name '") + InstrName + "'");
+ if (Token.isError() || parseInstruction(OpCode))
return nullptr;
- }
// TODO: Parse the rest of instruction - machine operands, etc.
const auto &MCID = MF.getSubtarget().getInstrInfo()->get(OpCode);
return MI;
}
+bool MIParser::parseInstruction(unsigned &OpCode) {
+ if (Token.isNot(MIToken::Identifier))
+ return error("expected a machine instruction");
+ StringRef InstrName = Token.stringValue();
+ if (parseInstrName(InstrName, OpCode))
+ return error(Twine("unknown machine instruction name '") + InstrName + "'");
+ return false;
+}
+
void MIParser::initNames2InstrOpCodes() {
if (!Names2InstrOpCodes.empty())
return;
# CHECK: - IMUL32rri8
# CHECK-NEXT: - RETQ
- IMUL32rri8
- - RETQ
+ - ' RETQ '
...
--- /dev/null
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+ define void @foo() {
+ entry:
+ ret void
+ }
+
+...
+---
+name: foo
+body:
+ - name: entry
+ instructions:
+ # CHECK: 1:1: expected a machine instruction
+ - ''
+...
--- /dev/null
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+ define void @foo() {
+ entry:
+ ret void
+ }
+
+...
+---
+name: foo
+body:
+ - name: entry
+ instructions:
+ # CHECK: 1:1: unexpected character '`'
+ - '` RETQ'
+...