From d4b59dcdba744d81cc6d97b1d4772d9c8b048977 Mon Sep 17 00:00:00 2001 From: Ahmed Bougacha Date: Fri, 29 May 2015 01:03:37 +0000 Subject: [PATCH] [TableGen][AsmMatcherEmitter] Only parse isolated tokens as registers. Fixes PR23455, where, when TableGen generates the matcher from the AsmString, it splits "cmp${cc}ss" into tokens, and the "ss" suffix is recognized as the SS register. I can't think of a situation where that's a feature, not a bug, hence: when a token is "isolated", i.e., it is followed and preceded by separators, it shouldn't be parsed as a register. Differential Revision: http://reviews.llvm.org/D9844 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@238536 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/MC/X86/intel-syntax.s | 3 +++ utils/TableGen/AsmMatcherEmitter.cpp | 26 ++++++++++++++++++++++---- 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/test/MC/X86/intel-syntax.s b/test/MC/X86/intel-syntax.s index fce0c65781c..30fe6c8b9b1 100644 --- a/test/MC/X86/intel-syntax.s +++ b/test/MC/X86/intel-syntax.s @@ -662,3 +662,6 @@ frstor dword ptr [eax] // CHECK: fnsave (%eax) // CHECK: fxrstor (%eax) // CHECK: frstor (%eax) + +// CHECK: cmpnless %xmm1, %xmm0 +cmpnless xmm0, xmm1 diff --git a/utils/TableGen/AsmMatcherEmitter.cpp b/utils/TableGen/AsmMatcherEmitter.cpp index a3932cd4e86..f038c730b14 100644 --- a/utils/TableGen/AsmMatcherEmitter.cpp +++ b/utils/TableGen/AsmMatcherEmitter.cpp @@ -310,11 +310,16 @@ struct MatchableInfo { /// The suboperand index within SrcOpName, or -1 for the entire operand. int SubOpIdx; + /// Whether the token is "isolated", i.e., it is preceded and followed + /// by separators. + bool IsIsolatedToken; + /// Register record if this token is singleton register. Record *SingletonReg; - explicit AsmOperand(StringRef T) : Token(T), Class(nullptr), SubOpIdx(-1), - SingletonReg(nullptr) {} + explicit AsmOperand(bool IsIsolatedToken, StringRef T) + : Token(T), Class(nullptr), SubOpIdx(-1), + IsIsolatedToken(IsIsolatedToken), SingletonReg(nullptr) {} }; /// ResOperand - This represents a single operand in the result instruction @@ -815,7 +820,14 @@ void MatchableInfo::initialize(const AsmMatcherInfo &Info, /// Append an AsmOperand for the given substring of AsmString. void MatchableInfo::addAsmOperand(size_t Start, size_t End) { StringRef String = AsmString; - AsmOperands.push_back(AsmOperand(String.slice(Start, End))); + StringRef Separators = "[]*! \t,"; + // Look for separators before and after to figure out is this token is + // isolated. Accept '$$' as that's how we escape '$'. + bool IsIsolatedToken = + (!Start || Separators.find(String[Start - 1]) != StringRef::npos || + String.substr(Start - 1, 2) == "$$") && + (End >= String.size() || Separators.find(String[End]) != StringRef::npos); + AsmOperands.push_back(AsmOperand(IsIsolatedToken, String.slice(Start, End))); } /// tokenizeAsmString - Tokenize a simplified assembly string. @@ -969,6 +981,12 @@ extractSingletonRegisterForAsmOperand(unsigned OperandNo, const AsmMatcherInfo &Info, std::string &RegisterPrefix) { StringRef Tok = AsmOperands[OperandNo].Token; + + // If this token is not an isolated token, i.e., it isn't separated from + // other tokens (e.g. with whitespace), don't interpret it as a register name. + if (!AsmOperands[OperandNo].IsIsolatedToken) + return; + if (RegisterPrefix.empty()) { std::string LoweredTok = Tok.lower(); if (const CodeGenRegister *Reg = Info.Target.getRegisterByName(LoweredTok)) @@ -1517,7 +1535,7 @@ buildInstructionOperandReference(MatchableInfo *II, // Insert remaining suboperands after AsmOpIdx in II->AsmOperands. StringRef Token = Op->Token; // save this in case Op gets moved for (unsigned SI = 1, SE = Operands[Idx].MINumOperands; SI != SE; ++SI) { - MatchableInfo::AsmOperand NewAsmOp(Token); + MatchableInfo::AsmOperand NewAsmOp(/*IsIsolatedToken=*/true, Token); NewAsmOp.SubOpIdx = SI; II->AsmOperands.insert(II->AsmOperands.begin()+AsmOpIdx+SI, NewAsmOp); } -- 2.34.1