1 //===- AsmMatcherEmitter.cpp - Generate an assembly matcher ---------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This tablegen backend emits a target specifier matcher for converting parsed
11 // assembly operands in the MCInst structures.
13 // The input to the target specific matcher is a list of literal tokens and
14 // operands. The target specific parser should generally eliminate any syntax
15 // which is not relevant for matching; for example, comma tokens should have
16 // already been consumed and eliminated by the parser. Most instructions will
17 // end up with a single literal token (the instruction name) and some number of
20 // Some example inputs, for X86:
21 // 'addl' (immediate ...) (register ...)
22 // 'add' (immediate ...) (memory ...)
25 // The assembly matcher is responsible for converting this input into a precise
26 // machine instruction (i.e., an instruction with a well defined encoding). This
27 // mapping has several properties which complicate matching:
29 // - It may be ambiguous; many architectures can legally encode particular
30 // variants of an instruction in different ways (for example, using a smaller
31 // encoding for small immediates). Such ambiguities should never be
32 // arbitrarily resolved by the assembler, the assembler is always responsible
33 // for choosing the "best" available instruction.
35 // - It may depend on the subtarget or the assembler context. Instructions
36 // which are invalid for the current mode, but otherwise unambiguous (e.g.,
37 // an SSE instruction in a file being assembled for i486) should be accepted
38 // and rejected by the assembler front end. However, if the proper encoding
39 // for an instruction is dependent on the assembler context then the matcher
40 // is responsible for selecting the correct machine instruction for the
43 // The core matching algorithm attempts to exploit the regularity in most
44 // instruction sets to quickly determine the set of possibly matching
45 // instructions, and the simplify the generated code. Additionally, this helps
46 // to ensure that the ambiguities are intentionally resolved by the user.
48 // The matching is divided into two distinct phases:
50 // 1. Classification: Each operand is mapped to the unique set which (a)
51 // contains it, and (b) is the largest such subset for which a single
52 // instruction could match all members.
54 // For register classes, we can generate these subgroups automatically. For
55 // arbitrary operands, we expect the user to define the classes and their
56 // relations to one another (for example, 8-bit signed immediates as a
57 // subset of 32-bit immediates).
59 // By partitioning the operands in this way, we guarantee that for any
60 // tuple of classes, any single instruction must match either all or none
61 // of the sets of operands which could classify to that tuple.
63 // In addition, the subset relation amongst classes induces a partial order
64 // on such tuples, which we use to resolve ambiguities.
66 // FIXME: What do we do if a crazy case shows up where this is the wrong
69 // 2. The input can now be treated as a tuple of classes (static tokens are
70 // simple singleton sets). Each such tuple should generally map to a single
71 // instruction (we currently ignore cases where this isn't true, whee!!!),
72 // which we can emit a simple matcher for.
74 //===----------------------------------------------------------------------===//
76 #include "AsmMatcherEmitter.h"
77 #include "CodeGenTarget.h"
79 #include "llvm/ADT/OwningPtr.h"
80 #include "llvm/ADT/SmallVector.h"
81 #include "llvm/ADT/StringExtras.h"
82 #include "llvm/Support/CommandLine.h"
83 #include "llvm/Support/Debug.h"
89 static cl::opt<std::string>
90 MatchOneInstr("match-one-instr", cl::desc("Match only the named instruction"),
94 /// FlattenVariants - Flatten an .td file assembly string by selecting the
95 /// variant at index \arg N.
96 static std::string FlattenVariants(const std::string &AsmString,
98 StringRef Cur = AsmString;
102 // Find the start of the next variant string.
103 size_t VariantsStart = 0;
104 for (size_t e = Cur.size(); VariantsStart != e; ++VariantsStart)
105 if (Cur[VariantsStart] == '{' &&
106 (VariantsStart == 0 || (Cur[VariantsStart-1] != '$' &&
107 Cur[VariantsStart-1] != '\\')))
110 // Add the prefix to the result.
111 Res += Cur.slice(0, VariantsStart);
112 if (VariantsStart == Cur.size())
115 ++VariantsStart; // Skip the '{'.
117 // Scan to the end of the variants string.
118 size_t VariantsEnd = VariantsStart;
119 unsigned NestedBraces = 1;
120 for (size_t e = Cur.size(); VariantsEnd != e; ++VariantsEnd) {
121 if (Cur[VariantsEnd] == '}' && Cur[VariantsEnd-1] != '\\') {
122 if (--NestedBraces == 0)
124 } else if (Cur[VariantsEnd] == '{')
128 // Select the Nth variant (or empty).
129 StringRef Selection = Cur.slice(VariantsStart, VariantsEnd);
130 for (unsigned i = 0; i != N; ++i)
131 Selection = Selection.split('|').second;
132 Res += Selection.split('|').first;
134 assert(VariantsEnd != Cur.size() &&
135 "Unterminated variants in assembly string!");
136 Cur = Cur.substr(VariantsEnd + 1);
142 /// TokenizeAsmString - Tokenize a simplified assembly string.
143 static void TokenizeAsmString(const StringRef &AsmString,
144 SmallVectorImpl<StringRef> &Tokens) {
147 for (unsigned i = 0, e = AsmString.size(); i != e; ++i) {
148 switch (AsmString[i]) {
157 Tokens.push_back(AsmString.slice(Prev, i));
160 if (!isspace(AsmString[i]) && AsmString[i] != ',')
161 Tokens.push_back(AsmString.substr(i, 1));
167 Tokens.push_back(AsmString.slice(Prev, i));
171 assert(i != AsmString.size() && "Invalid quoted character");
172 Tokens.push_back(AsmString.substr(i, 1));
177 // If this isn't "${", treat like a normal token.
178 if (i + 1 == AsmString.size() || AsmString[i + 1] != '{') {
180 Tokens.push_back(AsmString.slice(Prev, i));
188 Tokens.push_back(AsmString.slice(Prev, i));
192 StringRef::iterator End =
193 std::find(AsmString.begin() + i, AsmString.end(), '}');
194 assert(End != AsmString.end() && "Missing brace in operand reference!");
195 size_t EndPos = End - AsmString.begin();
196 Tokens.push_back(AsmString.slice(i, EndPos+1));
206 if (InTok && Prev != AsmString.size())
207 Tokens.push_back(AsmString.substr(Prev));
210 static bool IsAssemblerInstruction(const StringRef &Name,
211 const CodeGenInstruction &CGI,
212 const SmallVectorImpl<StringRef> &Tokens) {
213 // Ignore psuedo ops.
215 // FIXME: This is a hack.
216 if (const RecordVal *Form = CGI.TheDef->getValue("Form"))
217 if (Form->getValue()->getAsString() == "Pseudo")
220 // Ignore "PHI" node.
222 // FIXME: This is also a hack.
226 // Ignore instructions with no .s string.
228 // FIXME: What are these?
229 if (CGI.AsmString.empty())
232 // FIXME: Hack; ignore any instructions with a newline in them.
233 if (std::find(CGI.AsmString.begin(),
234 CGI.AsmString.end(), '\n') != CGI.AsmString.end())
237 // Ignore instructions with attributes, these are always fake instructions for
238 // simplifying codegen.
240 // FIXME: Is this true?
242 // Also, we ignore instructions which reference the operand multiple times;
243 // this implies a constraint we would not currently honor. These are
244 // currently always fake instructions for simplifying codegen.
246 // FIXME: Encode this assumption in the .td, so we can error out here.
247 std::set<std::string> OperandNames;
248 for (unsigned i = 1, e = Tokens.size(); i < e; ++i) {
249 if (Tokens[i][0] == '$' &&
250 std::find(Tokens[i].begin(),
251 Tokens[i].end(), ':') != Tokens[i].end()) {
253 errs() << "warning: '" << Name << "': "
254 << "ignoring instruction; operand with attribute '"
255 << Tokens[i] << "', \n";
260 if (Tokens[i][0] == '$' && !OperandNames.insert(Tokens[i]).second) {
262 errs() << "warning: '" << Name << "': "
263 << "ignoring instruction; tied operand '"
264 << Tokens[i] << "', \n";
275 struct InstructionInfo {
283 /// Operand - The tablegen operand this class corresponds to.
284 const CodeGenInstruction::OperandInfo *Operand;
286 /// ClassName - The name of this operand's class.
287 std::string ClassName;
289 /// PredicateMethod - The name of the operand method to test whether the
290 /// operand matches this class.
291 std::string PredicateMethod;
293 /// RenderMethod - The name of the operand method to add this operand to
295 std::string RenderMethod;
299 /// InstrName - The target name for this instruction.
300 std::string InstrName;
302 /// Instr - The instruction this matches.
303 const CodeGenInstruction *Instr;
305 /// AsmString - The assembly string for this instruction (with variants
307 std::string AsmString;
309 /// Tokens - The tokenized assembly pattern that this instruction matches.
310 SmallVector<StringRef, 4> Tokens;
312 /// Operands - The operands that this instruction matches.
313 SmallVector<Operand, 4> Operands;
315 /// ConversionFn - The name of the conversion function to convert parsed
316 /// operands into an MCInst for this function.
317 std::string ConversionFn;
319 /// OrderedClassOperands - The indices of the class operands, ordered by their
320 /// MIOperandNo order (which is the order they should be passed to the
321 /// conversion function).
322 SmallVector<unsigned, 4> OrderedClassOperands;
330 void InstructionInfo::dump() {
331 errs() << InstrName << " -- " << "flattened:\"" << AsmString << '\"'
333 for (unsigned i = 0, e = Tokens.size(); i != e; ++i) {
340 for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
341 Operand &Op = Operands[i];
342 errs() << " op[" << i << "] = ";
343 if (Op.Kind == Operand::Token) {
344 errs() << '\"' << Tokens[i] << "\"\n";
348 assert(Op.Kind == Operand::Class && "Invalid kind!");
349 const CodeGenInstruction::OperandInfo &OI = *Op.AsClass.Operand;
350 errs() << OI.Name << " " << OI.Rec->getName()
351 << " (" << OI.MIOperandNo << ", " << OI.MINumOperands << ")\n";
355 static void BuildInstructionInfos(CodeGenTarget &Target,
356 std::vector<InstructionInfo*> &Infos) {
357 const std::map<std::string, CodeGenInstruction> &Instructions =
358 Target.getInstructions();
360 for (std::map<std::string, CodeGenInstruction>::const_iterator
361 it = Instructions.begin(), ie = Instructions.end(); it != ie; ++it) {
362 const CodeGenInstruction &CGI = it->second;
364 if (!MatchOneInstr.empty() && it->first != MatchOneInstr)
367 OwningPtr<InstructionInfo> II(new InstructionInfo);
369 II->InstrName = it->first;
370 II->Instr = &it->second;
371 II->AsmString = FlattenVariants(CGI.AsmString, 0);
373 TokenizeAsmString(II->AsmString, II->Tokens);
375 // Ignore instructions which shouldn't be matched.
376 if (!IsAssemblerInstruction(it->first, CGI, II->Tokens))
379 for (unsigned i = 0, e = II->Tokens.size(); i != e; ++i) {
380 StringRef Token = II->Tokens[i];
382 // Check for simple tokens.
383 if (Token[0] != '$') {
384 InstructionInfo::Operand Op;
385 Op.Kind = InstructionInfo::Operand::Token;
386 II->Operands.push_back(Op);
390 // Otherwise this is an operand reference.
391 InstructionInfo::Operand Op;
392 Op.Kind = InstructionInfo::Operand::Class;
394 StringRef OperandName;
396 OperandName = Token.substr(2, Token.size() - 3);
398 OperandName = Token.substr(1);
400 // Map this token to an operand. FIXME: Move elsewhere.
403 Idx = CGI.getOperandNamed(OperandName);
405 errs() << "error: unable to find operand: '" << OperandName << "'!\n";
409 const CodeGenInstruction::OperandInfo &OI = CGI.OperandList[Idx];
410 Op.AsClass.Operand = &OI;
412 if (OI.Rec->isSubClassOf("RegisterClass")) {
413 Op.AsClass.ClassName = "Reg";
414 Op.AsClass.PredicateMethod = "isReg";
415 Op.AsClass.RenderMethod = "addRegOperands";
416 } else if (OI.Rec->isSubClassOf("Operand")) {
417 // FIXME: This should not be hard coded.
418 const RecordVal *RV = OI.Rec->getValue("Type");
420 // FIXME: Yet another total hack.
421 if (RV->getValue()->getAsString() == "iPTR" ||
422 OI.Rec->getName() == "lea32mem" ||
423 OI.Rec->getName() == "lea64_32mem") {
424 Op.AsClass.ClassName = "Mem";
425 Op.AsClass.PredicateMethod = "isMem";
426 Op.AsClass.RenderMethod = "addMemOperands";
428 Op.AsClass.ClassName = "Imm";
429 Op.AsClass.PredicateMethod = "isImm";
430 Op.AsClass.RenderMethod = "addImmOperands";
434 assert(0 && "Unexpected instruction operand record!");
437 II->Operands.push_back(Op);
440 // If we broke out, ignore the instruction.
441 if (II->Operands.size() != II->Tokens.size())
444 Infos.push_back(II.take());
448 static void ConstructConversionFunctions(CodeGenTarget &Target,
449 std::vector<InstructionInfo*> &Infos,
451 // Function we have already generated.
452 std::set<std::string> GeneratedFns;
454 for (std::vector<InstructionInfo*>::const_iterator it = Infos.begin(),
455 ie = Infos.end(); it != ie; ++it) {
456 InstructionInfo &II = **it;
458 // Order the (class) operands by the order to convert them into an MCInst.
459 SmallVector<std::pair<unsigned, unsigned>, 4> MIOperandList;
460 for (unsigned i = 0, e = II.Operands.size(); i != e; ++i) {
461 InstructionInfo::Operand &Op = II.Operands[i];
462 if (Op.Kind == InstructionInfo::Operand::Class)
463 MIOperandList.push_back(std::make_pair(Op.AsClass.Operand->MIOperandNo,
466 std::sort(MIOperandList.begin(), MIOperandList.end());
468 // Compute the total number of operands.
469 unsigned NumMIOperands = 0;
470 for (unsigned i = 0, e = II.Instr->OperandList.size(); i != e; ++i) {
471 const CodeGenInstruction::OperandInfo &OI = II.Instr->OperandList[i];
472 NumMIOperands = std::max(NumMIOperands,
473 OI.MIOperandNo + OI.MINumOperands);
476 // Build the conversion function signature.
477 std::string Signature = "Convert";
478 unsigned CurIndex = 0;
479 for (unsigned i = 0, e = MIOperandList.size(); i != e; ++i) {
480 InstructionInfo::Operand &Op = II.Operands[MIOperandList[i].second];
481 assert(CurIndex <= Op.AsClass.Operand->MIOperandNo &&
482 "Duplicate match for instruction operand!");
484 // Save the conversion index, for use by the matcher.
485 II.OrderedClassOperands.push_back(MIOperandList[i].second);
487 // Skip operands which weren't matched by anything, this occurs when the
488 // .td file encodes "implicit" operands as explicit ones.
490 // FIXME: This should be removed from the MCInst structure.
491 for (; CurIndex != Op.AsClass.Operand->MIOperandNo; ++CurIndex)
494 Signature += Op.AsClass.ClassName;
495 Signature += utostr(Op.AsClass.Operand->MINumOperands);
496 CurIndex += Op.AsClass.Operand->MINumOperands;
499 // Add any trailing implicit operands.
500 for (; CurIndex != NumMIOperands; ++CurIndex)
503 // Save the conversion function, for use by the matcher.
504 II.ConversionFn = Signature;
506 // Check if we have already generated this function.
507 if (!GeneratedFns.insert(Signature).second)
510 // If not, emit it now.
512 // FIXME: There should be no need to pass the number of operands to fill;
513 // this should always be implicit in the class.
514 OS << "static bool " << Signature << "(MCInst &Inst, unsigned Opcode";
515 for (unsigned i = 0, e = MIOperandList.size(); i != e; ++i)
516 OS << ", " << Target.getName() << "Operand Op" << i;
518 OS << " Inst.setOpcode(Opcode);\n";
520 for (unsigned i = 0, e = MIOperandList.size(); i != e; ++i) {
521 InstructionInfo::Operand &Op = II.Operands[MIOperandList[i].second];
523 // Add the implicit operands.
524 for (; CurIndex != Op.AsClass.Operand->MIOperandNo; ++CurIndex)
525 OS << " Inst.addOperand(MCOperand::CreateReg(0));\n";
527 OS << " Op" << i << "." << Op.AsClass.RenderMethod
528 << "(Inst, " << Op.AsClass.Operand->MINumOperands << ");\n";
529 CurIndex += Op.AsClass.Operand->MINumOperands;
532 // And add trailing implicit operands.
533 for (; CurIndex != NumMIOperands; ++CurIndex)
534 OS << " Inst.addOperand(MCOperand::CreateReg(0));\n";
536 OS << " return false;\n";
541 void AsmMatcherEmitter::run(raw_ostream &OS) {
542 CodeGenTarget Target;
543 const std::vector<CodeGenRegister> &Registers = Target.getRegisters();
544 Record *AsmParser = Target.getAsmParser();
545 std::string ClassName = AsmParser->getValueAsString("AsmParserClassName");
547 std::string Namespace = Registers[0].TheDef->getValueAsString("Namespace");
549 EmitSourceFileHeader("Assembly Matcher Source Fragment", OS);
551 // Emit the function to match a register name to number.
553 OS << "bool " << Target.getName() << ClassName
554 << "::MatchRegisterName(const StringRef &Name, unsigned &RegNo) {\n";
556 // FIXME: TableGen should have a fast string matcher generator.
557 for (unsigned i = 0, e = Registers.size(); i != e; ++i) {
558 const CodeGenRegister &Reg = Registers[i];
559 if (Reg.TheDef->getValueAsString("AsmName").empty())
562 OS << " if (Name == \""
563 << Reg.TheDef->getValueAsString("AsmName") << "\")\n"
564 << " return RegNo=" << i + 1 << ", false;\n";
566 OS << " return true;\n";
569 std::vector<InstructionInfo*> Infos;
570 BuildInstructionInfos(Target, Infos);
573 #define DEBUG_TYPE "instruction_info"
575 for (std::vector<InstructionInfo*>::iterator it = Infos.begin(),
576 ie = Infos.end(); it != ie; ++it)
580 #define DEBUG_TYPE ""
582 // FIXME: At this point we should be able to totally order Infos, if not then
583 // we have an ambiguity which the .td file should be forced to resolve.
585 // Generate the terminal actions to convert operands into an MCInst. We still
586 // pass the operands in to these functions individually (as opposed to the
587 // array) so that we do not need to worry about the operand order.
588 ConstructConversionFunctions(Target, Infos, OS);
590 // Build a very stupid version of the match function which just checks each
591 // instruction in order.
593 OS << "bool " << Target.getName() << ClassName
594 << "::MatchInstruction("
595 << "SmallVectorImpl<" << Target.getName() << "Operand> &Operands, "
596 << "MCInst &Inst) {\n";
598 for (std::vector<InstructionInfo*>::const_iterator it = Infos.begin(),
599 ie = Infos.end(); it != ie; ++it) {
600 InstructionInfo &II = **it;
602 // The parser is expected to arrange things so that each "token" matches
603 // exactly one target specific operand.
604 OS << " if (Operands.size() == " << II.Operands.size();
605 for (unsigned i = 0, e = II.Operands.size(); i != e; ++i) {
606 InstructionInfo::Operand &Op = II.Operands[i];
611 if (Op.Kind == InstructionInfo::Operand::Token)
612 OS << "Operands[" << i << "].isToken(\"" << II.Tokens[i] << "\")";
614 OS << "Operands[" << i << "]."
615 << Op.AsClass.PredicateMethod << "()";
618 OS << " return " << II.ConversionFn << "(Inst, "
619 << Target.getName() << "::" << II.InstrName;
620 for (unsigned i = 0, e = II.OrderedClassOperands.size(); i != e; ++i)
621 OS << ", Operands[" << II.OrderedClassOperands[i] << "]";
625 OS << " return true;\n";