1 #include "llvm/Analysis/Passes.h"
2 #include "llvm/ExecutionEngine/ExecutionEngine.h"
3 #include "llvm/ExecutionEngine/MCJIT.h"
4 #include "llvm/ExecutionEngine/SectionMemoryManager.h"
5 #include "llvm/IR/DataLayout.h"
6 #include "llvm/IR/DerivedTypes.h"
7 #include "llvm/IR/IRBuilder.h"
8 #include "llvm/IR/LLVMContext.h"
9 #include "llvm/IR/LegacyPassManager.h"
10 #include "llvm/IR/Module.h"
11 #include "llvm/IR/Verifier.h"
12 #include "llvm/Support/TargetSelect.h"
13 #include "llvm/Transforms/Scalar.h"
21 //===----------------------------------------------------------------------===//
23 //===----------------------------------------------------------------------===//
25 // The lexer returns tokens [0-255] if it is an unknown character, otherwise one
26 // of these for known things.
39 static std::string IdentifierStr; // Filled in if tok_identifier
40 static double NumVal; // Filled in if tok_number
42 /// gettok - Return the next token from standard input.
44 static int LastChar = ' ';
46 // Skip any whitespace.
47 while (isspace(LastChar))
50 if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
51 IdentifierStr = LastChar;
52 while (isalnum((LastChar = getchar())))
53 IdentifierStr += LastChar;
55 if (IdentifierStr == "def")
57 if (IdentifierStr == "extern")
59 return tok_identifier;
62 if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+
67 } while (isdigit(LastChar) || LastChar == '.');
69 NumVal = strtod(NumStr.c_str(), 0);
73 if (LastChar == '#') {
74 // Comment until end of line.
77 while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
83 // Check for end of file. Don't eat the EOF.
87 // Otherwise, just return the character as its ascii value.
88 int ThisChar = LastChar;
93 //===----------------------------------------------------------------------===//
94 // Abstract Syntax Tree (aka Parse Tree)
95 //===----------------------------------------------------------------------===//
97 /// ExprAST - Base class for all expression nodes.
100 virtual ~ExprAST() {}
101 virtual Value *Codegen() = 0;
104 /// NumberExprAST - Expression class for numeric literals like "1.0".
105 class NumberExprAST : public ExprAST {
109 NumberExprAST(double val) : Val(val) {}
110 Value *Codegen() override;
113 /// VariableExprAST - Expression class for referencing a variable, like "a".
114 class VariableExprAST : public ExprAST {
118 VariableExprAST(const std::string &name) : Name(name) {}
119 Value *Codegen() override;
122 /// BinaryExprAST - Expression class for a binary operator.
123 class BinaryExprAST : public ExprAST {
128 BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs)
129 : Op(op), LHS(lhs), RHS(rhs) {}
130 Value *Codegen() override;
133 /// CallExprAST - Expression class for function calls.
134 class CallExprAST : public ExprAST {
136 std::vector<ExprAST *> Args;
139 CallExprAST(const std::string &callee, std::vector<ExprAST *> &args)
140 : Callee(callee), Args(args) {}
141 Value *Codegen() override;
144 /// PrototypeAST - This class represents the "prototype" for a function,
145 /// which captures its name, and its argument names (thus implicitly the number
146 /// of arguments the function takes).
149 std::vector<std::string> Args;
152 PrototypeAST(const std::string &name, const std::vector<std::string> &args)
153 : Name(name), Args(args) {}
158 /// FunctionAST - This class represents a function definition itself.
164 FunctionAST(PrototypeAST *proto, ExprAST *body) : Proto(proto), Body(body) {}
168 } // end anonymous namespace
170 //===----------------------------------------------------------------------===//
172 //===----------------------------------------------------------------------===//
174 /// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current
175 /// token the parser is looking at. getNextToken reads another token from the
176 /// lexer and updates CurTok with its results.
178 static int getNextToken() { return CurTok = gettok(); }
180 /// BinopPrecedence - This holds the precedence for each binary operator that is
182 static std::map<char, int> BinopPrecedence;
184 /// GetTokPrecedence - Get the precedence of the pending binary operator token.
185 static int GetTokPrecedence() {
186 if (!isascii(CurTok))
189 // Make sure it's a declared binop.
190 int TokPrec = BinopPrecedence[CurTok];
196 /// Error* - These are little helper functions for error handling.
197 ExprAST *Error(const char *Str) {
198 fprintf(stderr, "Error: %s\n", Str);
201 PrototypeAST *ErrorP(const char *Str) {
205 FunctionAST *ErrorF(const char *Str) {
210 static ExprAST *ParseExpression();
214 /// ::= identifier '(' expression* ')'
215 static ExprAST *ParseIdentifierExpr() {
216 std::string IdName = IdentifierStr;
218 getNextToken(); // eat identifier.
220 if (CurTok != '(') // Simple variable ref.
221 return new VariableExprAST(IdName);
224 getNextToken(); // eat (
225 std::vector<ExprAST *> Args;
228 ExprAST *Arg = ParseExpression();
237 return Error("Expected ')' or ',' in argument list");
245 return new CallExprAST(IdName, Args);
248 /// numberexpr ::= number
249 static ExprAST *ParseNumberExpr() {
250 ExprAST *Result = new NumberExprAST(NumVal);
251 getNextToken(); // consume the number
255 /// parenexpr ::= '(' expression ')'
256 static ExprAST *ParseParenExpr() {
257 getNextToken(); // eat (.
258 ExprAST *V = ParseExpression();
263 return Error("expected ')'");
264 getNextToken(); // eat ).
269 /// ::= identifierexpr
272 static ExprAST *ParsePrimary() {
275 return Error("unknown token when expecting an expression");
277 return ParseIdentifierExpr();
279 return ParseNumberExpr();
281 return ParseParenExpr();
286 /// ::= ('+' primary)*
287 static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
288 // If this is a binop, find its precedence.
290 int TokPrec = GetTokPrecedence();
292 // If this is a binop that binds at least as tightly as the current binop,
293 // consume it, otherwise we are done.
294 if (TokPrec < ExprPrec)
297 // Okay, we know this is a binop.
299 getNextToken(); // eat binop
301 // Parse the primary expression after the binary operator.
302 ExprAST *RHS = ParsePrimary();
306 // If BinOp binds less tightly with RHS than the operator after RHS, let
307 // the pending operator take RHS as its LHS.
308 int NextPrec = GetTokPrecedence();
309 if (TokPrec < NextPrec) {
310 RHS = ParseBinOpRHS(TokPrec + 1, RHS);
316 LHS = new BinaryExprAST(BinOp, LHS, RHS);
321 /// ::= primary binoprhs
323 static ExprAST *ParseExpression() {
324 ExprAST *LHS = ParsePrimary();
328 return ParseBinOpRHS(0, LHS);
332 /// ::= id '(' id* ')'
333 static PrototypeAST *ParsePrototype() {
334 if (CurTok != tok_identifier)
335 return ErrorP("Expected function name in prototype");
337 std::string FnName = IdentifierStr;
341 return ErrorP("Expected '(' in prototype");
343 std::vector<std::string> ArgNames;
344 while (getNextToken() == tok_identifier)
345 ArgNames.push_back(IdentifierStr);
347 return ErrorP("Expected ')' in prototype");
350 getNextToken(); // eat ')'.
352 return new PrototypeAST(FnName, ArgNames);
355 /// definition ::= 'def' prototype expression
356 static FunctionAST *ParseDefinition() {
357 getNextToken(); // eat def.
358 PrototypeAST *Proto = ParsePrototype();
362 if (ExprAST *E = ParseExpression())
363 return new FunctionAST(Proto, E);
367 /// toplevelexpr ::= expression
368 static FunctionAST *ParseTopLevelExpr() {
369 if (ExprAST *E = ParseExpression()) {
370 // Make an anonymous proto.
371 PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>());
372 return new FunctionAST(Proto, E);
377 /// external ::= 'extern' prototype
378 static PrototypeAST *ParseExtern() {
379 getNextToken(); // eat extern.
380 return ParsePrototype();
383 //===----------------------------------------------------------------------===//
384 // Quick and dirty hack
385 //===----------------------------------------------------------------------===//
387 // FIXME: Obviously we can do better than this
388 std::string GenerateUniqueName(const char *root) {
391 sprintf(s, "%s%d", root, i++);
396 std::string MakeLegalFunctionName(std::string Name) {
399 return GenerateUniqueName("anon_func_");
401 // Start with what we have
404 // Look for a numberic first character
405 if (NewName.find_first_of("0123456789") == 0) {
406 NewName.insert(0, 1, 'n');
409 // Replace illegal characters with their ASCII equivalent
410 std::string legal_elements =
411 "_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
413 while ((pos = NewName.find_first_not_of(legal_elements)) !=
415 char old_c = NewName.at(pos);
417 sprintf(new_str, "%d", (int)old_c);
418 NewName = NewName.replace(pos, 1, new_str);
424 //===----------------------------------------------------------------------===//
425 // MCJIT helper class
426 //===----------------------------------------------------------------------===//
430 MCJITHelper(LLVMContext &C) : Context(C), OpenModule(NULL) {}
433 Function *getFunction(const std::string FnName);
434 Module *getModuleForNewFunction();
435 void *getPointerToFunction(Function *F);
436 void *getSymbolAddress(const std::string &Name);
440 typedef std::vector<Module *> ModuleVector;
441 typedef std::vector<ExecutionEngine *> EngineVector;
443 LLVMContext &Context;
445 ModuleVector Modules;
446 EngineVector Engines;
449 class HelpingMemoryManager : public SectionMemoryManager {
450 HelpingMemoryManager(const HelpingMemoryManager &) = delete;
451 void operator=(const HelpingMemoryManager &) = delete;
454 HelpingMemoryManager(MCJITHelper *Helper) : MasterHelper(Helper) {}
455 ~HelpingMemoryManager() override {}
457 /// This method returns the address of the specified symbol.
458 /// Our implementation will attempt to find symbols in other
459 /// modules associated with the MCJITHelper to cross link symbols
460 /// from one generated module to another.
461 uint64_t getSymbolAddress(const std::string &Name) override;
464 MCJITHelper *MasterHelper;
467 uint64_t HelpingMemoryManager::getSymbolAddress(const std::string &Name) {
468 uint64_t FnAddr = SectionMemoryManager::getSymbolAddress(Name);
472 uint64_t HelperFun = (uint64_t)MasterHelper->getSymbolAddress(Name);
474 report_fatal_error("Program used extern function '" + Name +
475 "' which could not be resolved!");
480 MCJITHelper::~MCJITHelper() {
483 EngineVector::iterator begin = Engines.begin();
484 EngineVector::iterator end = Engines.end();
485 EngineVector::iterator it;
486 for (it = begin; it != end; ++it)
490 Function *MCJITHelper::getFunction(const std::string FnName) {
491 ModuleVector::iterator begin = Modules.begin();
492 ModuleVector::iterator end = Modules.end();
493 ModuleVector::iterator it;
494 for (it = begin; it != end; ++it) {
495 Function *F = (*it)->getFunction(FnName);
497 if (*it == OpenModule)
500 assert(OpenModule != NULL);
502 // This function is in a module that has already been JITed.
503 // We need to generate a new prototype for external linkage.
504 Function *PF = OpenModule->getFunction(FnName);
505 if (PF && !PF->empty()) {
506 ErrorF("redefinition of function across modules");
510 // If we don't have a prototype yet, create one.
512 PF = Function::Create(F->getFunctionType(), Function::ExternalLinkage,
520 Module *MCJITHelper::getModuleForNewFunction() {
521 // If we have a Module that hasn't been JITed, use that.
525 // Otherwise create a new Module.
526 std::string ModName = GenerateUniqueName("mcjit_module_");
527 Module *M = new Module(ModName, Context);
528 Modules.push_back(M);
533 void *MCJITHelper::getPointerToFunction(Function *F) {
534 // See if an existing instance of MCJIT has this function.
535 EngineVector::iterator begin = Engines.begin();
536 EngineVector::iterator end = Engines.end();
537 EngineVector::iterator it;
538 for (it = begin; it != end; ++it) {
539 void *P = (*it)->getPointerToFunction(F);
544 // If we didn't find the function, see if we can generate it.
547 ExecutionEngine *NewEngine =
548 EngineBuilder(std::unique_ptr<Module>(OpenModule))
549 .setErrorStr(&ErrStr)
550 .setMCJITMemoryManager(std::unique_ptr<HelpingMemoryManager>(
551 new HelpingMemoryManager(this)))
554 fprintf(stderr, "Could not create ExecutionEngine: %s\n", ErrStr.c_str());
558 // Create a function pass manager for this engine
559 auto *FPM = new legacy::FunctionPassManager(OpenModule);
561 // Set up the optimizer pipeline. Start with registering info about how the
562 // target lays out data structures.
563 OpenModule->setDataLayout(*NewEngine->getDataLayout());
564 // Provide basic AliasAnalysis support for GVN.
565 FPM->add(createBasicAliasAnalysisPass());
566 // Promote allocas to registers.
567 FPM->add(createPromoteMemoryToRegisterPass());
568 // Do simple "peephole" optimizations and bit-twiddling optzns.
569 FPM->add(createInstructionCombiningPass());
570 // Reassociate expressions.
571 FPM->add(createReassociatePass());
572 // Eliminate Common SubExpressions.
573 FPM->add(createGVNPass());
574 // Simplify the control flow graph (deleting unreachable blocks, etc).
575 FPM->add(createCFGSimplificationPass());
576 FPM->doInitialization();
578 // For each function in the module
580 Module::iterator end = OpenModule->end();
581 for (it = OpenModule->begin(); it != end; ++it) {
582 // Run the FPM on this function
586 // We don't need this anymore
590 Engines.push_back(NewEngine);
591 NewEngine->finalizeObject();
592 return NewEngine->getPointerToFunction(F);
597 void *MCJITHelper::getSymbolAddress(const std::string &Name) {
598 // Look for the symbol in each of our execution engines.
599 EngineVector::iterator begin = Engines.begin();
600 EngineVector::iterator end = Engines.end();
601 EngineVector::iterator it;
602 for (it = begin; it != end; ++it) {
603 uint64_t FAddr = (*it)->getFunctionAddress(Name);
605 return (void *)FAddr;
611 void MCJITHelper::dump() {
612 ModuleVector::iterator begin = Modules.begin();
613 ModuleVector::iterator end = Modules.end();
614 ModuleVector::iterator it;
615 for (it = begin; it != end; ++it)
618 //===----------------------------------------------------------------------===//
620 //===----------------------------------------------------------------------===//
622 static MCJITHelper *JITHelper;
623 static IRBuilder<> Builder(getGlobalContext());
624 static std::map<std::string, Value *> NamedValues;
626 Value *ErrorV(const char *Str) {
631 Value *NumberExprAST::Codegen() {
632 return ConstantFP::get(getGlobalContext(), APFloat(Val));
635 Value *VariableExprAST::Codegen() {
636 // Look this variable up in the function.
637 Value *V = NamedValues[Name];
638 return V ? V : ErrorV("Unknown variable name");
641 Value *BinaryExprAST::Codegen() {
642 Value *L = LHS->Codegen();
643 Value *R = RHS->Codegen();
644 if (L == 0 || R == 0)
649 return Builder.CreateFAdd(L, R, "addtmp");
651 return Builder.CreateFSub(L, R, "subtmp");
653 return Builder.CreateFMul(L, R, "multmp");
655 L = Builder.CreateFCmpULT(L, R, "cmptmp");
656 // Convert bool 0/1 to double 0.0 or 1.0
657 return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
660 return ErrorV("invalid binary operator");
664 Value *CallExprAST::Codegen() {
665 // Look up the name in the global module table.
666 Function *CalleeF = JITHelper->getFunction(Callee);
668 return ErrorV("Unknown function referenced");
670 // If argument mismatch error.
671 if (CalleeF->arg_size() != Args.size())
672 return ErrorV("Incorrect # arguments passed");
674 std::vector<Value *> ArgsV;
675 for (unsigned i = 0, e = Args.size(); i != e; ++i) {
676 ArgsV.push_back(Args[i]->Codegen());
677 if (ArgsV.back() == 0)
681 return Builder.CreateCall(CalleeF, ArgsV, "calltmp");
684 Function *PrototypeAST::Codegen() {
685 // Make the function type: double(double,double) etc.
686 std::vector<Type *> Doubles(Args.size(),
687 Type::getDoubleTy(getGlobalContext()));
689 FunctionType::get(Type::getDoubleTy(getGlobalContext()), Doubles, false);
691 std::string FnName = MakeLegalFunctionName(Name);
693 Module *M = JITHelper->getModuleForNewFunction();
695 Function *F = Function::Create(FT, Function::ExternalLinkage, FnName, M);
697 // If F conflicted, there was already something named 'Name'. If it has a
698 // body, don't allow redefinition or reextern.
699 if (F->getName() != FnName) {
700 // Delete the one we just made and get the existing one.
701 F->eraseFromParent();
702 F = JITHelper->getFunction(Name);
703 // If F already has a body, reject this.
705 ErrorF("redefinition of function");
709 // If F took a different number of args, reject.
710 if (F->arg_size() != Args.size()) {
711 ErrorF("redefinition of function with different # args");
716 // Set names for all arguments.
718 for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size();
720 AI->setName(Args[Idx]);
722 // Add arguments to variable symbol table.
723 NamedValues[Args[Idx]] = AI;
729 Function *FunctionAST::Codegen() {
732 Function *TheFunction = Proto->Codegen();
733 if (TheFunction == 0)
736 // Create a new basic block to start insertion into.
737 BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
738 Builder.SetInsertPoint(BB);
740 if (Value *RetVal = Body->Codegen()) {
741 // Finish off the function.
742 Builder.CreateRet(RetVal);
744 // Validate the generated code, checking for consistency.
745 verifyFunction(*TheFunction);
750 // Error reading body, remove function.
751 TheFunction->eraseFromParent();
755 //===----------------------------------------------------------------------===//
756 // Top-Level parsing and JIT Driver
757 //===----------------------------------------------------------------------===//
759 static void HandleDefinition() {
760 if (FunctionAST *F = ParseDefinition()) {
761 if (Function *LF = F->Codegen()) {
762 fprintf(stderr, "Read function definition:");
766 // Skip token for error recovery.
771 static void HandleExtern() {
772 if (PrototypeAST *P = ParseExtern()) {
773 if (Function *F = P->Codegen()) {
774 fprintf(stderr, "Read extern: ");
778 // Skip token for error recovery.
783 static void HandleTopLevelExpression() {
784 // Evaluate a top-level expression into an anonymous function.
785 if (FunctionAST *F = ParseTopLevelExpr()) {
786 if (Function *LF = F->Codegen()) {
787 // JIT the function, returning a function pointer.
788 void *FPtr = JITHelper->getPointerToFunction(LF);
790 // Cast it to the right type (takes no arguments, returns a double) so we
791 // can call it as a native function.
792 double (*FP)() = (double (*)())(intptr_t)FPtr;
793 fprintf(stderr, "Evaluated to %f\n", FP());
796 // Skip token for error recovery.
801 /// top ::= definition | external | expression | ';'
802 static void MainLoop() {
804 fprintf(stderr, "ready> ");
810 break; // ignore top-level semicolons.
818 HandleTopLevelExpression();
824 //===----------------------------------------------------------------------===//
825 // "Library" functions that can be "extern'd" from user code.
826 //===----------------------------------------------------------------------===//
828 /// putchard - putchar that takes a double and returns 0.
829 extern "C" double putchard(double X) {
834 //===----------------------------------------------------------------------===//
836 //===----------------------------------------------------------------------===//
839 InitializeNativeTarget();
840 InitializeNativeTargetAsmPrinter();
841 InitializeNativeTargetAsmParser();
842 LLVMContext &Context = getGlobalContext();
843 JITHelper = new MCJITHelper(Context);
845 // Install standard binary operators.
846 // 1 is lowest precedence.
847 BinopPrecedence['<'] = 10;
848 BinopPrecedence['+'] = 20;
849 BinopPrecedence['-'] = 20;
850 BinopPrecedence['*'] = 40; // highest.
852 // Prime the first token.
853 fprintf(stderr, "ready> ");
856 // Run the main "interpreter loop" now.
859 // Print out all of the generated code.