1 #include "llvm/ADT/STLExtras.h"
2 #include "llvm/Analysis/BasicAliasAnalysis.h"
3 #include "llvm/Analysis/Passes.h"
4 #include "llvm/ExecutionEngine/ExecutionEngine.h"
5 #include "llvm/ExecutionEngine/MCJIT.h"
6 #include "llvm/ExecutionEngine/SectionMemoryManager.h"
7 #include "llvm/IR/DataLayout.h"
8 #include "llvm/IR/DerivedTypes.h"
9 #include "llvm/IR/IRBuilder.h"
10 #include "llvm/IR/LLVMContext.h"
11 #include "llvm/IR/LegacyPassManager.h"
12 #include "llvm/IR/Module.h"
13 #include "llvm/IR/Verifier.h"
14 #include "llvm/Support/TargetSelect.h"
15 #include "llvm/Transforms/Scalar.h"
23 //===----------------------------------------------------------------------===//
25 //===----------------------------------------------------------------------===//
27 // The lexer returns tokens [0-255] if it is an unknown character, otherwise one
28 // of these for known things.
48 static std::string IdentifierStr; // Filled in if tok_identifier
49 static double NumVal; // Filled in if tok_number
51 /// gettok - Return the next token from standard input.
53 static int LastChar = ' ';
55 // Skip any whitespace.
56 while (isspace(LastChar))
59 if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
60 IdentifierStr = LastChar;
61 while (isalnum((LastChar = getchar())))
62 IdentifierStr += LastChar;
64 if (IdentifierStr == "def")
66 if (IdentifierStr == "extern")
68 if (IdentifierStr == "if")
70 if (IdentifierStr == "then")
72 if (IdentifierStr == "else")
74 if (IdentifierStr == "for")
76 if (IdentifierStr == "in")
78 return tok_identifier;
81 if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+
86 } while (isdigit(LastChar) || LastChar == '.');
88 NumVal = strtod(NumStr.c_str(), 0);
92 if (LastChar == '#') {
93 // Comment until end of line.
96 while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
102 // Check for end of file. Don't eat the EOF.
106 // Otherwise, just return the character as its ascii value.
107 int ThisChar = LastChar;
108 LastChar = getchar();
112 //===----------------------------------------------------------------------===//
113 // Abstract Syntax Tree (aka Parse Tree)
114 //===----------------------------------------------------------------------===//
116 /// ExprAST - Base class for all expression nodes.
119 virtual ~ExprAST() {}
120 virtual Value *Codegen() = 0;
123 /// NumberExprAST - Expression class for numeric literals like "1.0".
124 class NumberExprAST : public ExprAST {
128 NumberExprAST(double val) : Val(val) {}
129 Value *Codegen() override;
132 /// VariableExprAST - Expression class for referencing a variable, like "a".
133 class VariableExprAST : public ExprAST {
137 VariableExprAST(const std::string &name) : Name(name) {}
138 Value *Codegen() override;
141 /// BinaryExprAST - Expression class for a binary operator.
142 class BinaryExprAST : public ExprAST {
147 BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs)
148 : Op(op), LHS(lhs), RHS(rhs) {}
149 Value *Codegen() override;
152 /// CallExprAST - Expression class for function calls.
153 class CallExprAST : public ExprAST {
155 std::vector<ExprAST *> Args;
158 CallExprAST(const std::string &callee, std::vector<ExprAST *> &args)
159 : Callee(callee), Args(args) {}
160 Value *Codegen() override;
163 /// IfExprAST - Expression class for if/then/else.
164 class IfExprAST : public ExprAST {
165 ExprAST *Cond, *Then, *Else;
168 IfExprAST(ExprAST *cond, ExprAST *then, ExprAST *_else)
169 : Cond(cond), Then(then), Else(_else) {}
170 Value *Codegen() override;
173 /// ForExprAST - Expression class for for/in.
174 class ForExprAST : public ExprAST {
176 ExprAST *Start, *End, *Step, *Body;
179 ForExprAST(const std::string &varname, ExprAST *start, ExprAST *end,
180 ExprAST *step, ExprAST *body)
181 : VarName(varname), Start(start), End(end), Step(step), Body(body) {}
182 Value *Codegen() override;
185 /// PrototypeAST - This class represents the "prototype" for a function,
186 /// which captures its name, and its argument names (thus implicitly the number
187 /// of arguments the function takes).
190 std::vector<std::string> Args;
193 PrototypeAST(const std::string &name, const std::vector<std::string> &args)
194 : Name(name), Args(args) {}
199 /// FunctionAST - This class represents a function definition itself.
205 FunctionAST(PrototypeAST *proto, ExprAST *body) : Proto(proto), Body(body) {}
209 } // end anonymous namespace
211 //===----------------------------------------------------------------------===//
213 //===----------------------------------------------------------------------===//
215 /// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current
216 /// token the parser is looking at. getNextToken reads another token from the
217 /// lexer and updates CurTok with its results.
219 static int getNextToken() { return CurTok = gettok(); }
221 /// BinopPrecedence - This holds the precedence for each binary operator that is
223 static std::map<char, int> BinopPrecedence;
225 /// GetTokPrecedence - Get the precedence of the pending binary operator token.
226 static int GetTokPrecedence() {
227 if (!isascii(CurTok))
230 // Make sure it's a declared binop.
231 int TokPrec = BinopPrecedence[CurTok];
237 /// Error* - These are little helper functions for error handling.
238 ExprAST *Error(const char *Str) {
239 fprintf(stderr, "Error: %s\n", Str);
242 PrototypeAST *ErrorP(const char *Str) {
246 FunctionAST *ErrorF(const char *Str) {
251 static ExprAST *ParseExpression();
255 /// ::= identifier '(' expression* ')'
256 static ExprAST *ParseIdentifierExpr() {
257 std::string IdName = IdentifierStr;
259 getNextToken(); // eat identifier.
261 if (CurTok != '(') // Simple variable ref.
262 return new VariableExprAST(IdName);
265 getNextToken(); // eat (
266 std::vector<ExprAST *> Args;
269 ExprAST *Arg = ParseExpression();
278 return Error("Expected ')' or ',' in argument list");
286 return new CallExprAST(IdName, Args);
289 /// numberexpr ::= number
290 static ExprAST *ParseNumberExpr() {
291 ExprAST *Result = new NumberExprAST(NumVal);
292 getNextToken(); // consume the number
296 /// parenexpr ::= '(' expression ')'
297 static ExprAST *ParseParenExpr() {
298 getNextToken(); // eat (.
299 ExprAST *V = ParseExpression();
304 return Error("expected ')'");
305 getNextToken(); // eat ).
309 /// ifexpr ::= 'if' expression 'then' expression 'else' expression
310 static ExprAST *ParseIfExpr() {
311 getNextToken(); // eat the if.
314 ExprAST *Cond = ParseExpression();
318 if (CurTok != tok_then)
319 return Error("expected then");
320 getNextToken(); // eat the then
322 ExprAST *Then = ParseExpression();
326 if (CurTok != tok_else)
327 return Error("expected else");
331 ExprAST *Else = ParseExpression();
335 return new IfExprAST(Cond, Then, Else);
338 /// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression
339 static ExprAST *ParseForExpr() {
340 getNextToken(); // eat the for.
342 if (CurTok != tok_identifier)
343 return Error("expected identifier after for");
345 std::string IdName = IdentifierStr;
346 getNextToken(); // eat identifier.
349 return Error("expected '=' after for");
350 getNextToken(); // eat '='.
352 ExprAST *Start = ParseExpression();
356 return Error("expected ',' after for start value");
359 ExprAST *End = ParseExpression();
363 // The step value is optional.
367 Step = ParseExpression();
372 if (CurTok != tok_in)
373 return Error("expected 'in' after for");
374 getNextToken(); // eat 'in'.
376 ExprAST *Body = ParseExpression();
380 return new ForExprAST(IdName, Start, End, Step, Body);
384 /// ::= identifierexpr
389 static ExprAST *ParsePrimary() {
392 return Error("unknown token when expecting an expression");
394 return ParseIdentifierExpr();
396 return ParseNumberExpr();
398 return ParseParenExpr();
400 return ParseIfExpr();
402 return ParseForExpr();
407 /// ::= ('+' primary)*
408 static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
409 // If this is a binop, find its precedence.
411 int TokPrec = GetTokPrecedence();
413 // If this is a binop that binds at least as tightly as the current binop,
414 // consume it, otherwise we are done.
415 if (TokPrec < ExprPrec)
418 // Okay, we know this is a binop.
420 getNextToken(); // eat binop
422 // Parse the primary expression after the binary operator.
423 ExprAST *RHS = ParsePrimary();
427 // If BinOp binds less tightly with RHS than the operator after RHS, let
428 // the pending operator take RHS as its LHS.
429 int NextPrec = GetTokPrecedence();
430 if (TokPrec < NextPrec) {
431 RHS = ParseBinOpRHS(TokPrec + 1, RHS);
437 LHS = new BinaryExprAST(BinOp, LHS, RHS);
442 /// ::= primary binoprhs
444 static ExprAST *ParseExpression() {
445 ExprAST *LHS = ParsePrimary();
449 return ParseBinOpRHS(0, LHS);
453 /// ::= id '(' id* ')'
454 static PrototypeAST *ParsePrototype() {
455 if (CurTok != tok_identifier)
456 return ErrorP("Expected function name in prototype");
458 std::string FnName = IdentifierStr;
462 return ErrorP("Expected '(' in prototype");
464 std::vector<std::string> ArgNames;
465 while (getNextToken() == tok_identifier)
466 ArgNames.push_back(IdentifierStr);
468 return ErrorP("Expected ')' in prototype");
471 getNextToken(); // eat ')'.
473 return new PrototypeAST(FnName, ArgNames);
476 /// definition ::= 'def' prototype expression
477 static FunctionAST *ParseDefinition() {
478 getNextToken(); // eat def.
479 PrototypeAST *Proto = ParsePrototype();
483 if (ExprAST *E = ParseExpression())
484 return new FunctionAST(Proto, E);
488 /// toplevelexpr ::= expression
489 static FunctionAST *ParseTopLevelExpr() {
490 if (ExprAST *E = ParseExpression()) {
491 // Make an anonymous proto.
492 PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>());
493 return new FunctionAST(Proto, E);
498 /// external ::= 'extern' prototype
499 static PrototypeAST *ParseExtern() {
500 getNextToken(); // eat extern.
501 return ParsePrototype();
504 //===----------------------------------------------------------------------===//
506 //===----------------------------------------------------------------------===//
508 static Module *TheModule;
509 static IRBuilder<> Builder(getGlobalContext());
510 static std::map<std::string, Value *> NamedValues;
511 static legacy::FunctionPassManager *TheFPM;
513 Value *ErrorV(const char *Str) {
518 Value *NumberExprAST::Codegen() {
519 return ConstantFP::get(getGlobalContext(), APFloat(Val));
522 Value *VariableExprAST::Codegen() {
523 // Look this variable up in the function.
524 Value *V = NamedValues[Name];
525 return V ? V : ErrorV("Unknown variable name");
528 Value *BinaryExprAST::Codegen() {
529 Value *L = LHS->Codegen();
530 Value *R = RHS->Codegen();
531 if (L == 0 || R == 0)
536 return Builder.CreateFAdd(L, R, "addtmp");
538 return Builder.CreateFSub(L, R, "subtmp");
540 return Builder.CreateFMul(L, R, "multmp");
542 L = Builder.CreateFCmpULT(L, R, "cmptmp");
543 // Convert bool 0/1 to double 0.0 or 1.0
544 return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
547 return ErrorV("invalid binary operator");
551 Value *CallExprAST::Codegen() {
552 // Look up the name in the global module table.
553 Function *CalleeF = TheModule->getFunction(Callee);
555 return ErrorV("Unknown function referenced");
557 // If argument mismatch error.
558 if (CalleeF->arg_size() != Args.size())
559 return ErrorV("Incorrect # arguments passed");
561 std::vector<Value *> ArgsV;
562 for (unsigned i = 0, e = Args.size(); i != e; ++i) {
563 ArgsV.push_back(Args[i]->Codegen());
564 if (ArgsV.back() == 0)
568 return Builder.CreateCall(CalleeF, ArgsV, "calltmp");
571 Value *IfExprAST::Codegen() {
572 Value *CondV = Cond->Codegen();
576 // Convert condition to a bool by comparing equal to 0.0.
577 CondV = Builder.CreateFCmpONE(
578 CondV, ConstantFP::get(getGlobalContext(), APFloat(0.0)), "ifcond");
580 Function *TheFunction = Builder.GetInsertBlock()->getParent();
582 // Create blocks for the then and else cases. Insert the 'then' block at the
583 // end of the function.
585 BasicBlock::Create(getGlobalContext(), "then", TheFunction);
586 BasicBlock *ElseBB = BasicBlock::Create(getGlobalContext(), "else");
587 BasicBlock *MergeBB = BasicBlock::Create(getGlobalContext(), "ifcont");
589 Builder.CreateCondBr(CondV, ThenBB, ElseBB);
592 Builder.SetInsertPoint(ThenBB);
594 Value *ThenV = Then->Codegen();
598 Builder.CreateBr(MergeBB);
599 // Codegen of 'Then' can change the current block, update ThenBB for the PHI.
600 ThenBB = Builder.GetInsertBlock();
603 TheFunction->getBasicBlockList().push_back(ElseBB);
604 Builder.SetInsertPoint(ElseBB);
606 Value *ElseV = Else->Codegen();
610 Builder.CreateBr(MergeBB);
611 // Codegen of 'Else' can change the current block, update ElseBB for the PHI.
612 ElseBB = Builder.GetInsertBlock();
615 TheFunction->getBasicBlockList().push_back(MergeBB);
616 Builder.SetInsertPoint(MergeBB);
618 Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2, "iftmp");
620 PN->addIncoming(ThenV, ThenBB);
621 PN->addIncoming(ElseV, ElseBB);
625 Value *ForExprAST::Codegen() {
631 // variable = phi [start, loopheader], [nextvariable, loopend]
637 // nextvariable = variable + step
639 // br endcond, loop, endloop
642 // Emit the start code first, without 'variable' in scope.
643 Value *StartVal = Start->Codegen();
647 // Make the new basic block for the loop header, inserting after current
649 Function *TheFunction = Builder.GetInsertBlock()->getParent();
650 BasicBlock *PreheaderBB = Builder.GetInsertBlock();
652 BasicBlock::Create(getGlobalContext(), "loop", TheFunction);
654 // Insert an explicit fall through from the current block to the LoopBB.
655 Builder.CreateBr(LoopBB);
657 // Start insertion in LoopBB.
658 Builder.SetInsertPoint(LoopBB);
660 // Start the PHI node with an entry for Start.
661 PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()),
663 Variable->addIncoming(StartVal, PreheaderBB);
665 // Within the loop, the variable is defined equal to the PHI node. If it
666 // shadows an existing variable, we have to restore it, so save it now.
667 Value *OldVal = NamedValues[VarName];
668 NamedValues[VarName] = Variable;
670 // Emit the body of the loop. This, like any other expr, can change the
671 // current BB. Note that we ignore the value computed by the body, but don't
673 if (Body->Codegen() == 0)
676 // Emit the step value.
679 StepVal = Step->Codegen();
683 // If not specified, use 1.0.
684 StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0));
687 Value *NextVar = Builder.CreateFAdd(Variable, StepVal, "nextvar");
689 // Compute the end condition.
690 Value *EndCond = End->Codegen();
694 // Convert condition to a bool by comparing equal to 0.0.
695 EndCond = Builder.CreateFCmpONE(
696 EndCond, ConstantFP::get(getGlobalContext(), APFloat(0.0)), "loopcond");
698 // Create the "after loop" block and insert it.
699 BasicBlock *LoopEndBB = Builder.GetInsertBlock();
700 BasicBlock *AfterBB =
701 BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction);
703 // Insert the conditional branch into the end of LoopEndBB.
704 Builder.CreateCondBr(EndCond, LoopBB, AfterBB);
706 // Any new code will be inserted in AfterBB.
707 Builder.SetInsertPoint(AfterBB);
709 // Add a new entry to the PHI node for the backedge.
710 Variable->addIncoming(NextVar, LoopEndBB);
712 // Restore the unshadowed variable.
714 NamedValues[VarName] = OldVal;
716 NamedValues.erase(VarName);
718 // for expr always returns 0.0.
719 return Constant::getNullValue(Type::getDoubleTy(getGlobalContext()));
722 Function *PrototypeAST::Codegen() {
723 // Make the function type: double(double,double) etc.
724 std::vector<Type *> Doubles(Args.size(),
725 Type::getDoubleTy(getGlobalContext()));
727 FunctionType::get(Type::getDoubleTy(getGlobalContext()), Doubles, false);
730 Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
732 // If F conflicted, there was already something named 'Name'. If it has a
733 // body, don't allow redefinition or reextern.
734 if (F->getName() != Name) {
735 // Delete the one we just made and get the existing one.
736 F->eraseFromParent();
737 F = TheModule->getFunction(Name);
739 // If F already has a body, reject this.
741 ErrorF("redefinition of function");
745 // If F took a different number of args, reject.
746 if (F->arg_size() != Args.size()) {
747 ErrorF("redefinition of function with different # args");
752 // Set names for all arguments.
754 for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size();
756 AI->setName(Args[Idx]);
758 // Add arguments to variable symbol table.
759 NamedValues[Args[Idx]] = AI;
765 Function *FunctionAST::Codegen() {
768 Function *TheFunction = Proto->Codegen();
769 if (TheFunction == 0)
772 // Create a new basic block to start insertion into.
773 BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
774 Builder.SetInsertPoint(BB);
776 if (Value *RetVal = Body->Codegen()) {
777 // Finish off the function.
778 Builder.CreateRet(RetVal);
780 // Validate the generated code, checking for consistency.
781 verifyFunction(*TheFunction);
783 // Optimize the function.
784 TheFPM->run(*TheFunction);
789 // Error reading body, remove function.
790 TheFunction->eraseFromParent();
794 //===----------------------------------------------------------------------===//
795 // Top-Level parsing and JIT Driver
796 //===----------------------------------------------------------------------===//
798 static ExecutionEngine *TheExecutionEngine;
800 static void HandleDefinition() {
801 if (FunctionAST *F = ParseDefinition()) {
802 if (Function *LF = F->Codegen()) {
803 fprintf(stderr, "Read function definition:");
807 // Skip token for error recovery.
812 static void HandleExtern() {
813 if (PrototypeAST *P = ParseExtern()) {
814 if (Function *F = P->Codegen()) {
815 fprintf(stderr, "Read extern: ");
819 // Skip token for error recovery.
824 static void HandleTopLevelExpression() {
825 // Evaluate a top-level expression into an anonymous function.
826 if (FunctionAST *F = ParseTopLevelExpr()) {
827 if (Function *LF = F->Codegen()) {
828 TheExecutionEngine->finalizeObject();
829 // JIT the function, returning a function pointer.
830 void *FPtr = TheExecutionEngine->getPointerToFunction(LF);
832 // Cast it to the right type (takes no arguments, returns a double) so we
833 // can call it as a native function.
834 double (*FP)() = (double (*)())(intptr_t)FPtr;
835 fprintf(stderr, "Evaluated to %f\n", FP());
838 // Skip token for error recovery.
843 /// top ::= definition | external | expression | ';'
844 static void MainLoop() {
846 fprintf(stderr, "ready> ");
852 break; // ignore top-level semicolons.
860 HandleTopLevelExpression();
866 //===----------------------------------------------------------------------===//
867 // "Library" functions that can be "extern'd" from user code.
868 //===----------------------------------------------------------------------===//
870 /// putchard - putchar that takes a double and returns 0.
871 extern "C" double putchard(double X) {
876 //===----------------------------------------------------------------------===//
878 //===----------------------------------------------------------------------===//
881 InitializeNativeTarget();
882 InitializeNativeTargetAsmPrinter();
883 InitializeNativeTargetAsmParser();
884 LLVMContext &Context = getGlobalContext();
886 // Install standard binary operators.
887 // 1 is lowest precedence.
888 BinopPrecedence['<'] = 10;
889 BinopPrecedence['+'] = 20;
890 BinopPrecedence['-'] = 20;
891 BinopPrecedence['*'] = 40; // highest.
893 // Prime the first token.
894 fprintf(stderr, "ready> ");
897 // Make the module, which holds all the code.
898 std::unique_ptr<Module> Owner = make_unique<Module>("my cool jit", Context);
899 TheModule = Owner.get();
901 // Create the JIT. This takes ownership of the module.
904 EngineBuilder(std::move(Owner))
905 .setErrorStr(&ErrStr)
906 .setMCJITMemoryManager(llvm::make_unique<SectionMemoryManager>())
908 if (!TheExecutionEngine) {
909 fprintf(stderr, "Could not create ExecutionEngine: %s\n", ErrStr.c_str());
913 legacy::FunctionPassManager OurFPM(TheModule);
915 // Set up the optimizer pipeline. Start with registering info about how the
916 // target lays out data structures.
917 TheModule->setDataLayout(TheExecutionEngine->getDataLayout());
918 // Provide basic AliasAnalysis support for GVN.
919 OurFPM.add(createBasicAliasAnalysisPass());
920 // Do simple "peephole" optimizations and bit-twiddling optzns.
921 OurFPM.add(createInstructionCombiningPass());
922 // Reassociate expressions.
923 OurFPM.add(createReassociatePass());
924 // Eliminate Common SubExpressions.
925 OurFPM.add(createGVNPass());
926 // Simplify the control flow graph (deleting unreachable blocks, etc).
927 OurFPM.add(createCFGSimplificationPass());
929 OurFPM.doInitialization();
931 // Set the global so the code gen can use this.
934 // Run the main "interpreter loop" now.
939 // Print out all of the generated code.