From 7146e900e89b057595bbce8296cef1b55cf310a4 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Wed, 16 Sep 2015 16:51:30 +0000 Subject: [PATCH] [WebAssembly] Check in an initial CFG Stackifier pass This pass implements a simple algorithm for conversion from CFG to wasm's structured control flow. It doesn't yet handle multiple-entry loops; that will be added in a future patch. It also adds initial support for switch statements. Differential Revision: http://reviews.llvm.org/D12735 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@247818 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/WebAssembly/CMakeLists.txt | 1 + lib/Target/WebAssembly/WebAssembly.h | 2 + .../WebAssembly/WebAssemblyAsmPrinter.cpp | 8 + .../WebAssembly/WebAssemblyCFGStackify.cpp | 278 ++++++++++++++++++ lib/Target/WebAssembly/WebAssemblyISD.def | 2 + .../WebAssembly/WebAssemblyISelLowering.cpp | 51 ++++ .../WebAssembly/WebAssemblyISelLowering.h | 2 + .../WebAssembly/WebAssemblyInstrControl.td | 23 ++ .../WebAssembly/WebAssemblyInstrInfo.cpp | 86 ++++++ lib/Target/WebAssembly/WebAssemblyInstrInfo.h | 12 + .../WebAssembly/WebAssemblyInstrInfo.td | 10 +- .../WebAssembly/WebAssemblyTargetMachine.cpp | 8 +- test/CodeGen/WebAssembly/cfg-stackify.ll | 274 +++++++++++++++++ test/CodeGen/WebAssembly/phi.ll | 5 +- test/CodeGen/WebAssembly/switch.ll | 173 +++++++++++ 15 files changed, 929 insertions(+), 6 deletions(-) create mode 100644 lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp create mode 100644 test/CodeGen/WebAssembly/cfg-stackify.ll create mode 100644 test/CodeGen/WebAssembly/switch.ll diff --git a/lib/Target/WebAssembly/CMakeLists.txt b/lib/Target/WebAssembly/CMakeLists.txt index 61959cf89f4..44e536a28bd 100644 --- a/lib/Target/WebAssembly/CMakeLists.txt +++ b/lib/Target/WebAssembly/CMakeLists.txt @@ -12,6 +12,7 @@ add_public_tablegen_target(WebAssemblyCommonTableGen) add_llvm_target(WebAssemblyCodeGen Relooper.cpp WebAssemblyAsmPrinter.cpp + WebAssemblyCFGStackify.cpp WebAssemblyFastISel.cpp WebAssemblyFrameLowering.cpp WebAssemblyISelDAGToDAG.cpp diff --git a/lib/Target/WebAssembly/WebAssembly.h b/lib/Target/WebAssembly/WebAssembly.h index 3ff19d46f43..b0cf6dd10d3 100644 --- a/lib/Target/WebAssembly/WebAssembly.h +++ b/lib/Target/WebAssembly/WebAssembly.h @@ -26,6 +26,8 @@ class FunctionPass; FunctionPass *createWebAssemblyISelDag(WebAssemblyTargetMachine &TM, CodeGenOpt::Level OptLevel); +FunctionPass *createWebAssemblyCFGStackify(); + } // end namespace llvm #endif diff --git a/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp index fca3890de60..94ea1085ff4 100644 --- a/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp +++ b/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp @@ -73,6 +73,7 @@ private: void EmitGlobalVariable(const GlobalVariable *GV) override; + void EmitJumpTableInfo() override; void EmitConstantPool() override; void EmitFunctionEntryLabel() override; void EmitFunctionBodyStart() override; @@ -213,6 +214,10 @@ void WebAssemblyAsmPrinter::EmitConstantPool() { "WebAssembly disables constant pools"); } +void WebAssemblyAsmPrinter::EmitJumpTableInfo() { + // Nothing to do; jump tables are incorporated into the instruction stream. +} + void WebAssemblyAsmPrinter::EmitFunctionEntryLabel() { SmallString<128> Str; raw_svector_ostream OS(Str); @@ -293,6 +298,9 @@ void WebAssemblyAsmPrinter::EmitInstruction(const MachineInstr *MI) { case MachineOperand::MO_GlobalAddress: { OS << ' ' << toSymbol(MO.getGlobal()->getName()); } break; + case MachineOperand::MO_MachineBasicBlock: { + OS << ' ' << toSymbol(MO.getMBB()->getSymbol()->getName()); + } break; } OS << ')'; } diff --git a/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp b/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp new file mode 100644 index 00000000000..27698969967 --- /dev/null +++ b/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp @@ -0,0 +1,278 @@ +//===-- WebAssemblyCFGStackify.cpp - CFG Stackification -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file implements a CFG stacking pass. +/// +/// This pass reorders the blocks in a function to put them into a reverse +/// post-order [0], with special care to keep the order as similar as possible +/// to the original order, and to keep loops contiguous even in the case of +/// split backedges. +/// +/// Then, it inserts BLOCK and LOOP markers to mark the start of scopes, since +/// scope boundaries serve as the labels for WebAssembly's control transfers. +/// +/// This is sufficient to convert arbitrary CFGs into a form that works on +/// WebAssembly, provided that all loops are single-entry. +/// +/// [0] https://en.wikipedia.org/wiki/Depth-first_search#Vertex_orderings +/// +//===----------------------------------------------------------------------===// + +#include "WebAssembly.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssemblySubtarget.h" +#include "llvm/ADT/SCCIterator.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-cfg-stackify" + +namespace { +class WebAssemblyCFGStackify final : public MachineFunctionPass { + const char *getPassName() const override { + return "WebAssembly CFG Stackify"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired(); + AU.addPreserved(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + +public: + static char ID; // Pass identification, replacement for typeid + WebAssemblyCFGStackify() : MachineFunctionPass(ID) {} +}; +} // end anonymous namespace + +char WebAssemblyCFGStackify::ID = 0; +FunctionPass *llvm::createWebAssemblyCFGStackify() { + return new WebAssemblyCFGStackify(); +} + +static void EliminateMultipleEntryLoops(MachineFunction &MF, + const MachineLoopInfo &MLI) { + SmallPtrSet InSet; + for (scc_iterator I = scc_begin(&MF), E = scc_end(&MF); + I != E; ++I) { + const std::vector &CurrentSCC = *I; + + // Skip trivial SCCs. + if (CurrentSCC.size() == 1) + continue; + + InSet.insert(CurrentSCC.begin(), CurrentSCC.end()); + MachineBasicBlock *Header = nullptr; + for (MachineBasicBlock *MBB : CurrentSCC) { + for (MachineBasicBlock *Pred : MBB->predecessors()) { + if (InSet.count(Pred)) + continue; + if (!Header) { + Header = MBB; + break; + } + // TODO: Implement multiple-entry loops. + report_fatal_error("multiple-entry loops are not supported yet"); + } + } + assert(MLI.isLoopHeader(Header)); + + InSet.clear(); + } +} + +namespace { +/// Post-order traversal stack entry. +struct POStackEntry { + MachineBasicBlock *MBB; + SmallVector Succs; + + POStackEntry(MachineBasicBlock *MBB, MachineFunction &MF, + const MachineLoopInfo &MLI); +}; +} // end anonymous namespace + +POStackEntry::POStackEntry(MachineBasicBlock *MBB, MachineFunction &MF, + const MachineLoopInfo &MLI) + : MBB(MBB), Succs(MBB->successors()) { + // RPO is not a unique form, since at every basic block with multiple + // successors, the DFS has to pick which order to visit the successors in. + // Sort them strategically (see below). + MachineLoop *Loop = MLI.getLoopFor(MBB); + MachineFunction::iterator Next = next(MachineFunction::iterator(MBB)); + MachineBasicBlock *LayoutSucc = Next == MF.end() ? nullptr : &*Next; + std::stable_sort( + Succs.begin(), Succs.end(), + [=, &MLI](const MachineBasicBlock *A, const MachineBasicBlock *B) { + if (A == B) + return false; + + // Keep loops contiguous by preferring the block that's in the same + // loop. + MachineLoop *LoopA = MLI.getLoopFor(A); + MachineLoop *LoopB = MLI.getLoopFor(B); + if (LoopA == Loop && LoopB != Loop) + return true; + if (LoopA != Loop && LoopB == Loop) + return false; + + // Minimize perturbation by preferring the block which is the immediate + // layout successor. + if (A == LayoutSucc) + return true; + if (B == LayoutSucc) + return false; + + // TODO: More sophisticated orderings may be profitable here. + + return false; + }); +} + +/// Sort the blocks in RPO, taking special care to make sure that loops are +/// contiguous even in the case of split backedges. +static void SortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI) { + // Note that we do our own RPO rather than using + // "llvm/ADT/PostOrderIterator.h" because we want control over the order that + // successors are visited in (see above). Also, we can sort the blocks in the + // MachineFunction as we go. + SmallPtrSet Visited; + SmallVector Stack; + + MachineBasicBlock *Entry = MF.begin(); + Visited.insert(Entry); + Stack.push_back(POStackEntry(Entry, MF, MLI)); + + for (;;) { + POStackEntry &Entry = Stack.back(); + SmallVectorImpl &Succs = Entry.Succs; + if (!Succs.empty()) { + MachineBasicBlock *Succ = Succs.pop_back_val(); + if (Visited.insert(Succ).second) + Stack.push_back(POStackEntry(Succ, MF, MLI)); + continue; + } + + // Put the block in its position in the MachineFunction. + MachineBasicBlock &MBB = *Entry.MBB; + MBB.moveBefore(MF.begin()); + + // Branch instructions may utilize a fallthrough, so update them if a + // fallthrough has been added or removed. + if (!MBB.empty() && MBB.back().isTerminator() && !MBB.back().isBranch() && + !MBB.back().isBarrier()) + report_fatal_error( + "Non-branch terminator with fallthrough cannot yet be rewritten"); + if (MBB.empty() || !MBB.back().isTerminator() || MBB.back().isBranch()) + MBB.updateTerminator(); + + Stack.pop_back(); + if (Stack.empty()) + break; + } + + // Now that we've sorted the blocks in RPO, renumber them. + MF.RenumberBlocks(); + +#ifndef NDEBUG + for (auto &MBB : MF) + if (MachineLoop *Loop = MLI.getLoopFor(&MBB)) { + // Assert that loops are contiguous. + assert(Loop->getHeader() == Loop->getTopBlock()); + assert(Loop->getHeader() == &MBB || + MLI.getLoopFor(prev(MachineFunction::iterator(&MBB))) == Loop); + } else { + // Assert that non-loops have no backedge predecessors. + for (auto Pred : MBB.predecessors()) + assert(Pred->getNumber() < MBB.getNumber() && + "CFG still has multiple-entry loops"); + } +#endif +} + +/// Insert BLOCK markers at appropriate places. +static void PlaceBlockMarkers(MachineBasicBlock &MBB, MachineBasicBlock &Succ, + MachineFunction &MF, const MachineLoopInfo &MLI, + const WebAssemblyInstrInfo &TII) { + // Backward branches are loop backedges, and we place the LOOP markers + // separately. So only consider forward branches here. + if (Succ.getNumber() <= MBB.getNumber()) + return; + + // Place the BLOCK for a forward branch. For simplicity, we just insert + // blocks immediately inside loop boundaries. + MachineLoop *Loop = MLI.getLoopFor(&Succ); + MachineBasicBlock &Header = *(Loop ? Loop->getHeader() : &MF.front()); + MachineBasicBlock::iterator InsertPos = Header.begin(), End = Header.end(); + if (InsertPos != End) { + if (InsertPos->getOpcode() == WebAssembly::LOOP) + ++InsertPos; + int SuccNumber = Succ.getNumber(); + // Position the BLOCK in nesting order. + for (; InsertPos != End && InsertPos->getOpcode() == WebAssembly::BLOCK; + ++InsertPos) { + int N = InsertPos->getOperand(0).getMBB()->getNumber(); + if (N < SuccNumber) + break; + // If there's already a BLOCK for Succ, we don't need another. + if (N == SuccNumber) + return; + } + } + + BuildMI(Header, InsertPos, DebugLoc(), TII.get(WebAssembly::BLOCK)) + .addMBB(&Succ); +} + +/// Insert LOOP and BLOCK markers at appropriate places. +static void PlaceMarkers(MachineFunction &MF, const MachineLoopInfo &MLI, + const WebAssemblyInstrInfo &TII) { + for (auto &MBB : MF) { + // Place the LOOP for loops. + if (MachineLoop *Loop = MLI.getLoopFor(&MBB)) + if (Loop->getHeader() == &MBB) + BuildMI(MBB, MBB.begin(), DebugLoc(), TII.get(WebAssembly::LOOP)) + .addMBB(Loop->getBottomBlock()); + + // Check for forward branches and switches that need BLOCKS placed. + for (auto &Term : MBB.terminators()) + for (auto &MO : Term.operands()) + if (MO.isMBB()) + PlaceBlockMarkers(MBB, *MO.getMBB(), MF, MLI, TII); + } +} + +bool WebAssemblyCFGStackify::runOnMachineFunction(MachineFunction &MF) { + DEBUG(dbgs() << "********** CFG Stackifying **********\n" + "********** Function: " + << MF.getName() << '\n'); + + const auto &MLI = getAnalysis(); + const auto &TII = *MF.getSubtarget().getInstrInfo(); + + // RPO sorting needs all loops to be single-entry. + EliminateMultipleEntryLoops(MF, MLI); + + // Sort the blocks in RPO, with contiguous loops. + SortBlocks(MF, MLI); + + // Place the BLOCK and LOOP markers to indicate the beginnings of scopes. + PlaceMarkers(MF, MLI, TII); + + return true; +} diff --git a/lib/Target/WebAssembly/WebAssemblyISD.def b/lib/Target/WebAssembly/WebAssemblyISD.def index 24e613e7711..403c6154dcd 100644 --- a/lib/Target/WebAssembly/WebAssemblyISD.def +++ b/lib/Target/WebAssembly/WebAssemblyISD.def @@ -19,5 +19,7 @@ HANDLE_NODETYPE(CALL0) HANDLE_NODETYPE(RETURN) HANDLE_NODETYPE(ARGUMENT) HANDLE_NODETYPE(Wrapper) +HANDLE_NODETYPE(BRIF) +HANDLE_NODETYPE(SWITCH) // add memory opcodes starting at ISD::FIRST_TARGET_MEMORY_OPCODE here... diff --git a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index 12dce113c3e..5d2a79c797e 100644 --- a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -20,6 +20,7 @@ #include "WebAssemblyTargetObjectFile.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/IR/DiagnosticInfo.h" @@ -114,6 +115,7 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( computeRegisterProperties(Subtarget->getRegisterInfo()); setOperationAction(ISD::GlobalAddress, MVTPtr, Custom); + setOperationAction(ISD::JumpTable, MVTPtr, Custom); for (auto T : {MVT::f32, MVT::f64}) { // Don't expand the floating-point types to constant pools. @@ -153,6 +155,14 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); setOperationAction(ISD::DYNAMIC_STACKALLOC, MVTPtr, Expand); + // Expand these forms; we pattern-match the forms that we can handle in isel. + for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64}) + for (auto Op : {ISD::BR_CC, ISD::SELECT_CC}) + setOperationAction(Op, T, Expand); + + // We have custom switch handling. + setOperationAction(ISD::BR_JT, MVT::Other, Custom); + // WebAssembly doesn't have: // - Floating-point extending loads. // - Floating-point truncating stores. @@ -365,6 +375,10 @@ SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op, return SDValue(); case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); + case ISD::JumpTable: + return LowerJumpTable(Op, DAG); + case ISD::BR_JT: + return LowerBR_JT(Op, DAG); } } @@ -382,6 +396,43 @@ SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op, DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT)); } +SDValue WebAssemblyTargetLowering::LowerJumpTable(SDValue Op, + SelectionDAG &DAG) const { + // There's no need for a Wrapper node because we always incorporate a jump + // table operand into a SWITCH instruction, rather than ever materializing + // it in a register. + const JumpTableSDNode *JT = cast(Op); + return DAG.getTargetJumpTable(JT->getIndex(), Op.getValueType(), + JT->getTargetFlags()); +} + +SDValue WebAssemblyTargetLowering::LowerBR_JT(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + SDValue Chain = Op.getOperand(0); + const auto *JT = cast(Op.getOperand(1)); + SDValue Index = Op.getOperand(2); + assert(JT->getTargetFlags() == 0 && "WebAssembly doesn't set target flags"); + + SmallVector Ops; + Ops.push_back(Chain); + Ops.push_back(Index); + + MachineJumpTableInfo *MJTI = DAG.getMachineFunction().getJumpTableInfo(); + const auto &MBBs = MJTI->getJumpTables()[JT->getIndex()].MBBs; + + // TODO: For now, we just pick something arbitrary for a default case for now. + // We really want to sniff out the guard and put in the real default case (and + // delete the guard). + Ops.push_back(DAG.getBasicBlock(MBBs[0])); + + // Add an operand for each case. + for (auto MBB : MBBs) + Ops.push_back(DAG.getBasicBlock(MBB)); + + return DAG.getNode(WebAssemblyISD::SWITCH, DL, MVT::Other, Ops); +} + //===----------------------------------------------------------------------===// // WebAssembly Optimization Hooks //===----------------------------------------------------------------------===// diff --git a/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/lib/Target/WebAssembly/WebAssemblyISelLowering.h index 119d0f5ef43..23e0c597a33 100644 --- a/lib/Target/WebAssembly/WebAssemblyISelLowering.h +++ b/lib/Target/WebAssembly/WebAssemblyISelLowering.h @@ -69,6 +69,8 @@ private: // Custom lowering hooks. SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; }; namespace WebAssembly { diff --git a/lib/Target/WebAssembly/WebAssemblyInstrControl.td b/lib/Target/WebAssembly/WebAssemblyInstrControl.td index 5f53e4a00d4..a349da6b971 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrControl.td +++ b/lib/Target/WebAssembly/WebAssemblyInstrControl.td @@ -25,6 +25,29 @@ * switch: switch statement with fallthrough */ +let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in { +def BRIF : I<(outs), (ins bb_op:$dst, Int32:$a), + [(brcond Int32:$a, bb:$dst)]>; +let isBarrier = 1 in { +def BR : I<(outs), (ins bb_op:$dst), + [(br bb:$dst)]>; +} // isBarrier = 1 +} // isBranch = 1, isTerminator = 1, hasCtrlDep = 1 + +// TODO: SelectionDAG's lowering insists on using a pointer as the index for +// jump tables, so in practice we don't ever use SWITCH_I64 in wasm32 mode +// currently. +let isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in { +def SWITCH_I32 : I<(outs), (ins Int32:$index, variable_ops), + [(WebAssemblyswitch Int32:$index)]>; +def SWITCH_I64 : I<(outs), (ins Int64:$index, variable_ops), + [(WebAssemblyswitch Int64:$index)]>; +} // isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 + +// Placemarkers to indicate the start of a block or loop scope. +def BLOCK : I<(outs), (ins bb_op:$dst), []>; +def LOOP : I<(outs), (ins bb_op:$dst), []>; + multiclass RETURN { def RETURN_#vt : I<(outs), (ins vt:$val), [(WebAssemblyreturn vt:$val)]>; } diff --git a/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp b/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp index 1898ad8f8eb..19a65dc2a2f 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp +++ b/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp @@ -37,3 +37,89 @@ void WebAssemblyInstrInfo::copyPhysReg(MachineBasicBlock &MBB, BuildMI(MBB, I, DL, get(WebAssembly::COPY), DestReg) .addReg(SrcReg, KillSrc ? RegState::Kill : 0); } + +// Branch analysis. +bool WebAssemblyInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, + MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl &Cond, + bool AllowModify) const { + bool HaveCond = false; + for (MachineInstr &MI : iterator_range( + MBB.getFirstInstrTerminator(), MBB.instr_end())) { + switch (MI.getOpcode()) { + default: + // Unhandled instruction; bail out. + return true; + case WebAssembly::BRIF: + if (HaveCond) + return true; + Cond.push_back(MI.getOperand(1)); + TBB = MI.getOperand(0).getMBB(); + HaveCond = true; + break; + case WebAssembly::BR: + if (!HaveCond) + TBB = MI.getOperand(0).getMBB(); + else + FBB = MI.getOperand(0).getMBB(); + break; + } + if (MI.isBarrier()) + break; + } + + return false; +} + +unsigned WebAssemblyInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { + MachineBasicBlock::instr_iterator I = MBB.instr_end(); + unsigned Count = 0; + + while (I != MBB.instr_begin()) { + --I; + if (I->isDebugValue()) + continue; + if (!I->isTerminator()) + break; + // Remove the branch. + I->eraseFromParent(); + I = MBB.instr_end(); + ++Count; + } + + return Count; +} + +unsigned WebAssemblyInstrInfo::InsertBranch( + MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, + ArrayRef Cond, DebugLoc DL) const { + assert(Cond.size() <= 1); + + if (Cond.empty()) { + if (!TBB) + return 0; + + BuildMI(&MBB, DL, get(WebAssembly::BR)).addMBB(TBB); + return 1; + } + + BuildMI(&MBB, DL, get(WebAssembly::BRIF)) + .addMBB(TBB) + .addOperand(Cond[0]); + if (!FBB) + return 1; + + BuildMI(&MBB, DL, get(WebAssembly::BR)).addMBB(FBB); + return 2; +} + +bool WebAssemblyInstrInfo::ReverseBranchCondition( + SmallVectorImpl &Cond) const { + assert(Cond.size() == 1); + + // TODO: Add branch reversal here... And re-enable MachineBlockPlacementID + // when we do. + + return true; +} diff --git a/lib/Target/WebAssembly/WebAssemblyInstrInfo.h b/lib/Target/WebAssembly/WebAssemblyInstrInfo.h index 29feee2d831..b36a128b510 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrInfo.h +++ b/lib/Target/WebAssembly/WebAssemblyInstrInfo.h @@ -37,6 +37,18 @@ public: void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, DebugLoc DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const override; + + bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl &Cond, + bool AllowModify = false) const override; + unsigned RemoveBranch(MachineBasicBlock &MBB) const override; + unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + ArrayRef Cond, + DebugLoc DL) const override; + bool + ReverseBranchCondition(SmallVectorImpl &Cond) const override; }; } // end namespace llvm diff --git a/lib/Target/WebAssembly/WebAssemblyInstrInfo.td b/lib/Target/WebAssembly/WebAssemblyInstrInfo.td index 409c8525f93..63ff03fe6c0 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrInfo.td +++ b/lib/Target/WebAssembly/WebAssemblyInstrInfo.td @@ -30,6 +30,7 @@ def SDT_WebAssemblyCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, iPTR>, SDTCisVT<1, iPTR>]>; def SDT_WebAssemblyCall0 : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>; def SDT_WebAssemblyCall1 : SDTypeProfile<1, -1, [SDTCisPtrTy<1>]>; +def SDT_WebAssemblySwitch : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>; def SDT_WebAssemblyArgument : SDTypeProfile<1, 1, [SDTCisVT<1, i32>]>; def SDT_WebAssemblyReturn : SDTypeProfile<0, -1, []>; def SDT_WebAssemblyWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, @@ -51,6 +52,9 @@ def WebAssemblycall0 : SDNode<"WebAssemblyISD::CALL0", def WebAssemblycall1 : SDNode<"WebAssemblyISD::CALL1", SDT_WebAssemblyCall1, [SDNPHasChain, SDNPVariadic]>; +def WebAssemblyswitch : SDNode<"WebAssemblyISD::SWITCH", + SDT_WebAssemblySwitch, + [SDNPHasChain, SDNPVariadic]>; def WebAssemblyargument : SDNode<"WebAssemblyISD::ARGUMENT", SDT_WebAssemblyArgument>; def WebAssemblyreturn : SDNode<"WebAssemblyISD::RETURN", @@ -69,6 +73,8 @@ def WebAssemblywrapper : SDNode<"WebAssemblyISD::Wrapper", * set_local: set the current value of a local variable */ +def bb_op : Operand; +def tjumptable_op : Operand; def global : Operand; //===----------------------------------------------------------------------===// @@ -96,9 +102,11 @@ def Immediate_F32 : I<(outs Float32:$res), (ins f32imm:$imm), def Immediate_F64 : I<(outs Float64:$res), (ins f64imm:$imm), [(set Float64:$res, fpimm:$imm)]>; -// Special types of immediates. +// Special types of immediates. FIXME: Hard-coded as 32-bit for now. def GLOBAL : I<(outs Int32:$dst), (ins global:$addr), [(set Int32:$dst, (WebAssemblywrapper tglobaladdr:$addr))]>; +def JUMP_TABLE : I<(outs Int32:$dst), (ins tjumptable_op:$addr), + [(set Int32:$dst, (WebAssemblywrapper tjumptable:$addr))]>; //===----------------------------------------------------------------------===// // Additional sets of instructions. diff --git a/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp index 60bb8eb178b..95cf00819f4 100644 --- a/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp +++ b/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp @@ -159,8 +159,14 @@ void WebAssemblyPassConfig::addPostRegAlloc() { disablePass(&PrologEpilogCodeInserterID); // Fails with: should be run after register allocation. disablePass(&MachineCopyPropagationID); + + // TODO: Until we get ReverseBranchCondition support, MachineBlockPlacement + // can create ugly-looking control flow. + disablePass(&MachineBlockPlacementID); } void WebAssemblyPassConfig::addPreSched2() {} -void WebAssemblyPassConfig::addPreEmitPass() {} +void WebAssemblyPassConfig::addPreEmitPass() { + addPass(createWebAssemblyCFGStackify()); +} diff --git a/test/CodeGen/WebAssembly/cfg-stackify.ll b/test/CodeGen/WebAssembly/cfg-stackify.ll new file mode 100644 index 00000000000..205384e6102 --- /dev/null +++ b/test/CodeGen/WebAssembly/cfg-stackify.ll @@ -0,0 +1,274 @@ +; RUN: llc < %s -asm-verbose=false | FileCheck %s + +; Test the CFG stackifier pass. + +target datalayout = "e-p:32:32-i64:64-n32:64-S128" +target triple = "wasm32-unknown-unknown" + +declare void @something() + +; Test that loops are made contiguous, even in the presence of split backedges. + +; CHECK-LABEL: test0 +; CHECK: (loop +; CHECK: (add +; CHECK: (brif +; CHECK: (call +; CHECK: (br $BB0_1) +; CHECK: (return) +define void @test0(i32 %n) { +entry: + br label %header + +header: + %i = phi i32 [ 0, %entry ], [ %i.next, %back ] + %i.next = add i32 %i, 1 + + %c = icmp slt i32 %i.next, %n + br i1 %c, label %back, label %exit + +exit: + ret void + +back: + call void @something() + br label %header +} + +; Same as test0, but the branch condition is reversed. + +; CHECK-LABEL: test1 +; CHECK: (loop +; CHECK: (add +; CHECK: (brif +; CHECK: (call +; CHECK: (br $BB1_1) +; CHECK: (return) +define void @test1(i32 %n) { +entry: + br label %header + +header: + %i = phi i32 [ 0, %entry ], [ %i.next, %back ] + %i.next = add i32 %i, 1 + + %c = icmp sge i32 %i.next, %n + br i1 %c, label %exit, label %back + +exit: + ret void + +back: + call void @something() + br label %header +} + +; Test that a simple loop is handled as expected. + +; CHECK-LABEL: test2 +; CHECK: (block $BB2_2) +; CHECK: (brif $BB2_2 {{.*}}) +; CHECK: BB2_1: +; CHECK: (brif $BB2_1 @14) +; CHECK: BB2_2: +; CHECK: (return) +define void @test2(double* nocapture %p, i32 %n) { +entry: + %cmp.4 = icmp sgt i32 %n, 0 + br i1 %cmp.4, label %for.body.preheader, label %for.end + +for.body.preheader: + br label %for.body + +for.body: + %i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] + %arrayidx = getelementptr inbounds double, double* %p, i32 %i.05 + %0 = load double, double* %arrayidx, align 8 + %mul = fmul double %0, 3.200000e+00 + store double %mul, double* %arrayidx, align 8 + %inc = add nuw nsw i32 %i.05, 1 + %exitcond = icmp eq i32 %inc, %n + br i1 %exitcond, label %for.end.loopexit, label %for.body + +for.end.loopexit: + br label %for.end + +for.end: + ret void +} + +; CHECK-LABEL: doublediamond +; CHECK: (block $BB3_5) +; CHECK: (block $BB3_4) +; CHECK: (block $BB3_2) +; CHECK: (brif $BB3_2 @4) +; CHECK: (br $BB3_5) +; CHECK: BB3_2: +; CHECK: (brif $BB3_4 @7) +; CHECK: (br $BB3_5) +; CHECK: BB3_4: +; CHECK: BB3_5: +; CHECK: (return @3) +define i32 @doublediamond(i32 %a, i32 %b, i32* %p) { +entry: + %c = icmp eq i32 %a, 0 + %d = icmp eq i32 %b, 0 + store volatile i32 0, i32* %p + br i1 %c, label %true, label %false +true: + store volatile i32 1, i32* %p + br label %exit +false: + store volatile i32 2, i32* %p + br i1 %d, label %ft, label %ff +ft: + store volatile i32 3, i32* %p + br label %exit +ff: + store volatile i32 4, i32* %p + br label %exit +exit: + store volatile i32 5, i32* %p + ret i32 0 +} + +; CHECK-LABEL: triangle +; CHECK: (block $BB4_2) +; CHECK: (brif $BB4_2 @3) +; CHECK: BB4_2: +; CHECK: (return @2) +define i32 @triangle(i32* %p, i32 %a) { +entry: + %c = icmp eq i32 %a, 0 + store volatile i32 0, i32* %p + br i1 %c, label %true, label %exit +true: + store volatile i32 1, i32* %p + br label %exit +exit: + store volatile i32 2, i32* %p + ret i32 0 +} + +; CHECK-LABEL: diamond +; CHECK: (block $BB5_3) +; CHECK: (block $BB5_2) +; CHECK: (brif $BB5_2 @3) +; CHECK: (br $BB5_3) +; CHECK: BB5_2: +; CHECK: BB5_3: +; CHECK: (return @2) +define i32 @diamond(i32* %p, i32 %a) { +entry: + %c = icmp eq i32 %a, 0 + store volatile i32 0, i32* %p + br i1 %c, label %true, label %false +true: + store volatile i32 1, i32* %p + br label %exit +false: + store volatile i32 2, i32* %p + br label %exit +exit: + store volatile i32 3, i32* %p + ret i32 0 +} + +; CHECK-LABEL: single_block +; CHECK-NOT: br +; CHECK: (return @1) +define i32 @single_block(i32* %p) { +entry: + store volatile i32 0, i32* %p + ret i32 0 +} + +; CHECK-LABEL: minimal_loop +; CHECK-NOT: br +; CHECK: BB7_1: +; CHECK: (store_i32 @0 @2) +; CHECK: (br $BB7_1) +define i32 @minimal_loop(i32* %p) { +entry: + store volatile i32 0, i32* %p + br label %loop +loop: + store volatile i32 1, i32* %p + br label %loop +} + +; CHECK-LABEL: simple_loop +; CHECK-NOT: br +; CHECK: BB8_1: +; CHECK: (loop $BB8_1) +; CHECK: (brif $BB8_1 @4) +; CHECK: (return @2) +define i32 @simple_loop(i32* %p, i32 %a) { +entry: + %c = icmp eq i32 %a, 0 + store volatile i32 0, i32* %p + br label %loop +loop: + store volatile i32 1, i32* %p + br i1 %c, label %loop, label %exit +exit: + store volatile i32 2, i32* %p + ret i32 0 +} + +; CHECK-LABEL: doubletriangle +; CHECK: (block $BB9_4) +; CHECK: (block $BB9_3) +; CHECK: (brif $BB9_4 @4) +; CHECK: (brif $BB9_3 @7) +; CHECK: BB9_3: +; CHECK: BB9_4: +; CHECK: (return @3) +define i32 @doubletriangle(i32 %a, i32 %b, i32* %p) { +entry: + %c = icmp eq i32 %a, 0 + %d = icmp eq i32 %b, 0 + store volatile i32 0, i32* %p + br i1 %c, label %true, label %exit +true: + store volatile i32 2, i32* %p + br i1 %d, label %tt, label %tf +tt: + store volatile i32 3, i32* %p + br label %tf +tf: + store volatile i32 4, i32* %p + br label %exit +exit: + store volatile i32 5, i32* %p + ret i32 0 +} + +; CHECK-LABEL: ifelse_earlyexits +; CHECK: (block $BB10_4) +; CHECK: (block $BB10_2) +; CHECK: (brif $BB10_2 @4) +; CHECK: (br $BB10_4) +; CHECK: BB10_2: +; CHECK: (brif $BB10_4 @7) +; CHECK: BB10_4: +; CHECK: (return @3) +define i32 @ifelse_earlyexits(i32 %a, i32 %b, i32* %p) { +entry: + %c = icmp eq i32 %a, 0 + %d = icmp eq i32 %b, 0 + store volatile i32 0, i32* %p + br i1 %c, label %true, label %false +true: + store volatile i32 1, i32* %p + br label %exit +false: + store volatile i32 2, i32* %p + br i1 %d, label %ft, label %exit +ft: + store volatile i32 3, i32* %p + br label %exit +exit: + store volatile i32 4, i32* %p + ret i32 0 +} diff --git a/test/CodeGen/WebAssembly/phi.ll b/test/CodeGen/WebAssembly/phi.ll index f06d9673de5..a4675fba711 100644 --- a/test/CodeGen/WebAssembly/phi.ll +++ b/test/CodeGen/WebAssembly/phi.ll @@ -1,8 +1,5 @@ ; RUN: llc < %s -asm-verbose=false | FileCheck %s -; This test depends on branching support, which is not yet checked in. -; XFAIL: * - ; Test that phis are lowered. target datalayout = "e-p:32:32-i64:64-n32:64-S128" @@ -29,7 +26,7 @@ done: ; Swap phis. ; CHECK-LABEL: test1 -; CHECK: BB0_1: +; CHECK: BB1_1: ; CHECK: (setlocal [[REG0:@.*]] [[REG1:@.*]]) ; CHECK: (setlocal [[REG1]] [[REG2:@.*]]) ; CHECK: (setlocal [[REG2]] [[REG0]]) diff --git a/test/CodeGen/WebAssembly/switch.ll b/test/CodeGen/WebAssembly/switch.ll new file mode 100644 index 00000000000..8aaa391cb52 --- /dev/null +++ b/test/CodeGen/WebAssembly/switch.ll @@ -0,0 +1,173 @@ +; RUN: llc < %s -asm-verbose=false | FileCheck %s + +; Test switch instructions. + +target datalayout = "e-p:32:32-i64:64-n32:64-S128" +target triple = "wasm32-unknown-unknown" + +declare void @foo0() +declare void @foo1() +declare void @foo2() +declare void @foo3() +declare void @foo4() +declare void @foo5() + +; CHECK-LABEL: bar32 +; CHECK: (block $BB0_8) +; CHECK: (block $BB0_7) +; CHECK: (block $BB0_6) +; CHECK: (block $BB0_5) +; CHECK: (block $BB0_4) +; CHECK: (block $BB0_3) +; CHECK: (block $BB0_2) +; CHECK: (switch {{.*}} $BB0_2 $BB0_2 $BB0_2 $BB0_2 $BB0_2 $BB0_2 $BB0_2 $BB0_2 $BB0_3 $BB0_3 $BB0_3 $BB0_3 $BB0_3 $BB0_3 $BB0_3 $BB0_3 $BB0_4 $BB0_4 $BB0_4 $BB0_4 $BB0_4 $BB0_4 $BB0_5 $BB0_6 $BB0_7) +; CHECk: BB0_2: +; CHECK: (setlocal {{.*}} (global $foo0)) +; CHECK: BB0_3: +; CHECK: (setlocal {{.*}} (global $foo1)) +; CHECK: BB0_4: +; CHECK: (setlocal {{.*}} (global $foo2)) +; CHECK: BB0_5: +; CHECK: (setlocal {{.*}} (global $foo3)) +; CHECK: BB0_6: +; CHECK: (setlocal {{.*}} (global $foo4)) +; CHECK: BB0_7: +; CHECK: (setlocal {{.*}} (global $foo5)) +; CHECK: BB0_8: +; CHECK: (return) +define void @bar32(i32 %n) { +entry: + switch i32 %n, label %sw.epilog [ + i32 0, label %sw.bb + i32 1, label %sw.bb + i32 2, label %sw.bb + i32 3, label %sw.bb + i32 4, label %sw.bb + i32 5, label %sw.bb + i32 6, label %sw.bb + i32 7, label %sw.bb.1 + i32 8, label %sw.bb.1 + i32 9, label %sw.bb.1 + i32 10, label %sw.bb.1 + i32 11, label %sw.bb.1 + i32 12, label %sw.bb.1 + i32 13, label %sw.bb.1 + i32 14, label %sw.bb.1 + i32 15, label %sw.bb.2 + i32 16, label %sw.bb.2 + i32 17, label %sw.bb.2 + i32 18, label %sw.bb.2 + i32 19, label %sw.bb.2 + i32 20, label %sw.bb.2 + i32 21, label %sw.bb.3 + i32 22, label %sw.bb.4 + i32 23, label %sw.bb.5 + ] + +sw.bb: ; preds = %entry, %entry, %entry, %entry, %entry, %entry, %entry + tail call void @foo0() + br label %sw.epilog + +sw.bb.1: ; preds = %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry + tail call void @foo1() + br label %sw.epilog + +sw.bb.2: ; preds = %entry, %entry, %entry, %entry, %entry, %entry + tail call void @foo2() + br label %sw.epilog + +sw.bb.3: ; preds = %entry + tail call void @foo3() + br label %sw.epilog + +sw.bb.4: ; preds = %entry + tail call void @foo4() + br label %sw.epilog + +sw.bb.5: ; preds = %entry + tail call void @foo5() + br label %sw.epilog + +sw.epilog: ; preds = %entry, %sw.bb.5, %sw.bb.4, %sw.bb.3, %sw.bb.2, %sw.bb.1, %sw.bb + ret void +} + +; CHECK-LABEL: bar64 +; CHECK: (block $BB1_8) +; CHECK: (block $BB1_7) +; CHECK: (block $BB1_6) +; CHECK: (block $BB1_5) +; CHECK: (block $BB1_4) +; CHECK: (block $BB1_3) +; CHECK: (block $BB1_2) +; CHECK: (switch {{.*}} $BB1_2 $BB1_2 $BB1_2 $BB1_2 $BB1_2 $BB1_2 $BB1_2 $BB1_2 $BB1_3 $BB1_3 $BB1_3 $BB1_3 $BB1_3 $BB1_3 $BB1_3 $BB1_3 $BB1_4 $BB1_4 $BB1_4 $BB1_4 $BB1_4 $BB1_4 $BB1_5 $BB1_6 $BB1_7) +; CHECk: BB1_2: +; CHECK: (setlocal {{.*}} (global $foo0)) +; CHECK: BB1_3: +; CHECK: (setlocal {{.*}} (global $foo1)) +; CHECK: BB1_4: +; CHECK: (setlocal {{.*}} (global $foo2)) +; CHECK: BB1_5: +; CHECK: (setlocal {{.*}} (global $foo3)) +; CHECK: BB1_6: +; CHECK: (setlocal {{.*}} (global $foo4)) +; CHECK: BB1_7: +; CHECK: (setlocal {{.*}} (global $foo5)) +; CHECK: BB1_8: +; CHECK: (return) +define void @bar64(i64 %n) { +entry: + switch i64 %n, label %sw.epilog [ + i64 0, label %sw.bb + i64 1, label %sw.bb + i64 2, label %sw.bb + i64 3, label %sw.bb + i64 4, label %sw.bb + i64 5, label %sw.bb + i64 6, label %sw.bb + i64 7, label %sw.bb.1 + i64 8, label %sw.bb.1 + i64 9, label %sw.bb.1 + i64 10, label %sw.bb.1 + i64 11, label %sw.bb.1 + i64 12, label %sw.bb.1 + i64 13, label %sw.bb.1 + i64 14, label %sw.bb.1 + i64 15, label %sw.bb.2 + i64 16, label %sw.bb.2 + i64 17, label %sw.bb.2 + i64 18, label %sw.bb.2 + i64 19, label %sw.bb.2 + i64 20, label %sw.bb.2 + i64 21, label %sw.bb.3 + i64 22, label %sw.bb.4 + i64 23, label %sw.bb.5 + ] + +sw.bb: ; preds = %entry, %entry, %entry, %entry, %entry, %entry, %entry + tail call void @foo0() + br label %sw.epilog + +sw.bb.1: ; preds = %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry + tail call void @foo1() + br label %sw.epilog + +sw.bb.2: ; preds = %entry, %entry, %entry, %entry, %entry, %entry + tail call void @foo2() + br label %sw.epilog + +sw.bb.3: ; preds = %entry + tail call void @foo3() + br label %sw.epilog + +sw.bb.4: ; preds = %entry + tail call void @foo4() + br label %sw.epilog + +sw.bb.5: ; preds = %entry + tail call void @foo5() + br label %sw.epilog + +sw.epilog: ; preds = %entry, %sw.bb.5, %sw.bb.4, %sw.bb.3, %sw.bb.2, %sw.bb.1, %sw.bb + ret void +} -- 2.34.1