From 005cc9c500af4fc7a3dbaa61439dd5bf8c0471cf Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Fri, 25 Dec 2015 00:31:02 +0000 Subject: [PATCH] [WebAssembly] Fix handling of COPY instructions in WebAssemblyRegStackify. Move RegStackify after coalescing and teach it to use LiveIntervals instead of depending on SSA form. This avoids a problem where a register in a COPY instruction is stackified and then subsequently coalesced with a register that is not stackified. This also puts it after the scheduler, which allows us to simplify the EXPR_STACK constraint, as we no longer have instructions being reordered after stackification and before coloring. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@256402 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../WebAssembly/WebAssemblyCFGStackify.cpp | 7 +- .../WebAssembly/WebAssemblyInstrControl.td | 6 +- .../WebAssembly/WebAssemblyRegStackify.cpp | 100 +++++++++--------- .../WebAssembly/WebAssemblyTargetMachine.cpp | 9 +- test/CodeGen/WebAssembly/cfg-stackify.ll | 14 +-- test/CodeGen/WebAssembly/reg-stackify.ll | 42 +++++++- 6 files changed, 111 insertions(+), 67 deletions(-) diff --git a/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp b/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp index 6ac53662359..e9671ee07e6 100644 --- a/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp +++ b/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp @@ -320,11 +320,12 @@ static void PlaceBlockMarker(MachineBasicBlock &MBB, MachineFunction &MF, // the BLOCK needs to be above the LOOP. InsertPos = Header->begin(); } else { - // Otherwise, insert the BLOCK as late in Header as we can, but before any - // existing BLOCKs. + // Otherwise, insert the BLOCK as late in Header as we can, but before the + // beginning of the local expression tree and any nested BLOCKs. InsertPos = Header->getFirstTerminator(); while (InsertPos != Header->begin() && - prev(InsertPos)->getOpcode() == WebAssembly::BLOCK) + prev(InsertPos)->definesRegister(WebAssembly::EXPR_STACK) && + prev(InsertPos)->getOpcode() != WebAssembly::LOOP) --InsertPos; } diff --git a/lib/Target/WebAssembly/WebAssemblyInstrControl.td b/lib/Target/WebAssembly/WebAssemblyInstrControl.td index 9a9468bb390..05efe890341 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrControl.td +++ b/lib/Target/WebAssembly/WebAssemblyInstrControl.td @@ -50,9 +50,13 @@ def TABLESWITCH_I64 : I<(outs), (ins I64:$index, bb_op:$default, variable_ops), "tableswitch\t$index, $default">; } // isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 -// Placemarkers to indicate the start of a block or loop scope. +// Placemarkers to indicate the start of a block or loop scope. These +// use/clobber EXPR_STACK to prevent them from being moved into the middle of +// an expression tree. +let Uses = [EXPR_STACK], Defs = [EXPR_STACK] in { def BLOCK : I<(outs), (ins bb_op:$dst), [], "block \t$dst">; def LOOP : I<(outs), (ins bb_op:$dst), [], "loop \t$dst">; +} // Uses = [EXPR_STACK], Defs = [EXPR_STACK] // No-op to indicate to the AsmPrinter that a loop ends here, so a // basic block label is needed even if it wouldn't otherwise appear so. diff --git a/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp b/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp index 3a58826bd76..89ef5cdb2be 100644 --- a/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp +++ b/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp @@ -24,6 +24,7 @@ #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" // for WebAssembly::ARGUMENT_* #include "WebAssemblyMachineFunctionInfo.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" @@ -42,8 +43,12 @@ class WebAssemblyRegStackify final : public MachineFunctionPass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); AU.addRequired(); + AU.addRequired(); AU.addPreserved(); + AU.addPreserved(); + AU.addPreserved(); AU.addPreservedID(MachineDominatorsID); + AU.addPreservedID(LiveVariablesID); MachineFunctionPass::getAnalysisUsage(AU); } @@ -61,59 +66,68 @@ FunctionPass *llvm::createWebAssemblyRegStackify() { } // Decorate the given instruction with implicit operands that enforce the -// expression stack ordering constraints needed for an instruction which is -// consumed by an instruction using the expression stack. -static void ImposeStackInputOrdering(MachineInstr *MI) { +// expression stack ordering constraints for an instruction which is on +// the expression stack. +static void ImposeStackOrdering(MachineInstr *MI) { // Write the opaque EXPR_STACK register. if (!MI->definesRegister(WebAssembly::EXPR_STACK)) MI->addOperand(MachineOperand::CreateReg(WebAssembly::EXPR_STACK, /*isDef=*/true, /*isImp=*/true)); -} - -// Decorate the given instruction with implicit operands that enforce the -// expression stack ordering constraints for an instruction which is on -// the expression stack. -static void ImposeStackOrdering(MachineInstr *MI, MachineRegisterInfo &MRI) { - ImposeStackInputOrdering(MI); // Also read the opaque EXPR_STACK register. if (!MI->readsRegister(WebAssembly::EXPR_STACK)) MI->addOperand(MachineOperand::CreateReg(WebAssembly::EXPR_STACK, /*isDef=*/false, /*isImp=*/true)); - - // Also, mark any inputs to this instruction as being consumed by an - // instruction on the expression stack. - // TODO: Find a lighter way to describe the appropriate constraints. - for (MachineOperand &MO : MI->uses()) { - if (!MO.isReg()) - continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) - continue; - MachineInstr *Def = MRI.getVRegDef(Reg); - if (Def->getOpcode() == TargetOpcode::PHI) - continue; - ImposeStackInputOrdering(Def); - } } -// Test whether it's safe to move Def to just before Insert. Note that this -// doesn't account for physical register dependencies, because WebAssembly -// doesn't have any (other than special ones like EXPR_STACK). +// Test whether it's safe to move Def to just before Insert. // TODO: Compute memory dependencies in a way that doesn't require always // walking the block. // TODO: Compute memory dependencies in a way that uses AliasAnalysis to be // more precise. static bool IsSafeToMove(const MachineInstr *Def, const MachineInstr *Insert, - AliasAnalysis &AA) { + AliasAnalysis &AA, LiveIntervals &LIS, + MachineRegisterInfo &MRI) { assert(Def->getParent() == Insert->getParent()); bool SawStore = false, SawSideEffects = false; MachineBasicBlock::const_iterator D(Def), I(Insert); + + // Check for register dependencies. + for (const MachineOperand &MO : Def->operands()) { + if (!MO.isReg() || MO.isUndef()) + continue; + unsigned Reg = MO.getReg(); + + // If the register is dead here and at Insert, ignore it. + if (MO.isDead() && Insert->definesRegister(Reg) && + !Insert->readsRegister(Reg)) + continue; + + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + // If the physical register is never modified, ignore it. + if (!MRI.isPhysRegModified(Reg)) + continue; + // Otherwise, it's a physical register with unknown liveness. + return false; + } + + // Ask LiveIntervals whether moving this virtual register use or def to + // Insert will change value numbers are seen. + const LiveInterval &LI = LIS.getInterval(Reg); + VNInfo *DefVNI = MO.isDef() ? + LI.getVNInfoAt(LIS.getInstructionIndex(Def).getRegSlot()) : + LI.getVNInfoBefore(LIS.getInstructionIndex(Def)); + assert(DefVNI && "Instruction input missing value number"); + VNInfo *InsVNI = LI.getVNInfoBefore(LIS.getInstructionIndex(Insert)); + if (InsVNI && DefVNI != InsVNI) + return false; + } + + // Check for memory dependencies and side effects. for (--I; I != D; --I) SawSideEffects |= I->isSafeToMove(&AA, SawStore); - return !(SawStore && Def->mayLoad() && !Def->isInvariantLoad(&AA)) && !(SawSideEffects && !Def->isSafeToMove(&AA, SawStore)); } @@ -127,8 +141,7 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) { MachineRegisterInfo &MRI = MF.getRegInfo(); WebAssemblyFunctionInfo &MFI = *MF.getInfo(); AliasAnalysis &AA = getAnalysis().getAAResults(); - - assert(MRI.isSSA() && "RegStackify depends on SSA form"); + LiveIntervals &LIS = getAnalysis(); // Walk the instructions from the bottom up. Currently we don't look past // block boundaries, and the blocks aren't ordered so the block visitation @@ -154,19 +167,10 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) { continue; unsigned Reg = Op.getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { - // An instruction with a physical register. Conservatively mark it as - // an expression stack input so that it isn't reordered with anything - // in an expression stack which might use it (physical registers - // aren't in SSA form so it's not trivial to determine this). - // TODO: Be less conservative. - ImposeStackInputOrdering(Insert); - continue; - } // Only consider registers with a single definition. // TODO: Eventually we may relax this, to stackify phi transfers. - MachineInstr *Def = MRI.getVRegDef(Reg); + MachineInstr *Def = MRI.getUniqueVRegDef(Reg); if (!Def) continue; @@ -198,26 +202,26 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) { continue; // For now, be conservative and don't look across block boundaries. - // TODO: Be more aggressive. + // TODO: Be more aggressive? if (Def->getParent() != &MBB) continue; // Don't move instructions that have side effects or memory dependencies // or other complications. - if (!IsSafeToMove(Def, Insert, AA)) + if (!IsSafeToMove(Def, Insert, AA, LIS, MRI)) continue; Changed = true; AnyStackified = true; // Move the def down and nest it in the current instruction. - MBB.insert(MachineBasicBlock::instr_iterator(Insert), - Def->removeFromParent()); + MBB.splice(Insert, &MBB, Def); + LIS.handleMove(Def); MFI.stackifyVReg(Reg); - ImposeStackOrdering(Def, MRI); + ImposeStackOrdering(Def); Insert = Def; } if (AnyStackified) - ImposeStackOrdering(&MI, MRI); + ImposeStackOrdering(&MI); } } diff --git a/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp index c774c120bb5..e31ea46de9f 100644 --- a/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp +++ b/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp @@ -165,12 +165,6 @@ void WebAssemblyPassConfig::addPreRegAlloc() { // Prepare store instructions for register stackifying. addPass(createWebAssemblyStoreResults()); - - // Mark registers as representing wasm's expression stack. - addPass(createWebAssemblyRegStackify()); - // The register coalescing pass has a bad interaction with COPY MIs which have - // EXPR_STACK as an extra operand - // disablePass(&RegisterCoalescerID); } void WebAssemblyPassConfig::addPostRegAlloc() { @@ -184,6 +178,9 @@ void WebAssemblyPassConfig::addPostRegAlloc() { // Fails with: should be run after register allocation. disablePass(&MachineCopyPropagationID); + // Mark registers as representing wasm's expression stack. + addPass(createWebAssemblyRegStackify()); + // Run the register coloring pass to reduce the total number of registers. addPass(createWebAssemblyRegColoring()); diff --git a/test/CodeGen/WebAssembly/cfg-stackify.ll b/test/CodeGen/WebAssembly/cfg-stackify.ll index a10fb614909..71f3551347b 100644 --- a/test/CodeGen/WebAssembly/cfg-stackify.ll +++ b/test/CodeGen/WebAssembly/cfg-stackify.ll @@ -464,7 +464,7 @@ if.end: ; CHECK: block BB13_8{{$}} ; CHECK-NEXT: block BB13_7{{$}} ; CHECK-NEXT: block BB13_4{{$}} -; CHECK-NEXT: br_if $pop{{[0-9]*}}, BB13_4{{$}} +; CHECK: br_if $pop{{[0-9]*}}, BB13_4{{$}} ; CHECK-NEXT: block BB13_3{{$}} ; CHECK: br_if $pop{{[0-9]*}}, BB13_3{{$}} ; CHECK: br_if $pop{{[0-9]*}}, BB13_7{{$}} @@ -483,7 +483,7 @@ if.end: ; OPT: block BB13_8{{$}} ; OPT-NEXT: block BB13_7{{$}} ; OPT-NEXT: block BB13_4{{$}} -; OPT-NEXT: br_if $pop{{[0-9]*}}, BB13_4{{$}} +; OPT: br_if $pop{{[0-9]*}}, BB13_4{{$}} ; OPT-NEXT: block BB13_3{{$}} ; OPT: br_if $pop{{[0-9]*}}, BB13_3{{$}} ; OPT: br_if $pop{{[0-9]*}}, BB13_7{{$}} @@ -642,7 +642,7 @@ second: ; CHECK-NEXT: loop BB16_5{{$}} ; CHECK-NOT: block ; CHECK: block BB16_4{{$}} -; CHECK-NEXT: br_if {{[^,]*}}, BB16_4{{$}} +; CHECK: br_if {{[^,]*}}, BB16_4{{$}} ; CHECK-NOT: block ; CHECK: br_if {{[^,]*}}, BB16_1{{$}} ; CHECK-NOT: block @@ -907,7 +907,7 @@ bb6: ; CHECK-NEXT: br_if {{[^,]*}}, BB20_4{{$}} ; CHECK-NOT: block ; CHECK: block BB20_3{{$}} -; CHECK-NEXT: br_if {{[^,]*}}, BB20_3{{$}} +; CHECK: br_if {{[^,]*}}, BB20_3{{$}} ; CHECK-NOT: block ; CHECK: br_if {{[^,]*}}, BB20_6{{$}} ; CHECK-NEXT: BB20_3: @@ -933,7 +933,7 @@ bb6: ; OPT-NEXT: br_if $0, BB20_4{{$}} ; OPT-NOT: block ; OPT: block BB20_3{{$}} -; OPT-NEXT: br_if $0, BB20_3{{$}} +; OPT: br_if $0, BB20_3{{$}} ; OPT-NOT: block ; OPT: br_if $0, BB20_8{{$}} ; OPT-NEXT: BB20_3: @@ -991,7 +991,7 @@ bb8: ; CHECK: block BB21_7{{$}} ; CHECK-NEXT: block BB21_6{{$}} ; CHECK-NEXT: block BB21_4{{$}} -; CHECK-NEXT: br_if {{[^,]*}}, BB21_4{{$}} +; CHECK: br_if {{[^,]*}}, BB21_4{{$}} ; CHECK-NOT: block ; CHECK: br_if {{[^,]*}}, BB21_7{{$}} ; CHECK-NOT: block @@ -1015,7 +1015,7 @@ bb8: ; OPT: block BB21_7{{$}} ; OPT-NEXT: block BB21_6{{$}} ; OPT-NEXT: block BB21_4{{$}} -; OPT-NEXT: br_if {{[^,]*}}, BB21_4{{$}} +; OPT: br_if {{[^,]*}}, BB21_4{{$}} ; OPT-NOT: block ; OPT: br_if {{[^,]*}}, BB21_7{{$}} ; OPT-NOT: block diff --git a/test/CodeGen/WebAssembly/reg-stackify.ll b/test/CodeGen/WebAssembly/reg-stackify.ll index 3c343434836..1c1b1e193f7 100644 --- a/test/CodeGen/WebAssembly/reg-stackify.ll +++ b/test/CodeGen/WebAssembly/reg-stackify.ll @@ -53,8 +53,9 @@ define i32 @yes1(i32* %q) { ; CHECK-NEXT: .param i32, i32, i32, i32{{$}} ; CHECK-NEXT: .result i32{{$}} ; CHECK-NEXT: .local i32, i32{{$}} -; CHECK-NEXT: i32.const $4=, 1{{$}} ; CHECK-NEXT: i32.const $5=, 2{{$}} +; CHECK-NEXT: i32.const $4=, 1{{$}} +; CHECK-NEXT: block BB4_2{{$}} ; CHECK-NEXT: i32.lt_s $push0=, $0, $4{{$}} ; CHECK-NEXT: i32.lt_s $push1=, $1, $5{{$}} ; CHECK-NEXT: i32.xor $push4=, $pop0, $pop1{{$}} @@ -63,7 +64,6 @@ define i32 @yes1(i32* %q) { ; CHECK-NEXT: i32.xor $push5=, $pop2, $pop3{{$}} ; CHECK-NEXT: i32.xor $push6=, $pop4, $pop5{{$}} ; CHECK-NEXT: i32.ne $push7=, $pop6, $4{{$}} -; CHECK-NEXT: block BB4_2{{$}} ; CHECK-NEXT: br_if $pop7, BB4_2{{$}} ; CHECK-NEXT: i32.const $push8=, 0{{$}} ; CHECK-NEXT: return $pop8{{$}} @@ -85,4 +85,42 @@ false: ret i32 1 } +; Test an interesting case where the load has multiple uses and cannot +; be trivially stackified. + +; CHECK-LABEL: multiple_uses: +; CHECK-NEXT: .param i32, i32, i32{{$}} +; CHECK-NEXT: .local i32{{$}} +; CHECK-NEXT: i32.load $3=, 0($2){{$}} +; CHECK-NEXT: block BB5_3{{$}} +; CHECK-NEXT: i32.ge_u $push0=, $3, $1{{$}} +; CHECK-NEXT: br_if $pop0, BB5_3{{$}} +; CHECK-NEXT: i32.lt_u $push1=, $3, $0{{$}} +; CHECK-NEXT: br_if $pop1, BB5_3{{$}} +; CHECK-NEXT: i32.store $discard=, 0($2), $3{{$}} +; CHECK-NEXT: BB5_3: +; CHECK-NEXT: return{{$}} +define void @multiple_uses(i32* %arg0, i32* %arg1, i32* %arg2) nounwind { +bb: + br label %loop + +loop: + %tmp7 = load i32, i32* %arg2 + %tmp8 = inttoptr i32 %tmp7 to i32* + %tmp9 = icmp uge i32* %tmp8, %arg1 + %tmp10 = icmp ult i32* %tmp8, %arg0 + %tmp11 = or i1 %tmp9, %tmp10 + br i1 %tmp11, label %back, label %then + +then: + store i32 %tmp7, i32* %arg2 + br label %back + +back: + br i1 undef, label %return, label %loop + +return: + ret void +} + !0 = !{} -- 2.34.1