From e94b2105e93b551b1b1bc9a009eb7c3c622ad5b7 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Thu, 3 Dec 2015 23:07:03 +0000 Subject: [PATCH] [WebAssembly] Fix dominance check for PHIs in the StoreResult pass When a block has no terminator instructions, getFirstTerminator() returns end(), which can't be used in dominance checks. Check dominance for phi operands separately. Also, remove some bits from WebAssemblyRegStackify.cpp that were causing trouble on the same testcase; they were left behind from an earlier experiment. Differential Revision: http://reviews.llvm.org/D15210 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254662 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../WebAssembly/WebAssemblyRegStackify.cpp | 9 ++-- .../WebAssembly/WebAssemblyStoreResults.cpp | 33 +++++++++----- test/CodeGen/WebAssembly/store-results.ll | 43 +++++++++++++++++++ 3 files changed, 69 insertions(+), 16 deletions(-) diff --git a/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp b/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp index bdccc8577c5..ecbbc5c7224 100644 --- a/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp +++ b/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp @@ -81,6 +81,7 @@ static void ImposeStackOrdering(MachineInstr *MI) { // more precise. static bool IsSafeToMove(const MachineInstr *Def, const MachineInstr *Insert, AliasAnalysis &AA) { + assert(Def->getParent() == Insert->getParent()); bool SawStore = false, SawSideEffects = false; MachineBasicBlock::const_iterator D(Def), I(Insert); for (--I; I != D; --I) @@ -155,17 +156,15 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) { Def->getOpcode() == WebAssembly::ARGUMENT_F64) continue; - // Single-use expression trees require defs that have one use, or that - // they be trivially clonable. + // Single-use expression trees require defs that have one use. // TODO: Eventually we'll relax this, to take advantage of set_local // returning its result. if (!MRI.hasOneUse(Reg)) continue; - // For now, be conservative and don't look across block boundaries, - // unless we have something trivially clonable. + // For now, be conservative and don't look across block boundaries. // TODO: Be more aggressive. - if (Def->getParent() != &MBB && !Def->isMoveImmediate()) + if (Def->getParent() != &MBB) continue; // Don't move instructions that have side effects or memory dependencies diff --git a/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp b/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp index 3a7f50e3b14..4a8fc09878c 100644 --- a/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp +++ b/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp @@ -10,9 +10,12 @@ /// \file /// \brief This file implements an optimization pass using store result values. /// -/// WebAssembly's store instructions return the stored value, specifically to -/// enable the optimization of reducing get_local/set_local traffic, which is -/// what we're doing here. +/// WebAssembly's store instructions return the stored value. This is to enable +/// an optimization wherein uses of the stored value can be replaced by uses of +/// the store's result value, making the stored value register more likely to +/// be single-use, thus more likely to be useful to register stackifying, and +/// potentially also exposing the store to register stackifying. These both can +/// reduce get_local/set_local traffic. /// //===----------------------------------------------------------------------===// @@ -89,14 +92,22 @@ bool WebAssemblyStoreResults::runOnMachineFunction(MachineFunction &MF) { for (auto I = MRI.use_begin(FromReg), E = MRI.use_end(); I != E;) { MachineOperand &O = *I++; MachineInstr *Where = O.getParent(); - if (Where->getOpcode() == TargetOpcode::PHI) - Where = Where->getOperand(&O - &Where->getOperand(0) + 1) - .getMBB() - ->getFirstTerminator(); - if (&MI == Where || !MDT.dominates(&MI, Where)) - continue; - DEBUG(dbgs() << "Setting operand " << O << " in " << *Where << - " from " << MI <<"\n"); + if (Where->getOpcode() == TargetOpcode::PHI) { + // PHIs use their operands on their incoming CFG edges rather than + // in their parent blocks. Get the basic block paired with this use + // of FromReg and check that MI's block dominates it. + MachineBasicBlock *Pred = + Where->getOperand(&O - &Where->getOperand(0) + 1).getMBB(); + if (!MDT.dominates(&MBB, Pred)) + continue; + } else { + // For a non-PHI, check that MI dominates the instruction in the + // normal way. + if (&MI == Where || !MDT.dominates(&MI, Where)) + continue; + } + DEBUG(dbgs() << "Setting operand " << O << " in " << *Where + << " from " << MI << "\n"); O.setReg(ToReg); } } diff --git a/test/CodeGen/WebAssembly/store-results.ll b/test/CodeGen/WebAssembly/store-results.ll index 1bcee5d31fb..c05ed3a04be 100644 --- a/test/CodeGen/WebAssembly/store-results.ll +++ b/test/CodeGen/WebAssembly/store-results.ll @@ -16,3 +16,46 @@ entry: store i32 0, i32* %p ret i32 0 } + +; Test interesting corner cases for wasm-store-results, in which the operand of +; a store ends up getting used by a phi, which needs special handling in the +; dominance test, since phis use their operands on their incoming edges. + +%class.Vec3 = type { float, float, float } + +@pos = global %class.Vec3 zeroinitializer, align 4 + +; CHECK-LABEL: foo: +; CHECK: i32.store $discard=, $pop0, $0 +define void @foo() { +for.body.i: + br label %for.body5.i + +for.body5.i: + %i.0168.i = phi i32 [ 0, %for.body.i ], [ %inc.i, %for.body5.i ] + %conv6.i = sitofp i32 %i.0168.i to float + store volatile float 0.0, float* getelementptr inbounds (%class.Vec3, %class.Vec3* @pos, i32 0, i32 0) + %inc.i = add nuw nsw i32 %i.0168.i, 1 + %exitcond.i = icmp eq i32 %inc.i, 256 + br i1 %exitcond.i, label %for.cond.cleanup4.i, label %for.body5.i + +for.cond.cleanup4.i: + ret void +} + +; CHECK-LABEL: bar: +; CHECK: i32.store $discard=, $0, $pop0 +define void @bar() { +for.body.i: + br label %for.body5.i + +for.body5.i: + %i.0168.i = phi float [ 0.0, %for.body.i ], [ %inc.i, %for.body5.i ] + store volatile float 0.0, float* getelementptr inbounds (%class.Vec3, %class.Vec3* @pos, i32 0, i32 0) + %inc.i = fadd float %i.0168.i, 1.0 + %exitcond.i = fcmp oeq float %inc.i, 256.0 + br i1 %exitcond.i, label %for.cond.cleanup4.i, label %for.body5.i + +for.cond.cleanup4.i: + ret void +} -- 2.34.1