From: Devang Patel Date: Tue, 11 Oct 2011 18:09:58 +0000 (+0000) Subject: Add dominance check for the instruction being hoisted. X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=2e350479478ccf809e2142a4f0ad8062342577cc;p=oota-llvm.git Add dominance check for the instruction being hoisted. For example, MachineLICM should not hoist a load that is not guaranteed to be executed. Radar 10254254. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@141689 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index d310f252e28..f6a08d36208 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -91,6 +91,11 @@ namespace { // For each opcode, keep a list of potential CSE instructions. DenseMap > CSEMap; + // If a MBB does not dominate loop exiting blocks then it may not safe + // to hoist loads from this block. + bool CurrentMBBDominatesLoopExitingBlocks; + bool NeedToCheckMBBDominance; + public: static char ID; // Pass identification, replacement for typeid MachineLICM() : @@ -194,6 +199,10 @@ namespace { /// hoist the given loop invariant. bool IsProfitableToHoist(MachineInstr &MI); + /// IsGuaranteedToExecute - Check if this mbb is guaranteed to execute. + /// If not then a load from this mbb may not be safe to hoist. + bool IsGuaranteedToExecute(MachineBasicBlock *BB); + /// HoistRegion - Walk the specified region of the CFG (defined by all /// blocks dominated by the specified block, and that are in the current /// loop) in depth first order w.r.t the DominatorTree. This allows us to @@ -295,6 +304,9 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { MRI = &MF.getRegInfo(); InstrItins = TM->getInstrItineraryData(); AllocatableSet = TRI->getAllocatableSet(MF); + // Stay conservative. + CurrentMBBDominatesLoopExitingBlocks = false; + NeedToCheckMBBDominance = true; if (PreRegAlloc) { // Estimate register pressure during pre-regalloc pass. @@ -459,6 +471,7 @@ void MachineLICM::HoistRegionPostRA() { ++PhysRegDefs[*AS]; } + NeedToCheckMBBDominance = true; for (MachineBasicBlock::iterator MII = BB->begin(), E = BB->end(); MII != E; ++MII) { MachineInstr *MI = &*MII; @@ -552,6 +565,30 @@ void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) { Changed = true; } +// IsGuaranteedToExecute - Check if this mbb is guaranteed to execute. +// If not then a load from this mbb may not be safe to hoist. +bool MachineLICM::IsGuaranteedToExecute(MachineBasicBlock *BB) { + // Do not check if we already have checked it once. + if (NeedToCheckMBBDominance == false) + return CurrentMBBDominatesLoopExitingBlocks; + + NeedToCheckMBBDominance = false; + + if (BB != CurLoop->getHeader()) { + // Check loop exiting blocks. + SmallVector CurrentLoopExitingBlocks; + CurLoop->getExitingBlocks(CurrentLoopExitingBlocks); + for (unsigned i = 0, e = CurrentLoopExitingBlocks.size(); i != e; ++i) + if (!DT->dominates(BB, CurrentLoopExitingBlocks[i])) { + CurrentMBBDominatesLoopExitingBlocks = false; + return CurrentMBBDominatesLoopExitingBlocks; + } + } + + CurrentMBBDominatesLoopExitingBlocks = true; + return CurrentMBBDominatesLoopExitingBlocks; +} + /// HoistRegion - Walk the specified region of the CFG (defined by all blocks /// dominated by the specified block, and that are in the current loop) in depth /// first order w.r.t the DominatorTree. This allows us to visit definitions @@ -578,6 +615,7 @@ void MachineLICM::HoistRegion(MachineDomTreeNode *N, bool IsHeader) { // Remember livein register pressure. BackTrace.push_back(RegPressure); + NeedToCheckMBBDominance = true; for (MachineBasicBlock::iterator MII = BB->begin(), E = BB->end(); MII != E; ) { MachineBasicBlock::iterator NextMII = MII; ++NextMII; @@ -711,7 +749,14 @@ bool MachineLICM::IsLICMCandidate(MachineInstr &I) { bool DontMoveAcrossStore = true; if (!I.isSafeToMove(TII, AA, DontMoveAcrossStore)) return false; - + + // If it is load then check if it is guaranteed to execute by making sure that + // it dominates all exiting blocks. If it doesn't, then there is a path out of + // the loop which does not execute this load, so we can't hoist it. + // Stores and side effects are already checked by isSafeToMove. + if (I.getDesc().mayLoad() && !IsGuaranteedToExecute(I.getParent())) + return false; + return true; } diff --git a/test/CodeGen/ARM/lsr-unfolded-offset.ll b/test/CodeGen/ARM/lsr-unfolded-offset.ll index 61b25bb94af..bf26a9670a7 100644 --- a/test/CodeGen/ARM/lsr-unfolded-offset.ll +++ b/test/CodeGen/ARM/lsr-unfolded-offset.ll @@ -4,12 +4,11 @@ ; register pressure and therefore spilling. There is more room for improvement ; here. -; CHECK: sub sp, #{{32|28|24}} +; CHECK: sub sp, #{{40|32|28|24}} ; CHECK: %for.inc ; CHECK: ldr{{(.w)?}} r{{.*}}, [sp, # ; CHECK: ldr{{(.w)?}} r{{.*}}, [sp, # -; CHECK: ldr{{(.w)?}} r{{.*}}, [sp, # ; CHECK: add target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32" diff --git a/test/CodeGen/X86/licm-dominance.ll b/test/CodeGen/X86/licm-dominance.ll new file mode 100644 index 00000000000..8a0958db0ef --- /dev/null +++ b/test/CodeGen/X86/licm-dominance.ll @@ -0,0 +1,36 @@ +; RUN: llc -asm-verbose=false < %s | FileCheck %s + +; MachineLICM should check dominance before hoisting instructions. +; CHECK: jne LBB0_3 +; CHECK-NEXT: xorb %al, %al +; CHECK-NEXT: testb %al, %al + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-macosx10.7.2" + +define void @CMSColorWorldCreateParametricData() nounwind uwtable optsize ssp { +entry: + br label %for.body.i + +for.body.i: + br i1 undef, label %for.inc.i, label %if.then26.i + +if.then26.i: + br i1 undef, label %if.else.i.i, label %lor.lhs.false.i.i + +if.else.i.i: + br i1 undef, label %lor.lhs.false.i.i, label %if.then116.i.i + +lor.lhs.false.i.i: + br i1 undef, label %for.inc.i, label %if.then116.i.i + +if.then116.i.i: + unreachable + +for.inc.i: + %cmp17.i = icmp ult i64 undef, undef + br i1 %cmp17.i, label %for.body.i, label %if.end28.i + +if.end28.i: + ret void +} diff --git a/test/CodeGen/X86/licm-nested.ll b/test/CodeGen/X86/licm-nested.ll index b0105ac533b..901d987e8d4 100644 --- a/test/CodeGen/X86/licm-nested.ll +++ b/test/CodeGen/X86/licm-nested.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=x86_64-apple-darwin -march=x86-64 < %s -o /dev/null -stats -info-output-file - | grep machine-licm | grep 3 +; RUN: llc -mtriple=x86_64-apple-darwin -march=x86-64 < %s -o /dev/null -stats -info-output-file - | grep machine-licm | grep 2 ; MachineLICM should be able to hoist the symbolic addresses out of ; the inner loops. diff --git a/test/CodeGen/X86/sink-hoist.ll b/test/CodeGen/X86/sink-hoist.ll index 31f41eebc5a..e13a81719ea 100644 --- a/test/CodeGen/X86/sink-hoist.ll +++ b/test/CodeGen/X86/sink-hoist.ll @@ -102,6 +102,7 @@ entry: br label %bb60 bb: ; preds = %bb60 + %i.0 = phi i32 [ 0, %bb60 ] ; [#uses=2] %0 = bitcast float* %x_addr.0 to <4 x float>* ; <<4 x float>*> [#uses=1] %1 = load <4 x float>* %0, align 16 ; <<4 x float>> [#uses=4] %tmp20 = bitcast <4 x float> %1 to <4 x i32> ; <<4 x i32>> [#uses=1] @@ -129,15 +130,14 @@ bb: ; preds = %bb60 %5 = getelementptr float* %x_addr.0, i64 4 ; [#uses=1] %6 = getelementptr float* %y_addr.0, i64 4 ; [#uses=1] %7 = add i32 %i.0, 4 ; [#uses=1] - br label %bb60 + %8 = load i32* %n, align 4 ; [#uses=1] + %9 = icmp sgt i32 %8, %7 ; [#uses=1] + br i1 %9, label %bb60, label %return bb60: ; preds = %bb, %entry - %i.0 = phi i32 [ 0, %entry ], [ %7, %bb ] ; [#uses=2] %x_addr.0 = phi float* [ %x, %entry ], [ %5, %bb ] ; [#uses=2] %y_addr.0 = phi float* [ %y, %entry ], [ %6, %bb ] ; [#uses=2] - %8 = load i32* %n, align 4 ; [#uses=1] - %9 = icmp sgt i32 %8, %i.0 ; [#uses=1] - br i1 %9, label %bb, label %return + br label %bb return: ; preds = %bb60 ret void