From: Chris Lattner Date: Mon, 24 Jan 2011 01:07:11 +0000 (+0000) Subject: enhance SRoA to promote allocas that are used by PHI nodes. This often X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=e3357863aaabe297c5fa819d083f2308ebb5f2c2;p=oota-llvm.git enhance SRoA to promote allocas that are used by PHI nodes. This often occurs because instcombine sinks loads and inserts phis. This kicks in on such apps as 175.vpr, eon, 403.gcc, xalancbmk and a bunch of times in spec2006 in some app that uses std::deque. This resolves the last of rdar://7339113. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@124090 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index 60302f77947..d4c953514ce 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -874,7 +874,7 @@ public: /// to: /// %V1 = load i32* %Alloca -> will be mem2reg'd /// %V2 = load i32* %Other -/// %V = select i1 %cond, i32 %V1, i32* %V2 +/// %V = select i1 %cond, i32 %V1, i32 %V2 /// /// We can do this to a select if its only uses are loads and if the operand to /// the select can be loaded unconditionally. @@ -887,7 +887,7 @@ static bool isSafeSelectToSpeculate(SelectInst *SI, const TargetData *TD) { LoadInst *LI = dyn_cast(*UI); if (LI == 0 || LI->isVolatile()) return false; - // Both operands to the select need to be deferencable, either absolutely + // Both operands to the select need to be dereferencable, either absolutely // (e.g. allocas) or at this point because we can see other accesses to it. if (!TDerefable && !isSafeToLoadUnconditionally(SI->getTrueValue(), LI, LI->getAlignment(), TD)) @@ -900,6 +900,77 @@ static bool isSafeSelectToSpeculate(SelectInst *SI, const TargetData *TD) { return true; } +/// isSafePHIToSpeculate - PHI instructions that use an alloca and are +/// subsequently loaded can be rewritten to load both input pointers in the pred +/// blocks and then PHI the results, allowing the load of the alloca to be +/// promoted. +/// From this: +/// %P2 = phi [i32* %Alloca, i32* %Other] +/// %V = load i32* %P2 +/// to: +/// %V1 = load i32* %Alloca -> will be mem2reg'd +/// ... +/// %V2 = load i32* %Other +/// ... +/// %V = phi [i32 %V1, i32 %V2] +/// +/// We can do this to a select if its only uses are loads and if the operand to +/// the select can be loaded unconditionally. +static bool isSafePHIToSpeculate(PHINode *PN, const TargetData *TD) { + // For now, we can only do this promotion if the load is in the same block as + // the PHI, and if there are no stores between the phi and load. + // TODO: Allow recursive phi users. + // TODO: Allow stores. + BasicBlock *BB = PN->getParent(); + unsigned MaxAlign = 0; + for (Value::use_iterator UI = PN->use_begin(), UE = PN->use_end(); + UI != UE; ++UI) { + LoadInst *LI = dyn_cast(*UI); + if (LI == 0 || LI->isVolatile()) return false; + + // For now we only allow loads in the same block as the PHI. This is a + // common case that happens when instcombine merges two loads through a PHI. + if (LI->getParent() != BB) return false; + + // Ensure that there are no instructions between the PHI and the load that + // could store. + for (BasicBlock::iterator BBI = PN; &*BBI != LI; ++BBI) + if (BBI->mayWriteToMemory()) + return false; + + MaxAlign = std::max(MaxAlign, LI->getAlignment()); + } + + // Okay, we know that we have one or more loads in the same block as the PHI. + // We can transform this if it is safe to push the loads into the predecessor + // blocks. The only thing to watch out for is that we can't put a possibly + // trapping load in the predecessor if it is a critical edge. + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + BasicBlock *Pred = PN->getIncomingBlock(i); + + // If the predecessor has a single successor, then the edge isn't critical. + if (Pred->getTerminator()->getNumSuccessors() == 1) + continue; + + Value *InVal = PN->getIncomingValue(i); + + // If the InVal is an invoke in the pred, we can't put a load on the edge. + if (InvokeInst *II = dyn_cast(InVal)) + if (II->getParent() == Pred) + return false; + + // If this pointer is always safe to load, or if we can prove that there is + // already a load in the block, then we can move the load to the pred block. + if (InVal->isDereferenceablePointer() || + isSafeToLoadUnconditionally(InVal, Pred->getTerminator(), MaxAlign, TD)) + continue; + + return false; + } + + return true; +} + /// tryToMakeAllocaBePromotable - This returns true if the alloca only has /// direct (non-volatile) loads and stores to it. If the alloca is close but @@ -946,6 +1017,21 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) { continue; } + if (PHINode *PN = dyn_cast(U)) { + if (PN->use_empty()) { // Dead PHIs can be stripped. + InstsToRewrite.insert(PN); + continue; + } + + // If it is safe to turn "load (phi [AI, ptr, ...])" into a PHI of loads + // in the pred blocks, then we can transform this by rewriting the PHI. + if (!isSafePHIToSpeculate(PN, TD)) + return false; + + InstsToRewrite.insert(PN); + continue; + } + return false; } @@ -957,35 +1043,80 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) { // If we have instructions that need to be rewritten for this to be promotable // take care of it now. for (unsigned i = 0, e = InstsToRewrite.size(); i != e; ++i) { - // Selects in InstsToRewrite only have load uses. Rewrite each as two - // loads with a new select. - SelectInst *SI = cast(InstsToRewrite[i]); - - for (Value::use_iterator UI = SI->use_begin(), E = SI->use_end(); UI != E;){ - LoadInst *LI = cast(*UI++); + if (SelectInst *SI = dyn_cast(InstsToRewrite[i])) { + // Selects in InstsToRewrite only have load uses. Rewrite each as two + // loads with a new select. + while (!SI->use_empty()) { + LoadInst *LI = cast(SI->use_back()); - IRBuilder<> Builder(LI); - LoadInst *TrueLoad = - Builder.CreateLoad(SI->getTrueValue(), LI->getName()+".t"); - LoadInst *FalseLoad = - Builder.CreateLoad(SI->getFalseValue(), LI->getName()+".t"); - - // Transfer alignment and TBAA info if present. - TrueLoad->setAlignment(LI->getAlignment()); - FalseLoad->setAlignment(LI->getAlignment()); - if (MDNode *Tag = LI->getMetadata(LLVMContext::MD_tbaa)) { - TrueLoad->setMetadata(LLVMContext::MD_tbaa, Tag); - FalseLoad->setMetadata(LLVMContext::MD_tbaa, Tag); + IRBuilder<> Builder(LI); + LoadInst *TrueLoad = + Builder.CreateLoad(SI->getTrueValue(), LI->getName()+".t"); + LoadInst *FalseLoad = + Builder.CreateLoad(SI->getFalseValue(), LI->getName()+".t"); + + // Transfer alignment and TBAA info if present. + TrueLoad->setAlignment(LI->getAlignment()); + FalseLoad->setAlignment(LI->getAlignment()); + if (MDNode *Tag = LI->getMetadata(LLVMContext::MD_tbaa)) { + TrueLoad->setMetadata(LLVMContext::MD_tbaa, Tag); + FalseLoad->setMetadata(LLVMContext::MD_tbaa, Tag); + } + + Value *V = Builder.CreateSelect(SI->getCondition(), TrueLoad, FalseLoad); + V->takeName(LI); + LI->replaceAllUsesWith(V); + LI->eraseFromParent(); } - - Value *V = Builder.CreateSelect(SI->getCondition(), TrueLoad, FalseLoad); - V->takeName(LI); - LI->replaceAllUsesWith(V); + + // Now that all the loads are gone, the select is gone too. + SI->eraseFromParent(); + continue; + } + + // Otherwise, we have a PHI node which allows us to push the loads into the + // predecessors. + PHINode *PN = cast(InstsToRewrite[i]); + if (PN->use_empty()) { + PN->eraseFromParent(); + continue; + } + + const Type *LoadTy = cast(PN->getType())->getElementType(); + PHINode *NewPN = PHINode::Create(LoadTy, PN->getName()+".ld", PN); + + // Get the TBAA tag and alignment to use from one of the loads. It doesn't + // matter which one we get and if any differ, it doesn't matter. + LoadInst *SomeLoad = cast(PN->use_back()); + MDNode *TBAATag = SomeLoad->getMetadata(LLVMContext::MD_tbaa); + unsigned Align = SomeLoad->getAlignment(); + + // Rewrite all loads of the PN to use the new PHI. + while (!PN->use_empty()) { + LoadInst *LI = cast(PN->use_back()); + LI->replaceAllUsesWith(NewPN); LI->eraseFromParent(); } - // Now that all the loads are gone, the select is gone too. - SI->eraseFromParent(); + // Inject loads into all of the pred blocks. Keep track of which blocks we + // insert them into in case we have multiple edges from the same block. + DenseMap InsertedLoads; + + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + BasicBlock *Pred = PN->getIncomingBlock(i); + LoadInst *&Load = InsertedLoads[Pred]; + if (Load == 0) { + Load = new LoadInst(PN->getIncomingValue(i), + PN->getName() + "." + Pred->getName(), + Pred->getTerminator()); + Load->setAlignment(Align); + if (TBAATag) Load->setMetadata(LLVMContext::MD_tbaa, TBAATag); + } + + NewPN->addIncoming(Load, Pred); + } + + PN->eraseFromParent(); } ++NumAdjusted; diff --git a/test/Transforms/ScalarRepl/phi-select.ll b/test/Transforms/ScalarRepl/phi-select.ll index f612ed55a2d..fa3972de90a 100644 --- a/test/Transforms/ScalarRepl/phi-select.ll +++ b/test/Transforms/ScalarRepl/phi-select.ll @@ -25,8 +25,8 @@ entry: } ; CHECK: @test2 -; CHECK: %A.0 = alloca i32 -; CHECK: %A.1 = alloca i32 +; CHECK: %X.ld = phi i32 [ 1, %entry ], [ 2, %T ] +; CHECK-NEXT: ret i32 %X.ld define i32 @test2(i1 %c) { entry: %A = alloca {i32, i32} @@ -35,7 +35,7 @@ entry: br i1 %c, label %T, label %F T: %C = getelementptr {i32, i32}* %A, i32 0, i32 1 - store i32 2, i32* %B + store i32 2, i32* %C br label %F F: %X = phi i32* [%B, %entry], [%C, %T] @@ -130,3 +130,24 @@ define i32 @test7(i32 %x, i1 %c) nounwind readnone ssp { ; CHECK: ret i32 %r } +;; Promote allocs that are PHI'd together by moving the loads. +define i32 @test8(i32 %x) nounwind readnone ssp { +; CHECK: @test8 +; CHECK-NOT: load i32 +; CHECK-NOT: store i32 +; CHECK: %p.0.ld = phi i32 [ 2, %entry ], [ 1, %T ] +; CHECK-NEXT: ret i32 %p.0.ld +entry: + %a = alloca i32, align 8 + %b = alloca i32, align 8 + store i32 1, i32* %a, align 8 + store i32 2, i32* %b, align 8 + %c = icmp eq i32 %x, 0 + br i1 %c, label %T, label %Cont +T: + br label %Cont +Cont: + %p.0 = phi i32* [%b, %entry],[%a, %T] + %r = load i32* %p.0, align 8 + ret i32 %r +}