From: Nico Weber Date: Thu, 24 Dec 2015 04:01:06 +0000 (+0000) Subject: Revert r256336, it caused PR25939 X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=55a37948ebdd934952b0fa9f6819107aaa7d4feb;p=oota-llvm.git Revert r256336, it caused PR25939 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@256361 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 4fa011722fb..2d577de7c2b 100644 --- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -108,11 +108,7 @@ public: private: typedef SmallVector StoreList; - StoreList StoreRefsForMemset; - StoreList StoreRefsForMemcpy; - bool HasMemset; - bool HasMemsetPattern; - bool HasMemcpy; + StoreList StoreRefs; /// \name Countable Loop Idiom Handling /// @{ @@ -122,15 +118,17 @@ private: SmallVectorImpl &ExitBlocks); void collectStores(BasicBlock *BB); - bool isLegalStore(StoreInst *SI, bool &ForMemset, bool &ForMemcpy); + bool isLegalStore(StoreInst *SI); bool processLoopStore(StoreInst *SI, const SCEV *BECount); bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount); bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize, - unsigned StoreAlignment, Value *StoredVal, + unsigned StoreAlignment, Value *SplatValue, Instruction *TheStore, const SCEVAddRecExpr *Ev, const SCEV *BECount, bool NegStride); - bool processLoopStoreOfLoopLoad(StoreInst *SI, const SCEV *BECount); + bool processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, + const SCEVAddRecExpr *StoreEv, + const SCEV *BECount, bool NegStride); /// @} /// \name Noncountable Loop Idiom Handling @@ -209,13 +207,8 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) { *CurLoop->getHeader()->getParent()); DL = &CurLoop->getHeader()->getModule()->getDataLayout(); - HasMemset = TLI->has(LibFunc::memset); - HasMemsetPattern = TLI->has(LibFunc::memset_pattern16); - HasMemcpy = TLI->has(LibFunc::memcpy); - - if (HasMemset || HasMemsetPattern || HasMemcpy) - if (SE->hasLoopInvariantBackedgeTakenCount(L)) - return runOnCountableLoop(); + if (SE->hasLoopInvariantBackedgeTakenCount(L)) + return runOnCountableLoop(); return runOnNoncountableLoop(); } @@ -304,8 +297,7 @@ static Constant *getMemSetPatternValue(Value *V, const DataLayout *DL) { return ConstantArray::get(AT, std::vector(ArraySize, C)); } -bool LoopIdiomRecognize::isLegalStore(StoreInst *SI, bool &ForMemset, - bool &ForMemcpy) { +bool LoopIdiomRecognize::isLegalStore(StoreInst *SI) { // Don't touch volatile stores. if (!SI->isSimple()) return false; @@ -330,86 +322,22 @@ bool LoopIdiomRecognize::isLegalStore(StoreInst *SI, bool &ForMemset, if (!isa(StoreEv->getOperand(1))) return false; - // See if the store can be turned into a memset. - - // If the stored value is a byte-wise value (like i32 -1), then it may be - // turned into a memset of i8 -1, assuming that all the consecutive bytes - // are stored. A store of i32 0x01020304 can never be turned into a memset, - // but it can be turned into memset_pattern if the target supports it. - Value *SplatValue = isBytewiseValue(StoredVal); - Constant *PatternValue = nullptr; - - // If we're allowed to form a memset, and the stored value would be - // acceptable for memset, use it. - if (HasMemset && SplatValue && - // Verify that the stored value is loop invariant. If not, we can't - // promote the memset. - CurLoop->isLoopInvariant(SplatValue)) { - // It looks like we can use SplatValue. - ForMemset = true; - return true; - } else if (HasMemsetPattern && - // Don't create memset_pattern16s with address spaces. - StorePtr->getType()->getPointerAddressSpace() == 0 && - (PatternValue = getMemSetPatternValue(StoredVal, DL))) { - // It looks like we can use PatternValue! - ForMemset = true; - return true; - } - - // Otherwise, see if the store can be turned into a memcpy. - if (HasMemcpy) { - // Check to see if the stride matches the size of the store. If so, then we - // know that every byte is touched in the loop. - unsigned Stride = getStoreStride(StoreEv); - unsigned StoreSize = getStoreSizeInBytes(SI, DL); - if (StoreSize != Stride && StoreSize != -Stride) - return false; - - // The store must be feeding a non-volatile load. - LoadInst *LI = dyn_cast(SI->getValueOperand()); - if (!LI || !LI->isSimple()) - return false; - - // See if the pointer expression is an AddRec like {base,+,1} on the current - // loop, which indicates a strided load. If we have something else, it's a - // random load we can't handle. - const SCEVAddRecExpr *LoadEv = - dyn_cast(SE->getSCEV(LI->getPointerOperand())); - if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine()) - return false; - - // The store and load must share the same stride. - if (StoreEv->getOperand(1) != LoadEv->getOperand(1)) - return false; - - // Success. This store can be converted into a memcpy. - ForMemcpy = true; - return true; - } - // This store can't be transformed into a memset/memcpy. - return false; + return true; } void LoopIdiomRecognize::collectStores(BasicBlock *BB) { - StoreRefsForMemset.clear(); - StoreRefsForMemcpy.clear(); + StoreRefs.clear(); for (Instruction &I : *BB) { StoreInst *SI = dyn_cast(&I); if (!SI) continue; - bool ForMemset = false; - bool ForMemcpy = false; // Make sure this is a strided store with a constant stride. - if (!isLegalStore(SI, ForMemset, ForMemcpy)) + if (!isLegalStore(SI)) continue; // Save the store locations. - if (ForMemset) - StoreRefsForMemset.push_back(SI); - else if (ForMemcpy) - StoreRefsForMemcpy.push_back(SI); + StoreRefs.push_back(SI); } } @@ -429,15 +357,9 @@ bool LoopIdiomRecognize::runOnLoopBlock( bool MadeChange = false; // Look for store instructions, which may be optimized to memset/memcpy. collectStores(BB); - - // Look for a single store which can be optimized into a memset. - for (auto &SI : StoreRefsForMemset) + for (auto &SI : StoreRefs) MadeChange |= processLoopStore(SI, BECount); - // Optimize the store into a memcpy, if it feeds an similarly strided load. - for (auto &SI : StoreRefsForMemcpy) - MadeChange |= processLoopStoreOfLoopLoad(SI, BECount); - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) { Instruction *Inst = &*I++; // Look for memset instructions, which may be optimized to a larger memset. @@ -458,7 +380,7 @@ bool LoopIdiomRecognize::runOnLoopBlock( return MadeChange; } -/// processLoopStore - See if this store can be promoted to a memset. +/// processLoopStore - See if this store can be promoted to a memset or memcpy. bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) { assert(SI->isSimple() && "Expected only non-volatile stores."); @@ -476,8 +398,12 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) { bool NegStride = StoreSize == -Stride; // See if we can optimize just this store in isolation. - return processLoopStridedStore(StorePtr, StoreSize, SI->getAlignment(), - StoredVal, SI, StoreEv, BECount, NegStride); + if (processLoopStridedStore(StorePtr, StoreSize, SI->getAlignment(), + StoredVal, SI, StoreEv, BECount, NegStride)) + return true; + + // Optimize the store into a memcpy, if it feeds an similarly strided load. + return processLoopStoreOfLoopLoad(SI, StoreSize, StoreEv, BECount, NegStride); } /// processLoopMemSet - See if this memset can be promoted to a large memset. @@ -570,19 +496,37 @@ bool LoopIdiomRecognize::processLoopStridedStore( Value *DestPtr, unsigned StoreSize, unsigned StoreAlignment, Value *StoredVal, Instruction *TheStore, const SCEVAddRecExpr *Ev, const SCEV *BECount, bool NegStride) { + + // If the stored value is a byte-wise value (like i32 -1), then it may be + // turned into a memset of i8 -1, assuming that all the consecutive bytes + // are stored. A store of i32 0x01020304 can never be turned into a memset, + // but it can be turned into memset_pattern if the target supports it. Value *SplatValue = isBytewiseValue(StoredVal); Constant *PatternValue = nullptr; + unsigned DestAS = DestPtr->getType()->getPointerAddressSpace(); - if (!SplatValue) - PatternValue = getMemSetPatternValue(StoredVal, DL); - - assert((SplatValue || PatternValue) && - "Expected either splat value or pattern value."); + // If we're allowed to form a memset, and the stored value would be acceptable + // for memset, use it. + if (SplatValue && TLI->has(LibFunc::memset) && + // Verify that the stored value is loop invariant. If not, we can't + // promote the memset. + CurLoop->isLoopInvariant(SplatValue)) { + // Keep and use SplatValue. + PatternValue = nullptr; + } else if (DestAS == 0 && TLI->has(LibFunc::memset_pattern16) && + (PatternValue = getMemSetPatternValue(StoredVal, DL))) { + // Don't create memset_pattern16s with address spaces. + // It looks like we can use PatternValue! + SplatValue = nullptr; + } else { + // Otherwise, this isn't an idiom we can transform. For example, we can't + // do anything with a 3-byte store. + return false; + } // The trip count of the loop and the base pointer of the addrec SCEV is // guaranteed to be loop invariant, which means that it should dominate the // header. This allows us to insert code for it in the preheader. - unsigned DestAS = DestPtr->getType()->getPointerAddressSpace(); BasicBlock *Preheader = CurLoop->getLoopPreheader(); IRBuilder<> Builder(Preheader->getTerminator()); SCEVExpander Expander(*SE, *DL, "loop-idiom"); @@ -664,25 +608,29 @@ bool LoopIdiomRecognize::processLoopStridedStore( /// If the stored value is a strided load in the same loop with the same stride /// this may be transformable into a memcpy. This kicks in for stuff like /// for (i) A[i] = B[i]; -bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI, - const SCEV *BECount) { - assert(SI->isSimple() && "Expected only non-volatile stores."); - - Value *StorePtr = SI->getPointerOperand(); - const SCEVAddRecExpr *StoreEv = cast(SE->getSCEV(StorePtr)); - unsigned Stride = getStoreStride(StoreEv); - unsigned StoreSize = getStoreSizeInBytes(SI, DL); - bool NegStride = StoreSize == -Stride; +bool LoopIdiomRecognize::processLoopStoreOfLoopLoad( + StoreInst *SI, unsigned StoreSize, const SCEVAddRecExpr *StoreEv, + const SCEV *BECount, bool NegStride) { + // If we're not allowed to form memcpy, we fail. + if (!TLI->has(LibFunc::memcpy)) + return false; // The store must be feeding a non-volatile load. - LoadInst *LI = cast(SI->getValueOperand()); - assert(LI->isSimple() && "Expected only non-volatile stores."); + LoadInst *LI = dyn_cast(SI->getValueOperand()); + if (!LI || !LI->isSimple()) + return false; // See if the pointer expression is an AddRec like {base,+,1} on the current // loop, which indicates a strided load. If we have something else, it's a // random load we can't handle. const SCEVAddRecExpr *LoadEv = - cast(SE->getSCEV(LI->getPointerOperand())); + dyn_cast(SE->getSCEV(LI->getPointerOperand())); + if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine()) + return false; + + // The store and load must share the same stride. + if (StoreEv->getOperand(1) != LoadEv->getOperand(1)) + return false; // The trip count of the loop and the base pointer of the addrec SCEV is // guaranteed to be loop invariant, which means that it should dominate the