bool processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
const SCEVAddRecExpr *StoreEv,
const SCEVAddRecExpr *LoadEv,
- const SCEV *BECount);
+ const SCEV *BECount, bool NegStride);
/// @}
/// \name Noncountable Loop Idiom Handling
StoredVal, SI, StoreEv, BECount, NegStride))
return true;
- // TODO: We don't handle negative stride memcpys.
- if (NegStride)
- return false;
-
// If the stored value is a strided load in the same loop with the same stride
// this may be transformable into a memcpy. This kicks in for stuff like
// for (i) A[i] = B[i];
dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LI->getOperand(0)));
if (LoadEv && LoadEv->getLoop() == CurLoop && LoadEv->isAffine() &&
StoreEv->getOperand(1) == LoadEv->getOperand(1) && LI->isSimple())
- if (processLoopStoreOfLoopLoad(SI, StoreSize, StoreEv, LoadEv, BECount))
+ if (processLoopStoreOfLoopLoad(SI, StoreSize, StoreEv, LoadEv, BECount,
+ NegStride))
return true;
}
// errs() << "UNHANDLED strided store: " << *StoreEv << " - " << *SI << "\n";
/// same-strided load.
bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
StoreInst *SI, unsigned StoreSize, const SCEVAddRecExpr *StoreEv,
- const SCEVAddRecExpr *LoadEv, const SCEV *BECount) {
+ const SCEVAddRecExpr *LoadEv, const SCEV *BECount, bool NegStride) {
// If we're not allowed to form memcpy, we fail.
if (!TLI->has(LibFunc::memcpy))
return false;
IRBuilder<> Builder(Preheader->getTerminator());
SCEVExpander Expander(*SE, *DL, "loop-idiom");
+ const SCEV *StrStart = StoreEv->getStart();
+ unsigned StrAS = SI->getPointerAddressSpace();
+ Type *IntPtrTy = Builder.getIntPtrTy(*DL, StrAS);
+
+ // Handle negative strided loops.
+ if (NegStride)
+ StrStart = getStartForNegStride(StrStart, BECount, IntPtrTy, StoreSize, SE);
+
// Okay, we have a strided store "p[i]" of a loaded value. We can turn
// this into a memcpy in the loop preheader now if we want. However, this
// would be unsafe to do if there is anything else in the loop that may read
// feeds the stores. Check for an alias by generating the base address and
// checking everything.
Value *StoreBasePtr = Expander.expandCodeFor(
- StoreEv->getStart(), Builder.getInt8PtrTy(SI->getPointerAddressSpace()),
- Preheader->getTerminator());
+ StrStart, Builder.getInt8PtrTy(StrAS), Preheader->getTerminator());
if (mayLoopAccessLocation(StoreBasePtr, MRI_ModRef, CurLoop, BECount,
StoreSize, *AA, SI)) {
return false;
}
+ const SCEV *LdStart = LoadEv->getStart();
+ unsigned LdAS = LI->getPointerAddressSpace();
+
+ // Handle negative strided loops.
+ if (NegStride)
+ LdStart = getStartForNegStride(LdStart, BECount, IntPtrTy, StoreSize, SE);
+
// For a memcpy, we have to make sure that the input array is not being
// mutated by the loop.
Value *LoadBasePtr = Expander.expandCodeFor(
- LoadEv->getStart(), Builder.getInt8PtrTy(LI->getPointerAddressSpace()),
- Preheader->getTerminator());
+ LdStart, Builder.getInt8PtrTy(LdAS), Preheader->getTerminator());
if (mayLoopAccessLocation(LoadBasePtr, MRI_Mod, CurLoop, BECount, StoreSize,
*AA, SI)) {
// The # stored bytes is (BECount+1)*Size. Expand the trip count out to
// pointer size if it isn't already.
- Type *IntPtrTy = Builder.getIntPtrTy(*DL, SI->getPointerAddressSpace());
BECount = SE->getTruncateOrZeroExtend(BECount, IntPtrTy);
const SCEV *NumBytesS =
; CHECK: ret void
}
-; We don't handle memcpy-able loops with negative stride.
+; Handle memcpy-able loops with negative stride.
define noalias i32* @test17(i32* nocapture readonly %a, i32 %c) {
entry:
%conv = sext i32 %c to i64
while.end: ; preds = %while.end.loopexit, %entry
ret i32* %0
; CHECK-LABEL: @test17(
-; CHECK-NOT: call void @llvm.memcpy
+; CHECK: call void @llvm.memcpy
; CHECK: ret i32*
}
declare noalias i8* @malloc(i64)
+
+; Handle memcpy-able loops with negative stride.
+; void test18(unsigned *__restrict__ a, unsigned *__restrict__ b) {
+; for (int i = 2047; i >= 0; --i) {
+; a[i] = b[i];
+; }
+; }
+define void @test18(i32* noalias nocapture %a, i32* noalias nocapture readonly %b) #0 {
+entry:
+ br label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ 2047, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
+ %0 = load i32, i32* %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+ store i32 %0, i32* %arrayidx2, align 4
+ %indvars.iv.next = add nsw i64 %indvars.iv, -1
+ %cmp = icmp sgt i64 %indvars.iv, 0
+ br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup: ; preds = %for.body
+ ret void
+; CHECK-LABEL: @test18(
+; CHECK: call void @llvm.memcpy
+; CHECK: ret
+}