void StraightLineStrengthReduce::allocateCandidateAndFindBasisForGEP(
const SCEV *B, ConstantInt *Idx, Value *S, uint64_t ElementSize,
Instruction *I) {
- // I = B + sext(Idx *nsw S) *nsw ElementSize
+ // I = B + sext(Idx *nsw S) * ElementSize
+ // = B + (sext(Idx) * sext(S)) * ElementSize
// = B + (sext(Idx) * ElementSize) * sext(S)
// Casting to IntegerType is safe because we skipped vector GEPs.
IntegerType *IntPtrTy = cast<IntegerType>(DL->getIntPtrType(I->getType()));
// sext'ed multiplication.
if (match(ArrayIdx, m_NSWMul(m_Value(LHS), m_ConstantInt(RHS)))) {
// SLSR is currently unsafe if i * S may overflow.
- // GEP = Base + sext(LHS *nsw RHS) *nsw ElementSize
+ // GEP = Base + sext(LHS *nsw RHS) * ElementSize
allocateCandidateAndFindBasisForGEP(Base, RHS, LHS, ElementSize, GEP);
}
}
case Candidate::GEP:
{
Type *IntPtrTy = DL->getIntPtrType(C.Ins->getType());
+ bool InBounds = cast<GetElementPtrInst>(C.Ins)->isInBounds();
if (BumpWithUglyGEP) {
// C = (char *)Basis + Bump
unsigned AS = Basis.Ins->getType()->getPointerAddressSpace();
Type *CharTy = Type::getInt8PtrTy(Basis.Ins->getContext(), AS);
Reduced = Builder.CreateBitCast(Basis.Ins, CharTy);
- // We only considered inbounds GEP as candidates.
- Reduced = Builder.CreateInBoundsGEP(Reduced, Bump);
+ if (InBounds)
+ Reduced = Builder.CreateInBoundsGEP(Reduced, Bump);
+ else
+ Reduced = Builder.CreateGEP(Reduced, Bump);
Reduced = Builder.CreateBitCast(Reduced, C.Ins->getType());
} else {
// C = gep Basis, Bump
// Canonicalize bump to pointer size.
Bump = Builder.CreateSExtOrTrunc(Bump, IntPtrTy);
- Reduced = Builder.CreateInBoundsGEP(Basis.Ins, Bump);
+ if (InBounds)
+ Reduced = Builder.CreateInBoundsGEP(Basis.Ins, Bump);
+ else
+ Reduced = Builder.CreateGEP(Basis.Ins, Bump);
}
}
break;
%2 = add i64 %1, %v2
ret i64 %2
}
+
+define i32 @slsr_out_of_bounds_gep(i32* %input, i32 %s) {
+; CHECK-LABEL: @slsr_out_of_bounds_gep(
+ ; v0 = input[0];
+ %p0 = getelementptr i32, i32* %input, i64 0
+ %v0 = load i32, i32* %p0
+
+ ; v1 = input[(long)s];
+ %t = sext i32 %s to i64
+ %p1 = getelementptr i32, i32* %input, i64 %t
+; CHECK: %p1 = getelementptr i32, i32* %input, i64 %t
+ %v1 = load i32, i32* %p1
+
+ ; v2 = input[(long)(s * 2)];
+ %s2 = mul nsw i32 %s, 2
+ %t2 = sext i32 %s2 to i64
+ %p2 = getelementptr i32, i32* %input, i64 %t2
+; CHECK: %p2 = getelementptr i32, i32* %p1, i64 %t
+ %v2 = load i32, i32* %p2
+
+ ; return v0 + v1 + v2;
+ %1 = add i32 %v0, %v1
+ %2 = add i32 %1, %v2
+ ret i32 %2
+}