if (PtrA == PtrB || PtrA->getType() != PtrB->getType())
return false;
- // Calculate a constant offset from the base pointer without using SCEV
- // in the supported cases.
- // TODO: Add support for the case where one of the pointers is a GEP that
- // uses the other pointer.
- GetElementPtrInst *GepA = dyn_cast<GetElementPtrInst>(PtrA);
- GetElementPtrInst *GepB = dyn_cast<GetElementPtrInst>(PtrB);
-
- unsigned BW = DL->getPointerSizeInBits(ASA);
+ unsigned PtrBitWidth = DL->getPointerSizeInBits(ASA);
Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
- int64_t Sz = DL->getTypeStoreSize(Ty);
+ APInt Size(PtrBitWidth, DL->getTypeStoreSize(Ty));
- // Check if PtrA is the base and PtrB is a constant offset.
- if (GepB && GepB->getPointerOperand() == PtrA) {
- APInt Offset(BW, 0);
- if (GepB->accumulateConstantOffset(*DL, Offset))
- return Offset.getSExtValue() == Sz;
- return false;
- }
+ APInt OffsetA(PtrBitWidth, 0), OffsetB(PtrBitWidth, 0);
+ PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(*DL, OffsetA);
+ PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(*DL, OffsetB);
- // Check if PtrB is the base and PtrA is a constant offset.
- if (GepA && GepA->getPointerOperand() == PtrB) {
- APInt Offset(BW, 0);
- if (GepA->accumulateConstantOffset(*DL, Offset))
- return Offset.getSExtValue() == -Sz;
- return false;
- }
+ APInt OffsetDelta = OffsetB - OffsetA;
- // If both pointers are GEPs:
- if (GepA && GepB) {
- // Check that they have the same base pointer and number of indices.
- if (GepA->getPointerOperand() != GepB->getPointerOperand() ||
- GepA->getNumIndices() != GepB->getNumIndices())
- return false;
+ // Check if they are based on the same pointer. That makes the offsets
+ // sufficient.
+ if (PtrA == PtrB)
+ return OffsetDelta == Size;
- // Try to strip the geps. This makes SCEV faster.
- // Make sure that all of the indices except for the last are identical.
- int LastIdx = GepA->getNumIndices();
- for (int i = 0; i < LastIdx - 1; i++) {
- if (GepA->getOperand(i+1) != GepB->getOperand(i+1))
- return false;
- }
-
- PtrA = GepA->getOperand(LastIdx);
- PtrB = GepB->getOperand(LastIdx);
- Sz = 1;
- }
-
- ConstantInt *CA = dyn_cast<ConstantInt>(PtrA);
- ConstantInt *CB = dyn_cast<ConstantInt>(PtrB);
- if (CA && CB) {
- return (CA->getSExtValue() + Sz == CB->getSExtValue());
- }
+ // Compute the necessary base pointer delta to have the necessary final delta
+ // equal to the size.
+ APInt BaseDelta = Size - OffsetDelta;
- // Calculate the distance.
+ // Otherwise compute the distance with SCEV between the base pointers.
const SCEV *PtrSCEVA = SE->getSCEV(PtrA);
const SCEV *PtrSCEVB = SE->getSCEV(PtrB);
- const SCEV *C = SE->getConstant(PtrSCEVA->getType(), Sz);
+ const SCEV *C = SE->getConstant(BaseDelta);
const SCEV *X = SE->getAddExpr(PtrSCEVA, C);
return X == PtrSCEVB;
}
-; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=i386-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+; RUN: opt < %s -basicaa -slp-vectorizer -slp-threshold=-100 -dce -S -mtriple=i386-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
target triple = "i386-apple-macosx10.9.0"
ret i32 0
}
+define void @test(x86_fp80* %i1, x86_fp80* %i2, x86_fp80* %o) {
+; CHECK-LABEL: @test(
+;
+; Test that we correctly recognize the discontiguous memory in arrays where the
+; size is less than the alignment, and through various different GEP formations.
+
+entry:
+ %i1.0 = load x86_fp80* %i1, align 16
+ %i1.gep1 = getelementptr x86_fp80* %i1, i64 1
+ %i1.1 = load x86_fp80* %i1.gep1, align 16
+; CHECK: load x86_fp80*
+; CHECK: load x86_fp80*
+; CHECK: insertelement <2 x x86_fp80>
+; CHECK: insertelement <2 x x86_fp80>
+ br i1 undef, label %then, label %end
+
+then:
+ %i2.gep0 = getelementptr inbounds x86_fp80* %i2, i64 0
+ %i2.0 = load x86_fp80* %i2.gep0, align 16
+ %i2.gep1 = getelementptr inbounds x86_fp80* %i2, i64 1
+ %i2.1 = load x86_fp80* %i2.gep1, align 16
+; CHECK: load x86_fp80*
+; CHECK: load x86_fp80*
+; CHECK: insertelement <2 x x86_fp80>
+; CHECK: insertelement <2 x x86_fp80>
+ br label %end
+
+end:
+ %phi0 = phi x86_fp80 [ %i1.0, %entry ], [ %i2.0, %then ]
+ %phi1 = phi x86_fp80 [ %i1.1, %entry ], [ %i2.1, %then ]
+; CHECK: phi <2 x x86_fp80>
+; CHECK: extractelement <2 x x86_fp80>
+; CHECK: extractelement <2 x x86_fp80>
+ store x86_fp80 %phi0, x86_fp80* %o, align 16
+ %o.gep1 = getelementptr inbounds x86_fp80* %o, i64 1
+ store x86_fp80 %phi1, x86_fp80* %o.gep1, align 16
+ ret void
+}