}
namespace {
- struct SCEVLess : std::binary_function<const SCEV *, const SCEV *, bool>
- {
- SCEVLess(ScalarEvolution *SE) : SE(SE) {}
+ struct BucketElement {
+ BucketElement(const SCEVConstant *O, Instruction *I) : Offset(O), Instr(I) {}
+ BucketElement(Instruction *I) : Offset(nullptr), Instr(I) {}
- bool operator() (const SCEV *X, const SCEV *Y) const {
- const SCEV *Diff = SE->getMinusSCEV(X, Y);
- return cast<SCEVConstant>(Diff)->getValue()->getSExtValue() < 0;
- }
+ const SCEVConstant *Offset;
+ Instruction *Instr;
+ };
- protected:
- ScalarEvolution *SE;
+ struct Bucket {
+ Bucket(const SCEV *B, Instruction *I) : BaseSCEV(B),
+ Elements(1, BucketElement(I)) {}
+
+ const SCEV *BaseSCEV;
+ SmallVector<BucketElement, 16> Elements;
};
}
std::distance(pred_begin(Header), pred_end(Header));
// Collect buckets of comparable addresses used by loads and stores.
- typedef std::multimap<const SCEV *, Instruction *, SCEVLess> Bucket;
SmallVector<Bucket, 16> Buckets;
for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
I != IE; ++I) {
}
bool FoundBucket = false;
- for (unsigned i = 0, e = Buckets.size(); i != e; ++i)
- for (Bucket::iterator K = Buckets[i].begin(), KE = Buckets[i].end();
- K != KE; ++K) {
- const SCEV *Diff = SE->getMinusSCEV(K->first, LSCEV);
- if (isa<SCEVConstant>(Diff)) {
- Buckets[i].insert(std::make_pair(LSCEV, MemI));
- FoundBucket = true;
- break;
- }
+ for (auto &B : Buckets) {
+ const SCEV *Diff = SE->getMinusSCEV(LSCEV, B.BaseSCEV);
+ if (const auto *CDiff = dyn_cast<SCEVConstant>(Diff)) {
+ B.Elements.push_back(BucketElement(CDiff, MemI));
+ FoundBucket = true;
+ break;
}
+ }
if (!FoundBucket) {
- Buckets.push_back(Bucket(SCEVLess(SE)));
- Buckets[Buckets.size()-1].insert(std::make_pair(LSCEV, MemI));
+ if (Buckets.size() == MaxVars)
+ return MadeChange;
+ Buckets.push_back(Bucket(LSCEV, MemI));
}
}
}
- if (Buckets.empty() || Buckets.size() > MaxVars)
+ if (Buckets.empty())
return MadeChange;
BasicBlock *LoopPredecessor = L->getLoopPredecessor();
// The base address of each bucket is transformed into a phi and the others
// are rewritten as offsets of that variable.
+ // We have a choice now of which instruction's memory operand we use as the
+ // base for the generated PHI. Always picking the first instruction in each
+ // bucket does not work well, specifically because that instruction might
+ // be a prefetch (and there are no pre-increment dcbt variants). Otherwise,
+ // the choice is somewhat arbitrary, because the backend will happily
+ // generate direct offsets from both the pre-incremented and
+ // post-incremented pointer values. Thus, we'll pick the first non-prefetch
+ // instruction in each bucket, and adjust the recurrence and other offsets
+ // accordingly.
+ for (int j = 0, je = Buckets[i].Elements.size(); j != je; ++j) {
+ if (auto *II = dyn_cast<IntrinsicInst>(Buckets[i].Elements[j].Instr))
+ if (II->getIntrinsicID() == Intrinsic::prefetch)
+ continue;
+
+ // If we'd otherwise pick the first element anyway, there's nothing to do.
+ if (j == 0)
+ break;
+
+ // If our chosen element has no offset from the base pointer, there's
+ // nothing to do.
+ if (!Buckets[i].Elements[j].Offset ||
+ Buckets[i].Elements[j].Offset->isZero())
+ break;
+
+ const SCEV *Offset = Buckets[i].Elements[j].Offset;
+ Buckets[i].BaseSCEV = SE->getAddExpr(Buckets[i].BaseSCEV, Offset);
+ for (auto &E : Buckets[i].Elements) {
+ if (E.Offset)
+ E.Offset = cast<SCEVConstant>(SE->getMinusSCEV(E.Offset, Offset));
+ else
+ E.Offset = cast<SCEVConstant>(SE->getNegativeSCEV(Offset));
+ }
+
+ std::swap(Buckets[i].Elements[j], Buckets[i].Elements[0]);
+ break;
+ }
+
const SCEVAddRecExpr *BasePtrSCEV =
- cast<SCEVAddRecExpr>(Buckets[i].begin()->first);
+ cast<SCEVAddRecExpr>(Buckets[i].BaseSCEV);
if (!BasePtrSCEV->isAffine())
continue;
assert(BasePtrSCEV->getLoop() == L &&
"AddRec for the wrong loop?");
- Instruction *MemI = Buckets[i].begin()->second;
+ // The instruction corresponding to the Bucket's BaseSCEV must be the first
+ // in the vector of elements.
+ Instruction *MemI = Buckets[i].Elements.begin()->Instr;
Value *BasePtr = GetPointerOperand(MemI);
assert(BasePtr && "No pointer operand");
BasePtr->replaceAllUsesWith(NewBasePtr);
RecursivelyDeleteTriviallyDeadInstructions(BasePtr);
- Value *LastNewPtr = NewBasePtr;
- for (Bucket::iterator I = std::next(Buckets[i].begin()),
- IE = Buckets[i].end(); I != IE; ++I) {
- Value *Ptr = GetPointerOperand(I->second);
+ // Keep track of the replacement pointer values we've inserted so that we
+ // don't generate more pointer values than necessary.
+ SmallPtrSet<Value *, 16> NewPtrs;
+ NewPtrs.insert( NewBasePtr);
+
+ for (auto I = std::next(Buckets[i].Elements.begin()),
+ IE = Buckets[i].Elements.end(); I != IE; ++I) {
+ Value *Ptr = GetPointerOperand(I->Instr);
assert(Ptr && "No pointer operand");
- if (Ptr == LastNewPtr)
+ if (NewPtrs.count(Ptr))
continue;
Instruction *RealNewPtr;
- const SCEVConstant *Diff =
- cast<SCEVConstant>(SE->getMinusSCEV(I->first, BasePtrSCEV));
- if (Diff->isZero()) {
+ if (!I->Offset || I->Offset->getValue()->isZero()) {
RealNewPtr = NewBasePtr;
} else {
Instruction *PtrIP = dyn_cast<Instruction>(Ptr);
else if (isa<PHINode>(PtrIP))
PtrIP = &*PtrIP->getParent()->getFirstInsertionPt();
else if (!PtrIP)
- PtrIP = I->second;
+ PtrIP = I->Instr;
GetElementPtrInst *NewPtr = GetElementPtrInst::Create(
- I8Ty, PtrInc, Diff->getValue(),
- I->second->hasName() ? I->second->getName() + ".off" : "", PtrIP);
+ I8Ty, PtrInc, I->Offset->getValue(),
+ I->Instr->hasName() ? I->Instr->getName() + ".off" : "", PtrIP);
if (!PtrIP)
NewPtr->insertAfter(cast<Instruction>(PtrInc));
NewPtr->setIsInBounds(IsPtrInBounds(Ptr));
Ptr->replaceAllUsesWith(ReplNewPtr);
RecursivelyDeleteTriviallyDeadInstructions(Ptr);
- LastNewPtr = RealNewPtr;
+ NewPtrs.insert(RealNewPtr);
}
MadeChange = true;
--- /dev/null
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-p:64:64-i64:64-n32:64"
+target triple = "powerpc64le-linux"
+
+%struct.BSS1.0.9.28.39.43.46.47.54.56.57.64.65.69.71.144 = type <{ [220 x i8] }>
+
+@.BSS1 = external unnamed_addr global %struct.BSS1.0.9.28.39.43.46.47.54.56.57.64.65.69.71.144, align 32
+
+; Function Attrs: noinline nounwind
+define void @ety2_() #0 {
+
+; This test case used to crash because the preinc prep pass would assume that
+; if X-Y could be simplified to a constant, than so could Y-X. While not
+; desirable, we cannot actually make this guarantee.
+; CHECK-LABEL: @ety2_
+
+L.entry:
+ %0 = load i32, i32* undef, align 4
+ %1 = sext i32 %0 to i64
+ %2 = shl nsw i64 %1, 3
+ %3 = add nsw i64 %2, 8
+ br label %L.LB1_425
+
+L.LB1_425: ; preds = %L.LB1_427, %L.entry
+ %4 = phi i64 [ %21, %L.LB1_427 ], [ undef, %L.entry ]
+ br i1 undef, label %L.LB1_427, label %L.LB1_816
+
+L.LB1_816: ; preds = %L.LB1_425
+ switch i32 undef, label %L.LB1_432 [
+ i32 30, label %L.LB1_805
+ i32 10, label %L.LB1_451
+ i32 20, label %L.LB1_451
+ ]
+
+L.LB1_451: ; preds = %L.LB1_816, %L.LB1_816
+ unreachable
+
+L.LB1_432: ; preds = %L.LB1_816
+ %.in.31 = lshr i64 %4, 32
+ %5 = trunc i64 %.in.31 to i32
+ br i1 undef, label %L.LB1_769, label %L.LB1_455
+
+L.LB1_455: ; preds = %L.LB1_432
+ unreachable
+
+L.LB1_769: ; preds = %L.LB1_432
+ %6 = sext i32 %5 to i64
+ %7 = add nsw i64 %6, 2
+ %8 = add nsw i64 %6, -1
+ %9 = mul i64 %8, %1
+ %10 = add i64 %9, %7
+ %11 = shl i64 %10, 3
+ %12 = getelementptr i8, i8* undef, i64 %11
+ %13 = mul nsw i64 %6, %1
+ %14 = add i64 %7, %13
+ %15 = shl i64 %14, 3
+ %16 = getelementptr i8, i8* undef, i64 %15
+ br i1 undef, label %L.LB1_662, label %L.LB1_662.prol
+
+L.LB1_662.prol: ; preds = %L.LB1_662.prol, %L.LB1_769
+ %indvars.iv.next20.prol = add nuw nsw i64 undef, 1
+ br i1 undef, label %L.LB1_662, label %L.LB1_662.prol
+
+L.LB1_662: ; preds = %L.LB1_437.2, %L.LB1_662.prol, %L.LB1_769
+ %indvars.iv19 = phi i64 [ %indvars.iv.next20.3, %L.LB1_437.2 ], [ 0, %L.LB1_769 ], [ %indvars.iv.next20.prol, %L.LB1_662.prol ]
+ %indvars.iv.next20 = add nuw nsw i64 %indvars.iv19, 1
+ %17 = mul i64 %indvars.iv.next20, %3
+ %18 = getelementptr i8, i8* %16, i64 %17
+ %19 = bitcast i8* %18 to double*
+ store double 0.000000e+00, double* %19, align 8
+ %indvars.iv.next20.1 = add nsw i64 %indvars.iv19, 2
+ %20 = mul i64 %indvars.iv.next20.1, %3
+ br i1 undef, label %L.LB1_437.2, label %L.LB1_824.2
+
+L.LB1_427: ; preds = %L.LB1_425
+ %21 = load i64, i64* bitcast (i8* getelementptr inbounds (%struct.BSS1.0.9.28.39.43.46.47.54.56.57.64.65.69.71.144, %struct.BSS1.0.9.28.39.43.46.47.54.56.57.64.65.69.71.144* @.BSS1, i64 0, i32 0, i64 8) to i64*), align 8
+ br label %L.LB1_425
+
+L.LB1_805: ; preds = %L.LB1_816
+ ret void
+
+L.LB1_824.2: ; preds = %L.LB1_662
+ %22 = getelementptr i8, i8* %12, i64 %20
+ %23 = bitcast i8* %22 to double*
+ store double 0.000000e+00, double* %23, align 8
+ br label %L.LB1_437.2
+
+L.LB1_437.2: ; preds = %L.LB1_824.2, %L.LB1_662
+ %indvars.iv.next20.3 = add nsw i64 %indvars.iv19, 4
+ br label %L.LB1_662
+}
+
+attributes #0 = { noinline nounwind }
+