};
SmallDenseMap<Instruction *, SplitOffsets, 8> SplitOffsetsMap;
+ // Track loads out of this alloca which cannot, for any reason, be pre-split.
+ // This is important as we also cannot pre-split stores of those loads!
+ // FIXME: This is all pretty gross. It means that we can be more aggressive
+ // in pre-splitting when the load feeding the store happens to come from
+ // a separate alloca. Put another way, the effectiveness of SROA would be
+ // decreased by a frontend which just concatenated all of its local allocas
+ // into one big flat alloca. But defeating such patterns is exactly the job
+ // SROA is tasked with! Sadly, to not have this discrepancy we would have
+ // change store pre-splitting to actually force pre-splitting of the load
+ // that feeds it *and all stores*. That makes pre-splitting much harder, but
+ // maybe it would make it more principled?
+ SmallPtrSet<LoadInst *, 8> UnsplittableLoads;
+
DEBUG(dbgs() << " Searching for candidate loads and stores\n");
for (auto &P : AS.partitions()) {
for (Slice &S : P) {
- if (!S.isSplittable())
- continue;
- if (S.endOffset() <= P.endOffset())
+ Instruction *I = cast<Instruction>(S.getUse()->getUser());
+ if (!S.isSplittable() ||S.endOffset() <= P.endOffset()) {
+ // If this was a load we have to track that it can't participate in any
+ // pre-splitting!
+ if (auto *LI = dyn_cast<LoadInst>(I))
+ UnsplittableLoads.insert(LI);
continue;
+ }
assert(P.endOffset() > S.beginOffset() &&
"Empty or backwards partition!");
// Determine if this is a pre-splittable slice.
- Instruction *I = cast<Instruction>(S.getUse()->getUser());
if (auto *LI = dyn_cast<LoadInst>(I)) {
assert(!LI->isVolatile() && "Cannot split volatile loads!");
}
return true;
};
- if (!IsLoadSimplyStored(LI))
+ if (!IsLoadSimplyStored(LI)) {
+ UnsplittableLoads.insert(LI);
continue;
+ }
Loads.push_back(LI);
} else if (auto *SI = dyn_cast<StoreInst>(S.getUse()->getUser())) {
// such loads and stores, we can only pre-split them if their splits exactly
// match relative to their starting offset. We have to verify this prior to
// any rewriting.
- SmallPtrSet<LoadInst *, 4> BadSplitLoads;
Stores.erase(
std::remove_if(Stores.begin(), Stores.end(),
- [&BadSplitLoads, &SplitOffsetsMap](StoreInst *SI) {
+ [&UnsplittableLoads, &SplitOffsetsMap](StoreInst *SI) {
// Lookup the load we are storing in our map of split
// offsets.
auto *LI = cast<LoadInst>(SI->getValueOperand());
+ // If it was completely unsplittable, then we're done,
+ // and this store can't be pre-split.
+ if (UnsplittableLoads.count(LI))
+ return true;
+
auto LoadOffsetsI = SplitOffsetsMap.find(LI);
if (LoadOffsetsI == SplitOffsetsMap.end())
- return false; // Unrelated loads are always safe.
+ return false; // Unrelated loads are definitely safe.
auto &LoadOffsets = LoadOffsetsI->second;
// Now lookup the store's offsets.
// with mismatched relative splits. Just give up on them
// and remove both instructions from our list of
// candidates.
- BadSplitLoads.insert(LI);
+ UnsplittableLoads.insert(LI);
return true;
}),
Stores.end());
+ // Now we have to go *back* through all te stores, because a later store may
+ // have caused an earlier store's load to become unsplittable and if it is
+ // unsplittable for the later store, then we can't rely on it being split in
+ // the earlier store either.
+ Stores.erase(std::remove_if(Stores.begin(), Stores.end(),
+ [&UnsplittableLoads](StoreInst *SI) {
+ auto *LI =
+ cast<LoadInst>(SI->getValueOperand());
+ return UnsplittableLoads.count(LI);
+ }),
+ Stores.end());
+ // Once we've established all the loads that can't be split for some reason,
+ // filter any that made it into our list out.
Loads.erase(std::remove_if(Loads.begin(), Loads.end(),
- [&BadSplitLoads](LoadInst *LI) {
- return BadSplitLoads.count(LI);
+ [&UnsplittableLoads](LoadInst *LI) {
+ return UnsplittableLoads.count(LI);
}),
Loads.end());
+
// If no loads or stores are left, there is no pre-splitting to be done for
// this alloca.
if (Loads.empty() && Stores.empty())
%ret = fadd float %f1, %f2
ret float %ret
}
+
+define i32 @PR22093() {
+; Test that we don't try to pre-split a splittable store of a splittable but
+; not pre-splittable load over the same alloca. We "handle" this case when the
+; load is unsplittable but unrelated to this alloca by just generating extra
+; loads without touching the original, but when the original load was out of
+; this alloca we need to handle it specially to ensure the splits line up
+; properly for rewriting.
+;
+; CHECK-LABEL: @PR22093(
+; CHECK-NOT: alloca
+; CHECK: alloca i16
+; CHECK-NOT: alloca
+; CHECK: store volatile i16
+
+entry:
+ %a = alloca i32
+ %a.cast = bitcast i32* %a to i16*
+ store volatile i16 42, i16* %a.cast
+ %load = load i32* %a
+ store i32 %load, i32* %a
+ ret i32 %load
+}
+
+define void @PR22093.2() {
+; Another way that we end up being unable to split a particular set of loads
+; and stores can even have ordering importance. Here we have a load which is
+; pre-splittable by itself, and the first store is also compatible. But the
+; second store of the load makes the load unsplittable because of a mismatch of
+; splits. Because this makes the load unsplittable, we also have to go back and
+; remove the first store from the presplit candidates as its load won't be
+; presplit.
+;
+; CHECK-LABEL: @PR22093.2(
+; CHECK-NOT: alloca
+; CHECK: alloca i16
+; CHECK-NEXT: alloca i8
+; CHECK-NOT: alloca
+; CHECK: store volatile i16
+; CHECK: store volatile i8
+
+entry:
+ %a = alloca i64
+ %a.cast1 = bitcast i64* %a to i32*
+ %a.cast2 = bitcast i64* %a to i16*
+ store volatile i16 42, i16* %a.cast2
+ %load = load i32* %a.cast1
+ store i32 %load, i32* %a.cast1
+ %a.gep1 = getelementptr i32* %a.cast1, i32 1
+ %a.cast3 = bitcast i32* %a.gep1 to i8*
+ store volatile i8 13, i8* %a.cast3
+ store i32 %load, i32* %a.gep1
+ ret void
+}