/// that dominated values can succeed in their lookup.
ScopedHTType AvailableValues;
- /// \brief A scoped hash table of the current values of loads.
+ /// A scoped hash table of the current values of previously encounted memory
+ /// locations.
///
- /// This allows us to get efficient access to dominating loads when we have
- /// a fully redundant load. In addition to the most recent load, we keep
- /// track of a generation count of the read, which is compared against the
- /// current generation count. The current generation count is incremented
+ /// This allows us to get efficient access to dominating loads or stores when
+ /// we have a fully redundant load. In addition to the most recent load, we
+ /// keep track of a generation count of the read, which is compared against
+ /// the current generation count. The current generation count is incremented
/// after every possibly writing memory operation, which ensures that we only
- /// CSE loads with other loads that have no intervening store.
+ /// CSE loads with other loads that have no intervening store. Ordering
+ /// events (such as fences or atomic instructions) increment the generation
+ /// count as well; essentially, we model these as writes to all possible
+ /// locations. Note that atomic and/or volatile loads and stores can be
+ /// present the table; it is the responsibility of the consumer to inspect
+ /// the atomicity/volatility if needed.
struct LoadValue {
Value *Data;
unsigned Generation;
int MatchingId;
- LoadValue() : Data(nullptr), Generation(0), MatchingId(-1) {}
- LoadValue(Value *Data, unsigned Generation, unsigned MatchingId)
- : Data(Data), Generation(Generation), MatchingId(MatchingId) {}
+ bool IsAtomic;
+ LoadValue()
+ : Data(nullptr), Generation(0), MatchingId(-1), IsAtomic(false) {}
+ LoadValue(Value *Data, unsigned Generation, unsigned MatchingId,
+ bool IsAtomic)
+ : Data(Data), Generation(Generation), MatchingId(MatchingId),
+ IsAtomic(IsAtomic) {}
};
typedef RecyclingAllocator<BumpPtrAllocator,
ScopedHashTableVal<Value *, LoadValue>>
}
return Inst->isAtomic();
}
+ bool isAtomic() const {
+ if (IsTargetMemInst) {
+ assert(Info.IsSimple && "need to refine IsSimple in TTI");
+ return false;
+ }
+ return Inst->isAtomic();
+ }
+ bool isUnordered() const {
+ if (IsTargetMemInst) {
+ assert(Info.IsSimple && "need to refine IsSimple in TTI");
+ return true;
+ }
+ if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+ return LI->isUnordered();
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ return SI->isUnordered();
+ }
+ // Conservative answer
+ return !Inst->isAtomic();
+ }
+
+ bool isVolatile() const {
+ if (IsTargetMemInst) {
+ assert(Info.IsSimple && "need to refine IsSimple in TTI");
+ return false;
+ }
+ if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+ return LI->isVolatile();
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ return SI->isVolatile();
+ }
+ // Conservative answer
+ return true;
+ }
+
+
bool isMatchingMemLoc(const ParseMemoryInst &Inst) const {
return (getPointerOperand() == Inst.getPointerOperand() &&
getMatchingId() == Inst.getMatchingId());
ParseMemoryInst MemInst(Inst, TTI);
// If this is a non-volatile load, process it.
if (MemInst.isValid() && MemInst.isLoad()) {
- // Ignore volatile or ordered loads.
- if (!MemInst.isSimple()) {
+ // (conservatively) we can't peak past the ordering implied by this
+ // operation, but we can add this load to our set of available values
+ if (MemInst.isVolatile() || !MemInst.isUnordered()) {
LastStore = nullptr;
- // Don't CSE across synchronization boundaries.
- if (Inst->mayWriteToMemory())
- ++CurrentGeneration;
- continue;
+ ++CurrentGeneration;
}
// If we have an available version of this load, and if it is the right
// generation, replace this instruction.
LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
if (InVal.Data != nullptr && InVal.Generation == CurrentGeneration &&
- InVal.MatchingId == MemInst.getMatchingId()) {
+ InVal.MatchingId == MemInst.getMatchingId() &&
+ // We don't yet handle removing loads with ordering of any kind.
+ !MemInst.isVolatile() && MemInst.isUnordered() &&
+ // We can't replace an atomic load with one which isn't also atomic.
+ InVal.IsAtomic >= MemInst.isAtomic()) {
Value *Op = getOrCreateResult(InVal.Data, Inst->getType());
if (Op != nullptr) {
DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << *Inst
// Otherwise, remember that we have this instruction.
AvailableLoads.insert(
MemInst.getPointerOperand(),
- LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId()));
+ LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId(),
+ MemInst.isAtomic()));
LastStore = nullptr;
continue;
}
if (MemInst.isValid() && MemInst.isStore()) {
// We do a trivial form of DSE if there are two stores to the same
- // location with no intervening loads. Delete the earlier store.
+ // location with no intervening loads. Delete the earlier store. Note
+ // that we can delete an earlier simple store even if the following one
+ // is ordered/volatile/atomic store.
if (LastStore) {
ParseMemoryInst LastStoreMemInst(LastStore, TTI);
+ assert(LastStoreMemInst.isSimple() && "Violated invariant");
if (LastStoreMemInst.isMatchingMemLoc(MemInst)) {
DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore
<< " due to: " << *Inst << '\n');
// the store.
AvailableLoads.insert(
MemInst.getPointerOperand(),
- LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId()));
+ LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId(),
+ MemInst.isAtomic()));
// Remember that this was the last normal store we saw for DSE.
+ // Note that we can't delete an earlier atomic or volatile store in
+ // favor of a later one which isn't. We could in principal remove an
+ // earlier unordered store if the later one is also unordered.
if (MemInst.isSimple())
LastStore = Inst;
+ else
+ LastStore = nullptr;
}
}
}
--- /dev/null
+; RUN: opt < %s -S -early-cse | FileCheck %s
+
+; CHECK-LABEL: @test12(
+define i32 @test12(i1 %B, i32* %P1, i32* %P2) {
+ %load0 = load i32, i32* %P1
+ %1 = load atomic i32, i32* %P2 seq_cst, align 4
+ %load1 = load i32, i32* %P1
+ %sel = select i1 %B, i32 %load0, i32 %load1
+ ret i32 %sel
+ ; CHECK: load i32, i32* %P1
+ ; CHECK: load i32, i32* %P1
+}
+
+; CHECK-LABEL: @test13(
+; atomic to non-atomic forwarding is legal
+define i32 @test13(i1 %B, i32* %P1) {
+ %a = load atomic i32, i32* %P1 seq_cst, align 4
+ %b = load i32, i32* %P1
+ %res = sub i32 %a, %b
+ ret i32 %res
+ ; CHECK: load atomic i32, i32* %P1
+ ; CHECK: ret i32 0
+}
+
+; CHECK-LABEL: @test14(
+; atomic to unordered atomic forwarding is legal
+define i32 @test14(i1 %B, i32* %P1) {
+ %a = load atomic i32, i32* %P1 seq_cst, align 4
+ %b = load atomic i32, i32* %P1 unordered, align 4
+ %res = sub i32 %a, %b
+ ret i32 %res
+ ; CHECK: load atomic i32, i32* %P1
+ ; CHECK: ret i32 0
+}
+
+; CHECK-LABEL: @test15(
+; implementation restiction: can't forward to stonger
+; than unordered
+define i32 @test15(i1 %B, i32* %P1, i32* %P2) {
+ %a = load atomic i32, i32* %P1 seq_cst, align 4
+ %b = load atomic i32, i32* %P1 seq_cst, align 4
+ %res = sub i32 %a, %b
+ ret i32 %res
+ ; CHECK: load atomic i32, i32* %P1
+ ; CHECK: load atomic i32, i32* %P1
+}
+
+; CHECK-LABEL: @test16(
+; forwarding non-atomic to atomic is wrong! (However,
+; it would be legal to use the later value in place of the
+; former in this particular example. We just don't
+; do that right now.)
+define i32 @test16(i1 %B, i32* %P1, i32* %P2) {
+ %a = load i32, i32* %P1, align 4
+ %b = load atomic i32, i32* %P1 unordered, align 4
+ %res = sub i32 %a, %b
+ ret i32 %res
+ ; CHECK: load i32, i32* %P1
+ ; CHECK: load atomic i32, i32* %P1
+}
+
+; Can't DSE across a full fence
+define void @test17(i1 %B, i32* %P1, i32* %P2) {
+; CHECK-LABEL: @test17
+; CHECK: store
+; CHECK: store atomic
+; CHECK: store
+ store i32 0, i32* %P1, align 4
+ store atomic i32 0, i32* %P2 seq_cst, align 4
+ store i32 0, i32* %P1, align 4
+ ret void
+}
+
+; Can't remove a volatile load
+define i32 @test18(i1 %B, i32* %P1, i32* %P2) {
+ %a = load i32, i32* %P1, align 4
+ %b = load volatile i32, i32* %P1, align 4
+ %res = sub i32 %a, %b
+ ret i32 %res
+ ; CHECK-LABEL: @test18
+ ; CHECK: load i32, i32* %P1
+ ; CHECK: load volatile i32, i32* %P1
+}
+
+; Can't DSE a volatile store
+define void @test19(i1 %B, i32* %P1, i32* %P2) {
+; CHECK-LABEL: @test19
+; CHECK: store volatile
+; CHECK: store
+ store volatile i32 0, i32* %P1, align 4
+ store i32 3, i32* %P1, align 4
+ ret void
+}
+
+; Can value forward from volailes
+define i32 @test20(i1 %B, i32* %P1, i32* %P2) {
+ %a = load volatile i32, i32* %P1, align 4
+ %b = load i32, i32* %P1, align 4
+ %res = sub i32 %a, %b
+ ret i32 %res
+ ; CHECK-LABEL: @test20
+ ; CHECK: load volatile i32, i32* %P1
+ ; CHECK: ret i32 0
+}
+
+; Can DSE a non-volatile store in favor of a volatile one
+; currently a missed optimization
+define void @test21(i1 %B, i32* %P1, i32* %P2) {
+; CHECK-LABEL: @test21
+; CHECK: store
+; CHECK: store volatile
+ store i32 0, i32* %P1, align 4
+ store volatile i32 3, i32* %P1, align 4
+ ret void
+}
+
+; Can DSE a normal store in favor of a unordered one
+define void @test22(i1 %B, i32* %P1, i32* %P2) {
+; CHECK-LABEL: @test22
+; CHECK-NEXT: store atomic
+ store i32 0, i32* %P1, align 4
+ store atomic i32 3, i32* %P1 unordered, align 4
+ ret void
+}
+
+
+