[EarlyCSE] DSE of stores which write back loaded values

author Philip Reames <listmail@philipreames.com>

Wed, 16 Dec 2015 01:01:30 +0000 (01:01 +0000)

committer Philip Reames <listmail@philipreames.com>

Wed, 16 Dec 2015 01:01:30 +0000 (01:01 +0000)
author Philip Reames <listmail@philipreames.com>
Wed, 16 Dec 2015 01:01:30 +0000 (01:01 +0000)
committer Philip Reames <listmail@philipreames.com>
Wed, 16 Dec 2015 01:01:30 +0000 (01:01 +0000)
diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp

index 6fa194e57092f3576a9b7662467da9b976c7f810..03f8c05c9d32837a42a587654fda21994f29e62b 100644 (file)
--- a/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -687,6 +687,33 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
          continue;
        }
  
+    // write back DSE - If we write back the same value we just loaded from
+    // the same location and haven't passed any intervening writes or ordering
+    // operations, we can remove the write.  The primary benefit is in allowing
+    // the available load table to remain valid and value forward past where
+    // the store originally was.
+    if (MemInst.isValid() && MemInst.isStore()) {
+      LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
+      if (InVal.Data &&
+          InVal.Data == getOrCreateResult(Inst, InVal.Data->getType()) &&
+          InVal.Generation == CurrentGeneration &&
+          InVal.MatchingId == MemInst.getMatchingId() &&
+          // We don't yet handle removing stores with ordering of any kind.
+          !MemInst.isVolatile() && MemInst.isUnordered()) {
+        assert((!LastStore ||
+                ParseMemoryInst(LastStore, TTI).getPointerOperand() ==
+                MemInst.getPointerOperand()) &&
+               "can't have an intervening store!");
+        DEBUG(dbgs() << "EarlyCSE DSE (writeback): " << *Inst << '\n');
+        Inst->eraseFromParent();
+        Changed = true;
+        ++NumDSE;
+        // We can avoid incrementing the generation count since we were able
+        // to eliminate this store.
+        continue;
+      }
+    }
+
      // Okay, this isn't something we can CSE at all.  Check to see if it is
      // something that could modify memory.  If so, our available memory values
      // cannot be used so bump the generation count.
diff --git a/test/Transforms/EarlyCSE/basic.ll b/test/Transforms/EarlyCSE/basic.ll

index 43b5e6098f6a19016859b5544ad79ec0ab14d35b..8c9b74b4d0e1dc2537ec760a559e23ba36b55b52 100644 (file)
--- a/test/Transforms/EarlyCSE/basic.ll
+++ b/test/Transforms/EarlyCSE/basic.ll
@@ -203,3 +203,77 @@ define i32 @test12(i1 %B, i32* %P1, i32* %P2) {
    ; CHECK: load i32, i32* %P1
    ; CHECK: load i32, i32* %P1
  }
+
+define void @dse1(i32 *%P) {
+; CHECK-LABEL: @dse1
+; CHECK-NOT: store
+  %v = load i32, i32* %P
+  store i32 %v, i32* %P
+  ret void
+}
+
+define void @dse2(i32 *%P) {
+; CHECK-LABEL: @dse2
+; CHECK-NOT: store
+  %v = load atomic i32, i32* %P seq_cst, align 4
+  store i32 %v, i32* %P
+  ret void
+}
+
+define void @dse3(i32 *%P) {
+; CHECK-LABEL: @dse3
+; CHECK-NOT: store
+  %v = load atomic i32, i32* %P seq_cst, align 4
+  store atomic i32 %v, i32* %P unordered, align 4
+  ret void
+}
+
+define i32 @dse4(i32 *%P, i32 *%Q) {
+; CHECK-LABEL: @dse4
+; CHECK-NOT: store
+; CHECK: ret i32 0
+  %a = load i32, i32* %Q
+  %v = load atomic i32, i32* %P unordered, align 4
+  store atomic i32 %v, i32* %P unordered, align 4
+  %b = load i32, i32* %Q
+  %res = sub i32 %a, %b
+  ret i32 %res
+}
+
+; Note that in this example, %P and %Q could in fact be the same
+; pointer.  %v could be different than the value observed for %a
+; and that's okay because we're using relaxed memory ordering.  
+; The only guarantee we have to provide is that each of the loads 
+; has to observe some value written to that location.  We  do 
+; not have to respect the order in which those writes were done.  
+define i32 @dse5(i32 *%P, i32 *%Q) {
+; CHECK-LABEL: @dse5
+; CHECK-NOT: store
+; CHECK: ret i32 0
+  %v = load atomic i32, i32* %P unordered, align 4
+  %a = load atomic i32, i32* %Q unordered, align 4
+  store atomic i32 %v, i32* %P unordered, align 4
+  %b = load atomic i32, i32* %Q unordered, align 4
+  %res = sub i32 %a, %b
+  ret i32 %res
+}
+
+
+define void @dse_neg1(i32 *%P) {
+; CHECK-LABEL: @dse_neg1
+; CHECK: store
+  %v = load i32, i32* %P
+  store i32 5, i32* %P
+  ret void
+}
+
+; Could remove the store, but only if ordering was somehow
+; encoded.
+define void @dse_neg2(i32 *%P) {
+; CHECK-LABEL: @dse_neg2
+; CHECK: store
+  %v = load i32, i32* %P
+  store atomic i32 %v, i32* %P seq_cst, align 4
+  ret void
+}
+
author	Philip Reames <listmail@philipreames.com>
	Wed, 16 Dec 2015 01:01:30 +0000 (01:01 +0000)
committer	Philip Reames <listmail@philipreames.com>
	Wed, 16 Dec 2015 01:01:30 +0000 (01:01 +0000)
lib/Transforms/Scalar/EarlyCSE.cpp		patch \| blob \| history
test/Transforms/EarlyCSE/basic.ll		patch \| blob \| history