Fix two issues in MergeConsecutiveStores:

author James Y Knight <jyknight@google.com>

Mon, 2 Nov 2015 18:48:08 +0000 (18:48 +0000)

committer James Y Knight <jyknight@google.com>

Mon, 2 Nov 2015 18:48:08 +0000 (18:48 +0000)
author James Y Knight <jyknight@google.com>
Mon, 2 Nov 2015 18:48:08 +0000 (18:48 +0000)
committer James Y Knight <jyknight@google.com>
Mon, 2 Nov 2015 18:48:08 +0000 (18:48 +0000)
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index 31f0df99d47dbbf9d22b848ae34d8d1bed38f883..a790282fada477f76614f31276faa92b7f5c2f37 100644 (file)
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -11165,6 +11165,13 @@ void DAGCombiner::getStoreMergeAndAliasCandidates(
      if (Index->getMemoryVT() != MemVT)
        break;
  
+    // We do not allow under-aligned stores in order to prevent
+    // overriding stores. NOTE: this is a bad hack. Alignment SHOULD
+    // be irrelevant here; what MATTERS is that we not move memory
+    // operations that potentially overlap past each-other.
+    if (Index->getAlignment() < MemVT.getStoreSize())
+      break;
+
      // We found a potential memory operand to merge.
      StoreNodes.push_back(MemOpLink(Index, Ptr.Offset, Seq++));
  
@@ -11249,12 +11256,18 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
    if (StoreNodes.size() < 2)
      return false;
  
-  // Sort the memory operands according to their distance from the base pointer.
+  // Sort the memory operands according to their distance from the
+  // base pointer.  As a secondary criteria: make sure stores coming
+  // later in the code come first in the list. This is important for
+  // the non-UseAA case, because we're merging stores into the FINAL
+  // store along a chain which potentially contains aliasing stores.
+  // Thus, if there are multiple stores to the same address, the last
+  // one can be considered for merging but not the others.
    std::sort(StoreNodes.begin(), StoreNodes.end(),
              [](MemOpLink LHS, MemOpLink RHS) {
      return LHS.OffsetFromBase < RHS.OffsetFromBase ||
             (LHS.OffsetFromBase == RHS.OffsetFromBase &&
-            LHS.SequenceNum > RHS.SequenceNum);
+            LHS.SequenceNum < RHS.SequenceNum);
    });
  
    // Scan the memory operations on the chain and find the first non-consecutive
diff --git a/test/CodeGen/X86/MergeConsecutiveStores.ll b/test/CodeGen/X86/MergeConsecutiveStores.ll

index 6670bbe02772cd134c34fd545379d06863ec05d5..70af4184e8a28c30f32e5b4adf74a3733a9aed4e 100644 (file)
--- a/test/CodeGen/X86/MergeConsecutiveStores.ll
+++ b/test/CodeGen/X86/MergeConsecutiveStores.ll
@@ -288,12 +288,16 @@ block4:                                       ; preds = %4, %.lr.ph
    ret void
  }
  
-;; On x86, even unaligned copies can be merged to vector ops.
+;; On x86, even unaligned copies should be merged to vector ops.
+;; TODO: however, this cannot happen at the moment, due to brokenness
+;; in MergeConsecutiveStores. See UseAA FIXME in DAGCombiner.cpp
+;; visitSTORE.
+
  ; CHECK-LABEL: merge_loads_no_align:
  ;  load:
-; CHECK: vmovups
+; CHECK-NOT: vmovups ;; TODO
  ;  store:
-; CHECK: vmovups
+; CHECK-NOT: vmovups ;; TODO
  ; CHECK: ret
  define void @merge_loads_no_align(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp {
    %a1 = icmp sgt i32 %count, 0
diff --git a/test/CodeGen/X86/dag-merge-fast-accesses.ll b/test/CodeGen/X86/dag-merge-fast-accesses.ll

index c5bb34013add12a15f6ca7f8734d75aa34421007..49631ffa77d58fb8bc97f06bd39d72fec2b6c42d 100644 (file)
--- a/test/CodeGen/X86/dag-merge-fast-accesses.ll
+++ b/test/CodeGen/X86/dag-merge-fast-accesses.ll
@@ -50,11 +50,19 @@ define void @merge_vec_element_store(<4 x double> %v, double* %ptr) {
  }
  
  
+;; TODO: FAST *should* be:
+;;    movups (%rdi), %xmm0
+;;    movups %xmm0, 40(%rdi)
+;; ..but is not currently. See the UseAA FIXME in DAGCombiner.cpp
+;; visitSTORE.
+
  define void @merge_vec_load_and_stores(i64 *%ptr) {
  ; FAST-LABEL: merge_vec_load_and_stores:
  ; FAST:       # BB#0:
-; FAST-NEXT:    movups (%rdi), %xmm0
-; FAST-NEXT:    movups %xmm0, 40(%rdi)
+; FAST-NEXT:    movq (%rdi), %rax
+; FAST-NEXT:    movq 8(%rdi), %rcx
+; FAST-NEXT:    movq %rax, 40(%rdi)
+; FAST-NEXT:    movq %rcx, 48(%rdi)
  ; FAST-NEXT:    retq
  ;
  ; SLOW-LABEL: merge_vec_load_and_stores:
diff --git a/test/CodeGen/X86/stores-merging.ll b/test/CodeGen/X86/stores-merging.ll

index d6daa573b4ae180e2b2b58c7dd1cdef6c72799b9..9e479bd71b98b82b7c1ab640e192318530b39476 100644 (file)
--- a/test/CodeGen/X86/stores-merging.ll
+++ b/test/CodeGen/X86/stores-merging.ll
@@ -7,17 +7,51 @@ target triple = "x86_64-unknown-linux-gnu"
  
  @e = common global %structTy zeroinitializer, align 4
  
-; CHECK-LABEL: f
-define void @f() {
-entry:
+;; Ensure that MergeConsecutiveStores doesn't incorrectly reorder
+;; store operations.  The first test stores in increasing address
+;; order, the second in decreasing -- but in both cases should have
+;; the same result in memory in the end.
  
-; CHECK:   movabsq     $528280977409, %rax
+; CHECK-LABEL: redundant_stores_merging:
+; CHECK:   movl    $123, e+8(%rip)
+; CHECK:   movabsq $1958505086977, %rax
  ; CHECK:   movq    %rax, e+4(%rip)
-; CHECK:   movl    $456, e+8(%rip)
-
+define void @redundant_stores_merging() {
+entry:
    store i32 1, i32* getelementptr inbounds (%structTy, %structTy* @e, i64 0, i32 1), align 4
    store i32 123, i32* getelementptr inbounds (%structTy, %structTy* @e, i64 0, i32 2), align 4
    store i32 456, i32* getelementptr inbounds (%structTy, %structTy* @e, i64 0, i32 2), align 4
    ret void
  }
  
+;; This variant tests PR25154.
+; CHECK-LABEL: redundant_stores_merging_reverse:
+; CHECK:   movl    $123, e+8(%rip)
+; CHECK:   movabsq $1958505086977, %rax
+; CHECK:   movq    %rax, e+4(%rip)
+define void @redundant_stores_merging_reverse() {
+entry:
+  store i32 123, i32* getelementptr inbounds (%structTy, %structTy* @e, i64 0, i32 2), align 4
+  store i32 456, i32* getelementptr inbounds (%structTy, %structTy* @e, i64 0, i32 2), align 4
+  store i32 1, i32* getelementptr inbounds (%structTy, %structTy* @e, i64 0, i32 1), align 4
+  ret void
+}
+
+@b = common global [8 x i8] zeroinitializer, align 2
+
+;; The 2-byte store to offset 3 overlaps the 2-byte store to offset 2;
+;; these must not be reordered in MergeConsecutiveStores such that the
+;; store to 3 comes first (e.g. by merging the stores to 0 and 2 into
+;; a movl, after the store to 3).
+
+;; CHECK-LABEL: overlapping_stores_merging:
+;; CHECK:  movw    $0, b+2(%rip)
+;; CHECK:  movw    $2, b+3(%rip)
+;; CHECK:  movw    $1, b(%rip)
+define void @overlapping_stores_merging() {
+entry:
+  store i16 0, i16* bitcast (i8* getelementptr inbounds ([8 x i8], [8 x i8]* @b, i64 0, i64 2) to i16*), align 2
+  store i16 2, i16* bitcast (i8* getelementptr inbounds ([8 x i8], [8 x i8]* @b, i64 0, i64 3) to i16*), align 1
+  store i16 1, i16* bitcast (i8* getelementptr inbounds ([8 x i8], [8 x i8]* @b, i64 0, i64 0) to i16*), align 2
+  ret void
+}
author	James Y Knight <jyknight@google.com>
	Mon, 2 Nov 2015 18:48:08 +0000 (18:48 +0000)
committer	James Y Knight <jyknight@google.com>
	Mon, 2 Nov 2015 18:48:08 +0000 (18:48 +0000)
lib/CodeGen/SelectionDAG/DAGCombiner.cpp		patch \| blob \| history
test/CodeGen/X86/MergeConsecutiveStores.ll		patch \| blob \| history
test/CodeGen/X86/dag-merge-fast-accesses.ll		patch \| blob \| history
test/CodeGen/X86/stores-merging.ll		patch \| blob \| history