DAGCombiner: Don't stop finding better chain on 2 aliases

author Matt Arsenault <Matthew.Arsenault@amd.com>

Tue, 13 Oct 2015 00:49:00 +0000 (00:49 +0000)

committer Matt Arsenault <Matthew.Arsenault@amd.com>

Tue, 13 Oct 2015 00:49:00 +0000 (00:49 +0000)
author Matt Arsenault <Matthew.Arsenault@amd.com>
Tue, 13 Oct 2015 00:49:00 +0000 (00:49 +0000)
committer Matt Arsenault <Matthew.Arsenault@amd.com>
Tue, 13 Oct 2015 00:49:00 +0000 (00:49 +0000)
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index a26f378b49aaee0ece7421a8d35b7dfa7b5df92e..b9b37fd0be85a54fff8a057455630dcb0480c1ba 100644 (file)
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -14298,14 +14298,12 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
      SDValue Chain = Chains.pop_back_val();
  
      // For TokenFactor nodes, look at each operand and only continue up the
-    // chain until we find two aliases.  If we've seen two aliases, assume we'll
-    // find more and revert to original chain since the xform is unlikely to be
-    // profitable.
+    // chain until we reach the depth limit.
      //
      // FIXME: The depth check could be made to return the last non-aliasing
      // chain we found before we hit a tokenfactor rather than the original
      // chain.
-    if (Depth > 6 || Aliases.size() == 2) {
+    if (Depth > 6) {
        Aliases.clear();
        Aliases.push_back(OriginalChain);
        return;
diff --git a/test/CodeGen/AMDGPU/merge-stores.ll b/test/CodeGen/AMDGPU/merge-stores.ll

index 62d372a398aa668c56bd0bbbaa5bd1da7a69aa0c..fac043e85246f949e16fc997d4f6a88aed1c46d7 100644 (file)
--- a/test/CodeGen/AMDGPU/merge-stores.ll
+++ b/test/CodeGen/AMDGPU/merge-stores.ll
@@ -1,5 +1,8 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
-; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-NOAA %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-NOAA %s
+
+; RUN: llc -march=amdgcn -verify-machineinstrs -combiner-alias-analysis < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-AA %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -combiner-alias-analysis < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-AA %s
  
  ; Run with devices with different unaligned load restrictions.
  
@@ -151,11 +154,15 @@ define void @merge_global_store_4_constants_f32(float addrspace(1)* %out) #0 {
  
  ; FIXME: Should be able to merge this
  ; GCN-LABEL: {{^}}merge_global_store_4_constants_mixed_i32_f32:
-; XGCN: buffer_store_dwordx4
-; GCN: buffer_store_dword
-; GCN: buffer_store_dword
-; GCN: buffer_store_dword
-; GCN: buffer_store_dword
+; GCN-NOAA: buffer_store_dword v
+; GCN-NOAA: buffer_store_dword v
+; GCN-NOAA: buffer_store_dword v
+; GCN-NOAA: buffer_store_dword v
+
+; GCN-AA: buffer_store_dwordx2
+; GCN-AA: buffer_store_dword v
+; GCN-AA: buffer_store_dword v
+
  ; GCN: s_endpgm
  define void @merge_global_store_4_constants_mixed_i32_f32(float addrspace(1)* %out) #0 {
    %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
@@ -484,11 +491,15 @@ define void @merge_global_store_4_adjacent_loads_i8_natural_align(i8 addrspace(1
  ; This works once AA is enabled on the subtarget
  ; GCN-LABEL: {{^}}merge_global_store_4_vector_elts_loads_v4i32:
  ; GCN: buffer_load_dwordx4 [[LOAD:v\[[0-9]+:[0-9]+\]]]
-; XGCN: buffer_store_dwordx4 [[LOAD]]
-; GCN: buffer_store_dword v
-; GCN: buffer_store_dword v
-; GCN: buffer_store_dword v
-; GCN: buffer_store_dword v
+
+; GCN-NOAA: buffer_store_dword v
+; GCN-NOAA: buffer_store_dword v
+; GCN-NOAA: buffer_store_dword v
+; GCN-NOAA: buffer_store_dword v
+
+; GCN-AA: buffer_store_dwordx4 [[LOAD]]
+
+; GCN: s_endpgm
  define void @merge_global_store_4_vector_elts_loads_v4i32(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
    %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
    %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2
@@ -606,18 +617,23 @@ define void @merge_global_store_7_constants_i32(i32 addrspace(1)* %out) {
    ret void
  }
  
+; FIXME: This should do 2 dwordx4 loads
  ; GCN-LABEL: {{^}}merge_global_store_8_constants_i32:
-; XGCN: buffer_store_dwordx4
-; XGCN: buffer_store_dwordx4
  
-; GCN: buffer_store_dword v
-; GCN: buffer_store_dword v
-; GCN: buffer_store_dword v
-; GCN: buffer_store_dword v
-; GCN: buffer_store_dword v
-; GCN: buffer_store_dword v
-; GCN: buffer_store_dword v
-; GCN: buffer_store_dword v
+; GCN-NOAA: buffer_store_dword v
+; GCN-NOAA: buffer_store_dword v
+; GCN-NOAA: buffer_store_dword v
+; GCN-NOAA: buffer_store_dword v
+; GCN-NOAA: buffer_store_dword v
+; GCN-NOAA: buffer_store_dword v
+; GCN-NOAA: buffer_store_dword v
+; GCN-NOAA: buffer_store_dword v
+
+; GCN-AA: buffer_store_dwordx4
+; GCN-AA: buffer_store_dwordx2
+; GCN-AA: buffer_store_dwordx2
+
+; GCN: s_endpgm
  define void @merge_global_store_8_constants_i32(i32 addrspace(1)* %out) {
    store i32 34, i32 addrspace(1)* %out, align 4
    %idx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1
author	Matt Arsenault <Matthew.Arsenault@amd.com>
	Tue, 13 Oct 2015 00:49:00 +0000 (00:49 +0000)
committer	Matt Arsenault <Matthew.Arsenault@amd.com>
	Tue, 13 Oct 2015 00:49:00 +0000 (00:49 +0000)
lib/CodeGen/SelectionDAG/DAGCombiner.cpp		patch \| blob \| history
test/CodeGen/AMDGPU/merge-stores.ll		patch \| blob \| history