remove 'FeatureSlowUAMem' from AMD CPUs based on 10H micro-arch or later
authorSanjay Patel <spatel@rotateright.com>
Fri, 21 Aug 2015 20:39:17 +0000 (20:39 +0000)
committerSanjay Patel <spatel@rotateright.com>
Fri, 21 Aug 2015 20:39:17 +0000 (20:39 +0000)
See discussion in D12154 ( http://reviews.llvm.org/D12154 ), AMD Software
Optimization Guides for 10H/12H/15H/16H, and Agner Fog's experimental data.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@245733 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Target/X86/X86.td
test/CodeGen/X86/slow-unaligned-mem.ll

index d081614a821d84612fa280bc91e06b812847ff2b..d00a1113e2e4085962f57ebf48a7ee500427f4fc 100644 (file)
@@ -433,21 +433,19 @@ def : Proc<"opteron-sse3",    [FeatureSlowUAMem, FeatureSSE3,   Feature3DNowA,
 def : Proc<"athlon64-sse3",   [FeatureSlowUAMem, FeatureSSE3,   Feature3DNowA,
                                FeatureCMPXCHG16B, FeatureSlowBTMem,
                                FeatureSlowSHLD]>;
-def : Proc<"amdfam10",        [FeatureSlowUAMem, FeatureSSE4A,
+def : Proc<"amdfam10",        [FeatureSSE4A,
                                Feature3DNowA, FeatureCMPXCHG16B, FeatureLZCNT,
                                FeaturePOPCNT, FeatureSlowBTMem,
                                FeatureSlowSHLD]>;
-def : Proc<"barcelona",       [FeatureSlowUAMem, FeatureSSE4A,
+def : Proc<"barcelona",       [FeatureSSE4A,
                                Feature3DNowA, FeatureCMPXCHG16B, FeatureLZCNT,
                                FeaturePOPCNT, FeatureSlowBTMem,
                                FeatureSlowSHLD]>;
 
-// FIXME: We should remove 'FeatureSlowUAMem' from AMD chips under here.
-
 // Bobcat
 def : Proc<"btver1",          [FeatureSSSE3, FeatureSSE4A, FeatureCMPXCHG16B,
                                FeaturePRFCHW, FeatureLZCNT, FeaturePOPCNT,
-                               FeatureSlowSHLD, FeatureSlowUAMem]>;
+                               FeatureSlowSHLD]>;
 
 // Jaguar
 def : ProcessorModel<"btver2", BtVer2Model,
@@ -461,15 +459,13 @@ def : ProcessorModel<"btver2", BtVer2Model,
 def : Proc<"bdver1",          [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
                                FeatureAES, FeaturePRFCHW, FeaturePCLMUL,
                                FeatureAVX, FeatureSSE4A, FeatureLZCNT,
-                               FeaturePOPCNT, FeatureSlowSHLD,
-                               FeatureSlowUAMem]>;
+                               FeaturePOPCNT, FeatureSlowSHLD]>;
 // Piledriver
 def : Proc<"bdver2",          [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
                                FeatureAES, FeaturePRFCHW, FeaturePCLMUL,
                                FeatureAVX, FeatureSSE4A, FeatureF16C,
                                FeatureLZCNT, FeaturePOPCNT, FeatureBMI,
-                               FeatureTBM, FeatureFMA, FeatureSlowSHLD,
-                               FeatureSlowUAMem]>;
+                               FeatureTBM, FeatureFMA, FeatureSlowSHLD]>;
 
 // Steamroller
 def : Proc<"bdver3",          [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
@@ -477,7 +473,7 @@ def : Proc<"bdver3",          [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
                                FeatureAVX, FeatureSSE4A, FeatureF16C,
                                FeatureLZCNT, FeaturePOPCNT, FeatureBMI,
                                FeatureTBM, FeatureFMA, FeatureSlowSHLD,
-                               FeatureFSGSBase, FeatureSlowUAMem]>;
+                               FeatureFSGSBase]>;
 
 // Excavator
 def : Proc<"bdver4",          [FeatureAVX2, FeatureXOP, FeatureFMA4,
@@ -485,7 +481,7 @@ def : Proc<"bdver4",          [FeatureAVX2, FeatureXOP, FeatureFMA4,
                                FeaturePCLMUL, FeatureF16C, FeatureLZCNT,
                                FeaturePOPCNT, FeatureBMI, FeatureBMI2,
                                FeatureTBM, FeatureFMA, FeatureSSE4A,
-                               FeatureFSGSBase, FeatureSlowUAMem]>;
+                               FeatureFSGSBase]>;
 
 def : Proc<"geode",           [FeatureSlowUAMem, Feature3DNowA]>;
 
index e865ca16ca19b315fd7c31b59c3154e70407c757..5c8166b63a3ca772eb0f5fbad4a40dbf9a92b802 100644 (file)
 ; AMD chips with fast unaligned memory accesses
 ; FIXME: These are wrong except for btver2.
 
-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=amdfam10      2>&1 | FileCheck %s --check-prefix=SLOW
-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=barcelona     2>&1 | FileCheck %s --check-prefix=SLOW
-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=btver1        2>&1 | FileCheck %s --check-prefix=SLOW
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=amdfam10      2>&1 | FileCheck %s --check-prefix=FAST
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=barcelona     2>&1 | FileCheck %s --check-prefix=FAST
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=btver1        2>&1 | FileCheck %s --check-prefix=FAST
 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=btver2        2>&1 | FileCheck %s --check-prefix=FAST
-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver1        2>&1 | FileCheck %s --check-prefix=SLOW
-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver2        2>&1 | FileCheck %s --check-prefix=SLOW
-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver3        2>&1 | FileCheck %s --check-prefix=SLOW
-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver4        2>&1 | FileCheck %s --check-prefix=SLOW
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver1        2>&1 | FileCheck %s --check-prefix=FAST
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver2        2>&1 | FileCheck %s --check-prefix=FAST
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver3        2>&1 | FileCheck %s --check-prefix=FAST
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver4        2>&1 | FileCheck %s --check-prefix=FAST
 
 ; Other chips with slow unaligned memory accesses