From: Stefanus Du Toit Date: Tue, 26 May 2009 21:04:35 +0000 (+0000) Subject: Update CPU capabilities for AMD machines X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=8cf5ab153d3873d8565d5433527bd0219f7b8043;p=oota-llvm.git Update CPU capabilities for AMD machines - added processors k8-sse3, opteron-sse3, athlon64-sse3, amdfam10, and barcelona with appropriate sse3/4a levels - added FeatureSSE4A for amdfam10 processors in X86Subtarget: - added hasSSE4A - updated AutoDetectSubtargetFeatures to detect SSE4A - updated GetCurrentX86CPU to detect family 15 with sse3 as k8-sse3 and family 10h as amdfam10 New processor names match those used by gcc. Patch by Paul Redmond! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@72434 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 0e2c98097ed..8df138d8d7a 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -52,6 +52,8 @@ def Feature64Bit : SubtargetFeature<"64bit", "HasX86_64", "true", "Support 64-bit instructions">; def FeatureSlowBTMem : SubtargetFeature<"slow-bt-mem", "IsBTMemSlow", "true", "Bit testing of memory is slow">; +def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true", + "Support SSE 4a instructions">; //===----------------------------------------------------------------------===// // X86 processors supported. @@ -97,6 +99,16 @@ def : Proc<"athlon64", [FeatureSSE2, Feature3DNowA, Feature64Bit, FeatureSlowBTMem]>; def : Proc<"athlon-fx", [FeatureSSE2, Feature3DNowA, Feature64Bit, FeatureSlowBTMem]>; +def : Proc<"k8-sse3", [FeatureSSE3, Feature3DNowA, Feature64Bit, + FeatureSlowBTMem]>; +def : Proc<"opteron-sse3", [FeatureSSE3, Feature3DNowA, Feature64Bit, + FeatureSlowBTMem]>; +def : Proc<"athlon64-sse3", [FeatureSSE3, Feature3DNowA, Feature64Bit, + FeatureSlowBTMem]>; +def : Proc<"amdfam10", [FeatureSSE3, FeatureSSE4A, + Feature3DNowA, Feature64Bit, FeatureSlowBTMem]>; +def : Proc<"barcelona", [FeatureSSE3, FeatureSSE4A, + Feature3DNowA, Feature64Bit, FeatureSlowBTMem]>; def : Proc<"winchip-c6", [FeatureMMX]>; def : Proc<"winchip2", [FeatureMMX, Feature3DNow]>; diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index d54ccb37e3b..03ce1aee0e8 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -216,6 +216,7 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { X86::GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); HasX86_64 = (EDX >> 29) & 0x1; + HasSSE4A = IsAMD && ((ECX >> 6) & 0x1); } } @@ -229,6 +230,7 @@ static const char *GetCurrentX86CPU() { X86::GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); bool Em64T = (EDX >> 29) & 0x1; + bool HasSSE3 = (ECX & 0x1); union { unsigned u[3]; @@ -311,10 +313,20 @@ static const char *GetCurrentX86CPU() { default: return "athlon"; } case 15: + if (HasSSE3) { + switch (Model) { + default: return "k8-sse3"; + } + } else { + switch (Model) { + case 1: return "opteron"; + case 5: return "athlon-fx"; // also opteron + default: return "athlon64"; + } + } + case 16: switch (Model) { - case 1: return "opteron"; - case 5: return "athlon-fx"; // also opteron - default: return "athlon64"; + default: return "amdfam10"; } default: return "generic"; diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index a5ca61704f6..46476f20400 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -67,6 +67,9 @@ protected: /// IsBTMemSlow - True if BT (bit test) of memory instructions are slow. bool IsBTMemSlow; + /// HasSSE4A - True if the processor supports SSE4A instructions. + bool HasSSE4A; + /// DarwinVers - Nonzero if this is a darwin platform: the numeric /// version of the platform, e.g. 8 = 10.4 (Tiger), 9 = 10.5 (Leopard), etc. unsigned char DarwinVers; // Is any darwin-x86 platform. @@ -127,6 +130,7 @@ public: bool hasSSSE3() const { return X86SSELevel >= SSSE3; } bool hasSSE41() const { return X86SSELevel >= SSE41; } bool hasSSE42() const { return X86SSELevel >= SSE42; } + bool hasSSE4A() const { return HasSSE4A; } bool has3DNow() const { return X863DNowLevel >= ThreeDNow; } bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; }