From 0f31d547ebc0f302085ff0046cdfae99710b0f76 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 16 Oct 2015 06:03:09 +0000 Subject: [PATCH] [X86] Add fxsr feature flag for fxsave/fxrestore instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@250497 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86.td | 88 +++++++++++++++--------- lib/Target/X86/X86InstrFPStack.td | 22 +++--- lib/Target/X86/X86InstrInfo.td | 1 + lib/Target/X86/X86Subtarget.cpp | 1 + lib/Target/X86/X86Subtarget.h | 4 ++ test/CodeGen/X86/system-intrinsics-64.ll | 2 +- test/CodeGen/X86/system-intrinsics.ll | 2 +- 7 files changed, 76 insertions(+), 44 deletions(-) diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 847839e60cf..0c55d01d775 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -37,6 +37,9 @@ def FeatureCMOV : SubtargetFeature<"cmov","HasCMov", "true", def FeaturePOPCNT : SubtargetFeature<"popcnt", "HasPOPCNT", "true", "Support POPCNT instruction">; +def FeatureFXSR : SubtargetFeature<"fxsr", "HasFXSR", "true", + "Support fxsave/fxrestore instructions">; + def FeatureXSAVE : SubtargetFeature<"xsave", "HasXSAVE", "true", "Support xsave instructions">; @@ -233,28 +236,33 @@ def : Proc<"pentium", [FeatureSlowUAMem16]>; def : Proc<"pentium-mmx", [FeatureSlowUAMem16, FeatureMMX]>; def : Proc<"i686", [FeatureSlowUAMem16]>; def : Proc<"pentiumpro", [FeatureSlowUAMem16, FeatureCMOV]>; -def : Proc<"pentium2", [FeatureSlowUAMem16, FeatureMMX, FeatureCMOV]>; -def : Proc<"pentium3", [FeatureSlowUAMem16, FeatureMMX, FeatureSSE1]>; +def : Proc<"pentium2", [FeatureSlowUAMem16, FeatureMMX, FeatureCMOV, + FeatureFXSR]>; +def : Proc<"pentium3", [FeatureSlowUAMem16, FeatureMMX, FeatureSSE1, + FeatureFXSR]>; def : Proc<"pentium3m", [FeatureSlowUAMem16, FeatureMMX, FeatureSSE1, - FeatureSlowBTMem]>; + FeatureFXSR, FeatureSlowBTMem]>; def : Proc<"pentium-m", [FeatureSlowUAMem16, FeatureMMX, FeatureSSE2, - FeatureSlowBTMem]>; -def : Proc<"pentium4", [FeatureSlowUAMem16, FeatureMMX, FeatureSSE2]>; + FeatureFXSR, FeatureSlowBTMem]>; +def : Proc<"pentium4", [FeatureSlowUAMem16, FeatureMMX, FeatureSSE2, + FeatureFXSR]>; def : Proc<"pentium4m", [FeatureSlowUAMem16, FeatureMMX, FeatureSSE2, - FeatureSlowBTMem]>; + FeatureFXSR, FeatureSlowBTMem]>; // Intel Core Duo. -def : ProcessorModel< - "yonah", SandyBridgeModel, - [ FeatureSlowUAMem16, FeatureMMX, FeatureSSE3, FeatureSlowBTMem ]>; +def : ProcessorModel<"yonah", SandyBridgeModel, + [FeatureSlowUAMem16, FeatureMMX, FeatureSSE3, FeatureFXSR, + FeatureSlowBTMem]>; // NetBurst. def : Proc<"prescott", - [ FeatureSlowUAMem16, FeatureMMX, FeatureSSE3, FeatureSlowBTMem ]>; + [FeatureSlowUAMem16, FeatureMMX, FeatureSSE3, FeatureFXSR, + FeatureSlowBTMem]>; def : Proc<"nocona", [ FeatureSlowUAMem16, FeatureMMX, FeatureSSE3, + FeatureFXSR, FeatureCMPXCHG16B, FeatureSlowBTMem ]>; @@ -264,6 +272,7 @@ def : ProcessorModel<"core2", SandyBridgeModel, [ FeatureSlowUAMem16, FeatureMMX, FeatureSSSE3, + FeatureFXSR, FeatureCMPXCHG16B, FeatureSlowBTMem ]>; @@ -271,6 +280,7 @@ def : ProcessorModel<"penryn", SandyBridgeModel, [ FeatureSlowUAMem16, FeatureMMX, FeatureSSE41, + FeatureFXSR, FeatureCMPXCHG16B, FeatureSlowBTMem ]>; @@ -281,6 +291,7 @@ class BonnellProc : ProcessorModel : ProcessorModel; // Legacy alias. class NehalemProc : ProcessorModel; class WestmereProc : ProcessorModel; class SandyBridgeProc : ProcessorModel; // Legacy alias. class IvyBridgeProc : ProcessorModel; // Legacy alias. class HaswellProc : ProcessorModel; // Legacy alias. class BroadwellProc : ProcessorModel; class KnightsLandingProc : ProcessorModel; class SkylakeProc : ProcessorModel; def : Proc<"athlon-4", [FeatureSlowUAMem16, FeatureSSE1, Feature3DNowA, - FeatureSlowBTMem, FeatureSlowSHLD]>; + FeatureFXSR, FeatureSlowBTMem, FeatureSlowSHLD]>; def : Proc<"athlon-xp", [FeatureSlowUAMem16, FeatureSSE1, Feature3DNowA, - FeatureSlowBTMem, FeatureSlowSHLD]>; + FeatureFXSR, FeatureSlowBTMem, FeatureSlowSHLD]>; def : Proc<"athlon-mp", [FeatureSlowUAMem16, FeatureSSE1, Feature3DNowA, - FeatureSlowBTMem, FeatureSlowSHLD]>; + FeatureFXSR, FeatureSlowBTMem, FeatureSlowSHLD]>; def : Proc<"k8", [FeatureSlowUAMem16, FeatureSSE2, Feature3DNowA, - Feature64Bit, FeatureSlowBTMem, + FeatureFXSR, Feature64Bit, FeatureSlowBTMem, FeatureSlowSHLD]>; def : Proc<"opteron", [FeatureSlowUAMem16, FeatureSSE2, Feature3DNowA, - Feature64Bit, FeatureSlowBTMem, + FeatureFXSR, Feature64Bit, FeatureSlowBTMem, FeatureSlowSHLD]>; def : Proc<"athlon64", [FeatureSlowUAMem16, FeatureSSE2, Feature3DNowA, - Feature64Bit, FeatureSlowBTMem, + FeatureFXSR, Feature64Bit, FeatureSlowBTMem, FeatureSlowSHLD]>; def : Proc<"athlon-fx", [FeatureSlowUAMem16, FeatureSSE2, Feature3DNowA, - Feature64Bit, FeatureSlowBTMem, + FeatureFXSR, Feature64Bit, FeatureSlowBTMem, FeatureSlowSHLD]>; def : Proc<"k8-sse3", [FeatureSlowUAMem16, FeatureSSE3, Feature3DNowA, - FeatureCMPXCHG16B, FeatureSlowBTMem, + FeatureFXSR, FeatureCMPXCHG16B, FeatureSlowBTMem, FeatureSlowSHLD]>; def : Proc<"opteron-sse3", [FeatureSlowUAMem16, FeatureSSE3, Feature3DNowA, - FeatureCMPXCHG16B, FeatureSlowBTMem, + FeatureFXSR, FeatureCMPXCHG16B, FeatureSlowBTMem, FeatureSlowSHLD]>; def : Proc<"athlon64-sse3", [FeatureSlowUAMem16, FeatureSSE3, Feature3DNowA, - FeatureCMPXCHG16B, FeatureSlowBTMem, - FeatureSlowSHLD]>; -def : Proc<"amdfam10", [FeatureSSE4A, - Feature3DNowA, FeatureCMPXCHG16B, FeatureLZCNT, - FeaturePOPCNT, FeatureSlowBTMem, - FeatureSlowSHLD]>; -def : Proc<"barcelona", [FeatureSSE4A, - Feature3DNowA, FeatureCMPXCHG16B, FeatureLZCNT, - FeaturePOPCNT, FeatureSlowBTMem, + FeatureFXSR, FeatureCMPXCHG16B, FeatureSlowBTMem, FeatureSlowSHLD]>; +def : Proc<"amdfam10", [FeatureSSE4A, Feature3DNowA, FeatureFXSR, + FeatureCMPXCHG16B, FeatureLZCNT, FeaturePOPCNT, + FeatureSlowBTMem, FeatureSlowSHLD]>; +def : Proc<"barcelona", [FeatureSSE4A, Feature3DNowA, FeatureFXSR, + FeatureCMPXCHG16B, FeatureLZCNT, FeaturePOPCNT, + FeatureSlowBTMem, FeatureSlowSHLD]>; // Bobcat def : Proc<"btver1", [ FeatureMMX, FeatureSSSE3, FeatureSSE4A, + FeatureFXSR, FeatureCMPXCHG16B, FeaturePRFCHW, FeatureLZCNT, @@ -549,6 +568,7 @@ def : Proc<"btver1", [ def : ProcessorModel<"btver2", BtVer2Model, [ FeatureMMX, FeatureAVX, + FeatureFXSR, FeatureSSE4A, FeatureCMPXCHG16B, FeaturePRFCHW, @@ -574,6 +594,7 @@ def : Proc<"bdver1", [ FeaturePCLMUL, FeatureMMX, FeatureAVX, + FeatureFXSR, FeatureSSE4A, FeatureLZCNT, FeaturePOPCNT, @@ -590,6 +611,7 @@ def : Proc<"bdver2", [ FeaturePCLMUL, FeatureMMX, FeatureAVX, + FeatureFXSR, FeatureSSE4A, FeatureF16C, FeatureLZCNT, @@ -611,6 +633,7 @@ def : Proc<"bdver3", [ FeaturePCLMUL, FeatureMMX, FeatureAVX, + FeatureFXSR, FeatureSSE4A, FeatureF16C, FeatureLZCNT, @@ -628,6 +651,7 @@ def : Proc<"bdver3", [ def : Proc<"bdver4", [ FeatureMMX, FeatureAVX2, + FeatureFXSR, FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B, @@ -651,7 +675,7 @@ def : Proc<"geode", [FeatureSlowUAMem16, Feature3DNowA]>; def : Proc<"winchip-c6", [FeatureSlowUAMem16, FeatureMMX]>; def : Proc<"winchip2", [FeatureSlowUAMem16, Feature3DNow]>; def : Proc<"c3", [FeatureSlowUAMem16, Feature3DNow]>; -def : Proc<"c3-2", [ FeatureSlowUAMem16, FeatureMMX, FeatureSSE1 ]>; +def : Proc<"c3-2", [FeatureSlowUAMem16, FeatureMMX, FeatureSSE1, FeatureFXSR]>; // We also provide a generic 64-bit specific x86 processor model which tries to // be good for modern chips without enabling instruction set encodings past the @@ -663,9 +687,9 @@ def : Proc<"c3-2", [ FeatureSlowUAMem16, FeatureMMX, FeatureSSE1 ]>; // covers a huge swath of x86 processors. If there are specific scheduling // knobs which need to be tuned differently for AMD chips, we might consider // forming a common base for them. -def : ProcessorModel< - "x86-64", SandyBridgeModel, - [ FeatureMMX, FeatureSSE2, Feature64Bit, FeatureSlowBTMem ]>; +def : ProcessorModel<"x86-64", SandyBridgeModel, + [FeatureMMX, FeatureSSE2, FeatureFXSR, Feature64Bit, + FeatureSlowBTMem ]>; //===----------------------------------------------------------------------===// // Register File Description diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td index c2fe39c952e..51648c6c567 100644 --- a/lib/Target/X86/X86InstrFPStack.td +++ b/lib/Target/X86/X86InstrFPStack.td @@ -633,16 +633,18 @@ def FRNDINT : I<0xD9, MRM_FC, (outs), (ins), "frndint", [], IIC_FRNDINT>; def FSCALE : I<0xD9, MRM_FD, (outs), (ins), "fscale", [], IIC_FSCALE>; def FCOMPP : I<0xDE, MRM_D9, (outs), (ins), "fcompp", [], IIC_FCOMPP>; -def FXSAVE : I<0xAE, MRM0m, (outs), (ins opaque512mem:$dst), - "fxsave\t$dst", [(int_x86_fxsave addr:$dst)], IIC_FXSAVE>, TB; -def FXSAVE64 : RI<0xAE, MRM0m, (outs), (ins opaque512mem:$dst), - "fxsave64\t$dst", [(int_x86_fxsave64 addr:$dst)], - IIC_FXSAVE>, TB, Requires<[In64BitMode]>; -def FXRSTOR : I<0xAE, MRM1m, (outs), (ins opaque512mem:$src), - "fxrstor\t$src", [(int_x86_fxrstor addr:$src)], IIC_FXRSTOR>, TB; -def FXRSTOR64 : RI<0xAE, MRM1m, (outs), (ins opaque512mem:$src), - "fxrstor64\t$src", [(int_x86_fxrstor64 addr:$src)], - IIC_FXRSTOR>, TB, Requires<[In64BitMode]>; +let Predicates = [HasFXSR] in { + def FXSAVE : I<0xAE, MRM0m, (outs), (ins opaque512mem:$dst), + "fxsave\t$dst", [(int_x86_fxsave addr:$dst)], IIC_FXSAVE>, TB; + def FXSAVE64 : RI<0xAE, MRM0m, (outs), (ins opaque512mem:$dst), + "fxsave64\t$dst", [(int_x86_fxsave64 addr:$dst)], + IIC_FXSAVE>, TB, Requires<[In64BitMode]>; + def FXRSTOR : I<0xAE, MRM1m, (outs), (ins opaque512mem:$src), + "fxrstor\t$src", [(int_x86_fxrstor addr:$src)], IIC_FXRSTOR>, TB; + def FXRSTOR64 : RI<0xAE, MRM1m, (outs), (ins opaque512mem:$src), + "fxrstor64\t$src", [(int_x86_fxrstor64 addr:$src)], + IIC_FXRSTOR>, TB, Requires<[In64BitMode]>; +} // Predicates = [FeatureFXSR] } // SchedRW //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 92225e3151f..2db827ea582 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -773,6 +773,7 @@ def NoVLX_Or_NoBWI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasBWI()">; def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">; def HasAES : Predicate<"Subtarget->hasAES()">; +def HasFXSR : Predicate<"Subtarget->hasFXSR()">; def HasXSAVE : Predicate<"Subtarget->hasXSAVE()">; def HasXSAVEOPT : Predicate<"Subtarget->hasXSAVEOPT()">; def HasXSAVEC : Predicate<"Subtarget->hasXSAVEC()">; diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 428b715480b..ad593f4be2e 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -236,6 +236,7 @@ void X86Subtarget::initializeEnvironment() { HasPOPCNT = false; HasSSE4A = false; HasAES = false; + HasFXSR = false; HasXSAVE = false; HasXSAVEOPT = false; HasXSAVEC = false; diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 7ba0723f03b..bca31c00398 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -89,6 +89,9 @@ protected: /// Target has AES instructions bool HasAES; + /// Target has FXSAVE/FXRESTOR instructions + bool HasFXSR; + /// Target has XSAVE instructions bool HasXSAVE; /// Target has XSAVEOPT instructions @@ -348,6 +351,7 @@ public: bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; } bool hasPOPCNT() const { return HasPOPCNT; } bool hasAES() const { return HasAES; } + bool hasFXSR() const { return HasFXSR; } bool hasXSAVE() const { return HasXSAVE; } bool hasXSAVEOPT() const { return HasXSAVEOPT; } bool hasXSAVEC() const { return HasXSAVEC; } diff --git a/test/CodeGen/X86/system-intrinsics-64.ll b/test/CodeGen/X86/system-intrinsics-64.ll index 96c44177339..e18a79c2b61 100644 --- a/test/CodeGen/X86/system-intrinsics-64.ll +++ b/test/CodeGen/X86/system-intrinsics-64.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+fxsr | FileCheck %s define void @test_fxsave(i8* %ptr) { ; CHECK-LABEL: test_fxsave diff --git a/test/CodeGen/X86/system-intrinsics.ll b/test/CodeGen/X86/system-intrinsics.ll index 84fcd052d7d..90dc9cd21e6 100644 --- a/test/CodeGen/X86/system-intrinsics.ll +++ b/test/CodeGen/X86/system-intrinsics.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+fxsr | FileCheck %s define void @test_fxsave(i8* %ptr) { ; CHECK-LABEL: test_fxsave -- 2.34.1