X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FX86%2FX86.td;h=852267400bba1adba4ed0c262c5130b0c7c35a37;hb=cd52a7a381a73c53ec4ef517ad87f19808cb1a28;hp=d1afa2cc70533dd65af31df3f6ac336dc14b1134;hpb=42deb12738dfc5d817d7eaf291c001a20bd1e22b;p=oota-llvm.git diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index d1afa2cc705..852267400bb 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -79,9 +79,16 @@ def FeatureSlowBTMem : SubtargetFeature<"slow-bt-mem", "IsBTMemSlow", "true", "Bit testing of memory is slow">; def FeatureSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true", "SHLD instruction is slow">; +// FIXME: This is a 16-byte (SSE/AVX) feature; we should rename it to make that +// explicit. Also, it seems this would be the default state for most chips +// going forward, so it would probably be better to negate the logic and +// match the 32-byte "slow mem" feature below. def FeatureFastUAMem : SubtargetFeature<"fast-unaligned-mem", "IsUAMemFast", "true", "Fast unaligned memory access">; +def FeatureSlowUAMem32 : SubtargetFeature<"slow-unaligned-mem-32", + "IsUAMem32Slow", "true", + "Slow unaligned 32-byte memory access">; def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true", "Support SSE 4a instructions", [FeatureSSE3]>; @@ -125,9 +132,9 @@ def FeatureFMA4 : SubtargetFeature<"fma4", "HasFMA4", "true", def FeatureXOP : SubtargetFeature<"xop", "HasXOP", "true", "Enable XOP instructions", [FeatureFMA4]>; -def FeatureVectorUAMem : SubtargetFeature<"vector-unaligned-mem", - "HasVectorUAMem", "true", - "Allow unaligned memory operands on vector/SIMD instructions">; +def FeatureSSEUnalignedMem : SubtargetFeature<"sse-unaligned-mem", + "HasSSEUnalignedMem", "true", + "Allow unaligned memory operands with SSE instructions">; def FeatureAES : SubtargetFeature<"aes", "HasAES", "true", "Enable AES instructions", [FeatureSSE2]>; @@ -157,17 +164,20 @@ def FeatureADX : SubtargetFeature<"adx", "HasADX", "true", def FeatureSHA : SubtargetFeature<"sha", "HasSHA", "true", "Enable SHA instructions", [FeatureSSE2]>; -def FeatureSGX : SubtargetFeature<"sgx", "HasSGX", "true", - "Support SGX instructions">; def FeaturePRFCHW : SubtargetFeature<"prfchw", "HasPRFCHW", "true", "Support PRFCHW instructions">; def FeatureRDSEED : SubtargetFeature<"rdseed", "HasRDSEED", "true", "Support RDSEED instruction">; +def FeatureMPX : SubtargetFeature<"mpx", "HasMPX", "true", + "Support MPX instructions">; def FeatureLeaForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true", "Use LEA for adjusting the stack pointer">; -def FeatureSlowDivide : SubtargetFeature<"idiv-to-divb", - "HasSlowDivide", "true", - "Use small divide for positive values less than 256">; +def FeatureSlowDivide32 : SubtargetFeature<"idivl-to-divb", + "HasSlowDivide32", "true", + "Use 8-bit divide for positive values less than 256">; +def FeatureSlowDivide64 : SubtargetFeature<"idivq-to-divw", + "HasSlowDivide64", "true", + "Use 16-bit divide for positive values less than 65536">; def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions", "PadShortFunctions", "true", "Pad short functions">; @@ -180,6 +190,9 @@ def FeatureSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true", "LEA instruction with certain arguments is slow">; def FeatureSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true", "INC and DEC instructions are slower than ADD and SUB">; +def FeatureSoftFloat + : SubtargetFeature<"soft-float", "UseSoftFloat", "true", + "Use software floating point features.">; //===----------------------------------------------------------------------===// // X86 processors supported. @@ -224,78 +237,164 @@ def : ProcessorModel<"core2", SandyBridgeModel, def : ProcessorModel<"penryn", SandyBridgeModel, [FeatureSSE41, FeatureCMPXCHG16B, FeatureSlowBTMem]>; -// Atom. -def : ProcessorModel<"atom", AtomModel, - [ProcIntelAtom, FeatureSSSE3, FeatureCMPXCHG16B, - FeatureMOVBE, FeatureSlowBTMem, FeatureLeaForSP, - FeatureSlowDivide, - FeatureCallRegIndirect, - FeatureLEAUsesAG, - FeaturePadShortFunctions]>; - -// Atom Silvermont. -def : ProcessorModel<"slm", SLMModel, [ProcIntelSLM, - FeatureSSE42, FeatureCMPXCHG16B, - FeatureMOVBE, FeaturePOPCNT, - FeaturePCLMUL, FeatureAES, - FeatureCallRegIndirect, - FeaturePRFCHW, - FeatureSlowLEA, FeatureSlowIncDec, - FeatureSlowBTMem, FeatureFastUAMem]>; +// Atom CPUs. +class BonnellProc : ProcessorModel; +def : BonnellProc<"bonnell">; +def : BonnellProc<"atom">; // Pin the generic name to the baseline. + +class SilvermontProc : ProcessorModel; +def : SilvermontProc<"silvermont">; +def : SilvermontProc<"slm">; // Legacy alias. + // "Arrandale" along with corei3 and corei5 -def : ProcessorModel<"corei7", SandyBridgeModel, - [FeatureSSE42, FeatureCMPXCHG16B, FeatureSlowBTMem, - FeatureFastUAMem, FeaturePOPCNT, FeatureAES]>; +class NehalemProc : ProcessorModel; +def : NehalemProc<"nehalem">; +def : NehalemProc<"corei7">; -def : ProcessorModel<"nehalem", SandyBridgeModel, - [FeatureSSE42, FeatureCMPXCHG16B, FeatureSlowBTMem, - FeatureFastUAMem, FeaturePOPCNT]>; // Westmere is a similar machine to nehalem with some additional features. // Westmere is the corei3/i5/i7 path from nehalem to sandybridge -def : ProcessorModel<"westmere", SandyBridgeModel, - [FeatureSSE42, FeatureCMPXCHG16B, FeatureSlowBTMem, - FeatureFastUAMem, FeaturePOPCNT, FeatureAES, - FeaturePCLMUL]>; -// Sandy Bridge +class WestmereProc : ProcessorModel; +def : WestmereProc<"westmere">; + // SSE is not listed here since llvm treats AVX as a reimplementation of SSE, // rather than a superset. -def : ProcessorModel<"corei7-avx", SandyBridgeModel, - [FeatureAVX, FeatureCMPXCHG16B, FeatureFastUAMem, - FeaturePOPCNT, FeatureAES, FeaturePCLMUL]>; -// Ivy Bridge -def : ProcessorModel<"core-avx-i", SandyBridgeModel, - [FeatureAVX, FeatureCMPXCHG16B, FeatureFastUAMem, - FeaturePOPCNT, FeatureAES, FeaturePCLMUL, FeatureRDRAND, - FeatureF16C, FeatureFSGSBase]>; - -// Haswell -def : ProcessorModel<"core-avx2", HaswellModel, - [FeatureAVX2, FeatureCMPXCHG16B, FeatureFastUAMem, - FeaturePOPCNT, FeatureAES, FeaturePCLMUL, FeatureRDRAND, - FeatureF16C, FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT, - FeatureBMI, FeatureBMI2, FeatureFMA, FeatureRTM, - FeatureHLE]>; - -// KNL +class SandyBridgeProc : ProcessorModel; +def : SandyBridgeProc<"sandybridge">; +def : SandyBridgeProc<"corei7-avx">; // Legacy alias. + +class IvyBridgeProc : ProcessorModel; +def : IvyBridgeProc<"ivybridge">; +def : IvyBridgeProc<"core-avx-i">; // Legacy alias. + +class HaswellProc : ProcessorModel; +def : HaswellProc<"haswell">; +def : HaswellProc<"core-avx2">; // Legacy alias. + +class BroadwellProc : ProcessorModel; +def : BroadwellProc<"broadwell">; + // FIXME: define KNL model -def : ProcessorModel<"knl", HaswellModel, +class KnightsLandingProc : ProcessorModel; + FeatureSlowIncDec, FeatureMPX]>; +def : KnightsLandingProc<"knl">; -// SKX // FIXME: define SKX model -def : ProcessorModel<"skx", HaswellModel, +class SkylakeProc : ProcessorModel; + FeatureSlowIncDec, FeatureMPX]>; +def : SkylakeProc<"skylake">; +def : SkylakeProc<"skx">; // Legacy alias. + + +// AMD CPUs. def : Proc<"k6", [FeatureMMX]>; def : Proc<"k6-2", [Feature3DNow]>; @@ -304,7 +403,7 @@ def : Proc<"athlon", [Feature3DNowA, FeatureSlowBTMem, FeatureSlowSHLD]>; def : Proc<"athlon-tbird", [Feature3DNowA, FeatureSlowBTMem, FeatureSlowSHLD]>; -def : Proc<"athlon-4", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem, +def : Proc<"athlon-4", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem, FeatureSlowSHLD]>; def : Proc<"athlon-xp", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem, FeatureSlowSHLD]>; @@ -328,39 +427,52 @@ def : Proc<"amdfam10", [FeatureSSE4A, Feature3DNowA, FeatureCMPXCHG16B, FeatureLZCNT, FeaturePOPCNT, FeatureSlowBTMem, FeatureSlowSHLD]>; +def : Proc<"barcelona", [FeatureSSE4A, + Feature3DNowA, FeatureCMPXCHG16B, FeatureLZCNT, + FeaturePOPCNT, FeatureSlowBTMem, + FeatureSlowSHLD]>; // Bobcat def : Proc<"btver1", [FeatureSSSE3, FeatureSSE4A, FeatureCMPXCHG16B, FeaturePRFCHW, FeatureLZCNT, FeaturePOPCNT, FeatureSlowSHLD]>; + // Jaguar -def : Proc<"btver2", [FeatureAVX, FeatureSSE4A, FeatureCMPXCHG16B, - FeaturePRFCHW, FeatureAES, FeaturePCLMUL, - FeatureBMI, FeatureF16C, FeatureMOVBE, - FeatureLZCNT, FeaturePOPCNT, FeatureSlowSHLD]>; +def : ProcessorModel<"btver2", BtVer2Model, + [FeatureAVX, FeatureSSE4A, FeatureCMPXCHG16B, + FeaturePRFCHW, FeatureAES, FeaturePCLMUL, + FeatureBMI, FeatureF16C, FeatureMOVBE, + FeatureLZCNT, FeaturePOPCNT, FeatureFastUAMem, + FeatureSlowSHLD]>; + +// TODO: We should probably add 'FeatureFastUAMem' to all of the AMD chips. + // Bulldozer def : Proc<"bdver1", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B, FeatureAES, FeaturePRFCHW, FeaturePCLMUL, - FeatureLZCNT, FeaturePOPCNT, FeatureSlowSHLD]>; + FeatureAVX, FeatureSSE4A, FeatureLZCNT, + FeaturePOPCNT, FeatureSlowSHLD]>; // Piledriver def : Proc<"bdver2", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B, FeatureAES, FeaturePRFCHW, FeaturePCLMUL, - FeatureF16C, FeatureLZCNT, - FeaturePOPCNT, FeatureBMI, FeatureTBM, - FeatureFMA, FeatureSlowSHLD]>; + FeatureAVX, FeatureSSE4A, FeatureF16C, + FeatureLZCNT, FeaturePOPCNT, FeatureBMI, + FeatureTBM, FeatureFMA, FeatureSlowSHLD]>; // Steamroller def : Proc<"bdver3", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B, FeatureAES, FeaturePRFCHW, FeaturePCLMUL, - FeatureF16C, FeatureLZCNT, - FeaturePOPCNT, FeatureBMI, FeatureTBM, - FeatureFMA, FeatureFSGSBase]>; + FeatureAVX, FeatureSSE4A, FeatureF16C, + FeatureLZCNT, FeaturePOPCNT, FeatureBMI, + FeatureTBM, FeatureFMA, FeatureSlowSHLD, + FeatureFSGSBase]>; // Excavator def : Proc<"bdver4", [FeatureAVX2, FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B, FeatureAES, FeaturePRFCHW, FeaturePCLMUL, FeatureF16C, FeatureLZCNT, FeaturePOPCNT, FeatureBMI, FeatureBMI2, - FeatureTBM, FeatureFMA, FeatureFSGSBase]>; + FeatureTBM, FeatureFMA, FeatureSSE4A, + FeatureFSGSBase]>; def : Proc<"geode", [Feature3DNowA]>; @@ -373,7 +485,7 @@ def : Proc<"c3-2", [FeatureSSE1]>; // be good for modern chips without enabling instruction set encodings past the // basic SSE2 and 64-bit ones. It disables slow things from any mainstream and // modern 64-bit x86 chip, and enables features that are generally beneficial. -// +// // We currently use the Sandy Bridge model as the default scheduling model as // we use it across Nehalem, Westmere, Sandy Bridge, and Ivy Bridge which // covers a huge swath of x86 processors. If there are specific scheduling