Move the MMX subtarget feature out of the SSE set of features and into
authorEric Christopher <echristo@gmail.com>
Thu, 8 Oct 2015 20:10:06 +0000 (20:10 +0000)
committerEric Christopher <echristo@gmail.com>
Thu, 8 Oct 2015 20:10:06 +0000 (20:10 +0000)
its own variable.

This is needed so that we can explicitly turn off MMX without turning
off SSE and also so that we can diagnose feature set incompatibilities
that involve MMX without SSE.

Rationale:

// sse3
__m128d test_mm_addsub_pd(__m128d A, __m128d B) {
  return _mm_addsub_pd(A, B);
}

// mmx
void shift(__m64 a, __m64 b, int c) {
  _mm_slli_pi16(a, c);
  _mm_slli_pi32(a, c);
  _mm_slli_si64(a, c);
  _mm_srli_pi16(a, c);
  _mm_srli_pi32(a, c);
  _mm_srli_si64(a, c);
  _mm_srai_pi16(a, c);
  _mm_srai_pi32(a, c);
}

clang -msse3 -mno-mmx file.c -c

For this code we should be able to explicitly turn off MMX
without affecting the compilation of the SSE3 function and then
diagnose and error on compiling the MMX function.

This matches the existing gcc behavior and follows the spirit of
the SSE/MMX separation in llvm where we can (and do) turn off
MMX code generation except in the presence of intrinsics.

Updated a couple of tests, but primarily tested with a couple of tests
for turning on only mmx and only sse.

This is paired with a patch to clang to take advantage of this behavior.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@249731 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Target/X86/X86.td
lib/Target/X86/X86Subtarget.cpp
lib/Target/X86/X86Subtarget.h
test/CodeGen/X86/mmx-intrinsics.ll
test/CodeGen/X86/mmx-only.ll [new file with mode: 0644]
test/CodeGen/X86/mult-alt-x86.ll
test/CodeGen/X86/sse-only.ll [new file with mode: 0644]

index 3a3b03874c0f8006320f710744aeca66217e4894..fa0b674ff9486b3b5ac21f8c4024b0f559962a63 100644 (file)
@@ -37,14 +37,17 @@ def FeatureCMOV    : SubtargetFeature<"cmov","HasCMov", "true",
 def FeaturePOPCNT   : SubtargetFeature<"popcnt", "HasPOPCNT", "true",
                                        "Support POPCNT instruction">;
 
-
-def FeatureMMX     : SubtargetFeature<"mmx","X86SSELevel", "MMX",
+// The MMX subtarget feature is separate from the rest of the SSE features
+// because it's important (for odd compatibility reasons) to be able to
+// turn it off explicitly while allowing SSE+ to be on.
+def FeatureMMX     : SubtargetFeature<"mmx","HasMMX", "true",
                                       "Enable MMX instructions">;
+
 def FeatureSSE1    : SubtargetFeature<"sse", "X86SSELevel", "SSE1",
                                       "Enable SSE instructions",
                                       // SSE codegen depends on cmovs, and all
                                       // SSE1+ processors support them.
-                                      [FeatureMMX, FeatureCMOV]>;
+                                      [FeatureCMOV]>;
 def FeatureSSE2    : SubtargetFeature<"sse2", "X86SSELevel", "SSE2",
                                       "Enable SSE2 instructions",
                                       [FeatureSSE1]>;
@@ -219,184 +222,241 @@ def : Proc<"pentium-mmx",     [FeatureSlowUAMem16, FeatureMMX]>;
 def : Proc<"i686",            [FeatureSlowUAMem16]>;
 def : Proc<"pentiumpro",      [FeatureSlowUAMem16, FeatureCMOV]>;
 def : Proc<"pentium2",        [FeatureSlowUAMem16, FeatureMMX, FeatureCMOV]>;
-def : Proc<"pentium3",        [FeatureSlowUAMem16, FeatureSSE1]>;
-def : Proc<"pentium3m",       [FeatureSlowUAMem16, FeatureSSE1,
+def : Proc<"pentium3",        [FeatureSlowUAMem16, FeatureMMX, FeatureSSE1]>;
+def : Proc<"pentium3m",       [FeatureSlowUAMem16, FeatureMMX, FeatureSSE1,
                                FeatureSlowBTMem]>;
-def : Proc<"pentium-m",       [FeatureSlowUAMem16, FeatureSSE2,
+def : Proc<"pentium-m",       [FeatureSlowUAMem16, FeatureMMX, FeatureSSE2,
                                FeatureSlowBTMem]>;
-def : Proc<"pentium4",        [FeatureSlowUAMem16, FeatureSSE2]>;
-def : Proc<"pentium4m",       [FeatureSlowUAMem16, FeatureSSE2,
+def : Proc<"pentium4",        [FeatureSlowUAMem16, FeatureMMX, FeatureSSE2]>;
+def : Proc<"pentium4m",       [FeatureSlowUAMem16, FeatureMMX, FeatureSSE2,
                                FeatureSlowBTMem]>;
 
 // Intel Core Duo.
-def : ProcessorModel<"yonah", SandyBridgeModel,
-                     [FeatureSlowUAMem16, FeatureSSE3, FeatureSlowBTMem]>;
+def : ProcessorModel<
+          "yonah", SandyBridgeModel,
+          [ FeatureSlowUAMem16, FeatureMMX, FeatureSSE3, FeatureSlowBTMem ]>;
 
 // NetBurst.
-def : Proc<"prescott", [FeatureSlowUAMem16, FeatureSSE3, FeatureSlowBTMem]>;
-def : Proc<"nocona",   [FeatureSlowUAMem16, FeatureSSE3, FeatureCMPXCHG16B,
-                        FeatureSlowBTMem]>;
+def : Proc<"prescott",
+           [ FeatureSlowUAMem16, FeatureMMX, FeatureSSE3, FeatureSlowBTMem ]>;
+def : Proc<"nocona", [
+  FeatureSlowUAMem16,
+  FeatureMMX,
+  FeatureSSE3,
+  FeatureCMPXCHG16B,
+  FeatureSlowBTMem
+]>;
 
 // Intel Core 2 Solo/Duo.
-def : ProcessorModel<"core2", SandyBridgeModel,
-                     [FeatureSlowUAMem16, FeatureSSSE3, FeatureCMPXCHG16B,
-                      FeatureSlowBTMem]>;
-def : ProcessorModel<"penryn", SandyBridgeModel,
-                     [FeatureSlowUAMem16, FeatureSSE41, FeatureCMPXCHG16B,
-                      FeatureSlowBTMem]>;
+def : ProcessorModel<"core2", SandyBridgeModel, [
+  FeatureSlowUAMem16,
+  FeatureMMX,
+  FeatureSSSE3,
+  FeatureCMPXCHG16B,
+  FeatureSlowBTMem
+]>;
+def : ProcessorModel<"penryn", SandyBridgeModel, [
+  FeatureSlowUAMem16,
+  FeatureMMX,
+  FeatureSSE41,
+  FeatureCMPXCHG16B,
+  FeatureSlowBTMem
+]>;
 
 // Atom CPUs.
 class BonnellProc<string Name> : ProcessorModel<Name, AtomModel, [
-                                   ProcIntelAtom,
-                                   FeatureSlowUAMem16,
-                                   FeatureSSSE3,
-                                   FeatureCMPXCHG16B,
-                                   FeatureMOVBE,
-                                   FeatureSlowBTMem,
-                                   FeatureLeaForSP,
-                                   FeatureSlowDivide32,
-                                   FeatureSlowDivide64,
-                                   FeatureCallRegIndirect,
-                                   FeatureLEAUsesAG,
-                                   FeaturePadShortFunctions
-                                 ]>;
+  ProcIntelAtom,
+  FeatureSlowUAMem16,
+  FeatureMMX,
+  FeatureSSSE3,
+  FeatureCMPXCHG16B,
+  FeatureMOVBE,
+  FeatureSlowBTMem,
+  FeatureLeaForSP,
+  FeatureSlowDivide32,
+  FeatureSlowDivide64,
+  FeatureCallRegIndirect,
+  FeatureLEAUsesAG,
+  FeaturePadShortFunctions
+]>;
 def : BonnellProc<"bonnell">;
 def : BonnellProc<"atom">; // Pin the generic name to the baseline.
 
 class SilvermontProc<string Name> : ProcessorModel<Name, SLMModel, [
-                                      ProcIntelSLM,
-                                      FeatureSSE42,
-                                      FeatureCMPXCHG16B,
-                                      FeatureMOVBE,
-                                      FeaturePOPCNT,
-                                      FeaturePCLMUL,
-                                      FeatureAES,
-                                      FeatureSlowDivide64,
-                                      FeatureCallRegIndirect,
-                                      FeaturePRFCHW,
-                                      FeatureSlowLEA,
-                                      FeatureSlowIncDec,
-                                      FeatureSlowBTMem
-                                    ]>;
+  ProcIntelSLM,
+  FeatureMMX,
+  FeatureSSE42,
+  FeatureCMPXCHG16B,
+  FeatureMOVBE,
+  FeaturePOPCNT,
+  FeaturePCLMUL,
+  FeatureAES,
+  FeatureSlowDivide64,
+  FeatureCallRegIndirect,
+  FeaturePRFCHW,
+  FeatureSlowLEA,
+  FeatureSlowIncDec,
+  FeatureSlowBTMem
+]>;
 def : SilvermontProc<"silvermont">;
 def : SilvermontProc<"slm">; // Legacy alias.
 
 // "Arrandale" along with corei3 and corei5
 class NehalemProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
-                                   FeatureSSE42,
-                                   FeatureCMPXCHG16B,
-                                   FeatureSlowBTMem,
-                                   FeaturePOPCNT
-                                 ]>;
+  FeatureMMX,
+  FeatureSSE42,
+  FeatureCMPXCHG16B,
+  FeatureSlowBTMem,
+  FeaturePOPCNT
+]>;
 def : NehalemProc<"nehalem">;
 def : NehalemProc<"corei7">;
 
 // Westmere is a similar machine to nehalem with some additional features.
 // Westmere is the corei3/i5/i7 path from nehalem to sandybridge
 class WestmereProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
-                                    FeatureSSE42,
-                                    FeatureCMPXCHG16B,
-                                    FeatureSlowBTMem,
-                                    FeaturePOPCNT,
-                                    FeatureAES,
-                                    FeaturePCLMUL
-                                  ]>;
+  FeatureMMX,
+  FeatureSSE42,
+  FeatureCMPXCHG16B,
+  FeatureSlowBTMem,
+  FeaturePOPCNT,
+  FeatureAES,
+  FeaturePCLMUL
+]>;
 def : WestmereProc<"westmere">;
 
 // SSE is not listed here since llvm treats AVX as a reimplementation of SSE,
 // rather than a superset.
 class SandyBridgeProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
-                                       FeatureAVX,
-                                       FeatureCMPXCHG16B,
-                                       FeatureSlowBTMem,
-                                       FeatureSlowUAMem32,
-                                       FeaturePOPCNT,
-                                       FeatureAES,
-                                       FeaturePCLMUL
-                                     ]>;
+  FeatureMMX,
+  FeatureAVX,
+  FeatureCMPXCHG16B,
+  FeatureSlowBTMem,
+  FeatureSlowUAMem32,
+  FeaturePOPCNT,
+  FeatureAES,
+  FeaturePCLMUL
+]>;
 def : SandyBridgeProc<"sandybridge">;
 def : SandyBridgeProc<"corei7-avx">; // Legacy alias.
 
 class IvyBridgeProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
-                                     FeatureAVX,
-                                     FeatureCMPXCHG16B,
-                                     FeatureSlowBTMem,
-                                     FeatureSlowUAMem32,
-                                     FeaturePOPCNT,
-                                     FeatureAES,
-                                     FeaturePCLMUL,
-                                     FeatureRDRAND,
-                                     FeatureF16C,
-                                     FeatureFSGSBase
-                                   ]>;
+  FeatureMMX,
+  FeatureAVX,
+  FeatureCMPXCHG16B,
+  FeatureSlowBTMem,
+  FeatureSlowUAMem32,
+  FeaturePOPCNT,
+  FeatureAES,
+  FeaturePCLMUL,
+  FeatureRDRAND,
+  FeatureF16C,
+  FeatureFSGSBase
+]>;
 def : IvyBridgeProc<"ivybridge">;
 def : IvyBridgeProc<"core-avx-i">; // Legacy alias.
 
 class HaswellProc<string Name> : ProcessorModel<Name, HaswellModel, [
-                                   FeatureAVX2,
-                                   FeatureCMPXCHG16B,
-                                   FeatureSlowBTMem,
-                                   FeaturePOPCNT,
-                                   FeatureAES,
-                                   FeaturePCLMUL,
-                                   FeatureRDRAND,
-                                   FeatureF16C,
-                                   FeatureFSGSBase,
-                                   FeatureMOVBE,
-                                   FeatureLZCNT,
-                                   FeatureBMI,
-                                   FeatureBMI2,
-                                   FeatureFMA,
-                                   FeatureRTM,
-                                   FeatureHLE,
-                                   FeatureSlowIncDec
-                                 ]>;
+  FeatureMMX,
+  FeatureAVX2,
+  FeatureCMPXCHG16B,
+  FeatureSlowBTMem,
+  FeaturePOPCNT,
+  FeatureAES,
+  FeaturePCLMUL,
+  FeatureRDRAND,
+  FeatureF16C,
+  FeatureFSGSBase,
+  FeatureMOVBE,
+  FeatureLZCNT,
+  FeatureBMI,
+  FeatureBMI2,
+  FeatureFMA,
+  FeatureRTM,
+  FeatureHLE,
+  FeatureSlowIncDec
+]>;
 def : HaswellProc<"haswell">;
 def : HaswellProc<"core-avx2">; // Legacy alias.
 
 class BroadwellProc<string Name> : ProcessorModel<Name, HaswellModel, [
-                                     FeatureAVX2,
-                                     FeatureCMPXCHG16B,
-                                     FeatureSlowBTMem,
-                                     FeaturePOPCNT,
-                                     FeatureAES,
-                                     FeaturePCLMUL,
-                                     FeatureRDRAND,
-                                     FeatureF16C,
-                                     FeatureFSGSBase,
-                                     FeatureMOVBE,
-                                     FeatureLZCNT,
-                                     FeatureBMI,
-                                     FeatureBMI2,
-                                     FeatureFMA,
-                                     FeatureRTM,
-                                     FeatureHLE,
-                                     FeatureADX,
-                                     FeatureRDSEED,
-                                     FeatureSlowIncDec
-                                   ]>;
+  FeatureMMX,
+  FeatureAVX2,
+  FeatureCMPXCHG16B,
+  FeatureSlowBTMem,
+  FeaturePOPCNT,
+  FeatureAES,
+  FeaturePCLMUL,
+  FeatureRDRAND,
+  FeatureF16C,
+  FeatureFSGSBase,
+  FeatureMOVBE,
+  FeatureLZCNT,
+  FeatureBMI,
+  FeatureBMI2,
+  FeatureFMA,
+  FeatureRTM,
+  FeatureHLE,
+  FeatureADX,
+  FeatureRDSEED,
+  FeatureSlowIncDec
+]>;
 def : BroadwellProc<"broadwell">;
 
 // FIXME: define KNL model
-class KnightsLandingProc<string Name> : ProcessorModel<Name, HaswellModel,
-                     [FeatureAVX512, FeatureERI, FeatureCDI, FeaturePFI,
-                      FeatureCMPXCHG16B, FeaturePOPCNT,
-                      FeatureAES, FeaturePCLMUL, FeatureRDRAND, FeatureF16C,
-                      FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT, FeatureBMI,
-                      FeatureBMI2, FeatureFMA, FeatureRTM, FeatureHLE,
-                      FeatureSlowIncDec, FeatureMPX]>;
+class KnightsLandingProc<string Name> : ProcessorModel<Name, HaswellModel, [
+  FeatureMMX,
+  FeatureAVX512,
+  FeatureERI,
+  FeatureCDI,
+  FeaturePFI,
+  FeatureCMPXCHG16B,
+  FeaturePOPCNT,
+  FeatureAES,
+  FeaturePCLMUL,
+  FeatureRDRAND,
+  FeatureF16C,
+  FeatureFSGSBase,
+  FeatureMOVBE,
+  FeatureLZCNT,
+  FeatureBMI,
+  FeatureBMI2,
+  FeatureFMA,
+  FeatureRTM,
+  FeatureHLE,
+  FeatureSlowIncDec,
+  FeatureMPX
+]>;
 def : KnightsLandingProc<"knl">;
 
 // FIXME: define SKX model
-class SkylakeProc<string Name> : ProcessorModel<Name, HaswellModel,
-                     [FeatureAVX512, FeatureCDI,
-                      FeatureDQI, FeatureBWI, FeatureVLX,
-                      FeatureCMPXCHG16B, FeatureSlowBTMem,
-                      FeaturePOPCNT, FeatureAES, FeaturePCLMUL, FeatureRDRAND,
-                      FeatureF16C, FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT,
-                      FeatureBMI, FeatureBMI2, FeatureFMA, FeatureRTM,
-                      FeatureHLE, FeatureADX, FeatureRDSEED, FeatureSlowIncDec,
-                      FeatureMPX]>;
+class SkylakeProc<string Name> : ProcessorModel<Name, HaswellModel, [
+  FeatureMMX,
+  FeatureAVX512,
+  FeatureCDI,
+  FeatureDQI,
+  FeatureBWI,
+  FeatureVLX,
+  FeatureCMPXCHG16B,
+  FeatureSlowBTMem,
+  FeaturePOPCNT,
+  FeatureAES,
+  FeaturePCLMUL,
+  FeatureRDRAND,
+  FeatureF16C,
+  FeatureFSGSBase,
+  FeatureMOVBE,
+  FeatureLZCNT,
+  FeatureBMI,
+  FeatureBMI2,
+  FeatureFMA,
+  FeatureRTM,
+  FeatureHLE,
+  FeatureADX,
+  FeatureRDSEED,
+  FeatureSlowIncDec,
+  FeatureMPX
+]>;
 def : SkylakeProc<"skylake">;
 def : SkylakeProc<"skx">; // Legacy alias.
 
@@ -447,52 +507,117 @@ def : Proc<"barcelona",       [FeatureSSE4A,
                                FeatureSlowSHLD]>;
 
 // Bobcat
-def : Proc<"btver1",          [FeatureSSSE3, FeatureSSE4A, FeatureCMPXCHG16B,
-                               FeaturePRFCHW, FeatureLZCNT, FeaturePOPCNT,
-                               FeatureSlowSHLD]>;
+def : Proc<"btver1", [
+  FeatureMMX,
+  FeatureSSSE3,
+  FeatureSSE4A,
+  FeatureCMPXCHG16B,
+  FeaturePRFCHW,
+  FeatureLZCNT,
+  FeaturePOPCNT,
+  FeatureSlowSHLD
+]>;
 
 // Jaguar
-def : ProcessorModel<"btver2", BtVer2Model,
-                     [FeatureAVX, FeatureSSE4A, FeatureCMPXCHG16B,
-                      FeaturePRFCHW, FeatureAES, FeaturePCLMUL,
-                      FeatureBMI, FeatureF16C, FeatureMOVBE,
-                      FeatureLZCNT, FeaturePOPCNT,
-                      FeatureSlowSHLD]>;
+def : ProcessorModel<"btver2", BtVer2Model, [
+  FeatureMMX,
+  FeatureAVX,
+  FeatureSSE4A,
+  FeatureCMPXCHG16B,
+  FeaturePRFCHW,
+  FeatureAES,
+  FeaturePCLMUL,
+  FeatureBMI,
+  FeatureF16C,
+  FeatureMOVBE,
+  FeatureLZCNT,
+  FeaturePOPCNT,
+  FeatureSlowSHLD
+]>;
 
 // Bulldozer
-def : Proc<"bdver1",          [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
-                               FeatureAES, FeaturePRFCHW, FeaturePCLMUL,
-                               FeatureAVX, FeatureSSE4A, FeatureLZCNT,
-                               FeaturePOPCNT, FeatureSlowSHLD]>;
+def : Proc<"bdver1", [
+  FeatureXOP,
+  FeatureFMA4,
+  FeatureCMPXCHG16B,
+  FeatureAES,
+  FeaturePRFCHW,
+  FeaturePCLMUL,
+  FeatureMMX,
+  FeatureAVX,
+  FeatureSSE4A,
+  FeatureLZCNT,
+  FeaturePOPCNT,
+  FeatureSlowSHLD
+]>;
 // Piledriver
-def : Proc<"bdver2",          [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
-                               FeatureAES, FeaturePRFCHW, FeaturePCLMUL,
-                               FeatureAVX, FeatureSSE4A, FeatureF16C,
-                               FeatureLZCNT, FeaturePOPCNT, FeatureBMI,
-                               FeatureTBM, FeatureFMA, FeatureSlowSHLD]>;
+def : Proc<"bdver2", [
+  FeatureXOP,
+  FeatureFMA4,
+  FeatureCMPXCHG16B,
+  FeatureAES,
+  FeaturePRFCHW,
+  FeaturePCLMUL,
+  FeatureMMX,
+  FeatureAVX,
+  FeatureSSE4A,
+  FeatureF16C,
+  FeatureLZCNT,
+  FeaturePOPCNT,
+  FeatureBMI,
+  FeatureTBM,
+  FeatureFMA,
+  FeatureSlowSHLD
+]>;
 
 // Steamroller
-def : Proc<"bdver3",          [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
-                               FeatureAES, FeaturePRFCHW, FeaturePCLMUL,
-                               FeatureAVX, FeatureSSE4A, FeatureF16C,
-                               FeatureLZCNT, FeaturePOPCNT, FeatureBMI,
-                               FeatureTBM, FeatureFMA, FeatureSlowSHLD,
-                               FeatureFSGSBase]>;
+def : Proc<"bdver3", [
+  FeatureXOP,
+  FeatureFMA4,
+  FeatureCMPXCHG16B,
+  FeatureAES,
+  FeaturePRFCHW,
+  FeaturePCLMUL,
+  FeatureMMX,
+  FeatureAVX,
+  FeatureSSE4A,
+  FeatureF16C,
+  FeatureLZCNT,
+  FeaturePOPCNT,
+  FeatureBMI,
+  FeatureTBM,
+  FeatureFMA,
+  FeatureSlowSHLD,
+  FeatureFSGSBase
+]>;
 
 // Excavator
-def : Proc<"bdver4",          [FeatureAVX2, FeatureXOP, FeatureFMA4,
-                               FeatureCMPXCHG16B, FeatureAES, FeaturePRFCHW,
-                               FeaturePCLMUL, FeatureF16C, FeatureLZCNT,
-                               FeaturePOPCNT, FeatureBMI, FeatureBMI2,
-                               FeatureTBM, FeatureFMA, FeatureSSE4A,
-                               FeatureFSGSBase]>;
+def : Proc<"bdver4", [
+  FeatureMMX,
+  FeatureAVX2,
+  FeatureXOP,
+  FeatureFMA4,
+  FeatureCMPXCHG16B,
+  FeatureAES,
+  FeaturePRFCHW,
+  FeaturePCLMUL,
+  FeatureF16C,
+  FeatureLZCNT,
+  FeaturePOPCNT,
+  FeatureBMI,
+  FeatureBMI2,
+  FeatureTBM,
+  FeatureFMA,
+  FeatureSSE4A,
+  FeatureFSGSBase
+]>;
 
 def : Proc<"geode",           [FeatureSlowUAMem16, Feature3DNowA]>;
 
 def : Proc<"winchip-c6",      [FeatureSlowUAMem16, FeatureMMX]>;
 def : Proc<"winchip2",        [FeatureSlowUAMem16, Feature3DNow]>;
 def : Proc<"c3",              [FeatureSlowUAMem16, Feature3DNow]>;
-def : Proc<"c3-2",            [FeatureSlowUAMem16, FeatureSSE1]>;
+def : Proc<"c3-2", [ FeatureSlowUAMem16, FeatureMMX, FeatureSSE1 ]>;
 
 // We also provide a generic 64-bit specific x86 processor model which tries to
 // be good for modern chips without enabling instruction set encodings past the
@@ -504,8 +629,9 @@ def : Proc<"c3-2",            [FeatureSlowUAMem16, FeatureSSE1]>;
 // covers a huge swath of x86 processors. If there are specific scheduling
 // knobs which need to be tuned differently for AMD chips, we might consider
 // forming a common base for them.
-def : ProcessorModel<"x86-64", SandyBridgeModel,
-                     [FeatureSSE2, Feature64Bit, FeatureSlowBTMem]>;
+def : ProcessorModel<
+          "x86-64", SandyBridgeModel,
+          [ FeatureMMX, FeatureSSE2, Feature64Bit, FeatureSlowBTMem ]>;
 
 //===----------------------------------------------------------------------===//
 // Register File Description
index 5b53ca93399fb58c42bcb1aba522e64d2bcdab94..a4db4e6028006d2f53116803baffcde86484893f 100644 (file)
@@ -228,9 +228,10 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
 }
 
 void X86Subtarget::initializeEnvironment() {
-  X86SSELevel = NoMMXSSE;
+  X86SSELevel = NoSSE;
   X863DNowLevel = NoThreeDNow;
   HasCMov = false;
+  HasMMX = false;
   HasX86_64 = false;
   HasPOPCNT = false;
   HasSSE4A = false;
index 52e68c08180f353b74b73c4ea2f20042b6d2c0b8..198e7fbf135d72b64893cdf75ceb59bac372e138 100644 (file)
@@ -47,7 +47,7 @@ class X86Subtarget final : public X86GenSubtargetInfo {
 
 protected:
   enum X86SSEEnum {
-    NoMMXSSE, MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, AVX, AVX2, AVX512F
+    NoSSE, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, AVX, AVX2, AVX512F
   };
 
   enum X863DNowEnum {
@@ -64,7 +64,7 @@ protected:
   /// Which PIC style to use
   PICStyles::Style PICStyle;
 
-  /// MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, or none supported.
+  /// SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, or none supported.
   X86SSEEnum X86SSELevel;
 
   /// 3DNow, 3DNow Athlon, or none supported.
@@ -74,6 +74,9 @@ protected:
   /// (generally pentium pro+).
   bool HasCMov;
 
+  /// True if this processor supports MMX instructions.
+  bool HasMMX;
+
   /// True if the processor supports X86-64 instructions.
   bool HasX86_64;
 
@@ -319,7 +322,7 @@ public:
   void setPICStyle(PICStyles::Style Style)  { PICStyle = Style; }
 
   bool hasCMov() const { return HasCMov; }
-  bool hasMMX() const { return X86SSELevel >= MMX; }
+  bool hasMMX() const { return HasMMX; }
   bool hasSSE1() const { return X86SSELevel >= SSE1; }
   bool hasSSE2() const { return X86SSELevel >= SSE2; }
   bool hasSSE3() const { return X86SSELevel >= SSE3; }
index d9bcdc4effd77fc08059f86a2cdabd106e5e99ec..7647fccb5803baa4c8e4a46350ec512001ff3d75 100644 (file)
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=x86 -mattr=+mmx,+ssse3,-avx | FileCheck %s --check-prefix=ALL --check-prefix=X86
-; RUN: llc < %s -march=x86 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=X86
+; RUN: llc < %s -march=x86 -mattr=+mmx,+avx | FileCheck %s --check-prefix=ALL --check-prefix=X86
 ; RUN: llc < %s -march=x86-64 -mattr=+mmx,+ssse3,-avx | FileCheck %s --check-prefix=ALL --check-prefix=X64
-; RUN: llc < %s -march=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=X64
+; RUN: llc < %s -march=x86-64 -mattr=+mmx,+avx | FileCheck %s --check-prefix=ALL --check-prefix=X64
 
 declare x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx, x86_mmx) nounwind readnone
 
diff --git a/test/CodeGen/X86/mmx-only.ll b/test/CodeGen/X86/mmx-only.ll
new file mode 100644 (file)
index 0000000..35598d5
--- /dev/null
@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=x86 -mattr=+mmx | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+mmx,-sse | FileCheck %s
+
+; Test that turning off sse doesn't turn off mmx.
+
+declare x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test88(<1 x i64> %a, <1 x i64> %b) nounwind readnone {
+; CHECK-LABEL: @test88
+; CHECK: pcmpgtd
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
index 5174f85adb9f5973d1ed6e428c891caf311226ef..1c83fedad3cea14be566b252043b8b665da63e87 100644 (file)
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -no-integrated-as
+; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2 -no-integrated-as
 ; ModuleID = 'mult-alt-x86.c'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
 target triple = "i686-pc-win32"
diff --git a/test/CodeGen/X86/sse-only.ll b/test/CodeGen/X86/sse-only.ll
new file mode 100644 (file)
index 0000000..a4fdf96
--- /dev/null
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2,-mmx | FileCheck %s
+
+; Test that turning off mmx doesn't turn off sse
+
+define void @test1(<2 x double>* %r, <2 x double>* %A, double %B) nounwind  {
+; CHECK-LABEL: test1:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT:    movapd (%ecx), %xmm0
+; CHECK-NEXT:    movlpd {{[0-9]+}}(%esp), %xmm0
+; CHECK-NEXT:    movapd %xmm0, (%eax)
+; CHECK-NEXT:    retl
+       %tmp3 = load <2 x double>, <2 x double>* %A, align 16
+       %tmp7 = insertelement <2 x double> undef, double %B, i32 0
+       %tmp9 = shufflevector <2 x double> %tmp3, <2 x double> %tmp7, <2 x i32> < i32 2, i32 1 >
+       store <2 x double> %tmp9, <2 x double>* %r, align 16
+       ret void
+}