Enhance PR11334 fix to support extload from v2f32/v4f32

author Michael Liao <michael.liao@intel.com>

Mon, 10 Sep 2012 18:33:51 +0000 (18:33 +0000)

committer Michael Liao <michael.liao@intel.com>

Mon, 10 Sep 2012 18:33:51 +0000 (18:33 +0000)
author Michael Liao <michael.liao@intel.com>
Mon, 10 Sep 2012 18:33:51 +0000 (18:33 +0000)
committer Michael Liao <michael.liao@intel.com>
Mon, 10 Sep 2012 18:33:51 +0000 (18:33 +0000)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index b0dc5b7b687afea36af34e96dce5c1e996c47802..21791d5be9badba419451c9fb22f43bf5fdc4ebf 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -932,6 +932,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
  
      setOperationAction(ISD::FP_TO_SINT,         MVT::v4i32, Legal);
      setOperationAction(ISD::SINT_TO_FP,         MVT::v4i32, Legal);
+
+    setLoadExtAction(ISD::EXTLOAD,              MVT::v2f32, Legal);
    }
  
    if (Subtarget->hasSSE41()) {
@@ -1043,6 +1045,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
      setOperationAction(ISD::SINT_TO_FP,         MVT::v8i32, Legal);
      setOperationAction(ISD::FP_ROUND,           MVT::v4f32, Legal);
  
+    setLoadExtAction(ISD::EXTLOAD,              MVT::v4f32, Legal);
+
      setOperationAction(ISD::SRL,               MVT::v16i16, Custom);
      setOperationAction(ISD::SRL,               MVT::v32i8, Custom);
  
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td

index 32e4315fbd4bb992d4ff1d0eb7dbf92b5018474e..90354354367917e21b7fe661065af795fb6cc0fc 100644 (file)
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -240,6 +240,10 @@ def loadv8f32    : PatFrag<(ops node:$ptr), (v8f32 (load node:$ptr))>;
  def loadv4f64    : PatFrag<(ops node:$ptr), (v4f64 (load node:$ptr))>;
  def loadv4i64    : PatFrag<(ops node:$ptr), (v4i64 (load node:$ptr))>;
  
+// 128-/256-bit extload pattern fragments
+def extloadv2f32 : PatFrag<(ops node:$ptr), (v2f64 (extloadvf32 node:$ptr))>;
+def extloadv4f32 : PatFrag<(ops node:$ptr), (v4f64 (extloadvf32 node:$ptr))>;
+
  // Like 'store', but always requires 128-bit vector alignment.
  def alignedstore : PatFrag<(ops node:$val, node:$ptr),
                             (store node:$val, node:$ptr), [{
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td

index 3f0e9fdd1e546e60d93dd209718b8588994319cc..17e91a6efb0ce6b5e2b482ec015640e4587b2930 100644 (file)
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -2007,10 +2007,10 @@ def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                       "vcvtps2pd\t{$src, $dst|$dst, $src}",
                       [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))],
                       IIC_SSE_CVT_PD_RR>, TB, VEX;
-let neverHasSideEffects = 1, mayLoad = 1 in
  def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
-                     "vcvtps2pd\t{$src, $dst|$dst, $src}", [],
-                     IIC_SSE_CVT_PD_RM>, TB, VEX;
+                    "vcvtps2pd\t{$src, $dst|$dst, $src}",
+                    [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))],
+                    IIC_SSE_CVT_PD_RM>, TB, VEX;
  def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
                       "vcvtps2pd\t{$src, $dst|$dst, $src}",
                       [(set VR256:$dst,
@@ -2028,10 +2028,10 @@ def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                         "cvtps2pd\t{$src, $dst|$dst, $src}",
                         [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))],
                         IIC_SSE_CVT_PD_RR>, TB;
-let neverHasSideEffects = 1, mayLoad = 1 in
  def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
-                       "cvtps2pd\t{$src, $dst|$dst, $src}", [],
-                       IIC_SSE_CVT_PD_RM>, TB;
+                   "cvtps2pd\t{$src, $dst|$dst, $src}",
+                   [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))],
+                   IIC_SSE_CVT_PD_RM>, TB;
  }
  
  // Convert Packed DW Integers to Packed Double FP
@@ -2134,7 +2134,7 @@ let Predicates = [HasAVX] in {
              (VCVTPS2PDrr VR128:$src)>;
    def : Pat<(v4f64 (fextend (v4f32 VR128:$src))),
              (VCVTPS2PDYrr VR128:$src)>;
-  def : Pat<(v4f64 (fextend (loadv4f32 addr:$src))),
+  def : Pat<(v4f64 (extloadv4f32 addr:$src)),
              (VCVTPS2PDYrm addr:$src)>;
  }
  
diff --git a/test/CodeGen/X86/vec_fpext.ll b/test/CodeGen/X86/vec_fpext.ll

index 05b263e2e0c4fe126b3ffb0fe9d2b30cec7710a7..dc0464ff9e0f5fab688ce136cc8bc382a287d464 100644 (file)
--- a/test/CodeGen/X86/vec_fpext.ll
+++ b/test/CodeGen/X86/vec_fpext.ll
@@ -1,14 +1,38 @@
  ; RUN: llc < %s -march=x86 -mattr=+sse41,-avx | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+avx | FileCheck --check-prefix=AVX %s
  
  ; PR11674
  define void @fpext_frommem(<2 x float>* %in, <2 x double>* %out) {
  entry:
-; TODO: We should be able to generate cvtps2pd for the load.
-; For now, just check that we generate something sane.
-; CHECK: cvtss2sd
-; CHECK: cvtss2sd
+; CHECK: cvtps2pd (%{{.+}}), %xmm{{[0-9]+}}
+; AVX: vcvtps2pd (%{{.+}}), %xmm{{[0-9]+}}
    %0 = load <2 x float>* %in, align 8
    %1 = fpext <2 x float> %0 to <2 x double>
    store <2 x double> %1, <2 x double>* %out, align 1
    ret void
  }
+
+define void @fpext_frommem4(<4 x float>* %in, <4 x double>* %out) {
+entry:
+; CHECK: cvtps2pd (%{{.+}}), %xmm{{[0-9]+}}
+; CHECK: cvtps2pd 8(%{{.+}}), %xmm{{[0-9]+}}
+; AVX: vcvtps2pd (%{{.+}}), %ymm{{[0-9]+}}
+  %0 = load <4 x float>* %in
+  %1 = fpext <4 x float> %0 to <4 x double>
+  store <4 x double> %1, <4 x double>* %out, align 1
+  ret void
+}
+
+define void @fpext_frommem8(<8 x float>* %in, <8 x double>* %out) {
+entry:
+; CHECK: cvtps2pd (%{{.+}}), %xmm{{[0-9]+}}
+; CHECK: cvtps2pd 8(%{{.+}}), %xmm{{[0-9]+}}
+; CHECK: cvtps2pd 16(%{{.+}}), %xmm{{[0-9]+}}
+; CHECK: cvtps2pd 24(%{{.+}}), %xmm{{[0-9]+}}
+; AVX: vcvtps2pd (%{{.+}}), %ymm{{[0-9]+}}
+; AVX: vcvtps2pd 16(%{{.+}}), %ymm{{[0-9]+}}
+  %0 = load <8 x float>* %in
+  %1 = fpext <8 x float> %0 to <8 x double>
+  store <8 x double> %1, <8 x double>* %out, align 1
+  ret void
+}
author	Michael Liao <michael.liao@intel.com>
	Mon, 10 Sep 2012 18:33:51 +0000 (18:33 +0000)
committer	Michael Liao <michael.liao@intel.com>
	Mon, 10 Sep 2012 18:33:51 +0000 (18:33 +0000)
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
lib/Target/X86/X86InstrFragmentsSIMD.td		patch \| blob \| history
lib/Target/X86/X86InstrSSE.td		patch \| blob \| history
test/CodeGen/X86/vec_fpext.ll		patch \| blob \| history