From: Michael Liao Date: Mon, 10 Sep 2012 18:33:51 +0000 (+0000) Subject: Enhance PR11334 fix to support extload from v2f32/v4f32 X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=b8150d852399f61f3fe7f6fd66edb99173de884c;p=oota-llvm.git Enhance PR11334 fix to support extload from v2f32/v4f32 - Fix an remaining issue of PR11674 as well git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@163528 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index b0dc5b7b687..21791d5be9b 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -932,6 +932,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); + + setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, Legal); } if (Subtarget->hasSSE41()) { @@ -1043,6 +1045,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal); setOperationAction(ISD::FP_ROUND, MVT::v4f32, Legal); + setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, Legal); + setOperationAction(ISD::SRL, MVT::v16i16, Custom); setOperationAction(ISD::SRL, MVT::v32i8, Custom); diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 32e4315fbd4..90354354367 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -240,6 +240,10 @@ def loadv8f32 : PatFrag<(ops node:$ptr), (v8f32 (load node:$ptr))>; def loadv4f64 : PatFrag<(ops node:$ptr), (v4f64 (load node:$ptr))>; def loadv4i64 : PatFrag<(ops node:$ptr), (v4i64 (load node:$ptr))>; +// 128-/256-bit extload pattern fragments +def extloadv2f32 : PatFrag<(ops node:$ptr), (v2f64 (extloadvf32 node:$ptr))>; +def extloadv4f32 : PatFrag<(ops node:$ptr), (v4f64 (extloadvf32 node:$ptr))>; + // Like 'store', but always requires 128-bit vector alignment. def alignedstore : PatFrag<(ops node:$val, node:$ptr), (store node:$val, node:$ptr), [{ diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 3f0e9fdd1e5..17e91a6efb0 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -2007,10 +2007,10 @@ def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vcvtps2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))], IIC_SSE_CVT_PD_RR>, TB, VEX; -let neverHasSideEffects = 1, mayLoad = 1 in def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), - "vcvtps2pd\t{$src, $dst|$dst, $src}", [], - IIC_SSE_CVT_PD_RM>, TB, VEX; + "vcvtps2pd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))], + IIC_SSE_CVT_PD_RM>, TB, VEX; def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), "vcvtps2pd\t{$src, $dst|$dst, $src}", [(set VR256:$dst, @@ -2028,10 +2028,10 @@ def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))], IIC_SSE_CVT_PD_RR>, TB; -let neverHasSideEffects = 1, mayLoad = 1 in def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), - "cvtps2pd\t{$src, $dst|$dst, $src}", [], - IIC_SSE_CVT_PD_RM>, TB; + "cvtps2pd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))], + IIC_SSE_CVT_PD_RM>, TB; } // Convert Packed DW Integers to Packed Double FP @@ -2134,7 +2134,7 @@ let Predicates = [HasAVX] in { (VCVTPS2PDrr VR128:$src)>; def : Pat<(v4f64 (fextend (v4f32 VR128:$src))), (VCVTPS2PDYrr VR128:$src)>; - def : Pat<(v4f64 (fextend (loadv4f32 addr:$src))), + def : Pat<(v4f64 (extloadv4f32 addr:$src)), (VCVTPS2PDYrm addr:$src)>; } diff --git a/test/CodeGen/X86/vec_fpext.ll b/test/CodeGen/X86/vec_fpext.ll index 05b263e2e0c..dc0464ff9e0 100644 --- a/test/CodeGen/X86/vec_fpext.ll +++ b/test/CodeGen/X86/vec_fpext.ll @@ -1,14 +1,38 @@ ; RUN: llc < %s -march=x86 -mattr=+sse41,-avx | FileCheck %s +; RUN: llc < %s -march=x86 -mattr=+avx | FileCheck --check-prefix=AVX %s ; PR11674 define void @fpext_frommem(<2 x float>* %in, <2 x double>* %out) { entry: -; TODO: We should be able to generate cvtps2pd for the load. -; For now, just check that we generate something sane. -; CHECK: cvtss2sd -; CHECK: cvtss2sd +; CHECK: cvtps2pd (%{{.+}}), %xmm{{[0-9]+}} +; AVX: vcvtps2pd (%{{.+}}), %xmm{{[0-9]+}} %0 = load <2 x float>* %in, align 8 %1 = fpext <2 x float> %0 to <2 x double> store <2 x double> %1, <2 x double>* %out, align 1 ret void } + +define void @fpext_frommem4(<4 x float>* %in, <4 x double>* %out) { +entry: +; CHECK: cvtps2pd (%{{.+}}), %xmm{{[0-9]+}} +; CHECK: cvtps2pd 8(%{{.+}}), %xmm{{[0-9]+}} +; AVX: vcvtps2pd (%{{.+}}), %ymm{{[0-9]+}} + %0 = load <4 x float>* %in + %1 = fpext <4 x float> %0 to <4 x double> + store <4 x double> %1, <4 x double>* %out, align 1 + ret void +} + +define void @fpext_frommem8(<8 x float>* %in, <8 x double>* %out) { +entry: +; CHECK: cvtps2pd (%{{.+}}), %xmm{{[0-9]+}} +; CHECK: cvtps2pd 8(%{{.+}}), %xmm{{[0-9]+}} +; CHECK: cvtps2pd 16(%{{.+}}), %xmm{{[0-9]+}} +; CHECK: cvtps2pd 24(%{{.+}}), %xmm{{[0-9]+}} +; AVX: vcvtps2pd (%{{.+}}), %ymm{{[0-9]+}} +; AVX: vcvtps2pd 16(%{{.+}}), %ymm{{[0-9]+}} + %0 = load <8 x float>* %in + %1 = fpext <8 x float> %0 to <8 x double> + store <8 x double> %1, <8 x double>* %out, align 1 + ret void +}