From: Bruno Cardoso Lopes Date: Tue, 26 Jul 2011 02:39:32 +0000 (+0000) Subject: Add 256-bit isel for movsldup/movshdup X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=5d348b4dc43eef440e18d79e006963296112e8e2;p=oota-llvm.git Add 256-bit isel for movsldup/movshdup git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@136051 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 6e46b3f56d3..b060e557a2f 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -3222,32 +3222,31 @@ def : Pat<(int_x86_avx_cvt_pd2dq_256 (memopv4f64 addr:$src)), //===---------------------------------------------------------------------===// // Replicate Single FP - MOVSHDUP and MOVSLDUP // -multiclass sse3_replicate_sfp op, SDNode OpNode, string OpcodeStr> { -def rr : S3SI op, SDNode OpNode, string OpcodeStr, + ValueType vt, RegisterClass RC, PatFrag mem_frag, + X86MemOperand x86memop> { +def rr : S3SI; -def rm : S3SI; +def rm : S3SI; -} - -multiclass sse3_replicate_sfp_y op, SDNode OpNode, - string OpcodeStr> { -def rr : S3SI; -def rm : S3SI; + [(set RC:$dst, (OpNode (mem_frag addr:$src)))]>; } let Predicates = [HasAVX] in { - // FIXME: Merge above classes when we have patterns for the ymm version - defm VMOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup">, VEX; - defm VMOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup">, VEX; - defm VMOVSHDUPY : sse3_replicate_sfp_y<0x16, X86Movshdup, "vmovshdup">, VEX; - defm VMOVSLDUPY : sse3_replicate_sfp_y<0x12, X86Movsldup, "vmovsldup">, VEX; -} -defm MOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "movshdup">; -defm MOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "movsldup">; + defm VMOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup", + v4f32, VR128, memopv4f32, f128mem>, VEX; + defm VMOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup", + v4f32, VR128, memopv4f32, f128mem>, VEX; + defm VMOVSHDUPY : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup", + v8f32, VR256, memopv8f32, f256mem>, VEX; + defm VMOVSLDUPY : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup", + v8f32, VR256, memopv8f32, f256mem>, VEX; +} +defm MOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "movshdup", v4f32, VR128, + memopv4f32, f128mem>; +defm MOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "movsldup", v4f32, VR128, + memopv4f32, f128mem>; let Predicates = [HasSSE3] in { def : Pat<(v4i32 (X86Movshdup VR128:$src)), @@ -3269,6 +3268,14 @@ let Predicates = [HasAVX] in { (VMOVSLDUPrr VR128:$src)>; def : Pat<(v4i32 (X86Movsldup (bc_v4i32 (memopv2i64 addr:$src)))), (VMOVSLDUPrm addr:$src)>; + def : Pat<(v8i32 (X86Movshdup VR256:$src)), + (VMOVSHDUPYrr VR256:$src)>; + def : Pat<(v8i32 (X86Movshdup (bc_v8i32 (memopv4i64 addr:$src)))), + (VMOVSHDUPYrm addr:$src)>; + def : Pat<(v8i32 (X86Movsldup VR256:$src)), + (VMOVSLDUPYrr VR256:$src)>; + def : Pat<(v8i32 (X86Movsldup (bc_v8i32 (memopv4i64 addr:$src)))), + (VMOVSLDUPYrm addr:$src)>; } //===---------------------------------------------------------------------===// diff --git a/test/CodeGen/X86/avx-256-movdup.ll b/test/CodeGen/X86/avx-256-movdup.ll new file mode 100644 index 00000000000..42d84def98a --- /dev/null +++ b/test/CodeGen/X86/avx-256-movdup.ll @@ -0,0 +1,34 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s + +; CHECK: vmovsldup +define <8 x float> @movdupA(<8 x float> %src) nounwind uwtable readnone ssp { +entry: + %shuffle.i = shufflevector <8 x float> %src, <8 x float> undef, <8 x i32> + ret <8 x float> %shuffle.i +} + +; CHECK: vmovshdup +define <8 x float> @movdupB(<8 x float> %src) nounwind uwtable readnone ssp { +entry: + %shuffle.i = shufflevector <8 x float> %src, <8 x float> undef, <8 x i32> + ret <8 x float> %shuffle.i +} + +; CHECK: vmovsldup +define <4 x i64> @movdupC(<4 x i64> %src) nounwind uwtable readnone ssp { +entry: + %0 = bitcast <4 x i64> %src to <8 x float> + %shuffle.i = shufflevector <8 x float> %0, <8 x float> undef, <8 x i32> + %1 = bitcast <8 x float> %shuffle.i to <4 x i64> + ret <4 x i64> %1 +} + +; CHECK: vmovshdup +define <4 x i64> @movdupD(<4 x i64> %src) nounwind uwtable readnone ssp { +entry: + %0 = bitcast <4 x i64> %src to <8 x float> + %shuffle.i = shufflevector <8 x float> %0, <8 x float> undef, <8 x i32> + %1 = bitcast <8 x float> %shuffle.i to <4 x i64> + ret <4 x i64> %1 +} +