From 98e0b9c86d1385d9cf04aef92651f0cdf8a46a9e Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 3 Nov 2011 07:35:53 +0000 Subject: [PATCH] Add new X86 AVX2 VBROADCAST instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143612 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IntrinsicsX86.td | 13 +++++++-- lib/Target/X86/X86InstrSSE.td | 39 ++++++++++++++++--------- test/CodeGen/X86/avx-intrinsics-x86.ll | 12 ++++---- test/CodeGen/X86/avx2-intrinsics-x86.ll | 23 +++++++++++++++ 4 files changed, 66 insertions(+), 21 deletions(-) diff --git a/include/llvm/IntrinsicsX86.td b/include/llvm/IntrinsicsX86.td index 04a119f055f..09f958c1319 100644 --- a/include/llvm/IntrinsicsX86.td +++ b/include/llvm/IntrinsicsX86.td @@ -1281,13 +1281,13 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // Vector load with broadcast let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_avx_vbroadcastss : + def int_x86_avx_vbroadcast_ss : GCCBuiltin<"__builtin_ia32_vbroadcastss">, Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], [IntrReadMem]>; def int_x86_avx_vbroadcast_sd_256 : GCCBuiltin<"__builtin_ia32_vbroadcastsd256">, Intrinsic<[llvm_v4f64_ty], [llvm_ptr_ty], [IntrReadMem]>; - def int_x86_avx_vbroadcastss_256 : + def int_x86_avx_vbroadcast_ss_256 : GCCBuiltin<"__builtin_ia32_vbroadcastss256">, Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadMem]>; def int_x86_avx_vbroadcastf128_pd_256 : @@ -1672,6 +1672,15 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // Vector load with broadcast let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_avx2_vbroadcast_ss_ps : + GCCBuiltin<"__builtin_ia32_vbroadcastss_ps">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrReadMem]>; + def int_x86_avx2_vbroadcast_sd_pd_256 : + GCCBuiltin<"__builtin_ia32_vbroadcastsd_pd256">, + Intrinsic<[llvm_v4f64_ty], [llvm_v2f64_ty], [IntrReadMem]>; + def int_x86_avx2_vbroadcast_ss_ps_256 : + GCCBuiltin<"__builtin_ia32_vbroadcastss_ps256">, + Intrinsic<[llvm_v8f32_ty], [llvm_v4f32_ty], [IntrReadMem]>; def int_x86_avx2_vbroadcasti128 : GCCBuiltin<"__builtin_ia32_vbroadcastsi256">, Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty], [IntrReadMem]>; diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 77a9031090d..8f7e27bab57 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -7083,35 +7083,48 @@ class avx_broadcast opc, string OpcodeStr, RegisterClass RC, !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set RC:$dst, (Int addr:$src))]>, VEX; -def VBROADCASTSS : avx_broadcast<0x18, "vbroadcastss", VR128, f32mem, - int_x86_avx_vbroadcastss>; -def VBROADCASTSSY : avx_broadcast<0x18, "vbroadcastss", VR256, f32mem, - int_x86_avx_vbroadcastss_256>; -def VBROADCASTSD : avx_broadcast<0x19, "vbroadcastsd", VR256, f64mem, - int_x86_avx_vbroadcast_sd_256>; +class avx_broadcast_reg opc, string OpcodeStr, RegisterClass RC, + Intrinsic Int> : + AVX8I, VEX; + +def VBROADCASTSSrm : avx_broadcast<0x18, "vbroadcastss", VR128, f32mem, + int_x86_avx_vbroadcast_ss>; +def VBROADCASTSSYrm : avx_broadcast<0x18, "vbroadcastss", VR256, f32mem, + int_x86_avx_vbroadcast_ss_256>; +def VBROADCASTSDrm : avx_broadcast<0x19, "vbroadcastsd", VR256, f64mem, + int_x86_avx_vbroadcast_sd_256>; def VBROADCASTF128 : avx_broadcast<0x1A, "vbroadcastf128", VR256, f128mem, int_x86_avx_vbroadcastf128_pd_256>; -let Predicates = [HasAVX2] in +let Predicates = [HasAVX2] in { def VBROADCASTI128 : avx_broadcast<0x5A, "vbroadcasti128", VR256, i128mem, int_x86_avx2_vbroadcasti128>; +def VBROADCASTSSrr : avx_broadcast_reg<0x18, "vbroadcastss", VR128, + int_x86_avx2_vbroadcast_ss_ps>; +def VBROADCASTSSYrr : avx_broadcast_reg<0x18, "vbroadcastss", VR256, + int_x86_avx2_vbroadcast_ss_ps_256>; +def VBROADCASTSDrr : avx_broadcast_reg<0x19, "vbroadcastsd", VR256, + int_x86_avx2_vbroadcast_sd_pd_256>; +} def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src), (VBROADCASTF128 addr:$src)>; def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))), - (VBROADCASTSSY addr:$src)>; + (VBROADCASTSSYrm addr:$src)>; def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))), - (VBROADCASTSD addr:$src)>; + (VBROADCASTSDrm addr:$src)>; def : Pat<(v8f32 (X86VBroadcast (loadf32 addr:$src))), - (VBROADCASTSSY addr:$src)>; + (VBROADCASTSSYrm addr:$src)>; def : Pat<(v4f64 (X86VBroadcast (loadf64 addr:$src))), - (VBROADCASTSD addr:$src)>; + (VBROADCASTSDrm addr:$src)>; def : Pat<(v4f32 (X86VBroadcast (loadf32 addr:$src))), - (VBROADCASTSS addr:$src)>; + (VBROADCASTSSrm addr:$src)>; def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))), - (VBROADCASTSS addr:$src)>; + (VBROADCASTSSrm addr:$src)>; //===----------------------------------------------------------------------===// // VINSERTF128 - Insert packed floating-point values diff --git a/test/CodeGen/X86/avx-intrinsics-x86.ll b/test/CodeGen/X86/avx-intrinsics-x86.ll index 2aad672d2c6..276209ea756 100644 --- a/test/CodeGen/X86/avx-intrinsics-x86.ll +++ b/test/CodeGen/X86/avx-intrinsics-x86.ll @@ -2292,20 +2292,20 @@ define <8 x float> @test_x86_avx_vbroadcastf128_ps_256(i8* %a0) { declare <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8*) nounwind readonly -define <4 x float> @test_x86_avx_vbroadcastss(i8* %a0) { +define <4 x float> @test_x86_avx_vbroadcast_ss(i8* %a0) { ; CHECK: vbroadcastss - %res = call <4 x float> @llvm.x86.avx.vbroadcastss(i8* %a0) ; <<4 x float>> [#uses=1] + %res = call <4 x float> @llvm.x86.avx.vbroadcast.ss(i8* %a0) ; <<4 x float>> [#uses=1] ret <4 x float> %res } -declare <4 x float> @llvm.x86.avx.vbroadcastss(i8*) nounwind readonly +declare <4 x float> @llvm.x86.avx.vbroadcast.ss(i8*) nounwind readonly -define <8 x float> @test_x86_avx_vbroadcastss_256(i8* %a0) { +define <8 x float> @test_x86_avx_vbroadcast_ss_256(i8* %a0) { ; CHECK: vbroadcastss - %res = call <8 x float> @llvm.x86.avx.vbroadcastss.256(i8* %a0) ; <<8 x float>> [#uses=1] + %res = call <8 x float> @llvm.x86.avx.vbroadcast.ss.256(i8* %a0) ; <<8 x float>> [#uses=1] ret <8 x float> %res } -declare <8 x float> @llvm.x86.avx.vbroadcastss.256(i8*) nounwind readonly +declare <8 x float> @llvm.x86.avx.vbroadcast.ss.256(i8*) nounwind readonly define <2 x double> @test_x86_avx_vextractf128_pd_256(<4 x double> %a0) { diff --git a/test/CodeGen/X86/avx2-intrinsics-x86.ll b/test/CodeGen/X86/avx2-intrinsics-x86.ll index 8aef255cae5..81ffdea8271 100644 --- a/test/CodeGen/X86/avx2-intrinsics-x86.ll +++ b/test/CodeGen/X86/avx2-intrinsics-x86.ll @@ -743,3 +743,26 @@ define <4 x i64> @test_x86_avx2_vbroadcasti128(i8* %a0) { ret <4 x i64> %res } declare <4 x i64> @llvm.x86.avx2.vbroadcasti128(i8*) nounwind readonly + +define <4 x double> @test_x86_avx2_vbroadcast_sd_pd_256(<2 x double> %a0) { + ; CHECK: vbroadcastsd + %res = call <4 x double> @llvm.x86.avx2.vbroadcast.sd.pd.256(<2 x double> %a0) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx2.vbroadcast.sd.pd.256(<2 x double>) nounwind readonly + + +define <4 x float> @test_x86_avx2_vbroadcast_ss_ps(<4 x float> %a0) { + ; CHECK: vbroadcastss + %res = call <4 x float> @llvm.x86.avx2.vbroadcast.ss.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.avx2.vbroadcast.ss.ps(<4 x float>) nounwind readonly + + +define <8 x float> @test_x86_avx2_vbroadcast_ss_ps_256(<4 x float> %a0) { + ; CHECK: vbroadcastss + %res = call <8 x float> @llvm.x86.avx2.vbroadcast.ss.ps.256(<4 x float> %a0) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx2.vbroadcast.ss.ps.256(<4 x float>) nounwind readonly -- 2.34.1