From: Robert Khasanov Date: Tue, 9 Dec 2014 18:45:30 +0000 (+0000) Subject: [AVX512] Added lowering for VBROADCASTSS/SD instructions. X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=648f7c7eb1ce92e18338bd4ba6370671e16664f7;p=oota-llvm.git [AVX512] Added lowering for VBROADCASTSS/SD instructions. Lowering patterns were written through avx512_broadcast_pat multiclass as pattern generates VBROADCAST and COPY_TO_REGCLASS nodes. Added lowering tests. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@223804 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 2616f117731..48287f40f83 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -651,6 +651,44 @@ let ExeDomain = SSEPackedDouble in { avx512vl_f64_info>, VEX_W, EVEX_CD8<64, CD8VT1>; } +// SrcRC_v and SrcRC_s are RegisterClasses for vector and scalar +// representations of source +multiclass avx512_broadcast_pat { + def : Pat<(_.VT (OpNode (!cast(_.EltTypeName) SrcRC_s:$src))), + (!cast(InstName##"r") + (COPY_TO_REGCLASS SrcRC_s:$src, SrcRC_v))>; + + let AddedComplexity = 30 in { + def : Pat<(_.VT (vselect _.KRCWM:$mask, + (OpNode (!cast(_.EltTypeName) SrcRC_s:$src)), + _.RC:$src0)), + (!cast(InstName##"rk") _.RC:$src0, _.KRCWM:$mask, + (COPY_TO_REGCLASS SrcRC_s:$src, SrcRC_v))>; + + def : Pat<(_.VT(vselect _.KRCWM:$mask, + (OpNode (!cast(_.EltTypeName) SrcRC_s:$src)), + _.ImmAllZerosV)), + (!cast(InstName##"rkz") _.KRCWM:$mask, + (COPY_TO_REGCLASS SrcRC_s:$src, SrcRC_v))>; + } +} + +defm : avx512_broadcast_pat<"VBROADCASTSSZ", X86VBroadcast, v16f32_info, + VR128X, FR32X>; +defm : avx512_broadcast_pat<"VBROADCASTSDZ", X86VBroadcast, v8f64_info, + VR128X, FR64X>; + +let Predicates = [HasVLX] in { + defm : avx512_broadcast_pat<"VBROADCASTSSZ256", X86VBroadcast, + v8f32x_info, VR128X, FR32X>; + defm : avx512_broadcast_pat<"VBROADCASTSSZ128", X86VBroadcast, + v4f32x_info, VR128X, FR32X>; + defm : avx512_broadcast_pat<"VBROADCASTSDZ256", X86VBroadcast, + v4f64x_info, VR128X, FR64X>; +} + def : Pat<(v16f32 (X86VBroadcast (loadf32 addr:$src))), (VBROADCASTSSZm addr:$src)>; def : Pat<(v8f64 (X86VBroadcast (loadf64 addr:$src))), diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 785e5ec10bb..02195a35318 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -659,6 +659,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VMOVUPSZrr, X86::VMOVUPSZrm, 0 }, { X86::VPABSDZrr, X86::VPABSDZrm, 0 }, { X86::VPABSQZrr, X86::VPABSQZrm, 0 }, + { X86::VBROADCASTSSZr, X86::VBROADCASTSSZm, TB_NO_REVERSE }, + { X86::VBROADCASTSDZr, X86::VBROADCASTSDZm, TB_NO_REVERSE }, // AVX-512 foldable instructions (256-bit versions) { X86::VMOVAPDZ256rr, X86::VMOVAPDZ256rm, TB_ALIGN_32 }, { X86::VMOVAPSZ256rr, X86::VMOVAPSZ256rm, TB_ALIGN_32 }, @@ -670,6 +672,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VMOVDQU64Z256rr, X86::VMOVDQU64Z256rm, 0 }, { X86::VMOVUPDZ256rr, X86::VMOVUPDZ256rm, 0 }, { X86::VMOVUPSZ256rr, X86::VMOVUPSZ256rm, 0 }, + { X86::VBROADCASTSSZ256r, X86::VBROADCASTSSZ256m, TB_NO_REVERSE }, + { X86::VBROADCASTSDZ256r, X86::VBROADCASTSDZ256m, TB_NO_REVERSE }, // AVX-512 foldable instructions (256-bit versions) { X86::VMOVAPDZ128rr, X86::VMOVAPDZ128rm, TB_ALIGN_16 }, { X86::VMOVAPSZ128rr, X86::VMOVAPSZ128rm, TB_ALIGN_16 }, @@ -681,6 +685,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VMOVDQU64Z128rr, X86::VMOVDQU64Z128rm, 0 }, { X86::VMOVUPDZ128rr, X86::VMOVUPDZ128rm, 0 }, { X86::VMOVUPSZ128rr, X86::VMOVUPSZ128rm, 0 }, + { X86::VBROADCASTSSZ128r, X86::VBROADCASTSSZ128m, TB_NO_REVERSE }, // AES foldable instructions { X86::AESIMCrr, X86::AESIMCrm, TB_ALIGN_16 }, @@ -1321,6 +1326,13 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VALIGNQrri, X86::VALIGNQrmi, 0 }, { X86::VALIGNDrri, X86::VALIGNDrmi, 0 }, { X86::VPMULUDQZrr, X86::VPMULUDQZrm, 0 }, + { X86::VBROADCASTSSZrkz, X86::VBROADCASTSSZmkz, TB_NO_REVERSE }, + { X86::VBROADCASTSDZrkz, X86::VBROADCASTSDZmkz, TB_NO_REVERSE }, + + // AVX-512{F,VL} foldable instructions + { X86::VBROADCASTSSZ256rkz, X86::VBROADCASTSSZ256mkz, TB_NO_REVERSE }, + { X86::VBROADCASTSDZ256rkz, X86::VBROADCASTSDZ256mkz, TB_NO_REVERSE }, + { X86::VBROADCASTSSZ128rkz, X86::VBROADCASTSSZ128mkz, TB_NO_REVERSE }, // AES foldable instructions { X86::AESDECLASTrr, X86::AESDECLASTrm, TB_ALIGN_16 }, @@ -1501,7 +1513,12 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VBLENDMPDZrr, X86::VBLENDMPDZrm, 0 }, { X86::VBLENDMPSZrr, X86::VBLENDMPSZrm, 0 }, { X86::VPBLENDMDZrr, X86::VPBLENDMDZrm, 0 }, - { X86::VPBLENDMQZrr, X86::VPBLENDMQZrm, 0 } + { X86::VPBLENDMQZrr, X86::VPBLENDMQZrm, 0 }, + { X86::VBROADCASTSSZrk, X86::VBROADCASTSSZmk, TB_NO_REVERSE }, + { X86::VBROADCASTSDZrk, X86::VBROADCASTSDZmk, TB_NO_REVERSE }, + { X86::VBROADCASTSSZ256rk, X86::VBROADCASTSSZ256mk, TB_NO_REVERSE }, + { X86::VBROADCASTSDZ256rk, X86::VBROADCASTSDZ256mk, TB_NO_REVERSE }, + { X86::VBROADCASTSSZ128rk, X86::VBROADCASTSSZ128mk, TB_NO_REVERSE } }; for (unsigned i = 0, e = array_lengthof(OpTbl3); i != e; ++i) { diff --git a/test/CodeGen/X86/avx512-vbroadcast.ll b/test/CodeGen/X86/avx512-vbroadcast.ll index 0b0e0fc2bc8..5bb82338d08 100644 --- a/test/CodeGen/X86/avx512-vbroadcast.ll +++ b/test/CodeGen/X86/avx512-vbroadcast.ll @@ -20,6 +20,14 @@ define <8 x i64> @_inreg8xi64(i64 %a) { ret <8 x i64> %c } +;CHECK-LABEL: _ss16xfloat_v4 +;CHECK: vbroadcastss %xmm0, %zmm0 +;CHECK: ret +define <16 x float> @_ss16xfloat_v4(<4 x float> %a) { + %b = shufflevector <4 x float> %a, <4 x float> undef, <16 x i32> zeroinitializer + ret <16 x float> %b +} + define <16 x float> @_inreg16xfloat(float %a) { ; CHECK-LABEL: _inreg16xfloat: ; CHECK: ## BB#0: @@ -30,6 +38,62 @@ define <16 x float> @_inreg16xfloat(float %a) { ret <16 x float> %c } +;CHECK-LABEL: _ss16xfloat_mask: +;CHECK: vbroadcastss %xmm0, %zmm1 {%k1} +;CHECK: ret +define <16 x float> @_ss16xfloat_mask(float %a, <16 x float> %i, <16 x i32> %mask1) { + %mask = icmp ne <16 x i32> %mask1, zeroinitializer + %b = insertelement <16 x float> undef, float %a, i32 0 + %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer + %r = select <16 x i1> %mask, <16 x float> %c, <16 x float> %i + ret <16 x float> %r +} + +;CHECK-LABEL: _ss16xfloat_maskz: +;CHECK: vbroadcastss %xmm0, %zmm0 {%k1} {z} +;CHECK: ret +define <16 x float> @_ss16xfloat_maskz(float %a, <16 x i32> %mask1) { + %mask = icmp ne <16 x i32> %mask1, zeroinitializer + %b = insertelement <16 x float> undef, float %a, i32 0 + %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer + %r = select <16 x i1> %mask, <16 x float> %c, <16 x float> zeroinitializer + ret <16 x float> %r +} + +;CHECK-LABEL: _ss16xfloat_load: +;CHECK: vbroadcastss (%{{.*}}, %zmm +;CHECK: ret +define <16 x float> @_ss16xfloat_load(float* %a.ptr) { + %a = load float* %a.ptr + %b = insertelement <16 x float> undef, float %a, i32 0 + %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer + ret <16 x float> %c +} + +;CHECK-LABEL: _ss16xfloat_mask_load: +;CHECK: vbroadcastss (%rdi), %zmm0 {%k1} +;CHECK: ret +define <16 x float> @_ss16xfloat_mask_load(float* %a.ptr, <16 x float> %i, <16 x i32> %mask1) { + %a = load float* %a.ptr + %mask = icmp ne <16 x i32> %mask1, zeroinitializer + %b = insertelement <16 x float> undef, float %a, i32 0 + %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer + %r = select <16 x i1> %mask, <16 x float> %c, <16 x float> %i + ret <16 x float> %r +} + +;CHECK-LABEL: _ss16xfloat_maskz_load: +;CHECK: vbroadcastss (%rdi), %zmm0 {%k1} {z} +;CHECK: ret +define <16 x float> @_ss16xfloat_maskz_load(float* %a.ptr, <16 x i32> %mask1) { + %a = load float* %a.ptr + %mask = icmp ne <16 x i32> %mask1, zeroinitializer + %b = insertelement <16 x float> undef, float %a, i32 0 + %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer + %r = select <16 x i1> %mask, <16 x float> %c, <16 x float> zeroinitializer + ret <16 x float> %r +} + define <8 x double> @_inreg8xdouble(double %a) { ; CHECK-LABEL: _inreg8xdouble: ; CHECK: ## BB#0: @@ -40,6 +104,62 @@ define <8 x double> @_inreg8xdouble(double %a) { ret <8 x double> %c } +;CHECK-LABEL: _sd8xdouble_mask: +;CHECK: vbroadcastsd %xmm0, %zmm1 {%k1} +;CHECK: ret +define <8 x double> @_sd8xdouble_mask(double %a, <8 x double> %i, <8 x i32> %mask1) { + %mask = icmp ne <8 x i32> %mask1, zeroinitializer + %b = insertelement <8 x double> undef, double %a, i32 0 + %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer + %r = select <8 x i1> %mask, <8 x double> %c, <8 x double> %i + ret <8 x double> %r +} + +;CHECK-LABEL: _sd8xdouble_maskz: +;CHECK: vbroadcastsd %xmm0, %zmm0 {%k1} {z} +;CHECK: ret +define <8 x double> @_sd8xdouble_maskz(double %a, <8 x i32> %mask1) { + %mask = icmp ne <8 x i32> %mask1, zeroinitializer + %b = insertelement <8 x double> undef, double %a, i32 0 + %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer + %r = select <8 x i1> %mask, <8 x double> %c, <8 x double> zeroinitializer + ret <8 x double> %r +} + +;CHECK-LABEL: _sd8xdouble_load: +;CHECK: vbroadcastsd (%rdi), %zmm +;CHECK: ret +define <8 x double> @_sd8xdouble_load(double* %a.ptr) { + %a = load double* %a.ptr + %b = insertelement <8 x double> undef, double %a, i32 0 + %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer + ret <8 x double> %c +} + +;CHECK-LABEL: _sd8xdouble_mask_load: +;CHECK: vbroadcastsd (%rdi), %zmm0 {%k1} +;CHECK: ret +define <8 x double> @_sd8xdouble_mask_load(double* %a.ptr, <8 x double> %i, <8 x i32> %mask1) { + %a = load double* %a.ptr + %mask = icmp ne <8 x i32> %mask1, zeroinitializer + %b = insertelement <8 x double> undef, double %a, i32 0 + %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer + %r = select <8 x i1> %mask, <8 x double> %c, <8 x double> %i + ret <8 x double> %r +} + +define <8 x double> @_sd8xdouble_maskz_load(double* %a.ptr, <8 x i32> %mask1) { +; CHECK-LABEL: _sd8xdouble_maskz_load: +; CHECK: vbroadcastsd (%rdi), %zmm0 {%k1} {z} +; CHECK: ret + %a = load double* %a.ptr + %mask = icmp ne <8 x i32> %mask1, zeroinitializer + %b = insertelement <8 x double> undef, double %a, i32 0 + %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer + %r = select <8 x i1> %mask, <8 x double> %c, <8 x double> zeroinitializer + ret <8 x double> %r +} + define <16 x i32> @_xmm16xi32(<16 x i32> %a) { ; CHECK-LABEL: _xmm16xi32: ; CHECK: ## BB#0: