From 8c4bb575e190bbae78d8cad0ed938539612a17db Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Fri, 20 Feb 2015 00:45:28 +0000 Subject: [PATCH] Revert "AVX-512: Full implementation for VRNDSCALESS/SD instructions and intrinsics." The instructions were being generated on architectures that don't support avx512. This reverts commit r229837. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@229942 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IR/IntrinsicsX86.td | 14 ++-- lib/Target/X86/X86ISelLowering.cpp | 15 +--- lib/Target/X86/X86ISelLowering.h | 3 +- lib/Target/X86/X86InstrAVX512.td | 104 ++++++++++-------------- lib/Target/X86/X86InstrFragmentsSIMD.td | 3 - lib/Target/X86/X86IntrinsicsInfo.h | 8 +- test/CodeGen/X86/avx512-intrinsics.ll | 8 -- test/CodeGen/X86/avx512-round.ll | 25 ------ 8 files changed, 54 insertions(+), 126 deletions(-) diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td index 60deb3288a0..0271310f5d6 100644 --- a/include/llvm/IR/IntrinsicsX86.td +++ b/include/llvm/IR/IntrinsicsX86.td @@ -3193,14 +3193,12 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_rndscale_ss : GCCBuiltin<"__builtin_ia32_rndscaless_mask">, - Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, - llvm_i8_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_rndscale_sd : GCCBuiltin<"__builtin_ia32_rndscalesd_mask">, - Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, - llvm_i8_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem]>; + def int_x86_avx512_rndscale_ss : GCCBuiltin<"__builtin_ia32_rndscaless">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, + llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_rndscale_sd : GCCBuiltin<"__builtin_ia32_rndscalesd">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, + llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_sqrt_ss : GCCBuiltin<"__builtin_ia32_sqrtrndss">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 4edc1d5e807..261645c2233 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -17374,20 +17374,9 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget SDValue Src2 = Op.getOperand(2); SDValue Src0 = Op.getOperand(3); SDValue Mask = Op.getOperand(4); - // There are 2 kinds of intrinsics in this group: - // (1) With supress-all-exceptions (sae) - 6 operands - // (2) With rounding mode and sae - 7 operands. - if (Op.getNumOperands() == 6) { - SDValue Sae = Op.getOperand(5); - return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2, - Sae), - Mask, Src0, Subtarget, DAG); - } - assert(Op.getNumOperands() == 7 && "Unexpected intrinsic form"); - SDValue RoundingMode = Op.getOperand(5); - SDValue Sae = Op.getOperand(6); + SDValue RoundingMode = Op.getOperand(5); return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2, - RoundingMode, Sae), + RoundingMode), Mask, Src0, Subtarget, DAG); } case INTR_TYPE_2OP_MASK: { diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 9f94e9df875..5d69c1fa188 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -393,8 +393,7 @@ namespace llvm { FMSUB_RND, FNMSUB_RND, FMADDSUB_RND, - FMSUBADD_RND, - RNDSCALE, + FMSUBADD_RND, // Compress and expand COMPRESS, diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 9d20922b5a2..6249a2cd7cc 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -101,8 +101,6 @@ class X86VectorVTInfo("v" # !srl(Size, 5) # "i32"); @@ -4654,6 +4652,7 @@ let ExeDomain = d in { } // ExeDomain } + defm VRNDSCALEPSZ : avx512_rndscale<0x08, "vrndscaleps", f512mem, VR512, loadv16f32, SSEPackedSingle>, EVEX_V512, EVEX_CD8<32, CD8VF>; @@ -4673,68 +4672,51 @@ def : Pat<(v8f64 (int_x86_avx512_mask_rndscale_pd_512 (v8f64 VR512:$src1), FROUND_CURRENT)), (VRNDSCALEPDZr VR512:$src1, imm:$src2)>; -multiclass -avx512_rndscale_scalar opc, string OpcodeStr, X86VectorVTInfo _> { +multiclass avx512_rndscale_scalar opc, string OpcodeStr, + Operand x86memop, RegisterClass RC, Domain d> { +let ExeDomain = d in { + def r : AVX512AIi8, EVEX_4V; - let ExeDomain = _.ExeDomain in { - defm r : AVX512_maskable_scalar; + def m : AVX512AIi8, EVEX_4V; +} // ExeDomain +} - defm rb : AVX512_maskable_scalar, EVEX_B; +defm VRNDSCALESS : avx512_rndscale_scalar<0x0A, "vrndscaless", ssmem, FR32X, + SSEPackedSingle>, EVEX_CD8<32, CD8VT1>; - let mayLoad = 1 in - defm m : AVX512_maskable_scalar; - } - def : Pat<(ffloor _.FRC:$src), (COPY_TO_REGCLASS - (_.VT (!cast(NAME##r) (_.VT (IMPLICIT_DEF)), - (_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0x1))), _.FRC)>; - def : Pat<(fceil _.FRC:$src), (COPY_TO_REGCLASS - (_.VT (!cast(NAME##r) (_.VT (IMPLICIT_DEF)), - (_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0x2))), _.FRC)>; - def : Pat<(ftrunc _.FRC:$src), (COPY_TO_REGCLASS - (_.VT (!cast(NAME##r) (_.VT (IMPLICIT_DEF)), - (_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0x3))), _.FRC)>; - def : Pat<(frint _.FRC:$src), (COPY_TO_REGCLASS - (_.VT (!cast(NAME##r) (_.VT (IMPLICIT_DEF)), - (_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0x4))), _.FRC)>; - def : Pat<(fnearbyint _.FRC:$src), (COPY_TO_REGCLASS - (_.VT (!cast(NAME##r) (_.VT (IMPLICIT_DEF)), - (_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0xc))), _.FRC)>; - - def : Pat<(ffloor (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS - (_.VT (!cast(NAME##m) (_.VT (IMPLICIT_DEF)), - addr:$src, (i32 0x1))), _.FRC)>; - def : Pat<(fceil (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS - (_.VT (!cast(NAME##m) (_.VT (IMPLICIT_DEF)), - addr:$src, (i32 0x2))), _.FRC)>; - def : Pat<(ftrunc (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS - (_.VT (!cast(NAME##m) (_.VT (IMPLICIT_DEF)), - addr:$src, (i32 0x3))), _.FRC)>; - def : Pat<(frint (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS - (_.VT (!cast(NAME##m) (_.VT (IMPLICIT_DEF)), - addr:$src, (i32 0x4))), _.FRC)>; - def : Pat<(fnearbyint (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS - (_.VT (!cast(NAME##m) (_.VT (IMPLICIT_DEF)), - addr:$src, (i32 0xc))), _.FRC)>; -} - -defm VRNDSCALESS : avx512_rndscale_scalar<0x0A, "vrndscaless", f32x_info>, - AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VT1>; - -defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd", f64x_info>, VEX_W, - AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VT1>; +defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd", sdmem, FR64X, + SSEPackedDouble>, EVEX_CD8<64, CD8VT1>; + +let Predicates = [HasAVX512] in { + def : Pat<(ffloor FR32X:$src), + (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x1))>; + def : Pat<(f64 (ffloor FR64X:$src)), + (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x1))>; + def : Pat<(f32 (fnearbyint FR32X:$src)), + (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0xC))>; + def : Pat<(f64 (fnearbyint FR64X:$src)), + (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0xC))>; + def : Pat<(f32 (fceil FR32X:$src)), + (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x2))>; + def : Pat<(f64 (fceil FR64X:$src)), + (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x2))>; + def : Pat<(f32 (frint FR32X:$src)), + (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x4))>; + def : Pat<(f64 (frint FR64X:$src)), + (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x4))>; + def : Pat<(f32 (ftrunc FR32X:$src)), + (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x3))>; + def : Pat<(f64 (ftrunc FR64X:$src)), + (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x3))>; +} def : Pat<(v16f32 (ffloor VR512:$src)), (VRNDSCALEPSZr VR512:$src, (i32 0x1))>; diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 25058a25646..f8590e59dbf 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -223,8 +223,6 @@ def STDFp1SrcRm : SDTypeProfile<1, 2, [SDTCisSameAs<0,1>, SDTCisVec<0>, SDTCisInt<2>]>; def STDFp2SrcRm : SDTypeProfile<1, 3, [SDTCisSameAs<0,1>, SDTCisVec<0>, SDTCisInt<3>]>; -def STDFp3SrcRm : SDTypeProfile<1, 4, [SDTCisSameAs<0,1>, - SDTCisVec<0>, SDTCisInt<3>, SDTCisInt<4>]>; def X86PAlignr : SDNode<"X86ISD::PALIGNR", SDTShuff3OpI>; def X86VAlign : SDNode<"X86ISD::VALIGN", SDTShuff3OpI>; @@ -301,7 +299,6 @@ def X86exp2 : SDNode<"X86ISD::EXP2", STDFp1SrcRm>; def X86rsqrt28s : SDNode<"X86ISD::RSQRT28", STDFp2SrcRm>; def X86rcp28s : SDNode<"X86ISD::RCP28", STDFp2SrcRm>; -def X86RndScale : SDNode<"X86ISD::RNDSCALE", STDFp3SrcRm>; def SDT_PCMPISTRI : SDTypeProfile<2, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, v16i8>, SDTCisVT<3, v16i8>, diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h index e4368116a67..d32b448c619 100644 --- a/lib/Target/X86/X86IntrinsicsInfo.h +++ b/lib/Target/X86/X86IntrinsicsInfo.h @@ -378,10 +378,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_psrli_q, VSHIFT_MASK, X86ISD::VSRLI, 0), X86_INTRINSIC_DATA(avx512_mask_psrlv_d, INTR_TYPE_2OP_MASK, ISD::SRL, 0), X86_INTRINSIC_DATA(avx512_mask_psrlv_q, INTR_TYPE_2OP_MASK, ISD::SRL, 0), - X86_INTRINSIC_DATA(avx512_mask_rndscale_sd, INTR_TYPE_SCALAR_MASK_RM, - X86ISD::RNDSCALE, 0), - X86_INTRINSIC_DATA(avx512_mask_rndscale_ss, INTR_TYPE_SCALAR_MASK_RM, - X86ISD::RNDSCALE, 0), X86_INTRINSIC_DATA(avx512_mask_sub_pd_512, INTR_TYPE_2OP_MASK, ISD::FSUB, X86ISD::FSUB_RND), X86_INTRINSIC_DATA(avx512_mask_sub_ps_512, INTR_TYPE_2OP_MASK, ISD::FSUB, @@ -400,8 +396,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_ucmp_w_512, CMP_MASK_CC, X86ISD::CMPMU, 0), X86_INTRINSIC_DATA(avx512_rcp28_pd, INTR_TYPE_1OP_MASK_RM,X86ISD::RCP28, 0), X86_INTRINSIC_DATA(avx512_rcp28_ps, INTR_TYPE_1OP_MASK_RM,X86ISD::RCP28, 0), - X86_INTRINSIC_DATA(avx512_rcp28_sd, INTR_TYPE_SCALAR_MASK_RM, X86ISD::RCP28, 0), - X86_INTRINSIC_DATA(avx512_rcp28_ss, INTR_TYPE_SCALAR_MASK_RM, X86ISD::RCP28, 0), + X86_INTRINSIC_DATA(avx512_rcp28_sd, INTR_TYPE_SCALAR_MASK_RM,X86ISD::RCP28, 0), + X86_INTRINSIC_DATA(avx512_rcp28_ss, INTR_TYPE_SCALAR_MASK_RM,X86ISD::RCP28, 0), X86_INTRINSIC_DATA(avx512_rsqrt28_pd, INTR_TYPE_1OP_MASK_RM,X86ISD::RSQRT28, 0), X86_INTRINSIC_DATA(avx512_rsqrt28_ps, INTR_TYPE_1OP_MASK_RM,X86ISD::RSQRT28, 0), X86_INTRINSIC_DATA(avx512_rsqrt28_sd, INTR_TYPE_SCALAR_MASK_RM,X86ISD::RSQRT28, 0), diff --git a/test/CodeGen/X86/avx512-intrinsics.ll b/test/CodeGen/X86/avx512-intrinsics.ll index 8eb67c0a345..b6375c1f618 100644 --- a/test/CodeGen/X86/avx512-intrinsics.ll +++ b/test/CodeGen/X86/avx512-intrinsics.ll @@ -68,14 +68,6 @@ define <8 x double> @test7(<8 x double> %a) { ret <8 x double>%res } -declare <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32, i32) - -define <2 x double> @test_rndsc_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c) { -; CHECK: vrndscalesd $11, %xmm{{.*}} {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x0b,0xd1,0x0b] - %res = call <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 5, i32 11, i32 4) - ret <2 x double>%res -} - declare <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float>, i32, <16 x float>, i16, i32) define <16 x float> @test8(<16 x float> %a) { diff --git a/test/CodeGen/X86/avx512-round.ll b/test/CodeGen/X86/avx512-round.ll index ffeb2a85e91..19d9f18b80a 100644 --- a/test/CodeGen/X86/avx512-round.ll +++ b/test/CodeGen/X86/avx512-round.ll @@ -79,28 +79,3 @@ define <8 x double> @nearbyint_v8f64(<8 x double> %a) { ret <8 x double> %res } declare <8 x double> @llvm.nearbyint.v8f64(<8 x double> %p) - -define double @nearbyint_f64(double %a) { -; CHECK-LABEL: nearbyint_f64 -; CHECK: vrndscalesd $12, {{.*}}encoding: [0x62,0xf3,0xfd,0x08,0x0b,0xc0,0x0c] - %res = call double @llvm.nearbyint.f64(double %a) - ret double %res -} -declare double @llvm.nearbyint.f64(double %p) - -define float @floor_f32(float %a) { -; CHECK-LABEL: floor_f32 -; CHECK: vrndscaless $1, {{.*}}encoding: [0x62,0xf3,0x7d,0x08,0x0a,0xc0,0x01] - %res = call float @llvm.floor.f32(float %a) - ret float %res -} -declare float @llvm.floor.f32(float %p) - -define float @floor_f32m(float* %aptr) { -; CHECK-LABEL: floor_f32m -; CHECK: vrndscaless $1, (%rdi), {{.*}}encoding: [0x62,0xf3,0x7d,0x08,0x0a,0x07,0x01] - %a = load float* %aptr, align 4 - %res = call float @llvm.floor.f32(float %a) - ret float %res -} - -- 2.34.1