From f060668270f8538693be0828c0cf262dff6b50a9 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Tue, 28 Apr 2015 18:48:45 +0000 Subject: [PATCH] [x86] remove RCPPS and RSQRTPS intrinsic instruction definitions We don't need codegen-only intrinsic instructions for the vector forms of these instructions. This makes the reciprocal estimate instruction lowering identical to how we handle normal square roots: (V)SQRTPS / (V)SQRTPD. No existing regression tests fail with this patch. Differential Revision: http://reviews.llvm.org/D9301 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@236013 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrInfo.cpp | 6 ---- lib/Target/X86/X86InstrSSE.td | 48 ++---------------------------- lib/Target/X86/X86IntrinsicsInfo.h | 4 +++ 3 files changed, 6 insertions(+), 52 deletions(-) diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 5beff9ec191..1f4a9e9a45a 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -526,11 +526,9 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::PSHUFLWri, X86::PSHUFLWmi, TB_ALIGN_16 }, { X86::PTESTrr, X86::PTESTrm, TB_ALIGN_16 }, { X86::RCPPSr, X86::RCPPSm, TB_ALIGN_16 }, - { X86::RCPPSr_Int, X86::RCPPSm_Int, TB_ALIGN_16 }, { X86::ROUNDPDr, X86::ROUNDPDm, TB_ALIGN_16 }, { X86::ROUNDPSr, X86::ROUNDPSm, TB_ALIGN_16 }, { X86::RSQRTPSr, X86::RSQRTPSm, TB_ALIGN_16 }, - { X86::RSQRTPSr_Int, X86::RSQRTPSm_Int, TB_ALIGN_16 }, { X86::RSQRTSSr, X86::RSQRTSSm, 0 }, { X86::RSQRTSSr_Int, X86::RSQRTSSm_Int, 0 }, { X86::SQRTPDr, X86::SQRTPDm, TB_ALIGN_16 }, @@ -634,11 +632,9 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPSHUFLWri, X86::VPSHUFLWmi, 0 }, { X86::VPTESTrr, X86::VPTESTrm, 0 }, { X86::VRCPPSr, X86::VRCPPSm, 0 }, - { X86::VRCPPSr_Int, X86::VRCPPSm_Int, 0 }, { X86::VROUNDPDr, X86::VROUNDPDm, 0 }, { X86::VROUNDPSr, X86::VROUNDPSm, 0 }, { X86::VRSQRTPSr, X86::VRSQRTPSm, 0 }, - { X86::VRSQRTPSr_Int, X86::VRSQRTPSm_Int, 0 }, { X86::VSQRTPDr, X86::VSQRTPDm, 0 }, { X86::VSQRTPSr, X86::VSQRTPSm, 0 }, { X86::VTESTPDrr, X86::VTESTPDrm, 0 }, @@ -667,11 +663,9 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPERMILPSYri, X86::VPERMILPSYmi, 0 }, { X86::VPTESTYrr, X86::VPTESTYrm, 0 }, { X86::VRCPPSYr, X86::VRCPPSYm, 0 }, - { X86::VRCPPSYr_Int, X86::VRCPPSYm_Int, 0 }, { X86::VROUNDYPDr, X86::VROUNDYPDm, 0 }, { X86::VROUNDYPSr, X86::VROUNDYPSm, 0 }, { X86::VRSQRTPSYr, X86::VRSQRTPSYm, 0 }, - { X86::VRSQRTPSYr_Int, X86::VRSQRTPSYm_Int, 0 }, { X86::VSQRTPDYr, X86::VSQRTPDYm, 0 }, { X86::VSQRTPSYr, X86::VSQRTPSYm, 0 }, { X86::VTESTPDYrr, X86::VTESTPDYrm, 0 }, diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 2b418aada85..35738476bba 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -3464,46 +3464,6 @@ let Predicates = [HasAVX] in { Sched<[itins.Sched.Folded]>; } -/// sse1_fp_unop_p_int - SSE1 intrinsics unops in packed forms. -multiclass sse1_fp_unop_p_int opc, string OpcodeStr, - Intrinsic V4F32Int, Intrinsic V8F32Int, - OpndItins itins> { -let isCodeGenOnly = 1 in { -let Predicates = [HasAVX] in { - def V#NAME#PSr_Int : PSI, VEX, Sched<[itins.Sched]>; - def V#NAME#PSm_Int : PSI, VEX, Sched<[itins.Sched.Folded]>; - def V#NAME#PSYr_Int : PSI, VEX, VEX_L, Sched<[itins.Sched]>; - def V#NAME#PSYm_Int : PSI, VEX, VEX_L, Sched<[itins.Sched.Folded]>; -} - - def PSr_Int : PSI, Sched<[itins.Sched]>; - def PSm_Int : PSI, Sched<[itins.Sched.Folded]>; -} // isCodeGenOnly = 1 -} - /// sse2_fp_unop_p - SSE2 unops in vector forms. multiclass sse2_fp_unop_p opc, string OpcodeStr, SDNode OpNode, OpndItins itins> { @@ -3574,13 +3534,9 @@ defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, SSE_SQRTSS>, // Reciprocal approximations. Note that these typically require refinement // in order to obtain suitable precision. defm RSQRT : sse1_fp_unop_s<0x52, "rsqrt", X86frsqrt, SSE_RSQRTSS>, - sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_RSQRTPS>, - sse1_fp_unop_p_int<0x52, "rsqrt", int_x86_sse_rsqrt_ps, - int_x86_avx_rsqrt_ps_256, SSE_RSQRTPS>; + sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_RSQRTPS>; defm RCP : sse1_fp_unop_s<0x53, "rcp", X86frcp, SSE_RCPS>, - sse1_fp_unop_p<0x53, "rcp", X86frcp, SSE_RCPP>, - sse1_fp_unop_p_int<0x53, "rcp", int_x86_sse_rcp_ps, - int_x86_avx_rcp_ps_256, SSE_RCPP>; + sse1_fp_unop_p<0x53, "rcp", X86frcp, SSE_RCPP>; // There is no f64 version of the reciprocal approximation instructions. diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h index 2bea4a107fd..6944e560210 100644 --- a/lib/Target/X86/X86IntrinsicsInfo.h +++ b/lib/Target/X86/X86IntrinsicsInfo.h @@ -506,6 +506,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx_max_ps_256, INTR_TYPE_2OP, X86ISD::FMAX, 0), X86_INTRINSIC_DATA(avx_min_pd_256, INTR_TYPE_2OP, X86ISD::FMIN, 0), X86_INTRINSIC_DATA(avx_min_ps_256, INTR_TYPE_2OP, X86ISD::FMIN, 0), + X86_INTRINSIC_DATA(avx_rcp_ps_256, INTR_TYPE_1OP, X86ISD::FRCP, 0), + X86_INTRINSIC_DATA(avx_rsqrt_ps_256, INTR_TYPE_1OP, X86ISD::FRSQRT, 0), X86_INTRINSIC_DATA(avx_sqrt_pd_256, INTR_TYPE_1OP, ISD::FSQRT, 0), X86_INTRINSIC_DATA(avx_sqrt_ps_256, INTR_TYPE_1OP, ISD::FSQRT, 0), X86_INTRINSIC_DATA(avx_vperm2f128_pd_256, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0), @@ -664,6 +666,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(sse_comineq_ss, COMI, X86ISD::COMI, ISD::SETNE), X86_INTRINSIC_DATA(sse_max_ps, INTR_TYPE_2OP, X86ISD::FMAX, 0), X86_INTRINSIC_DATA(sse_min_ps, INTR_TYPE_2OP, X86ISD::FMIN, 0), + X86_INTRINSIC_DATA(sse_rcp_ps, INTR_TYPE_1OP, X86ISD::FRCP, 0), + X86_INTRINSIC_DATA(sse_rsqrt_ps, INTR_TYPE_1OP, X86ISD::FRSQRT, 0), X86_INTRINSIC_DATA(sse_sqrt_ps, INTR_TYPE_1OP, ISD::FSQRT, 0), X86_INTRINSIC_DATA(sse_ucomieq_ss, COMI, X86ISD::UCOMI, ISD::SETEQ), X86_INTRINSIC_DATA(sse_ucomige_ss, COMI, X86ISD::UCOMI, ISD::SETGE), -- 2.34.1