From c375284b82054a4908a2a8dabd1399b134faf69d Mon Sep 17 00:00:00 2001 From: Asaf Badouh Date: Sun, 18 Oct 2015 11:04:38 +0000 Subject: [PATCH] [X86][AVX512DQ] add scalar fpclass Differential Revision: http://reviews.llvm.org/D13769 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@250650 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IR/IntrinsicsX86.td | 8 ++ lib/Target/X86/X86ISelLowering.cpp | 11 ++ lib/Target/X86/X86InstrAVX512.td | 50 +++++++- lib/Target/X86/X86InstrFragmentsSIMD.td | 2 + lib/Target/X86/X86IntrinsicsInfo.h | 4 +- test/CodeGen/X86/avx512dq-intrinsics.ll | 34 ++++++ test/MC/X86/x86-64-avx512dq.s | 144 ++++++++++++++++++++++++ 7 files changed, 247 insertions(+), 6 deletions(-) diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td index 938c02c933f..1851e49de06 100644 --- a/include/llvm/IR/IntrinsicsX86.td +++ b/include/llvm/IR/IntrinsicsX86.td @@ -1705,6 +1705,14 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". GCCBuiltin<"__builtin_ia32_fpclassps512_mask">, Intrinsic<[llvm_i16_ty], [llvm_v16f32_ty, llvm_i32_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_fpclass_sd : + GCCBuiltin<"__builtin_ia32_fpclasssd">, + Intrinsic<[llvm_i8_ty], [llvm_v2f64_ty, llvm_i32_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_fpclass_ss : + GCCBuiltin<"__builtin_ia32_fpclassss">, + Intrinsic<[llvm_i8_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i8_ty], + [IntrNoMem]>; } // Vector extract sign mask diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index b1bafba047c..a1137cf8bef 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -16012,6 +16012,8 @@ static SDValue getScalarMaskingNode(SDValue Op, SDValue Mask, if (Op.getOpcode() == X86ISD::FSETCC) return DAG.getNode(ISD::AND, dl, VT, Op, IMask); + if (Op.getOpcode() == X86ISD::VFPCLASS) + return DAG.getNode(ISD::OR, dl, VT, Op, IMask); if (PreservedSrc.getOpcode() == ISD::UNDEF) PreservedSrc = getZeroVector(VT, Subtarget, DAG, dl); @@ -16357,6 +16359,15 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget DAG.getIntPtrConstant(0, dl)); return DAG.getBitcast(Op.getValueType(), Res); } + case FPCLASSS: { + SDValue Src1 = Op.getOperand(1); + SDValue Imm = Op.getOperand(2); + SDValue Mask = Op.getOperand(3); + SDValue FPclass = DAG.getNode(IntrData->Opc0, dl, MVT::i1, Src1, Imm); + SDValue FPclassMask = getScalarMaskingNode(FPclass, Mask, + DAG.getTargetConstant(0, dl, MVT::i1), Subtarget, DAG); + return DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i8, FPclassMask); + } case CMP_MASK: case CMP_MASK_CC: { // Comparison intrinsics with masks. diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 31a7e95df15..a5a904873e5 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -1803,6 +1803,42 @@ def : Pat<(v8i1 (X86cmpmu (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)), // ---------------------------------------------------------------- // FPClass +//handle fpclass instruction mask = op(reg_scalar,imm) +// op(mem_scalar,imm) +multiclass avx512_scalar_fpclass opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _, Predicate prd> { + let Predicates = [prd] in { + def rr : AVX512; + def rrk : AVX512, EVEX_K; + let mayLoad = 1, AddedComplexity = 20 in { + def rm : AVX512; + def rmk : AVX512, EVEX_K; + } + } +} + //handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm) // fpclass(reg_vec, mem_vec, imm) // fpclass(reg_vec, broadcast(eltVt), imm) @@ -1873,15 +1909,19 @@ multiclass avx512_vector_fpclass_all opcVec, - SDNode OpNode, Predicate prd>{ + bits<8> opcScalar, SDNode VecOpNode, SDNode ScalarOpNode, Predicate prd>{ defm PS : avx512_vector_fpclass_all, EVEX_CD8<32, CD8VF>; + VecOpNode, prd, "{l}">, EVEX_CD8<32, CD8VF>; defm PD : avx512_vector_fpclass_all,EVEX_CD8<64, CD8VF> , VEX_W; + VecOpNode, prd, "{q}">,EVEX_CD8<64, CD8VF> , VEX_W; + defm SS : avx512_scalar_fpclass, EVEX_CD8<32, CD8VT1>; + defm SD : avx512_scalar_fpclass, EVEX_CD8<64, CD8VT1>, VEX_W; } -defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, X86Vfpclass, HasDQI>, - AVX512AIi8Base,EVEX; +defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, X86Vfpclass, + X86Vfpclasss, HasDQI>, AVX512AIi8Base,EVEX; //----------------------------------------------------------------- // Mask register copy, including diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index ca67a2cff72..3eb5a6e5281 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -376,6 +376,8 @@ def X86VGetMant : SDNode<"X86ISD::VGETMANT", SDTFPUnaryOpImmRound>; def X86Vfpclass : SDNode<"X86ISD::VFPCLASS", SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCVecEltisVT<0, i1>, SDTCisVec<1>, SDTCisInt<2>]>, []>; +def X86Vfpclasss : SDNode<"X86ISD::VFPCLASS", SDTypeProfile<1, 2, [SDTCisInt<0>, + SDTCisFP<1>, SDTCisInt<2>]>,[]>; def X86SubVBroadcast : SDNode<"X86ISD::SUBV_BROADCAST", SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>, diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h index 74d6e4e6d73..2d8d9364c86 100644 --- a/lib/Target/X86/X86IntrinsicsInfo.h +++ b/lib/Target/X86/X86IntrinsicsInfo.h @@ -18,7 +18,7 @@ namespace llvm { enum IntrinsicType { INTR_NO_TYPE, - GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, ADX, FPCLASS, + GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, ADX, FPCLASS, FPCLASSS, INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_2OP_IMM8, INTR_TYPE_3OP, INTR_TYPE_4OP, CMP_MASK, CMP_MASK_CC,CMP_MASK_SCALAR_CC, VSHIFT, VSHIFT_MASK, COMI, INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM, @@ -688,6 +688,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_fpclass_ps_128, FPCLASS, X86ISD::VFPCLASS, 0), X86_INTRINSIC_DATA(avx512_mask_fpclass_ps_256, FPCLASS, X86ISD::VFPCLASS, 0), X86_INTRINSIC_DATA(avx512_mask_fpclass_ps_512, FPCLASS, X86ISD::VFPCLASS, 0), + X86_INTRINSIC_DATA(avx512_mask_fpclass_sd, FPCLASSS, X86ISD::VFPCLASS, 0), + X86_INTRINSIC_DATA(avx512_mask_fpclass_ss, FPCLASSS, X86ISD::VFPCLASS, 0), X86_INTRINSIC_DATA(avx512_mask_getexp_pd_128, INTR_TYPE_1OP_MASK_RM, X86ISD::FGETEXP_RND, 0), X86_INTRINSIC_DATA(avx512_mask_getexp_pd_256, INTR_TYPE_1OP_MASK_RM, diff --git a/test/CodeGen/X86/avx512dq-intrinsics.ll b/test/CodeGen/X86/avx512dq-intrinsics.ll index 7348229d35a..b36f1ef52c1 100644 --- a/test/CodeGen/X86/avx512dq-intrinsics.ll +++ b/test/CodeGen/X86/avx512dq-intrinsics.ll @@ -467,3 +467,37 @@ define i16@test_int_x86_avx512_mask_fpclass_ps_512(<16 x float> %x0, i16 %x1) { %res2 = add i16 %res, %res1 ret i16 %res2 } + +declare i8 @llvm.x86.avx512.mask.fpclass.sd(<2 x double>, i32, i8) + +; CHECK-LABEL: @test_int_x86_avx512_mask_fpclass_sd +; CHECK-NOT: call +; CHECK: kmov +; CHECK: vfpclasssd +; CHECK: %k0 {%k1} +; CHECK: vfpclasssd +; CHECK: %k0 +define i8 @test_int_x86_avx512_mask_fpclass_sd(<2 x double> %x0, i8 %x1) { + %res = call i8 @llvm.x86.avx512.mask.fpclass.sd(<2 x double> %x0, i32 2, i8 %x1) + %res1 = call i8 @llvm.x86.avx512.mask.fpclass.sd(<2 x double> %x0, i32 4, i8 -1) + %res2 = add i8 %res, %res1 + ret i8 %res2 +} + +declare i8 @llvm.x86.avx512.mask.fpclass.ss(<4 x float>, i32, i8) + +; CHECK-LABEL: @test_int_x86_avx512_mask_fpclass_ss +; CHECK-NOT: call +; CHECK: kmovw +; CHECK: vfpclassss +; CHECK: %k0 +; CHECK: {%k1} +; CHECK: kmovw +; CHECK: vfpclassss +; CHECK: %k0 +define i8 @test_int_x86_avx512_mask_fpclass_ss(<4 x float> %x0, i8 %x1) { + %res = call i8 @llvm.x86.avx512.mask.fpclass.ss(<4 x float> %x0, i32 4, i8 %x1) + %res1 = call i8 @llvm.x86.avx512.mask.fpclass.ss(<4 x float> %x0, i32 4, i8 -1) + %res2 = add i8 %res, %res1 + ret i8 %res2 +} diff --git a/test/MC/X86/x86-64-avx512dq.s b/test/MC/X86/x86-64-avx512dq.s index fef90032f68..5c9f579e97c 100644 --- a/test/MC/X86/x86-64-avx512dq.s +++ b/test/MC/X86/x86-64-avx512dq.s @@ -3847,3 +3847,147 @@ // CHECK: vcvtuqq2ps -1032(%rdx){1to8}, %ymm25 // CHECK: encoding: [0x62,0x61,0xff,0x58,0x7a,0x8a,0xf8,0xfb,0xff,0xff] vcvtuqq2ps -1032(%rdx){1to8}, %ymm25 + +// CHECK: vfpclasssd $171, %xmm28, %k4 +// CHECK: encoding: [0x62,0x93,0xfd,0x08,0x67,0xe4,0xab] + vfpclasssd $0xab, %xmm28, %k4 + +// CHECK: vfpclasssd $171, %xmm28, %k4 {%k3} +// CHECK: encoding: [0x62,0x93,0xfd,0x0b,0x67,0xe4,0xab] + vfpclasssd $0xab, %xmm28, %k4 {%k3} + +// CHECK: vfpclasssd $123, %xmm28, %k4 +// CHECK: encoding: [0x62,0x93,0xfd,0x08,0x67,0xe4,0x7b] + vfpclasssd $0x7b, %xmm28, %k4 + +// CHECK: vfpclasssd $123, (%rcx), %k4 +// CHECK: encoding: [0x62,0xf3,0xfd,0x08,0x67,0x21,0x7b] + vfpclasssd $0x7b,(%rcx), %k4 + +// CHECK: vfpclasssd $123, 291(%rax,%r14,8), %k4 +// CHECK: encoding: [0x62,0xb3,0xfd,0x08,0x67,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b] + vfpclasssd $0x7b,291(%rax,%r14,8), %k4 + +// CHECK: vfpclasssd $123, 1016(%rdx), %k4 +// CHECK: encoding: [0x62,0xf3,0xfd,0x08,0x67,0x62,0x7f,0x7b] + vfpclasssd $0x7b,1016(%rdx), %k4 + +// CHECK: vfpclasssd $123, 1024(%rdx), %k4 +// CHECK: encoding: [0x62,0xf3,0xfd,0x08,0x67,0xa2,0x00,0x04,0x00,0x00,0x7b] + vfpclasssd $0x7b,1024(%rdx), %k4 + +// CHECK: vfpclasssd $123, -1024(%rdx), %k4 +// CHECK: encoding: [0x62,0xf3,0xfd,0x08,0x67,0x62,0x80,0x7b] + vfpclasssd $0x7b,-1024(%rdx), %k4 + +// CHECK: vfpclasssd $123, -1032(%rdx), %k4 +// CHECK: encoding: [0x62,0xf3,0xfd,0x08,0x67,0xa2,0xf8,0xfb,0xff,0xff,0x7b] + vfpclasssd $0x7b,-1032(%rdx), %k4 + +// CHECK: vfpclassss $171, %xmm26, %k5 +// CHECK: encoding: [0x62,0x93,0x7d,0x08,0x67,0xea,0xab] + vfpclassss $0xab, %xmm26, %k5 + +// CHECK: vfpclassss $171, %xmm26, %k5 {%k4} +// CHECK: encoding: [0x62,0x93,0x7d,0x0c,0x67,0xea,0xab] + vfpclassss $0xab, %xmm26, %k5 {%k4} + +// CHECK: vfpclassss $123, %xmm26, %k5 +// CHECK: encoding: [0x62,0x93,0x7d,0x08,0x67,0xea,0x7b] + vfpclassss $0x7b, %xmm26, %k5 + +// CHECK: vfpclassss $123, (%rcx), %k5 +// CHECK: encoding: [0x62,0xf3,0x7d,0x08,0x67,0x29,0x7b] + vfpclassss $0x7b,(%rcx), %k5 + +// CHECK: vfpclassss $123, 291(%rax,%r14,8), %k5 +// CHECK: encoding: [0x62,0xb3,0x7d,0x08,0x67,0xac,0xf0,0x23,0x01,0x00,0x00,0x7b] + vfpclassss $0x7b,291(%rax,%r14,8), %k5 + +// CHECK: vfpclassss $123, 508(%rdx), %k5 +// CHECK: encoding: [0x62,0xf3,0x7d,0x08,0x67,0x6a,0x7f,0x7b] + vfpclassss $0x7b,508(%rdx), %k5 + +// CHECK: vfpclassss $123, 512(%rdx), %k5 +// CHECK: encoding: [0x62,0xf3,0x7d,0x08,0x67,0xaa,0x00,0x02,0x00,0x00,0x7b] + vfpclassss $0x7b,512(%rdx), %k5 + +// CHECK: vfpclassss $123, -512(%rdx), %k5 +// CHECK: encoding: [0x62,0xf3,0x7d,0x08,0x67,0x6a,0x80,0x7b] + vfpclassss $0x7b,-512(%rdx), %k5 + +// CHECK: vfpclassss $123, -516(%rdx), %k5 +// CHECK: encoding: [0x62,0xf3,0x7d,0x08,0x67,0xaa,0xfc,0xfd,0xff,0xff,0x7b] + vfpclassss $0x7b,-516(%rdx), %k5 + +// CHECK: vfpclasssd $171, %xmm20, %k3 +// CHECK: encoding: [0x62,0xb3,0xfd,0x08,0x67,0xdc,0xab] + vfpclasssd $0xab, %xmm20, %k3 + +// CHECK: vfpclasssd $171, %xmm20, %k3 {%k6} +// CHECK: encoding: [0x62,0xb3,0xfd,0x0e,0x67,0xdc,0xab] + vfpclasssd $0xab, %xmm20, %k3 {%k6} + +// CHECK: vfpclasssd $123, %xmm20, %k3 +// CHECK: encoding: [0x62,0xb3,0xfd,0x08,0x67,0xdc,0x7b] + vfpclasssd $0x7b, %xmm20, %k3 + +// CHECK: vfpclasssd $123, (%rcx), %k3 +// CHECK: encoding: [0x62,0xf3,0xfd,0x08,0x67,0x19,0x7b] + vfpclasssd $0x7b,(%rcx), %k3 + +// CHECK: vfpclasssd $123, 4660(%rax,%r14,8), %k3 +// CHECK: encoding: [0x62,0xb3,0xfd,0x08,0x67,0x9c,0xf0,0x34,0x12,0x00,0x00,0x7b] + vfpclasssd $0x7b,4660(%rax,%r14,8), %k3 + +// CHECK: vfpclasssd $123, 1016(%rdx), %k3 +// CHECK: encoding: [0x62,0xf3,0xfd,0x08,0x67,0x5a,0x7f,0x7b] + vfpclasssd $0x7b,1016(%rdx), %k3 + +// CHECK: vfpclasssd $123, 1024(%rdx), %k3 +// CHECK: encoding: [0x62,0xf3,0xfd,0x08,0x67,0x9a,0x00,0x04,0x00,0x00,0x7b] + vfpclasssd $0x7b,1024(%rdx), %k3 + +// CHECK: vfpclasssd $123, -1024(%rdx), %k3 +// CHECK: encoding: [0x62,0xf3,0xfd,0x08,0x67,0x5a,0x80,0x7b] + vfpclasssd $0x7b,-1024(%rdx), %k3 + +// CHECK: vfpclasssd $123, -1032(%rdx), %k3 +// CHECK: encoding: [0x62,0xf3,0xfd,0x08,0x67,0x9a,0xf8,0xfb,0xff,0xff,0x7b] + vfpclasssd $0x7b,-1032(%rdx), %k3 + +// CHECK: vfpclassss $171, %xmm28, %k4 +// CHECK: encoding: [0x62,0x93,0x7d,0x08,0x67,0xe4,0xab] + vfpclassss $0xab, %xmm28, %k4 + +// CHECK: vfpclassss $171, %xmm28, %k4 {%k6} +// CHECK: encoding: [0x62,0x93,0x7d,0x0e,0x67,0xe4,0xab] + vfpclassss $0xab, %xmm28, %k4 {%k6} + +// CHECK: vfpclassss $123, %xmm28, %k4 +// CHECK: encoding: [0x62,0x93,0x7d,0x08,0x67,0xe4,0x7b] + vfpclassss $0x7b, %xmm28, %k4 + +// CHECK: vfpclassss $123, (%rcx), %k4 +// CHECK: encoding: [0x62,0xf3,0x7d,0x08,0x67,0x21,0x7b] + vfpclassss $0x7b,(%rcx), %k4 + +// CHECK: vfpclassss $123, 4660(%rax,%r14,8), %k4 +// CHECK: encoding: [0x62,0xb3,0x7d,0x08,0x67,0xa4,0xf0,0x34,0x12,0x00,0x00,0x7b] + vfpclassss $0x7b,4660(%rax,%r14,8), %k4 + +// CHECK: vfpclassss $123, 508(%rdx), %k4 +// CHECK: encoding: [0x62,0xf3,0x7d,0x08,0x67,0x62,0x7f,0x7b] + vfpclassss $0x7b,508(%rdx), %k4 + +// CHECK: vfpclassss $123, 512(%rdx), %k4 +// CHECK: encoding: [0x62,0xf3,0x7d,0x08,0x67,0xa2,0x00,0x02,0x00,0x00,0x7b] + vfpclassss $0x7b,512(%rdx), %k4 + +// CHECK: vfpclassss $123, -512(%rdx), %k4 +// CHECK: encoding: [0x62,0xf3,0x7d,0x08,0x67,0x62,0x80,0x7b] + vfpclassss $0x7b,-512(%rdx), %k4 + +// CHECK: vfpclassss $123, -516(%rdx), %k4 +// CHECK: encoding: [0x62,0xf3,0x7d,0x08,0x67,0xa2,0xfc,0xfd,0xff,0xff,0x7b] + vfpclassss $0x7b,-516(%rdx), %k4 -- 2.34.1