// Compares
let TargetPrefix = "x86" in {
// 512-bit
- def int_x86_avx512_vcomi_sd : GCCBuiltin<"__builtin_ia32_vcomisd">,
- Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
- llvm_v2f64_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_vcomi_ss : GCCBuiltin<"__builtin_ia32_vcomiss">,
- Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
- llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_mask_pcmpeq_b_512 : GCCBuiltin<"__builtin_ia32_pcmpeqb512_mask">,
Intrinsic<[llvm_i64_ty], [llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty],
[IntrNoMem]>;
DAG.getConstant(X86CC, dl, MVT::i8), Cond);
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
}
- case COMI_RM: { // Comparison intrinsics with Sae
- SDValue LHS = Op.getOperand(1);
- SDValue RHS = Op.getOperand(2);
- SDValue CC = Op.getOperand(3);
- SDValue Sae = Op.getOperand(4);
- auto X86CC = TranslateX86ConstCondToX86CC(CC);
- unsigned comiOp = std::get<0>(X86CC) ? IntrData->Opc0 : IntrData->Opc1;
- SDValue Cond;
- if (cast<ConstantSDNode>(Sae)->getZExtValue() !=
- X86::STATIC_ROUNDING::CUR_DIRECTION)
- Cond = DAG.getNode(comiOp, dl, MVT::i32, LHS, RHS, Sae);
- else
- Cond = DAG.getNode(comiOp, dl, MVT::i32, LHS, RHS);
- SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
- DAG.getConstant(std::get<1>(X86CC), dl, MVT::i8), Cond);
- return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
- }
case VSHIFT:
return getTargetVShiftNode(IntrData->Opc0, dl, Op.getSimpleValueType(),
Op.getOperand(1), Op.getOperand(2), DAG);
EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
}
}
-
-// Unordered/Ordered scalar fp compare with Sea and set EFLAGS
-multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _, SDNode OpNode,
- string OpcodeStr> {
- def rb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
- !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"),
- [(set EFLAGS, (OpNode (_.VT _.RC:$src1), _.RC:$src2,
- (i32 FROUND_NO_EXC)))],
- IIC_SSE_COMIS_RR>, EVEX, EVEX_B, VEX_LIG, EVEX_V128,
- Sched<[WriteFAdd]>;
-}
-
-let Defs = [EFLAGS], Predicates = [HasAVX512] in {
- defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, X86ucomiSae, "vucomiss">,
- AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
- defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, X86ucomiSae, "vucomisd">,
- AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
- defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, X86comiSae, "vcomiss">,
- AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
- defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, X86comiSae, "vcomisd">,
- AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
-}
-
let Defs = [EFLAGS], Predicates = [HasAVX512] in {
defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32,
"ucomiss">, PS, EVEX, VEX_LIG,
def SDTX86VFCMP : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>,
SDTCisFP<1>, SDTCisVT<3, i8>,
SDTCisVec<1>]>;
-def SDTX86CmpTestSae : SDTypeProfile<1, 3, [SDTCisVT<0, i32>,
- SDTCisSameAs<1, 2>, SDTCisInt<3>]>;
def X86fmin : SDNode<"X86ISD::FMIN", SDTFPBinOp>;
def X86fmax : SDNode<"X86ISD::FMAX", SDTFPBinOp>;
def X86hadd : SDNode<"X86ISD::HADD", SDTIntBinOp>;
def X86hsub : SDNode<"X86ISD::HSUB", SDTIntBinOp>;
def X86comi : SDNode<"X86ISD::COMI", SDTX86CmpTest>;
-def X86comiSae : SDNode<"X86ISD::COMI", SDTX86CmpTestSae>;
def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest>;
-def X86ucomiSae: SDNode<"X86ISD::UCOMI", SDTX86CmpTestSae>;
def X86cmps : SDNode<"X86ISD::FSETCC", SDTX86Cmps>;
//def X86cmpsd : SDNode<"X86ISD::FSETCCsd", SDTX86Cmpsd>;
def X86cvtdq2pd: SDNode<"X86ISD::CVTDQ2PD",
INTR_NO_TYPE,
GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, ADX, FPCLASS, FPCLASSS,
INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_2OP_IMM8, INTR_TYPE_3OP, INTR_TYPE_4OP,
- CMP_MASK, CMP_MASK_CC,CMP_MASK_SCALAR_CC, VSHIFT, VSHIFT_MASK, COMI, COMI_RM,
+ CMP_MASK, CMP_MASK_CC,CMP_MASK_SCALAR_CC, VSHIFT, VSHIFT_MASK, COMI,
INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM,
INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM, INTR_TYPE_2OP_IMM8_MASK,
INTR_TYPE_3OP_MASK, INTR_TYPE_3OP_MASK_RM, INTR_TYPE_3OP_IMM8_MASK,
X86_INTRINSIC_DATA(avx512_rsqrt28_ps, INTR_TYPE_1OP_MASK_RM,X86ISD::RSQRT28, 0),
X86_INTRINSIC_DATA(avx512_rsqrt28_sd, INTR_TYPE_SCALAR_MASK_RM,X86ISD::RSQRT28, 0),
X86_INTRINSIC_DATA(avx512_rsqrt28_ss, INTR_TYPE_SCALAR_MASK_RM,X86ISD::RSQRT28, 0),
- X86_INTRINSIC_DATA(avx512_vcomi_sd, COMI_RM, X86ISD::COMI, X86ISD::UCOMI),
- X86_INTRINSIC_DATA(avx512_vcomi_ss, COMI_RM, X86ISD::COMI, X86ISD::UCOMI),
X86_INTRINSIC_DATA(avx_hadd_pd_256, INTR_TYPE_2OP, X86ISD::FHADD, 0),
X86_INTRINSIC_DATA(avx_hadd_ps_256, INTR_TYPE_2OP, X86ISD::FHADD, 0),
X86_INTRINSIC_DATA(avx_hsub_pd_256, INTR_TYPE_2OP, X86ISD::FHSUB, 0),
"Intrinsic data tables should be sorted by Intrinsic ID");
}
-/*
-* Get comparison modifier from _mm_comi_round_sd/ss intrinsic
-* Return tuple <isOrdered, X86 condcode>
-*/
-static std::tuple<bool,unsigned> TranslateX86ConstCondToX86CC(SDValue &imm) {
- ConstantSDNode *CImm = dyn_cast<ConstantSDNode>(imm);
- unsigned IntImm = CImm->getZExtValue();
- // On a floating point condition, the flags are set as follows:
- // ZF PF CF op
- // 0 | 0 | 0 | X > Y
- // 0 | 0 | 1 | X < Y
- // 1 | 0 | 0 | X == Y
- // 1 | 1 | 1 | unordered
- switch (IntImm) {
- default: llvm_unreachable("Invalid floating point compare value for Comi!");
- case _CMP_EQ_OQ: // 0x00 - Equal (ordered, nonsignaling)
- case _CMP_EQ_OS: // 0x10 - Equal (ordered, signaling)
- return std::make_tuple(true, X86::COND_E);
- case _CMP_EQ_UQ: // 0x08 - Equal (unordered, non-signaling)
- case _CMP_EQ_US: // 0x18 - Equal (unordered, signaling)
- return std::make_tuple(false , X86::COND_E);
- case _CMP_LT_OS: // 0x01 - Less-than (ordered, signaling)
- case _CMP_LT_OQ: // 0x11 - Less-than (ordered, nonsignaling)
- return std::make_tuple(true, X86::COND_B);
- case _CMP_NGE_US: // 0x09 - Not-greater-than-or-equal (unordered, signaling)
- case _CMP_NGE_UQ: // 0x19 - Not-greater-than-or-equal (unordered, nonsignaling)
- return std::make_tuple(false , X86::COND_B);
- case _CMP_LE_OS: // 0x02 - Less-than-or-equal (ordered, signaling)
- case _CMP_LE_OQ: // 0x12 - Less-than-or-equal (ordered, nonsignaling)
- return std::make_tuple(true, X86::COND_BE);
- case _CMP_NGT_US: // 0x0A - Not-greater-than (unordered, signaling)
- case _CMP_NGT_UQ: // 0x1A - Not-greater-than (unordered, nonsignaling)
- return std::make_tuple(false, X86::COND_BE);
- case _CMP_GT_OS: // 0x0E - Greater-than (ordered, signaling)
- case _CMP_GT_OQ: // 0x1E - Greater-than (ordered, nonsignaling)
- return std::make_tuple(true, X86::COND_A);
- case _CMP_NLE_US: // 0x06 - Not-less-than-or-equal (unordered,signaling)
- case _CMP_NLE_UQ: // 0x16 - Not-less-than-or-equal (unordered, nonsignaling)
- return std::make_tuple(false, X86::COND_A);
- case _CMP_GE_OS: // 0x0D - Greater-than-or-equal (ordered, signaling)
- case _CMP_GE_OQ: // 0x1D - Greater-than-or-equal (ordered, nonsignaling)
- return std::make_tuple(true, X86::COND_AE);
- case _CMP_NLT_US: // 0x05 - Not-less-than (unordered, signaling)
- case _CMP_NLT_UQ: // 0x15 - Not-less-than (unordered, nonsignaling)
- return std::make_tuple(false, X86::COND_AE);
- case _CMP_NEQ_OQ: // 0x0C - Not-equal (ordered, non-signaling)
- case _CMP_NEQ_OS: // 0x1C - Not-equal (ordered, signaling)
- return std::make_tuple(true, X86::COND_NE);
- case _CMP_NEQ_UQ: // 0x04 - Not-equal (unordered, nonsignaling)
- case _CMP_NEQ_US: // 0x14 - Not-equal (unordered, signaling)
- return std::make_tuple(false, X86::COND_NE);
- }
-}
-
} // End llvm namespace
#endif
ret <8 x i64> %res2
}
-define i32 @test_x86_avx512_comi_sd_eq_sae(<2 x double> %a0, <2 x double> %a1) {
-; CHECK-LABEL: test_x86_avx512_comi_sd_eq_sae
-; CHECK: vcomisd {sae}, %xmm1, %xmm0
-; CHECK-NEXT: sete %al
- %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 0, i32 8)
- ret i32 %res
-}
-
-define i32 @test_x86_avx512_ucomi_sd_eq_sae(<2 x double> %a0, <2 x double> %a1) {
-; CHECK-LABEL: test_x86_avx512_ucomi_sd_eq_sae
-; CHECK: vucomisd {sae}, %xmm1, %xmm0
-; CHECK-NEXT: sete %al
- %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 8, i32 8)
- ret i32 %res
-}
-
-define i32 @test_x86_avx512_comi_sd_eq(<2 x double> %a0, <2 x double> %a1) {
-; CHECK-LABEL: test_x86_avx512_comi_sd_eq
-; CHECK: vcomisd %xmm1, %xmm0
-; CHECK-NEXT: sete %al
- %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 0, i32 4)
- ret i32 %res
-}
-
-define i32 @test_x86_avx512_ucomi_sd_eq(<2 x double> %a0, <2 x double> %a1) {
-; CHECK-LABEL: test_x86_avx512_ucomi_sd_eq
-; CHECK: vucomisd %xmm1, %xmm0
-; CHECK-NEXT: sete %al
- %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 8, i32 4)
- ret i32 %res
-}
-
-define i32 @test_x86_avx512_comi_sd_lt_sae(<2 x double> %a0, <2 x double> %a1) {
-; CHECK-LABEL: test_x86_avx512_comi_sd_lt_sae
-; CHECK: vcomisd {sae}, %xmm1, %xmm0
-; CHECK-NEXT: sbbl %eax, %eax
- %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 1, i32 8)
- ret i32 %res
-}
-
-define i32 @test_x86_avx512_ucomi_sd_lt_sae(<2 x double> %a0, <2 x double> %a1) {
-; CHECK-LABEL: test_x86_avx512_ucomi_sd_lt_sae
-; CHECK: vucomisd {sae}, %xmm1, %xmm0
-; CHECK-NEXT: sbbl %eax, %eax
- %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 9, i32 8)
- ret i32 %res
-}
-
-define i32 @test_x86_avx512_comi_sd_lt(<2 x double> %a0, <2 x double> %a1) {
-; CHECK-LABEL: test_x86_avx512_comi_sd_lt
-; CHECK: vcomisd %xmm1, %xmm0
-; CHECK-NEXT: sbbl %eax, %eax
- %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 1, i32 4)
- ret i32 %res
-}
-
-define i32 @test_x86_avx512_ucomi_sd_lt(<2 x double> %a0, <2 x double> %a1) {
-; CHECK-LABEL: test_x86_avx512_ucomi_sd_lt
-; CHECK: vucomisd %xmm1, %xmm0
-; CHECK-NEXT: sbbl %eax, %eax
- %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 9, i32 4)
- ret i32 %res
-}
-
-declare i32 @llvm.x86.avx512.vcomi.sd(<2 x double>, <2 x double>, i32, i32)
-
-define i32 @test_x86_avx512_ucomi_ss_lt(<4 x float> %a0, <4 x float> %a1) {
-; CHECK-LABEL: test_x86_avx512_ucomi_ss_lt
-; CHECK: vucomiss %xmm1, %xmm0
-; CHECK-NEXT: sbbl %eax, %eax
- %res = call i32 @llvm.x86.avx512.vcomi.ss(<4 x float> %a0, <4 x float> %a1, i32 9, i32 4)
- ret i32 %res
-}
-
-declare i32 @llvm.x86.avx512.vcomi.ss(<4 x float>, <4 x float>, i32, i32)
// CHECK: encoding: [0xc5,0xf9,0x7e,0xaa,0xfc,0xfd,0xff,0xff]
vmovd %xmm5, -516(%rdx)
-// CHECK: vcomisd %xmm21, %xmm23
-// CHECK: encoding: [0x62,0xa1,0xfd,0x08,0x2f,0xfd]
- vcomisd %xmm21, %xmm23
-
-// CHECK: vcomisd {sae}, %xmm21, %xmm23
-// CHECK: encoding: [0x62,0xa1,0xfd,0x18,0x2f,0xfd]
- vcomisd {sae}, %xmm21, %xmm23
-
-// CHECK: vcomisd (%rcx), %xmm23
-// CHECK: encoding: [0x62,0xe1,0xfd,0x08,0x2f,0x39]
- vcomisd (%rcx), %xmm23
-
-// CHECK: vcomisd 291(%rax,%r14,8), %xmm23
-// CHECK: encoding: [0x62,0xa1,0xfd,0x08,0x2f,0xbc,0xf0,0x23,0x01,0x00,0x00]
- vcomisd 291(%rax,%r14,8), %xmm23
-
-// CHECK: vcomisd 1016(%rdx), %xmm23
-// CHECK: encoding: [0x62,0xe1,0xfd,0x08,0x2f,0x7a,0x7f]
- vcomisd 1016(%rdx), %xmm23
-
-// CHECK: vcomisd 1024(%rdx), %xmm23
-// CHECK: encoding: [0x62,0xe1,0xfd,0x08,0x2f,0xba,0x00,0x04,0x00,0x00]
- vcomisd 1024(%rdx), %xmm23
-
-// CHECK: vcomisd -1024(%rdx), %xmm23
-// CHECK: encoding: [0x62,0xe1,0xfd,0x08,0x2f,0x7a,0x80]
- vcomisd -1024(%rdx), %xmm23
-
-// CHECK: vcomisd -1032(%rdx), %xmm23
-// CHECK: encoding: [0x62,0xe1,0xfd,0x08,0x2f,0xba,0xf8,0xfb,0xff,0xff]
- vcomisd -1032(%rdx), %xmm23
-
-// CHECK: vcomiss %xmm28, %xmm14
-// CHECK: encoding: [0x62,0x11,0x7c,0x08,0x2f,0xf4]
- vcomiss %xmm28, %xmm14
-
-// CHECK: vcomiss {sae}, %xmm28, %xmm14
-// CHECK: encoding: [0x62,0x11,0x7c,0x18,0x2f,0xf4]
- vcomiss {sae}, %xmm28, %xmm14
-
-// CHECK: vcomiss (%rcx), %xmm14
-// CHECK: encoding: [0xc5,0x78,0x2f,0x31]
- vcomiss (%rcx), %xmm14
-
-// CHECK: vcomiss 291(%rax,%r14,8), %xmm14
-// CHECK: encoding: [0xc4,0x21,0x78,0x2f,0xb4,0xf0,0x23,0x01,0x00,0x00]
- vcomiss 291(%rax,%r14,8), %xmm14
-
-// CHECK: vcomiss 508(%rdx), %xmm14
-// CHECK: encoding: [0xc5,0x78,0x2f,0xb2,0xfc,0x01,0x00,0x00]
- vcomiss 508(%rdx), %xmm14
-
-// CHECK: vcomiss 512(%rdx), %xmm14
-// CHECK: encoding: [0xc5,0x78,0x2f,0xb2,0x00,0x02,0x00,0x00]
- vcomiss 512(%rdx), %xmm14
-
-// CHECK: vcomiss -512(%rdx), %xmm14
-// CHECK: encoding: [0xc5,0x78,0x2f,0xb2,0x00,0xfe,0xff,0xff]
- vcomiss -512(%rdx), %xmm14
-
-// CHECK: vcomiss -516(%rdx), %xmm14
-// CHECK: encoding: [0xc5,0x78,0x2f,0xb2,0xfc,0xfd,0xff,0xff]
- vcomiss -516(%rdx), %xmm14
-
-// CHECK: vucomisd %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x41,0x79,0x2e,0xda]
- vucomisd %xmm10, %xmm11
-
-// CHECK: vucomisd {sae}, %xmm10, %xmm11
-// CHECK: encoding: [0x62,0x51,0xfd,0x18,0x2e,0xda]
- vucomisd {sae}, %xmm10, %xmm11
-
-// CHECK: vucomisd (%rcx), %xmm11
-// CHECK: encoding: [0xc5,0x79,0x2e,0x19]
- vucomisd (%rcx), %xmm11
-
-// CHECK: vucomisd 291(%rax,%r14,8), %xmm11
-// CHECK: encoding: [0xc4,0x21,0x79,0x2e,0x9c,0xf0,0x23,0x01,0x00,0x00]
- vucomisd 291(%rax,%r14,8), %xmm11
-
-// CHECK: vucomisd 1016(%rdx), %xmm11
-// CHECK: encoding: [0xc5,0x79,0x2e,0x9a,0xf8,0x03,0x00,0x00]
- vucomisd 1016(%rdx), %xmm11
-
-// CHECK: vucomisd 1024(%rdx), %xmm11
-// CHECK: encoding: [0xc5,0x79,0x2e,0x9a,0x00,0x04,0x00,0x00]
- vucomisd 1024(%rdx), %xmm11
-
-// CHECK: vucomisd -1024(%rdx), %xmm11
-// CHECK: encoding: [0xc5,0x79,0x2e,0x9a,0x00,0xfc,0xff,0xff]
- vucomisd -1024(%rdx), %xmm11
-
-// CHECK: vucomisd -1032(%rdx), %xmm11
-// CHECK: encoding: [0xc5,0x79,0x2e,0x9a,0xf8,0xfb,0xff,0xff]
- vucomisd -1032(%rdx), %xmm11
-
-// CHECK: vucomiss %xmm11, %xmm22
-// CHECK: encoding: [0x62,0xc1,0x7c,0x08,0x2e,0xf3]
- vucomiss %xmm11, %xmm22
-
-// CHECK: vucomiss {sae}, %xmm11, %xmm22
-// CHECK: encoding: [0x62,0xc1,0x7c,0x18,0x2e,0xf3]
- vucomiss {sae}, %xmm11, %xmm22
-
-// CHECK: vucomiss (%rcx), %xmm22
-// CHECK: encoding: [0x62,0xe1,0x7c,0x08,0x2e,0x31]
- vucomiss (%rcx), %xmm22
-
-// CHECK: vucomiss 291(%rax,%r14,8), %xmm22
-// CHECK: encoding: [0x62,0xa1,0x7c,0x08,0x2e,0xb4,0xf0,0x23,0x01,0x00,0x00]
- vucomiss 291(%rax,%r14,8), %xmm22
-
-// CHECK: vucomiss 508(%rdx), %xmm22
-// CHECK: encoding: [0x62,0xe1,0x7c,0x08,0x2e,0x72,0x7f]
- vucomiss 508(%rdx), %xmm22
-
-// CHECK: vucomiss 512(%rdx), %xmm22
-// CHECK: encoding: [0x62,0xe1,0x7c,0x08,0x2e,0xb2,0x00,0x02,0x00,0x00]
- vucomiss 512(%rdx), %xmm22
-
-// CHECK: vucomiss -512(%rdx), %xmm22
-// CHECK: encoding: [0x62,0xe1,0x7c,0x08,0x2e,0x72,0x80]
- vucomiss -512(%rdx), %xmm22
-
-// CHECK: vucomiss -516(%rdx), %xmm22
-// CHECK: encoding: [0x62,0xe1,0x7c,0x08,0x2e,0xb2,0xfc,0xfd,0xff,0xff]
- vucomiss -516(%rdx), %xmm22