From: Elena Demikhovsky Date: Thu, 7 May 2015 11:24:42 +0000 (+0000) Subject: AVX-512: Added all forms of FP compare instructions for KNL and SKX. X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=d08d0340e51812a4b7093543aee9b999c2c6d3b8;p=oota-llvm.git AVX-512: Added all forms of FP compare instructions for KNL and SKX. Added intrinsics for the instructions. CC parameter of the intrinsics was changed from i8 to i32 according to the spec. By Igor Breger (igor.breger@intel.com) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@236714 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td index 9a85a3374b0..43aa89871e8 100644 --- a/include/llvm/IR/IntrinsicsX86.td +++ b/include/llvm/IR/IntrinsicsX86.td @@ -4183,13 +4183,33 @@ let TargetPrefix = "x86" in { } // Misc. let TargetPrefix = "x86" in { - def int_x86_avx512_mask_cmp_ps_512 : GCCBuiltin<"__builtin_ia32_cmpps512_mask">, - Intrinsic<[llvm_i16_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i8_ty, - llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_cmp_pd_512 : GCCBuiltin<"__builtin_ia32_cmppd512_mask">, - Intrinsic<[llvm_i8_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty, - llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_movntdqa : GCCBuiltin<"__builtin_ia32_movntdqa512">, + def int_x86_avx512_mask_cmp_ps_512 : + GCCBuiltin<"__builtin_ia32_cmpps512_mask">, + Intrinsic<[llvm_i16_ty], [llvm_v16f32_ty, llvm_v16f32_ty, + llvm_i32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_cmp_pd_512 : + GCCBuiltin<"__builtin_ia32_cmppd512_mask">, + Intrinsic<[llvm_i8_ty], [llvm_v8f64_ty, llvm_v8f64_ty, + llvm_i32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_cmp_ps_256 : + GCCBuiltin<"__builtin_ia32_cmpps256_mask">, + Intrinsic<[llvm_i8_ty], [llvm_v8f32_ty, llvm_v8f32_ty, + llvm_i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_cmp_pd_256 : + GCCBuiltin<"__builtin_ia32_cmppd256_mask">, + Intrinsic<[llvm_i8_ty], [llvm_v4f64_ty, llvm_v4f64_ty, + llvm_i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_cmp_ps_128 : + GCCBuiltin<"__builtin_ia32_cmpps128_mask">, + Intrinsic<[llvm_i8_ty], [llvm_v4f32_ty, llvm_v4f32_ty, + llvm_i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_cmp_pd_128 : + GCCBuiltin<"__builtin_ia32_cmppd128_mask">, + Intrinsic<[llvm_i8_ty], [llvm_v2f64_ty, llvm_v2f64_ty, + llvm_i32_ty, llvm_i8_ty], [IntrNoMem]>; + + def int_x86_avx512_movntdqa : + GCCBuiltin<"__builtin_ia32_movntdqa512">, Intrinsic<[llvm_v8i64_ty], [llvm_ptr_ty], [IntrReadMem]>; } diff --git a/lib/IR/AutoUpgrade.cpp b/lib/IR/AutoUpgrade.cpp index 7081c749058..dc576209e2d 100644 --- a/lib/IR/AutoUpgrade.cpp +++ b/lib/IR/AutoUpgrade.cpp @@ -210,14 +210,6 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { if (Name == "x86.avx2.mpsadbw") return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw, NewFn); - - if (Name == "x86.avx512.mask.cmp.ps.512") - return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_ps_512, - NewFn); - if (Name == "x86.avx512.mask.cmp.pd.512") - return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_pd_512, - NewFn); - if (Name == "x86.avx512.mask.cmp.b.512") return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_b_512, NewFn); @@ -799,21 +791,6 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { CI->eraseFromParent(); return; } - case Intrinsic::x86_avx512_mask_cmp_ps_512: - case Intrinsic::x86_avx512_mask_cmp_pd_512: { - // Need to truncate the last argument from i32 to i8 -- this argument models - // an inherently 8-bit immediate operand to these x86 instructions. - SmallVector Args(CI->arg_operands().begin(), - CI->arg_operands().end()); - - // Replace the last argument with a trunc. - Args[2] = Builder.CreateTrunc(Args[2], Type::getInt8Ty(C), "trunc"); - - CallInst *NewCall = Builder.CreateCall(NewFn, Args); - CI->replaceAllUsesWith(NewCall); - CI->eraseFromParent(); - return; - } } } diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 93c6ea02acc..244a94e9aa7 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -1414,7 +1414,8 @@ std::unique_ptr X86AsmParser::ParseRoundingModeOp(SMLoc Start, SMLoc End) { MCAsmParser &Parser = getParser(); const AsmToken &Tok = Parser.getTok(); - consumeToken(); // Eat "{" + // Eat "{" and mark the current place. + const SMLoc consumedToken = consumeToken(); if (Tok.getIdentifier().startswith("r")){ int rndMode = StringSwitch(Tok.getIdentifier()) .Case("rn", X86::STATIC_ROUNDING::TO_NEAREST_INT) @@ -1436,6 +1437,13 @@ X86AsmParser::ParseRoundingModeOp(SMLoc Start, SMLoc End) { MCConstantExpr::Create(rndMode, Parser.getContext()); return X86Operand::CreateImm(RndModeOp, Start, End); } + if(Tok.getIdentifier().equals("sae")){ + Parser.Lex(); // Eat the sae + if (!getLexer().is(AsmToken::RCurly)) + return ErrorOperand(Tok.getLoc(), "Expected } at this point"); + Parser.Lex(); // Eat "}" + return X86Operand::CreateToken("{sae}", consumedToken); + } return ErrorOperand(Tok.getLoc(), "unknown token in expression"); } /// ParseIntelMemOperand - Parse intel style memory operand. diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 49f0a8a73bf..5e23b5b9cf9 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -14926,12 +14926,27 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget Mask.getValueType().getSizeInBits()); SDValue Cmp; if (IntrData->Type == CMP_MASK_CC) { - Cmp = DAG.getNode(IntrData->Opc0, dl, MaskVT, Op.getOperand(1), - Op.getOperand(2), Op.getOperand(3)); + SDValue CC = Op.getOperand(3); + CC = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, CC); + // We specify 2 possible opcodes for intrinsics with rounding modes. + // First, we check if the intrinsic may have non-default rounding mode, + // (IntrData->Opc1 != 0), then we check the rounding mode operand. + if (IntrData->Opc1 != 0) { + SDValue Rnd = Op.getOperand(5); + if (cast(Rnd)->getZExtValue() != + X86::STATIC_ROUNDING::CUR_DIRECTION) + Cmp = DAG.getNode(IntrData->Opc1, dl, MaskVT, Op.getOperand(1), + Op.getOperand(2), CC, Rnd); + } + //default rounding mode + if(!Cmp.getNode()) + Cmp = DAG.getNode(IntrData->Opc0, dl, MaskVT, Op.getOperand(1), + Op.getOperand(2), CC); + } else { assert(IntrData->Type == CMP_MASK && "Unexpected intrinsic type!"); Cmp = DAG.getNode(IntrData->Opc0, dl, MaskVT, Op.getOperand(1), - Op.getOperand(2)); + Op.getOperand(2)); } SDValue CmpMask = getVectorMaskingNode(Cmp, Mask, DAG.getTargetConstant(0, dl, diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 0d24c6a7a56..d2593d53a36 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -308,6 +308,8 @@ namespace llvm { /// integer signed and unsigned data types. CMPM, CMPMU, + // Vector comparison with rounding mode for FP values + CMPM_RND, // Arithmetic operations with FLAGS results. ADD, SUB, ADC, SBB, SMUL, diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 41f4584c2a4..bc9ee59ae49 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -305,8 +305,8 @@ multiclass AVX512_maskable_custom_cmp O, Format F, Pattern, itin>; def NAME#k: AVX512, EVEX_K; } @@ -335,6 +335,14 @@ multiclass AVX512_maskable_cmp O, Format F, X86VectorVTInfo _, (and _.KRCWM:$mask, RHS), Round, itin>; +multiclass AVX512_maskable_cmp_alt O, Format F, X86VectorVTInfo _, + dag Outs, dag Ins, string OpcodeStr, + string AttSrcAsm, string IntelSrcAsm> : + AVX512_maskable_custom_cmp; + // Bitcasts between 512-bit vector types. Return the original type since // no instruction is needed for the conversion let Predicates = [HasAVX512] in { @@ -1590,53 +1598,97 @@ defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86cmpm, avx512vl_i64_info, defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86cmpmu, avx512vl_i64_info, HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>; -// avx512_cmp_packed - compare packed instructions -multiclass avx512_cmp_packed { - def rri : AVX512PIi8<0xC2, MRMSrcReg, - (outs KRC:$dst), (ins RC:$src1, RC:$src2, AVXCC:$cc), - !strconcat("vcmp${cc}", suffix, - "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set KRC:$dst, (X86cmpm (vt RC:$src1), (vt RC:$src2), imm:$cc))], d>; - let hasSideEffects = 0 in - def rrib: AVX512PIi8<0xC2, MRMSrcReg, - (outs KRC:$dst), (ins RC:$src1, RC:$src2, AVXCC:$cc), - !strconcat("vcmp${cc}", suffix, - "\t{{sae}, $src2, $src1, $dst|$dst, $src1, $src2, {sae}}"), - [], d>, EVEX_B; - def rmi : AVX512PIi8<0xC2, MRMSrcMem, - (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, AVXCC:$cc), - !strconcat("vcmp${cc}", suffix, - "\t{$src2, $src1, $dst|$dst, $src1, $src2, $cc}"), - [(set KRC:$dst, - (X86cmpm (vt RC:$src1), (load addr:$src2), imm:$cc))], d>; +multiclass avx512_vcmp_common { + + defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, + (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,AVXCC:$cc), + "vcmp${cc}"#_.Suffix, + "$src2, $src1", "$src1, $src2", + (X86cmpm (_.VT _.RC:$src1), + (_.VT _.RC:$src2), + imm:$cc)>; + let mayLoad = 1 in { + defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, + (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, AVXCC:$cc), + "vcmp${cc}"#_.Suffix, + "$src2, $src1", "$src1, $src2", + (X86cmpm (_.VT _.RC:$src1), + (_.VT (bitconvert (_.LdFrag addr:$src2))), + imm:$cc)>; + + defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, + (outs _.KRC:$dst), + (ins _.RC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc), + "vcmp${cc}"#_.Suffix, + "${src2}"##_.BroadcastStr##", $src1", + "$src1, ${src2}"##_.BroadcastStr, + (X86cmpm (_.VT _.RC:$src1), + (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))), + imm:$cc)>,EVEX_B; + } // Accept explicit immediate argument form instead of comparison code. let isAsmParserOnly = 1, hasSideEffects = 0 in { - def rri_alt : AVX512PIi8<0xC2, MRMSrcReg, - (outs KRC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc), - !strconcat("vcmp", suffix, - "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), [], d>; - def rrib_alt: AVX512PIi8<0xC2, MRMSrcReg, - (outs KRC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc), - !strconcat("vcmp", suffix, - "\t{{sae}, $cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc, {sae}}"), - [], d>, EVEX_B; - let mayLoad = 1 in - def rmi_alt : AVX512PIi8<0xC2, MRMSrcMem, - (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc), - !strconcat("vcmp", suffix, - "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), [], d>; + defm rri_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _, + (outs _.KRC:$dst), + (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), + "vcmp"#_.Suffix, + "$cc, $src2, $src1", "$src1, $src2, $cc">; + + let mayLoad = 1 in { + defm rmi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _, + (outs _.KRC:$dst), + (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc), + "vcmp"#_.Suffix, + "$cc, $src2, $src1", "$src1, $src2, $cc">; + + defm rmbi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _, + (outs _.KRC:$dst), + (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc), + "vcmp"#_.Suffix, + "$cc, ${src2}"##_.BroadcastStr##", $src1", + "$src1, ${src2}"##_.BroadcastStr##", $cc">,EVEX_B; + } + } +} + +multiclass avx512_vcmp_sae { + // comparison code form (VCMP[EQ/LT/LE/...] + defm rrib : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, + (outs _.KRC:$dst),(ins _.RC:$src1, _.RC:$src2, AVXCC:$cc), + "vcmp${cc}"#_.Suffix, + "{sae}, $src2, $src1", "$src1, $src2,{sae}", + (X86cmpmRnd (_.VT _.RC:$src1), + (_.VT _.RC:$src2), + imm:$cc, + (i32 FROUND_NO_EXC))>, EVEX_B; + + let isAsmParserOnly = 1, hasSideEffects = 0 in { + defm rrib_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _, + (outs _.KRC:$dst), + (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), + "vcmp"#_.Suffix, + "$cc,{sae}, $src2, $src1", + "$src1, $src2,{sae}, $cc">, EVEX_B; + } +} + +multiclass avx512_vcmp { + let Predicates = [HasAVX512] in { + defm Z : avx512_vcmp_common<_.info512>, + avx512_vcmp_sae<_.info512>, EVEX_V512; + + } + let Predicates = [HasAVX512,HasVLX] in { + defm Z128 : avx512_vcmp_common<_.info128>, EVEX_V128; + defm Z256 : avx512_vcmp_common<_.info256>, EVEX_V256; } } -defm VCMPPSZ : avx512_cmp_packed, PS, EVEX_4V, EVEX_V512, - EVEX_CD8<32, CD8VF>; -defm VCMPPDZ : avx512_cmp_packed, PD, EVEX_4V, VEX_W, EVEX_V512, - EVEX_CD8<64, CD8VF>; +defm VCMPPD : avx512_vcmp, + AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; +defm VCMPPS : avx512_vcmp, + AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; def : Pat<(v8i1 (X86cmpm (v8f32 VR256X:$src1), (v8f32 VR256X:$src2), imm:$cc)), (COPY_TO_REGCLASS (VCMPPSZrri @@ -1654,30 +1706,7 @@ def : Pat<(v8i1 (X86cmpmu (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)), (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)), imm:$cc), VK8)>; -def : Pat<(i16 (int_x86_avx512_mask_cmp_ps_512 (v16f32 VR512:$src1), - (v16f32 VR512:$src2), i8immZExt5:$cc, (i16 -1), - FROUND_NO_EXC)), - (COPY_TO_REGCLASS (VCMPPSZrrib VR512:$src1, VR512:$src2, - (I8Imm imm:$cc)), GR16)>; - -def : Pat<(i8 (int_x86_avx512_mask_cmp_pd_512 (v8f64 VR512:$src1), - (v8f64 VR512:$src2), i8immZExt5:$cc, (i8 -1), - FROUND_NO_EXC)), - (COPY_TO_REGCLASS (VCMPPDZrrib VR512:$src1, VR512:$src2, - (I8Imm imm:$cc)), GR8)>; - -def : Pat<(i16 (int_x86_avx512_mask_cmp_ps_512 (v16f32 VR512:$src1), - (v16f32 VR512:$src2), i8immZExt5:$cc, (i16 -1), - FROUND_CURRENT)), - (COPY_TO_REGCLASS (VCMPPSZrri VR512:$src1, VR512:$src2, - (I8Imm imm:$cc)), GR16)>; - -def : Pat<(i8 (int_x86_avx512_mask_cmp_pd_512 (v8f64 VR512:$src1), - (v8f64 VR512:$src2), i8immZExt5:$cc, (i8 -1), - FROUND_CURRENT)), - (COPY_TO_REGCLASS (VCMPPDZrri VR512:$src1, VR512:$src2, - (I8Imm imm:$cc)), GR8)>; - +//----------------------------------------------------------------- // Mask register copy, including // - copy between mask registers // - load/store mask registers diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index 56043fb0682..ed480ec60f5 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -747,6 +747,14 @@ class AVX512BIi8Base : PD { Domain ExeDomain = SSEPackedInt; ImmType ImmT = Imm8; } +class AVX512PSIi8Base : PS { + Domain ExeDomain = SSEPackedSingle; + ImmType ImmT = Imm8; +} +class AVX512PDIi8Base : PD { + Domain ExeDomain = SSEPackedDouble; + ImmType ImmT = Imm8; +} class AVX512AIi8 o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> : Ii8, TAPD, diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 0bf9d1d80f7..497bdf65315 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -147,14 +147,21 @@ def X86pcmpeqm : SDNode<"X86ISD::PCMPEQM", X86IntCmpMask, [SDNPCommutative]>; def X86pcmpgtm : SDNode<"X86ISD::PCMPGTM", X86IntCmpMask>; def X86CmpMaskCC : - SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisInt<0>, SDTCisVec<1>, - SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>; + SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCVecEltisVT<0, i1>, + SDTCisVec<1>, SDTCisSameAs<2, 1>, + SDTCisSameNumEltsAs<0, 1>, SDTCisVT<3, i8>]>; +def X86CmpMaskCCRound : + SDTypeProfile<1, 4, [SDTCisVec<0>,SDTCVecEltisVT<0, i1>, + SDTCisVec<1>, SDTCisSameAs<2, 1>, + SDTCisSameNumEltsAs<0, 1>, SDTCisVT<3, i8>, + SDTCisInt<4>]>; def X86CmpMaskCCScalar : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>; -def X86cmpm : SDNode<"X86ISD::CMPM", X86CmpMaskCC>; -def X86cmpmu : SDNode<"X86ISD::CMPMU", X86CmpMaskCC>; -def X86cmpms : SDNode<"X86ISD::FSETCC", X86CmpMaskCCScalar>; +def X86cmpm : SDNode<"X86ISD::CMPM", X86CmpMaskCC>; +def X86cmpmRnd : SDNode<"X86ISD::CMPM_RND", X86CmpMaskCCRound>; +def X86cmpmu : SDNode<"X86ISD::CMPMU", X86CmpMaskCC>; +def X86cmpms : SDNode<"X86ISD::FSETCC", X86CmpMaskCCScalar>; def X86vshl : SDNode<"X86ISD::VSHL", SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h index 7f6be8981cd..648769e7069 100644 --- a/lib/Target/X86/X86IntrinsicsInfo.h +++ b/lib/Target/X86/X86IntrinsicsInfo.h @@ -277,18 +277,26 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_blend_w_128, BLEND, X86ISD::SELECT, 0), X86_INTRINSIC_DATA(avx512_mask_blend_w_256, BLEND, X86ISD::SELECT, 0), X86_INTRINSIC_DATA(avx512_mask_blend_w_512, BLEND, X86ISD::SELECT, 0), - X86_INTRINSIC_DATA(avx512_mask_cmp_b_128, CMP_MASK_CC, X86ISD::CMPM, 0), - X86_INTRINSIC_DATA(avx512_mask_cmp_b_256, CMP_MASK_CC, X86ISD::CMPM, 0), - X86_INTRINSIC_DATA(avx512_mask_cmp_b_512, CMP_MASK_CC, X86ISD::CMPM, 0), - X86_INTRINSIC_DATA(avx512_mask_cmp_d_128, CMP_MASK_CC, X86ISD::CMPM, 0), - X86_INTRINSIC_DATA(avx512_mask_cmp_d_256, CMP_MASK_CC, X86ISD::CMPM, 0), - X86_INTRINSIC_DATA(avx512_mask_cmp_d_512, CMP_MASK_CC, X86ISD::CMPM, 0), - X86_INTRINSIC_DATA(avx512_mask_cmp_q_128, CMP_MASK_CC, X86ISD::CMPM, 0), - X86_INTRINSIC_DATA(avx512_mask_cmp_q_256, CMP_MASK_CC, X86ISD::CMPM, 0), - X86_INTRINSIC_DATA(avx512_mask_cmp_q_512, CMP_MASK_CC, X86ISD::CMPM, 0), - X86_INTRINSIC_DATA(avx512_mask_cmp_w_128, CMP_MASK_CC, X86ISD::CMPM, 0), - X86_INTRINSIC_DATA(avx512_mask_cmp_w_256, CMP_MASK_CC, X86ISD::CMPM, 0), - X86_INTRINSIC_DATA(avx512_mask_cmp_w_512, CMP_MASK_CC, X86ISD::CMPM, 0), + X86_INTRINSIC_DATA(avx512_mask_cmp_b_128, CMP_MASK_CC, X86ISD::CMPM, 0), + X86_INTRINSIC_DATA(avx512_mask_cmp_b_256, CMP_MASK_CC, X86ISD::CMPM, 0), + X86_INTRINSIC_DATA(avx512_mask_cmp_b_512, CMP_MASK_CC, X86ISD::CMPM, 0), + X86_INTRINSIC_DATA(avx512_mask_cmp_d_128, CMP_MASK_CC, X86ISD::CMPM, 0), + X86_INTRINSIC_DATA(avx512_mask_cmp_d_256, CMP_MASK_CC, X86ISD::CMPM, 0), + X86_INTRINSIC_DATA(avx512_mask_cmp_d_512, CMP_MASK_CC, X86ISD::CMPM, 0), + X86_INTRINSIC_DATA(avx512_mask_cmp_pd_128, CMP_MASK_CC, X86ISD::CMPM, 0), + X86_INTRINSIC_DATA(avx512_mask_cmp_pd_256, CMP_MASK_CC, X86ISD::CMPM, 0), + X86_INTRINSIC_DATA(avx512_mask_cmp_pd_512, CMP_MASK_CC, X86ISD::CMPM, + X86ISD::CMPM_RND), + X86_INTRINSIC_DATA(avx512_mask_cmp_ps_128, CMP_MASK_CC, X86ISD::CMPM, 0), + X86_INTRINSIC_DATA(avx512_mask_cmp_ps_256, CMP_MASK_CC, X86ISD::CMPM, 0), + X86_INTRINSIC_DATA(avx512_mask_cmp_ps_512, CMP_MASK_CC, X86ISD::CMPM, + X86ISD::CMPM_RND), + X86_INTRINSIC_DATA(avx512_mask_cmp_q_128, CMP_MASK_CC, X86ISD::CMPM, 0), + X86_INTRINSIC_DATA(avx512_mask_cmp_q_256, CMP_MASK_CC, X86ISD::CMPM, 0), + X86_INTRINSIC_DATA(avx512_mask_cmp_q_512, CMP_MASK_CC, X86ISD::CMPM, 0), + X86_INTRINSIC_DATA(avx512_mask_cmp_w_128, CMP_MASK_CC, X86ISD::CMPM, 0), + X86_INTRINSIC_DATA(avx512_mask_cmp_w_256, CMP_MASK_CC, X86ISD::CMPM, 0), + X86_INTRINSIC_DATA(avx512_mask_cmp_w_512, CMP_MASK_CC, X86ISD::CMPM, 0), X86_INTRINSIC_DATA(avx512_mask_compress_d_128, COMPRESS_EXPAND_IN_REG, X86ISD::COMPRESS, 0), X86_INTRINSIC_DATA(avx512_mask_compress_d_256, COMPRESS_EXPAND_IN_REG, diff --git a/test/CodeGen/X86/avx512-intrinsics.ll b/test/CodeGen/X86/avx512-intrinsics.ll index 59a9c71a336..d4129e14c95 100644 --- a/test/CodeGen/X86/avx512-intrinsics.ll +++ b/test/CodeGen/X86/avx512-intrinsics.ll @@ -392,17 +392,17 @@ declare <8 x i64> @llvm.x86.avx512.mask.blend.q.512(<8 x i64>, <8 x i64>, i8) no define i16 @test_cmpps(<16 x float> %a, <16 x float> %b) { ;CHECK: vcmpleps {sae}{{.*}}encoding: [0x62,0xf1,0x7c,0x18,0xc2,0xc1,0x02] - %res = call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %b, i8 2, i16 -1, i32 8) + %res = call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, i16 -1, i32 8) ret i16 %res } - declare i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> , <16 x float> , i8, i16, i32) + declare i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> , <16 x float> , i32, i16, i32) define i8 @test_cmppd(<8 x double> %a, <8 x double> %b) { ;CHECK: vcmpneqpd %zmm{{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xc1,0x04] - %res = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a, <8 x double> %b, i8 4, i8 -1, i32 4) + %res = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 4, i8 -1, i32 4) ret i8 %res } - declare i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> , <8 x double> , i8, i8, i32) + declare i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> , <8 x double> , i32, i8, i32) ; cvt intrinsics define <16 x float> @test_cvtdq2ps(<16 x i32> %a) { diff --git a/test/CodeGen/X86/avx512-vec-cmp.ll b/test/CodeGen/X86/avx512-vec-cmp.ll index 4808ea9f854..e1f6276c6ef 100644 --- a/test/CodeGen/X86/avx512-vec-cmp.ll +++ b/test/CodeGen/X86/avx512-vec-cmp.ll @@ -1,36 +1,37 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=KNL +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=SKX define <16 x float> @test1(<16 x float> %x, <16 x float> %y) nounwind { -; CHECK-LABEL: test1: -; CHECK: ## BB#0: -; CHECK-NEXT: vcmpleps %zmm1, %zmm0, %k1 -; CHECK-NEXT: vmovaps %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 -; CHECK-NEXT: retq +; KNL-LABEL: test1: +; KNL: ## BB#0: +; KNL-NEXT: vcmpleps %zmm1, %zmm0, %k1 +; KNL-NEXT: vmovaps %zmm0, %zmm1 {%k1} +; KNL-NEXT: vmovaps %zmm1, %zmm0 +; KNL-NEXT: retq %mask = fcmp ole <16 x float> %x, %y %max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %y ret <16 x float> %max } define <8 x double> @test2(<8 x double> %x, <8 x double> %y) nounwind { -; CHECK-LABEL: test2: -; CHECK: ## BB#0: -; CHECK-NEXT: vcmplepd %zmm1, %zmm0, %k1 -; CHECK-NEXT: vmovapd %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 -; CHECK-NEXT: retq +; KNL-LABEL: test2: +; KNL: ## BB#0: +; KNL-NEXT: vcmplepd %zmm1, %zmm0, %k1 +; KNL-NEXT: vmovapd %zmm0, %zmm1 {%k1} +; KNL-NEXT: vmovaps %zmm1, %zmm0 +; KNL-NEXT: retq %mask = fcmp ole <8 x double> %x, %y %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %y ret <8 x double> %max } define <16 x i32> @test3(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %yp) nounwind { -; CHECK-LABEL: test3: -; CHECK: ## BB#0: -; CHECK-NEXT: vpcmpeqd (%rdi), %zmm0, %k1 -; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 -; CHECK-NEXT: retq +; KNL-LABEL: test3: +; KNL: ## BB#0: +; KNL-NEXT: vpcmpeqd (%rdi), %zmm0, %k1 +; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} +; KNL-NEXT: vmovaps %zmm1, %zmm0 +; KNL-NEXT: retq %y = load <16 x i32>, <16 x i32>* %yp, align 4 %mask = icmp eq <16 x i32> %x, %y %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1 @@ -38,98 +39,120 @@ define <16 x i32> @test3(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %yp) nounwin } define <16 x i32> @test4_unsigned(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1) nounwind { -; CHECK-LABEL: test4_unsigned: -; CHECK: ## BB#0: -; CHECK-NEXT: vpcmpnltud %zmm1, %zmm0, %k1 -; CHECK-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 -; CHECK-NEXT: retq +; KNL-LABEL: test4_unsigned: +; KNL: ## BB#0: +; KNL-NEXT: vpcmpnltud %zmm1, %zmm0, %k1 +; KNL-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1} +; KNL-NEXT: vmovaps %zmm1, %zmm0 +; KNL-NEXT: retq %mask = icmp uge <16 x i32> %x, %y %max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y ret <16 x i32> %max } define <8 x i64> @test5(<8 x i64> %x, <8 x i64> %y) nounwind { -; CHECK-LABEL: test5: -; CHECK: ## BB#0: -; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k1 -; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 -; CHECK-NEXT: retq +; KNL-LABEL: test5: +; KNL: ## BB#0: +; KNL-NEXT: vpcmpeqq %zmm1, %zmm0, %k1 +; KNL-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1} +; KNL-NEXT: vmovaps %zmm1, %zmm0 +; KNL-NEXT: retq %mask = icmp eq <8 x i64> %x, %y %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %y ret <8 x i64> %max } define <8 x i64> @test6_unsigned(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1) nounwind { -; CHECK-LABEL: test6_unsigned: -; CHECK: ## BB#0: -; CHECK-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1 -; CHECK-NEXT: vmovdqa64 %zmm2, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 -; CHECK-NEXT: retq +; KNL-LABEL: test6_unsigned: +; KNL: ## BB#0: +; KNL-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1 +; KNL-NEXT: vmovdqa64 %zmm2, %zmm1 {%k1} +; KNL-NEXT: vmovaps %zmm1, %zmm0 +; KNL-NEXT: retq %mask = icmp ugt <8 x i64> %x, %y %max = select <8 x i1> %mask, <8 x i64> %x1, <8 x i64> %y ret <8 x i64> %max } define <4 x float> @test7(<4 x float> %a, <4 x float> %b) { -; CHECK-LABEL: test7: -; CHECK: ## BB#0: -; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; CHECK-NEXT: vcmpltps %xmm2, %xmm0, %xmm2 -; CHECK-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 -; CHECK-NEXT: retq +; KNL-LABEL: test7: +; KNL: ## BB#0: +; KNL-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; KNL-NEXT: vcmpltps %xmm2, %xmm0, %xmm2 +; KNL-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; KNL-NEXT: retq +; SKX-LABEL: test7: +; SKX: ## BB#0: +; SKX: vxorps %xmm2, %xmm2, %xmm2 +; SKX: vcmpltps %xmm2, %xmm0, %k1 +; SKX: vmovaps %xmm0, %xmm1 {%k1} +; SKX: vmovaps %zmm1, %zmm0 +; SKX: retq + %mask = fcmp olt <4 x float> %a, zeroinitializer %c = select <4 x i1>%mask, <4 x float>%a, <4 x float>%b ret <4 x float>%c } define <2 x double> @test8(<2 x double> %a, <2 x double> %b) { -; CHECK-LABEL: test8: -; CHECK: ## BB#0: -; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 -; CHECK-NEXT: vcmpltpd %xmm2, %xmm0, %xmm2 -; CHECK-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 -; CHECK-NEXT: retq +; KNL-LABEL: test8: +; KNL: ## BB#0: +; KNL-NEXT: vxorpd %xmm2, %xmm2, %xmm2 +; KNL-NEXT: vcmpltpd %xmm2, %xmm0, %xmm2 +; KNL-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; KNL-NEXT: retq +; SKX-LABEL: test8: +; SKX: ## BB#0: +; SKX: vxorpd %xmm2, %xmm2, %xmm2 +; SKX: vcmpltpd %xmm2, %xmm0, %k1 +; SKX: vmovapd %xmm0, %xmm1 {%k1} +; SKX: vmovaps %zmm1, %zmm0 +; SKX: retq %mask = fcmp olt <2 x double> %a, zeroinitializer %c = select <2 x i1>%mask, <2 x double>%a, <2 x double>%b ret <2 x double>%c } define <8 x i32> @test9(<8 x i32> %x, <8 x i32> %y) nounwind { -; CHECK-LABEL: test9: -; CHECK: ## BB#0: -; CHECK-NEXT: ## kill: YMM1 YMM1 ZMM1 -; CHECK-NEXT: ## kill: YMM0 YMM0 ZMM0 -; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k1 -; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} -; CHECK-NEXT: ## kill: YMM0 YMM0 ZMM0 -; CHECK-NEXT: retq +; KNL-LABEL: test9: +; KNL: ## BB#0: +; KNL-NEXT: ## kill: YMM1 YMM1 ZMM1 +; KNL-NEXT: ## kill: YMM0 YMM0 ZMM0 +; KNL-NEXT: vpcmpeqd %zmm1, %zmm0, %k1 +; KNL-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} +; KNL-NEXT: ## kill: YMM0 YMM0 ZMM0 +; KNL-NEXT: retq %mask = icmp eq <8 x i32> %x, %y %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y ret <8 x i32> %max } define <8 x float> @test10(<8 x float> %x, <8 x float> %y) nounwind { -; CHECK-LABEL: test10: -; CHECK: ## BB#0: -; CHECK-NEXT: ## kill: YMM1 YMM1 ZMM1 -; CHECK-NEXT: ## kill: YMM0 YMM0 ZMM0 -; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1 -; CHECK-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} -; CHECK-NEXT: ## kill: YMM0 YMM0 ZMM0 -; CHECK-NEXT: retq +; KNL-LABEL: test10: +; KNL: ## BB#0: +; KNL-NEXT: ## kill: YMM1 YMM1 ZMM1 +; KNL-NEXT: ## kill: YMM0 YMM0 ZMM0 +; KNL-NEXT: vcmpeqps %zmm1, %zmm0, %k1 +; KNL-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} +; KNL-NEXT: ## kill: YMM0 YMM0 ZMM0 +; KNL-NEXT: retq +; SKX-LABEL: test10: +; SKX: ## BB#0: +; SKX: vcmpeqps %ymm1, %ymm0, %k1 +; SKX: vmovaps %ymm0, %ymm1 {%k1} +; SKX: vmovaps %zmm1, %zmm0 +; SKX: retq + %mask = fcmp oeq <8 x float> %x, %y %max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %y ret <8 x float> %max } define <8 x i32> @test11_unsigned(<8 x i32> %x, <8 x i32> %y) nounwind { -; CHECK-LABEL: test11_unsigned: -; CHECK: ## BB#0: -; CHECK-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 -; CHECK-NEXT: retq +; KNL-LABEL: test11_unsigned: +; KNL: ## BB#0: +; KNL-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 +; KNL-NEXT: retq %mask = icmp ugt <8 x i32> %x, %y %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y ret <8 x i32> %max @@ -137,25 +160,25 @@ define <8 x i32> @test11_unsigned(<8 x i32> %x, <8 x i32> %y) nounwind { define i16 @test12(<16 x i64> %a, <16 x i64> %b) nounwind { -; CHECK-LABEL: test12: -; CHECK: ## BB#0: -; CHECK-NEXT: vpcmpeqq %zmm2, %zmm0, %k0 -; CHECK-NEXT: vpcmpeqq %zmm3, %zmm1, %k1 -; CHECK-NEXT: kunpckbw %k0, %k1, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: ## kill: AX AX EAX -; CHECK-NEXT: retq +; KNL-LABEL: test12: +; KNL: ## BB#0: +; KNL-NEXT: vpcmpeqq %zmm2, %zmm0, %k0 +; KNL-NEXT: vpcmpeqq %zmm3, %zmm1, %k1 +; KNL-NEXT: kunpckbw %k0, %k1, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: ## kill: AX AX EAX +; KNL-NEXT: retq %res = icmp eq <16 x i64> %a, %b %res1 = bitcast <16 x i1> %res to i16 ret i16 %res1 } define <16 x i32> @test13(<16 x float>%a, <16 x float>%b) -; CHECK-LABEL: test13: -; CHECK: ## BB#0: -; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1 -; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} -; CHECK-NEXT: retq +; KNL-LABEL: test13: +; KNL: ## BB#0: +; KNL-NEXT: vcmpeqps %zmm1, %zmm0, %k1 +; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} +; KNL-NEXT: retq { %cmpvector_i = fcmp oeq <16 x float> %a, %b %conv = zext <16 x i1> %cmpvector_i to <16 x i32> @@ -163,14 +186,14 @@ define <16 x i32> @test13(<16 x float>%a, <16 x float>%b) } define <16 x i32> @test14(<16 x i32>%a, <16 x i32>%b) { -; CHECK-LABEL: test14: -; CHECK: ## BB#0: -; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm1 -; CHECK-NEXT: vpcmpgtd %zmm0, %zmm1, %k0 -; CHECK-NEXT: knotw %k0, %k0 -; CHECK-NEXT: knotw %k0, %k1 -; CHECK-NEXT: vmovdqu32 %zmm1, %zmm0 {%k1} {z} -; CHECK-NEXT: retq +; KNL-LABEL: test14: +; KNL: ## BB#0: +; KNL-NEXT: vpsubd %zmm1, %zmm0, %zmm1 +; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k0 +; KNL-NEXT: knotw %k0, %k0 +; KNL-NEXT: knotw %k0, %k1 +; KNL-NEXT: vmovdqu32 %zmm1, %zmm0 {%k1} {z} +; KNL-NEXT: retq %sub_r = sub <16 x i32> %a, %b %cmp.i2.i = icmp sgt <16 x i32> %sub_r, %a %sext.i3.i = sext <16 x i1> %cmp.i2.i to <16 x i32> @@ -180,14 +203,14 @@ define <16 x i32> @test14(<16 x i32>%a, <16 x i32>%b) { } define <8 x i64> @test15(<8 x i64>%a, <8 x i64>%b) { -; CHECK-LABEL: test15: -; CHECK: ## BB#0: -; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm1 -; CHECK-NEXT: vpcmpgtq %zmm0, %zmm1, %k0 -; CHECK-NEXT: knotw %k0, %k0 -; CHECK-NEXT: knotw %k0, %k1 -; CHECK-NEXT: vmovdqu64 %zmm1, %zmm0 {%k1} {z} -; CHECK-NEXT: retq +; KNL-LABEL: test15: +; KNL: ## BB#0: +; KNL-NEXT: vpsubq %zmm1, %zmm0, %zmm1 +; KNL-NEXT: vpcmpgtq %zmm0, %zmm1, %k0 +; KNL-NEXT: knotw %k0, %k0 +; KNL-NEXT: knotw %k0, %k1 +; KNL-NEXT: vmovdqu64 %zmm1, %zmm0 {%k1} {z} +; KNL-NEXT: retq %sub_r = sub <8 x i64> %a, %b %cmp.i2.i = icmp sgt <8 x i64> %sub_r, %a %sext.i3.i = sext <8 x i1> %cmp.i2.i to <8 x i64> @@ -197,24 +220,24 @@ define <8 x i64> @test15(<8 x i64>%a, <8 x i64>%b) { } define <16 x i32> @test16(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1) nounwind { -; CHECK-LABEL: test16: -; CHECK: ## BB#0: -; CHECK-NEXT: vpcmpled %zmm0, %zmm1, %k1 -; CHECK-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 -; CHECK-NEXT: retq +; KNL-LABEL: test16: +; KNL: ## BB#0: +; KNL-NEXT: vpcmpled %zmm0, %zmm1, %k1 +; KNL-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1} +; KNL-NEXT: vmovaps %zmm1, %zmm0 +; KNL-NEXT: retq %mask = icmp sge <16 x i32> %x, %y %max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y ret <16 x i32> %max } define <16 x i32> @test17(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nounwind { -; CHECK-LABEL: test17: -; CHECK: ## BB#0: -; CHECK-NEXT: vpcmpgtd (%rdi), %zmm0, %k1 -; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 -; CHECK-NEXT: retq +; KNL-LABEL: test17: +; KNL: ## BB#0: +; KNL-NEXT: vpcmpgtd (%rdi), %zmm0, %k1 +; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} +; KNL-NEXT: vmovaps %zmm1, %zmm0 +; KNL-NEXT: retq %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4 %mask = icmp sgt <16 x i32> %x, %y %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1 @@ -222,12 +245,12 @@ define <16 x i32> @test17(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nou } define <16 x i32> @test18(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nounwind { -; CHECK-LABEL: test18: -; CHECK: ## BB#0: -; CHECK-NEXT: vpcmpled (%rdi), %zmm0, %k1 -; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 -; CHECK-NEXT: retq +; KNL-LABEL: test18: +; KNL: ## BB#0: +; KNL-NEXT: vpcmpled (%rdi), %zmm0, %k1 +; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} +; KNL-NEXT: vmovaps %zmm1, %zmm0 +; KNL-NEXT: retq %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4 %mask = icmp sle <16 x i32> %x, %y %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1 @@ -235,12 +258,12 @@ define <16 x i32> @test18(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nou } define <16 x i32> @test19(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nounwind { -; CHECK-LABEL: test19: -; CHECK: ## BB#0: -; CHECK-NEXT: vpcmpleud (%rdi), %zmm0, %k1 -; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 -; CHECK-NEXT: retq +; KNL-LABEL: test19: +; KNL: ## BB#0: +; KNL-NEXT: vpcmpleud (%rdi), %zmm0, %k1 +; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} +; KNL-NEXT: vmovaps %zmm1, %zmm0 +; KNL-NEXT: retq %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4 %mask = icmp ule <16 x i32> %x, %y %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1 @@ -248,13 +271,13 @@ define <16 x i32> @test19(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nou } define <16 x i32> @test20(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i32> %y1) nounwind { -; CHECK-LABEL: test20: -; CHECK: ## BB#0: -; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k1 -; CHECK-NEXT: vpcmpeqd %zmm3, %zmm2, %k1 {%k1} -; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 -; CHECK-NEXT: retq +; KNL-LABEL: test20: +; KNL: ## BB#0: +; KNL-NEXT: vpcmpeqd %zmm1, %zmm0, %k1 +; KNL-NEXT: vpcmpeqd %zmm3, %zmm2, %k1 {%k1} +; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} +; KNL-NEXT: vmovaps %zmm1, %zmm0 +; KNL-NEXT: retq %mask1 = icmp eq <16 x i32> %x1, %y1 %mask0 = icmp eq <16 x i32> %x, %y %mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer @@ -263,13 +286,13 @@ define <16 x i32> @test20(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i3 } define <8 x i64> @test21(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y1) nounwind { -; CHECK-LABEL: test21: -; CHECK: ## BB#0: -; CHECK-NEXT: vpcmpleq %zmm1, %zmm0, %k1 -; CHECK-NEXT: vpcmpleq %zmm2, %zmm3, %k1 {%k1} -; CHECK-NEXT: vmovdqa64 %zmm0, %zmm2 {%k1} -; CHECK-NEXT: vmovaps %zmm2, %zmm0 -; CHECK-NEXT: retq +; KNL-LABEL: test21: +; KNL: ## BB#0: +; KNL-NEXT: vpcmpleq %zmm1, %zmm0, %k1 +; KNL-NEXT: vpcmpleq %zmm2, %zmm3, %k1 {%k1} +; KNL-NEXT: vmovdqa64 %zmm0, %zmm2 {%k1} +; KNL-NEXT: vmovaps %zmm2, %zmm0 +; KNL-NEXT: retq %mask1 = icmp sge <8 x i64> %x1, %y1 %mask0 = icmp sle <8 x i64> %x, %y %mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer @@ -278,13 +301,13 @@ define <8 x i64> @test21(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y } define <8 x i64> @test22(<8 x i64> %x, <8 x i64>* %y.ptr, <8 x i64> %x1, <8 x i64> %y1) nounwind { -; CHECK-LABEL: test22: -; CHECK: ## BB#0: -; CHECK-NEXT: vpcmpgtq %zmm2, %zmm1, %k1 -; CHECK-NEXT: vpcmpgtq (%rdi), %zmm0, %k1 {%k1} -; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 -; CHECK-NEXT: retq +; KNL-LABEL: test22: +; KNL: ## BB#0: +; KNL-NEXT: vpcmpgtq %zmm2, %zmm1, %k1 +; KNL-NEXT: vpcmpgtq (%rdi), %zmm0, %k1 {%k1} +; KNL-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1} +; KNL-NEXT: vmovaps %zmm1, %zmm0 +; KNL-NEXT: retq %mask1 = icmp sgt <8 x i64> %x1, %y1 %y = load <8 x i64>, <8 x i64>* %y.ptr, align 4 %mask0 = icmp sgt <8 x i64> %x, %y @@ -294,13 +317,13 @@ define <8 x i64> @test22(<8 x i64> %x, <8 x i64>* %y.ptr, <8 x i64> %x1, <8 x i6 } define <16 x i32> @test23(<16 x i32> %x, <16 x i32>* %y.ptr, <16 x i32> %x1, <16 x i32> %y1) nounwind { -; CHECK-LABEL: test23: -; CHECK: ## BB#0: -; CHECK-NEXT: vpcmpled %zmm1, %zmm2, %k1 -; CHECK-NEXT: vpcmpleud (%rdi), %zmm0, %k1 {%k1} -; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 -; CHECK-NEXT: retq +; KNL-LABEL: test23: +; KNL: ## BB#0: +; KNL-NEXT: vpcmpled %zmm1, %zmm2, %k1 +; KNL-NEXT: vpcmpleud (%rdi), %zmm0, %k1 {%k1} +; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} +; KNL-NEXT: vmovaps %zmm1, %zmm0 +; KNL-NEXT: retq %mask1 = icmp sge <16 x i32> %x1, %y1 %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4 %mask0 = icmp ule <16 x i32> %x, %y @@ -310,12 +333,12 @@ define <16 x i32> @test23(<16 x i32> %x, <16 x i32>* %y.ptr, <16 x i32> %x1, <16 } define <8 x i64> @test24(<8 x i64> %x, <8 x i64> %x1, i64* %yb.ptr) nounwind { -; CHECK-LABEL: test24: -; CHECK: ## BB#0: -; CHECK-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k1 -; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 -; CHECK-NEXT: retq +; KNL-LABEL: test24: +; KNL: ## BB#0: +; KNL-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k1 +; KNL-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1} +; KNL-NEXT: vmovaps %zmm1, %zmm0 +; KNL-NEXT: retq %yb = load i64, i64* %yb.ptr, align 4 %y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0 %y = shufflevector <8 x i64> %y.0, <8 x i64> undef, <8 x i32> zeroinitializer @@ -325,12 +348,12 @@ define <8 x i64> @test24(<8 x i64> %x, <8 x i64> %x1, i64* %yb.ptr) nounwind { } define <16 x i32> @test25(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1) nounwind { -; CHECK-LABEL: test25: -; CHECK: ## BB#0: -; CHECK-NEXT: vpcmpled (%rdi){1to16}, %zmm0, %k1 -; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 -; CHECK-NEXT: retq +; KNL-LABEL: test25: +; KNL: ## BB#0: +; KNL-NEXT: vpcmpled (%rdi){1to16}, %zmm0, %k1 +; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} +; KNL-NEXT: vmovaps %zmm1, %zmm0 +; KNL-NEXT: retq %yb = load i32, i32* %yb.ptr, align 4 %y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0 %y = shufflevector <16 x i32> %y.0, <16 x i32> undef, <16 x i32> zeroinitializer @@ -340,13 +363,13 @@ define <16 x i32> @test25(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1) nounwind } define <16 x i32> @test26(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1, <16 x i32> %y1) nounwind { -; CHECK-LABEL: test26: -; CHECK: ## BB#0: -; CHECK-NEXT: vpcmpled %zmm1, %zmm2, %k1 -; CHECK-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k1 {%k1} -; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 -; CHECK-NEXT: retq +; KNL-LABEL: test26: +; KNL: ## BB#0: +; KNL-NEXT: vpcmpled %zmm1, %zmm2, %k1 +; KNL-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k1 {%k1} +; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} +; KNL-NEXT: vmovaps %zmm1, %zmm0 +; KNL-NEXT: retq %mask1 = icmp sge <16 x i32> %x1, %y1 %yb = load i32, i32* %yb.ptr, align 4 %y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0 @@ -358,13 +381,13 @@ define <16 x i32> @test26(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1, <16 x i32 } define <8 x i64> @test27(<8 x i64> %x, i64* %yb.ptr, <8 x i64> %x1, <8 x i64> %y1) nounwind { -; CHECK-LABEL: test27: -; CHECK: ## BB#0: -; CHECK-NEXT: vpcmpleq %zmm1, %zmm2, %k1 -; CHECK-NEXT: vpcmpleq (%rdi){1to8}, %zmm0, %k1 {%k1} -; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 -; CHECK-NEXT: retq +; KNL-LABEL: test27: +; KNL: ## BB#0: +; KNL-NEXT: vpcmpleq %zmm1, %zmm2, %k1 +; KNL-NEXT: vpcmpleq (%rdi){1to8}, %zmm0, %k1 {%k1} +; KNL-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1} +; KNL-NEXT: vmovaps %zmm1, %zmm0 +; KNL-NEXT: retq %mask1 = icmp sge <8 x i64> %x1, %y1 %yb = load i64, i64* %yb.ptr, align 4 %y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0 @@ -375,10 +398,10 @@ define <8 x i64> @test27(<8 x i64> %x, i64* %yb.ptr, <8 x i64> %x1, <8 x i64> %y ret <8 x i64> %max } -; CHECK-LABEL: test28 -; CHECK: vpcmpgtq -; CHECK: vpcmpgtq -; CHECK: kxorw +; KNL-LABEL: test28 +; KNL: vpcmpgtq +; KNL: vpcmpgtq +; KNL: kxorw define <8 x i32>@test28(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y1) { %x_gt_y = icmp sgt <8 x i64> %x, %y %x1_gt_y1 = icmp sgt <8 x i64> %x1, %y1 @@ -387,14 +410,188 @@ define <8 x i32>@test28(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y1 ret <8 x i32> %resse } -; CHECK-LABEL: test29 -; CHECK: vpcmpgtd -; CHECK: vpcmpgtd -; CHECK: kxnorw +; KNL-LABEL: test29 +; KNL: vpcmpgtd +; KNL: vpcmpgtd +; KNL: kxnorw define <16 x i8>@test29(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i32> %y1) { %x_gt_y = icmp sgt <16 x i32> %x, %y %x1_gt_y1 = icmp sgt <16 x i32> %x1, %y1 %res = icmp ne <16 x i1>%x_gt_y, %x1_gt_y1 %resse = sext <16 x i1>%res to <16 x i8> ret <16 x i8> %resse -} \ No newline at end of file +} + +define <4 x double> @test30(<4 x double> %x, <4 x double> %y) nounwind { +; SKX-LABEL: test30: +; SKX: vcmpeqpd %ymm1, %ymm0, %k1 +; SKX: vmovapd %ymm0, %ymm1 {%k1} + + %mask = fcmp oeq <4 x double> %x, %y + %max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %y + ret <4 x double> %max +} + +define <2 x double> @test31(<2 x double> %x, <2 x double> %x1, <2 x double>* %yp) nounwind { +; SKX-LABEL: test31: +; SKX: vcmpltpd (%rdi), %xmm0, %k1 +; SKX: vmovapd %xmm0, %xmm1 {%k1} + + %y = load <2 x double>, <2 x double>* %yp, align 4 + %mask = fcmp olt <2 x double> %x, %y + %max = select <2 x i1> %mask, <2 x double> %x, <2 x double> %x1 + ret <2 x double> %max +} + +define <4 x double> @test32(<4 x double> %x, <4 x double> %x1, <4 x double>* %yp) nounwind { +; SKX-LABEL: test32: +; SKX: vcmpltpd (%rdi), %ymm0, %k1 +; SKX: vmovapd %ymm0, %ymm1 {%k1} + + %y = load <4 x double>, <4 x double>* %yp, align 4 + %mask = fcmp ogt <4 x double> %y, %x + %max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %x1 + ret <4 x double> %max +} + +define <8 x double> @test33(<8 x double> %x, <8 x double> %x1, <8 x double>* %yp) nounwind { +; SKX-LABEL: test33: +; SKX: vcmpltpd (%rdi), %zmm0, %k1 +; SKX: vmovapd %zmm0, %zmm1 {%k1} + %y = load <8 x double>, <8 x double>* %yp, align 4 + %mask = fcmp olt <8 x double> %x, %y + %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1 + ret <8 x double> %max +} + +define <4 x float> @test34(<4 x float> %x, <4 x float> %x1, <4 x float>* %yp) nounwind { +; SKX-LABEL: test34: +; SKX: vcmpltps (%rdi), %xmm0, %k1 +; SKX: vmovaps %xmm0, %xmm1 {%k1} + %y = load <4 x float>, <4 x float>* %yp, align 4 + %mask = fcmp olt <4 x float> %x, %y + %max = select <4 x i1> %mask, <4 x float> %x, <4 x float> %x1 + ret <4 x float> %max +} + +define <8 x float> @test35(<8 x float> %x, <8 x float> %x1, <8 x float>* %yp) nounwind { +; SKX-LABEL: test35: +; SKX: vcmpltps (%rdi), %ymm0, %k1 +; SKX: vmovaps %ymm0, %ymm1 {%k1} + + %y = load <8 x float>, <8 x float>* %yp, align 4 + %mask = fcmp ogt <8 x float> %y, %x + %max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %x1 + ret <8 x float> %max +} + +define <16 x float> @test36(<16 x float> %x, <16 x float> %x1, <16 x float>* %yp) nounwind { +; SKX-LABEL: test36: +; SKX: vcmpltps (%rdi), %zmm0, %k1 +; SKX: vmovaps %zmm0, %zmm1 {%k1} + %y = load <16 x float>, <16 x float>* %yp, align 4 + %mask = fcmp olt <16 x float> %x, %y + %max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %x1 + ret <16 x float> %max +} + +define <8 x double> @test37(<8 x double> %x, <8 x double> %x1, double* %ptr) nounwind { +; SKX-LABEL: test37: +; SKX: vcmpltpd (%rdi){1to8}, %zmm0, %k1 +; SKX: vmovapd %zmm0, %zmm1 {%k1} + + %a = load double, double* %ptr + %v = insertelement <8 x double> undef, double %a, i32 0 + %shuffle = shufflevector <8 x double> %v, <8 x double> undef, <8 x i32> zeroinitializer + + %mask = fcmp ogt <8 x double> %shuffle, %x + %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1 + ret <8 x double> %max +} + +define <4 x double> @test38(<4 x double> %x, <4 x double> %x1, double* %ptr) nounwind { +; SKX-LABEL: test38: +; SKX: vcmpltpd (%rdi){1to4}, %ymm0, %k1 +; SKX: vmovapd %ymm0, %ymm1 {%k1} + + %a = load double, double* %ptr + %v = insertelement <4 x double> undef, double %a, i32 0 + %shuffle = shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> zeroinitializer + + %mask = fcmp ogt <4 x double> %shuffle, %x + %max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %x1 + ret <4 x double> %max +} + +define <2 x double> @test39(<2 x double> %x, <2 x double> %x1, double* %ptr) nounwind { +; SKX-LABEL: test39: +; SKX: vcmpltpd (%rdi){1to2}, %xmm0, %k1 +; SKX: vmovapd %xmm0, %xmm1 {%k1} + + %a = load double, double* %ptr + %v = insertelement <2 x double> undef, double %a, i32 0 + %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> + + %mask = fcmp ogt <2 x double> %shuffle, %x + %max = select <2 x i1> %mask, <2 x double> %x, <2 x double> %x1 + ret <2 x double> %max +} + + +define <16 x float> @test40(<16 x float> %x, <16 x float> %x1, float* %ptr) nounwind { +; SKX-LABEL: test40: +; SKX: vcmpltps (%rdi){1to16}, %zmm0, %k1 +; SKX: vmovaps %zmm0, %zmm1 {%k1} + + %a = load float, float* %ptr + %v = insertelement <16 x float> undef, float %a, i32 0 + %shuffle = shufflevector <16 x float> %v, <16 x float> undef, <16 x i32> + + %mask = fcmp ogt <16 x float> %shuffle, %x + %max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %x1 + ret <16 x float> %max +} + +define <8 x float> @test41(<8 x float> %x, <8 x float> %x1, float* %ptr) nounwind { +; SKX-LABEL: test41: +; SKX: vcmpltps (%rdi){1to8}, %ymm0, %k1 +; SKX: vmovaps %ymm0, %ymm1 {%k1} + + %a = load float, float* %ptr + %v = insertelement <8 x float> undef, float %a, i32 0 + %shuffle = shufflevector <8 x float> %v, <8 x float> undef, <8 x i32> + + %mask = fcmp ogt <8 x float> %shuffle, %x + %max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %x1 + ret <8 x float> %max +} + +define <4 x float> @test42(<4 x float> %x, <4 x float> %x1, float* %ptr) nounwind { +; SKX-LABEL: test42: +; SKX: vcmpltps (%rdi){1to4}, %xmm0, %k1 +; SKX: vmovaps %xmm0, %xmm1 {%k1} + + %a = load float, float* %ptr + %v = insertelement <4 x float> undef, float %a, i32 0 + %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> + + %mask = fcmp ogt <4 x float> %shuffle, %x + %max = select <4 x i1> %mask, <4 x float> %x, <4 x float> %x1 + ret <4 x float> %max +} + +define <8 x double> @test43(<8 x double> %x, <8 x double> %x1, double* %ptr,<8 x i1> %mask_in) nounwind { +; SKX-LABEL: test43: +; SKX: vpmovw2m %xmm2, %k1 +; SKX: vcmpltpd (%rdi){1to8}, %zmm0, %k1 {%k1} +; SKX: vmovapd %zmm0, %zmm1 {%k1} + + %a = load double, double* %ptr + %v = insertelement <8 x double> undef, double %a, i32 0 + %shuffle = shufflevector <8 x double> %v, <8 x double> undef, <8 x i32> zeroinitializer + + %mask_cmp = fcmp ogt <8 x double> %shuffle, %x + %mask = and <8 x i1> %mask_cmp, %mask_in + %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1 + ret <8 x double> %max +} diff --git a/test/CodeGen/X86/avx512vl-intrinsics.ll b/test/CodeGen/X86/avx512vl-intrinsics.ll index 4db5df53848..64ec8166d50 100644 --- a/test/CodeGen/X86/avx512vl-intrinsics.ll +++ b/test/CodeGen/X86/avx512vl-intrinsics.ll @@ -2261,3 +2261,31 @@ define <4 x i64> @test_mask_andnot_epi64_rmbkz_256(<4 x i64> %a, i64* %ptr_b, i8 } declare <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) + +define i8 @test_cmpps_256(<8 x float> %a, <8 x float> %b) { + ;CHECK: vcmpleps %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc1,0x02] + %res = call i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %a, <8 x float> %b, i32 2, i8 -1) + ret i8 %res + } + declare i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> , <8 x float> , i32, i8) + +define i8 @test_cmpps_128(<4 x float> %a, <4 x float> %b) { + ;CHECK: vcmpleps %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x02] + %res = call i8 @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %a, <4 x float> %b, i32 2, i8 -1) + ret i8 %res + } + declare i8 @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> , <4 x float> , i32, i8) + +define i8 @test_cmppd_256(<4 x double> %a, <4 x double> %b) { + ;CHECK: vcmplepd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0xfd,0x28,0xc2,0xc1,0x02] + %res = call i8 @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %a, <4 x double> %b, i32 2, i8 -1) + ret i8 %res + } + declare i8 @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> , <4 x double> , i32, i8) + +define i8 @test_cmppd_128(<2 x double> %a, <2 x double> %b) { + ;CHECK: vcmplepd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x02] + %res = call i8 @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %a, <2 x double> %b, i32 2, i8 -1) + ret i8 %res + } + declare i8 @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> , <2 x double> , i32, i8) diff --git a/test/MC/Disassembler/X86/avx-512.txt b/test/MC/Disassembler/X86/avx-512.txt index cfe5ffda2db..d618e7e5933 100644 --- a/test/MC/Disassembler/X86/avx-512.txt +++ b/test/MC/Disassembler/X86/avx-512.txt @@ -137,5 +137,5 @@ # CHECK: vpcmpd $8, %zmm10, %zmm25, %k5 0x62 0xd3 0x35 0x40 0x1f 0xea 0x8 -# CHECK: vcmppd {sae}, $127, %zmm27, %zmm11, %k4 +# CHECK: vcmppd $127,{sae}, %zmm27, %zmm11, %k4 0x62 0x91 0xa5 0x58 0xc2 0xe3 0x7f diff --git a/test/MC/X86/avx512-encodings.s b/test/MC/X86/avx512-encodings.s index fa74005d649..91107cc6fee 100644 --- a/test/MC/X86/avx512-encodings.s +++ b/test/MC/X86/avx512-encodings.s @@ -5983,3 +5983,132 @@ vpermilps 0x400(%rbx), %zmm2, %zmm3 // CHECK: vpermilpd // CHECK: encoding: [0x62,0xf3,0xfd,0x48,0x05,0x53,0x10,0x23] vpermilpd $0x23, 0x400(%rbx), %zmm2 + +// CHECK: vcmppd $171, %zmm26, %zmm12, %k2 +// CHECK: encoding: [0x62,0x91,0x9d,0x48,0xc2,0xd2,0xab] + vcmppd $0xab, %zmm26, %zmm12, %k2 + +// CHECK: vcmppd $171, %zmm26, %zmm12, %k2 {%k3} +// CHECK: encoding: [0x62,0x91,0x9d,0x4b,0xc2,0xd2,0xab] + vcmppd $0xab, %zmm26, %zmm12, %k2 {%k3} + +// CHECK: vcmppd $171,{sae}, %zmm26, %zmm12, %k2 +// CHECK: encoding: [0x62,0x91,0x9d,0x18,0xc2,0xd2,0xab] + vcmppd $0xab,{sae}, %zmm26, %zmm12, %k2 + +// CHECK: vcmppd $123, %zmm26, %zmm12, %k2 +// CHECK: encoding: [0x62,0x91,0x9d,0x48,0xc2,0xd2,0x7b] + vcmppd $0x7b, %zmm26, %zmm12, %k2 + +// CHECK: vcmppd $123,{sae}, %zmm26, %zmm12, %k2 +// CHECK: encoding: [0x62,0x91,0x9d,0x18,0xc2,0xd2,0x7b] + vcmppd $0x7b,{sae}, %zmm26, %zmm12, %k2 + +// CHECK: vcmppd $123, (%rcx), %zmm12, %k2 +// CHECK: encoding: [0x62,0xf1,0x9d,0x48,0xc2,0x11,0x7b] + vcmppd $0x7b, (%rcx), %zmm12, %k2 + +// CHECK: vcmppd $123, 291(%rax,%r14,8), %zmm12, %k2 +// CHECK: encoding: [0x62,0xb1,0x9d,0x48,0xc2,0x94,0xf0,0x23,0x01,0x00,0x00,0x7b] + vcmppd $0x7b, 291(%rax,%r14,8), %zmm12, %k2 + +// CHECK: vcmppd $123, (%rcx){1to8}, %zmm12, %k2 +// CHECK: encoding: [0x62,0xf1,0x9d,0x58,0xc2,0x11,0x7b] + vcmppd $0x7b, (%rcx){1to8}, %zmm12, %k2 + +// CHECK: vcmppd $123, 8128(%rdx), %zmm12, %k2 +// CHECK: encoding: [0x62,0xf1,0x9d,0x48,0xc2,0x52,0x7f,0x7b] + vcmppd $0x7b, 8128(%rdx), %zmm12, %k2 + +// CHECK: vcmppd $123, 8192(%rdx), %zmm12, %k2 +// CHECK: encoding: [0x62,0xf1,0x9d,0x48,0xc2,0x92,0x00,0x20,0x00,0x00,0x7b] + vcmppd $0x7b, 8192(%rdx), %zmm12, %k2 + +// CHECK: vcmppd $123, -8192(%rdx), %zmm12, %k2 +// CHECK: encoding: [0x62,0xf1,0x9d,0x48,0xc2,0x52,0x80,0x7b] + vcmppd $0x7b, -8192(%rdx), %zmm12, %k2 + +// CHECK: vcmppd $123, -8256(%rdx), %zmm12, %k2 +// CHECK: encoding: [0x62,0xf1,0x9d,0x48,0xc2,0x92,0xc0,0xdf,0xff,0xff,0x7b] + vcmppd $0x7b, -8256(%rdx), %zmm12, %k2 + +// CHECK: vcmppd $123, 1016(%rdx){1to8}, %zmm12, %k2 +// CHECK: encoding: [0x62,0xf1,0x9d,0x58,0xc2,0x52,0x7f,0x7b] + vcmppd $0x7b, 1016(%rdx){1to8}, %zmm12, %k2 + +// CHECK: vcmppd $123, 1024(%rdx){1to8}, %zmm12, %k2 +// CHECK: encoding: [0x62,0xf1,0x9d,0x58,0xc2,0x92,0x00,0x04,0x00,0x00,0x7b] + vcmppd $0x7b, 1024(%rdx){1to8}, %zmm12, %k2 + +// CHECK: vcmppd $123, -1024(%rdx){1to8}, %zmm12, %k2 +// CHECK: encoding: [0x62,0xf1,0x9d,0x58,0xc2,0x52,0x80,0x7b] + vcmppd $0x7b, -1024(%rdx){1to8}, %zmm12, %k2 + +// CHECK: vcmppd $123, -1032(%rdx){1to8}, %zmm12, %k2 +// CHECK: encoding: [0x62,0xf1,0x9d,0x58,0xc2,0x92,0xf8,0xfb,0xff,0xff,0x7b] + vcmppd $0x7b, -1032(%rdx){1to8}, %zmm12, %k2 + +// CHECK: vcmpps $171, %zmm22, %zmm17, %k2 +// CHECK: encoding: [0x62,0xb1,0x74,0x40,0xc2,0xd6,0xab] + vcmpps $0xab, %zmm22, %zmm17, %k2 + +// CHECK: vcmpps $171, %zmm22, %zmm17, %k2 {%k3} +// CHECK: encoding: [0x62,0xb1,0x74,0x43,0xc2,0xd6,0xab] + vcmpps $0xab, %zmm22, %zmm17, %k2 {%k3} + +// CHECK: vcmpps $171,{sae}, %zmm22, %zmm17, %k2 +// CHECK: encoding: [0x62,0xb1,0x74,0x10,0xc2,0xd6,0xab] + vcmpps $0xab,{sae}, %zmm22, %zmm17, %k2 + +// CHECK: vcmpps $123, %zmm22, %zmm17, %k2 +// CHECK: encoding: [0x62,0xb1,0x74,0x40,0xc2,0xd6,0x7b] + vcmpps $0x7b, %zmm22, %zmm17, %k2 + +// CHECK: vcmpps $123,{sae}, %zmm22, %zmm17, %k2 +// CHECK: encoding: [0x62,0xb1,0x74,0x10,0xc2,0xd6,0x7b] + vcmpps $0x7b,{sae}, %zmm22, %zmm17, %k2 + +// CHECK: vcmpps $123, (%rcx), %zmm17, %k2 +// CHECK: encoding: [0x62,0xf1,0x74,0x40,0xc2,0x11,0x7b] + vcmpps $0x7b, (%rcx), %zmm17, %k2 + +// CHECK: vcmpps $123, 291(%rax,%r14,8), %zmm17, %k2 +// CHECK: encoding: [0x62,0xb1,0x74,0x40,0xc2,0x94,0xf0,0x23,0x01,0x00,0x00,0x7b] + vcmpps $0x7b, 291(%rax,%r14,8), %zmm17, %k2 + +// CHECK: vcmpps $123, (%rcx){1to16}, %zmm17, %k2 +// CHECK: encoding: [0x62,0xf1,0x74,0x50,0xc2,0x11,0x7b] + vcmpps $0x7b, (%rcx){1to16}, %zmm17, %k2 + +// CHECK: vcmpps $123, 8128(%rdx), %zmm17, %k2 +// CHECK: encoding: [0x62,0xf1,0x74,0x40,0xc2,0x52,0x7f,0x7b] + vcmpps $0x7b, 8128(%rdx), %zmm17, %k2 + +// CHECK: vcmpps $123, 8192(%rdx), %zmm17, %k2 +// CHECK: encoding: [0x62,0xf1,0x74,0x40,0xc2,0x92,0x00,0x20,0x00,0x00,0x7b] + vcmpps $0x7b, 8192(%rdx), %zmm17, %k2 + +// CHECK: vcmpps $123, -8192(%rdx), %zmm17, %k2 +// CHECK: encoding: [0x62,0xf1,0x74,0x40,0xc2,0x52,0x80,0x7b] + vcmpps $0x7b, -8192(%rdx), %zmm17, %k2 + +// CHECK: vcmpps $123, -8256(%rdx), %zmm17, %k2 +// CHECK: encoding: [0x62,0xf1,0x74,0x40,0xc2,0x92,0xc0,0xdf,0xff,0xff,0x7b] + vcmpps $0x7b, -8256(%rdx), %zmm17, %k2 + +// CHECK: vcmpps $123, 508(%rdx){1to16}, %zmm17, %k2 +// CHECK: encoding: [0x62,0xf1,0x74,0x50,0xc2,0x52,0x7f,0x7b] + vcmpps $0x7b, 508(%rdx){1to16}, %zmm17, %k2 + +// CHECK: vcmpps $123, 512(%rdx){1to16}, %zmm17, %k2 +// CHECK: encoding: [0x62,0xf1,0x74,0x50,0xc2,0x92,0x00,0x02,0x00,0x00,0x7b] + vcmpps $0x7b, 512(%rdx){1to16}, %zmm17, %k2 + +// CHECK: vcmpps $123, -512(%rdx){1to16}, %zmm17, %k2 +// CHECK: encoding: [0x62,0xf1,0x74,0x50,0xc2,0x52,0x80,0x7b] + vcmpps $0x7b, -512(%rdx){1to16}, %zmm17, %k2 + +// CHECK: vcmpps $123, -516(%rdx){1to16}, %zmm17, %k2 +// CHECK: encoding: [0x62,0xf1,0x74,0x50,0xc2,0x92,0xfc,0xfd,0xff,0xff,0x7b] + vcmpps $0x7b, -516(%rdx){1to16}, %zmm17, %k2 + diff --git a/test/MC/X86/intel-syntax-avx512.s b/test/MC/X86/intel-syntax-avx512.s index af4e98ccba2..ffdbd20b77e 100644 --- a/test/MC/X86/intel-syntax-avx512.s +++ b/test/MC/X86/intel-syntax-avx512.s @@ -1,34 +1,175 @@ -// RUN: llvm-mc -triple x86_64-unknown-unknown -x86-asm-syntax=intel -mcpu=knl --show-encoding %s | FileCheck %s +// RUN: llvm-mc -triple x86_64-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 -mcpu=knl --show-encoding %s | FileCheck %s -// CHECK: vaddps (%rax), %zmm1, %zmm1 +// CHECK: vaddps zmm1 , zmm1, zmmword ptr [rax] // CHECK: encoding: [0x62,0xf1,0x74,0x48,0x58,0x08] vaddps zmm1, zmm1, zmmword ptr [rax] -// CHECK: vaddpd %zmm2, %zmm1, %zmm1 +// CHECK: vaddpd zmm1 , zmm1, zmm2 // CHECK: encoding: [0x62,0xf1,0xf5,0x48,0x58,0xca] vaddpd zmm1,zmm1,zmm2 -// CHECK: vaddpd %zmm2, %zmm1, %zmm1 {%k5} +// CHECK: vaddpd zmm1 {k5}, zmm1, zmm2 // CHECK: encoding: [0x62,0xf1,0xf5,0x4d,0x58,0xca] vaddpd zmm1{k5},zmm1,zmm2 -// CHECK: vaddpd %zmm2, %zmm1, %zmm1 {%k5} {z} +// CHECK: vaddpd zmm1 {k5} {z}, zmm1, zmm2 // CHECK: encoding: [0x62,0xf1,0xf5,0xcd,0x58,0xca] vaddpd zmm1{k5} {z},zmm1,zmm2 -// CHECK: vaddpd {rn-sae}, %zmm2, %zmm1, %zmm1 +// CHECK: vaddpd zmm1 , zmm1, zmm2, {rn-sae} // CHECK: encoding: [0x62,0xf1,0xf5,0x18,0x58,0xca] vaddpd zmm1,zmm1,zmm2,{rn-sae} -// CHECK: vaddpd {ru-sae}, %zmm2, %zmm1, %zmm1 +// CHECK: vaddpd zmm1 , zmm1, zmm2, {ru-sae} // CHECK: encoding: [0x62,0xf1,0xf5,0x58,0x58,0xca] vaddpd zmm1,zmm1,zmm2,{ru-sae} -// CHECK: vaddpd {rd-sae}, %zmm2, %zmm1, %zmm1 +// CHECK: vaddpd zmm1 , zmm1, zmm2, {rd-sae} // CHECK: encoding: [0x62,0xf1,0xf5,0x38,0x58,0xca] vaddpd zmm1,zmm1,zmm2,{rd-sae} -// CHECK: vaddpd {rz-sae}, %zmm2, %zmm1, %zmm1 +// CHECK: vaddpd zmm1 , zmm1, zmm2, {rz-sae} // CHECK: encoding: [0x62,0xf1,0xf5,0x78,0x58,0xca] vaddpd zmm1,zmm1,zmm2,{rz-sae} +// CHECK: vcmppd k2 , zmm12, zmm26, 171 +// CHECK: encoding: [0x62,0x91,0x9d,0x48,0xc2,0xd2,0xab] + vcmppd k2,zmm12,zmm26,0xab + +// CHECK: vcmppd k2 {k3}, zmm12, zmm26, 171 +// CHECK: encoding: [0x62,0x91,0x9d,0x4b,0xc2,0xd2,0xab] + vcmppd k2{k3},zmm12,zmm26,0xab + +// CHECK: vcmppd k2 , zmm12, zmm26,{sae}, 171 +// CHECK: encoding: [0x62,0x91,0x9d,0x18,0xc2,0xd2,0xab] + vcmppd k2,zmm12,zmm26,{sae},0xab + +// CHECK: vcmppd k2 , zmm12, zmm26, 123 +// CHECK: encoding: [0x62,0x91,0x9d,0x48,0xc2,0xd2,0x7b] + vcmppd k2 ,zmm12,zmm26,0x7b + +// CHECK: vcmppd k2 , zmm12, zmm26,{sae}, 123 +// CHECK: encoding: [0x62,0x91,0x9d,0x18,0xc2,0xd2,0x7b] + vcmppd k2,zmm12,zmm26,{sae},0x7b + +// CHECK: vcmppd k2 , zmm12, zmmword ptr [rcx], 123 +// CHECK: encoding: [0x62,0xf1,0x9d,0x48,0xc2,0x11,0x7b] + vcmppd k2,zmm12,zmmword PTR [rcx],0x7b + +// CHECK: vcmppd k2 , zmm12, zmmword ptr [rax + 8*r14 + 291], 123 +// CHECK: encoding: [0x62,0xb1,0x9d,0x48,0xc2,0x94,0xf0,0x23,0x01,0x00,0x00,0x7b] + vcmppd k2 ,zmm12,zmmword PTR [rax+r14*8+0x123],0x7b + +// CHECK: vcmppd k2 , zmm12, qword ptr [rcx]{1to8}, 123 +// CHECK: encoding: [0x62,0xf1,0x9d,0x58,0xc2,0x11,0x7b] + vcmppd k2,zmm12,QWORD PTR [rcx]{1to8},0x7b + +// CHECK: vcmppd k2 , zmm12, zmmword ptr [rdx + 8128], 123 +// CHECK: encoding: [0x62,0xf1,0x9d,0x48,0xc2,0x52,0x7f,0x7b] + vcmppd k2,zmm12,zmmword PTR [rdx+0x1fc0],0x7b + +// CHECK: vcmppd k2 , zmm12, zmmword ptr [rdx + 8192], 123 +// CHECK: encoding: [0x62,0xf1,0x9d,0x48,0xc2,0x92,0x00,0x20,0x00,0x00,0x7b] + vcmppd k2,zmm12,zmmword PTR [rdx+0x2000],0x7b + +// CHECK: vcmppd k2 , zmm12, zmmword ptr [rdx - 8192], 123 +// CHECK: encoding: [0x62,0xf1,0x9d,0x48,0xc2,0x52,0x80,0x7b] + vcmppd k2,zmm12,zmmword PTR [rdx-0x2000],0x7b + +// CHECK: vcmppd k2 , zmm12, zmmword ptr [rdx - 8256], 123 +// CHECK: encoding: [0x62,0xf1,0x9d,0x48,0xc2,0x92,0xc0,0xdf,0xff,0xff,0x7b] + vcmppd k2,zmm12,zmmword PTR [rdx-0x2040],0x7b + +// CHECK: vcmppd k2 , zmm12, qword ptr [rdx + 1016]{1to8}, 123 +// CHECK: encoding: [0x62,0xf1,0x9d,0x58,0xc2,0x52,0x7f,0x7b] + vcmppd k2,zmm12,QWORD PTR [rdx+0x3f8]{1to8},0x7b + +// CHECK: vcmppd k2 , zmm12, qword ptr [rdx + 1024]{1to8}, 123 +// CHECK: encoding: [0x62,0xf1,0x9d,0x58,0xc2,0x92,0x00,0x04,0x00,0x00,0x7b] + vcmppd k2,zmm12,QWORD PTR [rdx+0x400]{1to8},0x7b + +// CHECK: vcmppd k2 , zmm12, qword ptr [rdx - 1024]{1to8}, 123 +// CHECK: encoding: [0x62,0xf1,0x9d,0x58,0xc2,0x52,0x80,0x7b] + vcmppd k2,zmm12,QWORD PTR [rdx-0x400]{1to8},0x7b + +// CHECK: vcmppd k2 , zmm12, qword ptr [rdx - 1032]{1to8}, 123 +// CHECK: encoding: [0x62,0xf1,0x9d,0x58,0xc2,0x92,0xf8,0xfb,0xff,0xff,0x7b] + vcmppd k2,zmm12,QWORD PTR [rdx-0x408]{1to8},0x7b + +// CHECK: vcmpps k2 , zmm17, zmm22, 171 +// CHECK: encoding: [0x62,0xb1,0x74,0x40,0xc2,0xd6,0xab] + vcmpps k2,zmm17,zmm22,0xab + +// CHECK: vcmpps k2 {k3}, zmm17, zmm22, 171 +// CHECK: encoding: [0x62,0xb1,0x74,0x43,0xc2,0xd6,0xab] + vcmpps k2{k3},zmm17,zmm22,0xab + +// CHECK: vcmpps k2 , zmm17, zmm22,{sae}, 171 +// CHECK: encoding: [0x62,0xb1,0x74,0x10,0xc2,0xd6,0xab] + vcmpps k2,zmm17,zmm22,{sae},0xab + +// CHECK: vcmpps k2 , zmm17, zmm22, 123 +// CHECK: encoding: [0x62,0xb1,0x74,0x40,0xc2,0xd6,0x7b] + vcmpps k2,zmm17,zmm22,0x7b + +// CHECK: vcmpps k2 , zmm17, zmm22,{sae}, 123 +// CHECK: encoding: [0x62,0xb1,0x74,0x10,0xc2,0xd6,0x7b] + vcmpps k2,zmm17,zmm22,{sae},0x7b + +// CHECK: vcmpps k2 , zmm17, zmmword ptr [rcx], 123 +// CHECK: encoding: [0x62,0xf1,0x74,0x40,0xc2,0x11,0x7b] + vcmpps k2,zmm17,zmmword PTR [rcx],0x7b + +// CHECK: vcmpps k2 , zmm17, zmmword ptr [rax + 8*r14 + 291], 123 +// CHECK: encoding: [0x62,0xb1,0x74,0x40,0xc2,0x94,0xf0,0x23,0x01,0x00,0x00,0x7b] + vcmpps k2,zmm17,zmmword PTR [rax+r14*8+0x123],0x7b + +// CHECK: vcmpps k2 , zmm17, dword ptr [rcx]{1to16}, 123 +// CHECK: encoding: [0x62,0xf1,0x74,0x50,0xc2,0x11,0x7b] + vcmpps k2,zmm17,DWORD PTR [rcx]{1to16},0x7b + +// CHECK: vcmpps k2 , zmm17, zmmword ptr [rdx + 8128], 123 +// CHECK: encoding: [0x62,0xf1,0x74,0x40,0xc2,0x52,0x7f,0x7b] + vcmpps k2,zmm17,zmmword PTR [rdx+0x1fc0],0x7b + +// CHECK: vcmpps k2 , zmm17, zmmword ptr [rdx + 8192], 123 +// CHECK: encoding: [0x62,0xf1,0x74,0x40,0xc2,0x92,0x00,0x20,0x00,0x00,0x7b] + vcmpps k2,zmm17,zmmword PTR [rdx+0x2000],0x7b + +// CHECK: vcmpps k2 , zmm17, zmmword ptr [rdx - 8192], 123 +// CHECK: encoding: [0x62,0xf1,0x74,0x40,0xc2,0x52,0x80,0x7b] + vcmpps k2,zmm17,zmmword PTR [rdx-0x2000],0x7b + +// CHECK: vcmpps k2 , zmm17, zmmword ptr [rdx - 8256], 123 +// CHECK: encoding: [0x62,0xf1,0x74,0x40,0xc2,0x92,0xc0,0xdf,0xff,0xff,0x7b] + vcmpps k2,zmm17,zmmword PTR [rdx-0x2040],0x7b + +// CHECK: vcmpps k2 , zmm17, dword ptr [rdx + 508]{1to16}, 123 +// CHECK: encoding: [0x62,0xf1,0x74,0x50,0xc2,0x52,0x7f,0x7b] + vcmpps k2,zmm17,DWORD PTR [rdx+0x1fc]{1to16},0x7b + +// CHECK: vcmpps k2 , zmm17, dword ptr [rdx + 512]{1to16}, 123 +// CHECK: encoding: [0x62,0xf1,0x74,0x50,0xc2,0x92,0x00,0x02,0x00,0x00,0x7b] + vcmpps k2,zmm17,DWORD PTR [rdx+0x200]{1to16},0x7b + +// CHECK: vcmpps k2 , zmm17, dword ptr [rdx - 512]{1to16}, 123 +// CHECK: encoding: [0x62,0xf1,0x74,0x50,0xc2,0x52,0x80,0x7b] + vcmpps k2,zmm17,DWORD PTR [rdx-0x200]{1to16},0x7b + +// CHECK: vcmpps k2 , zmm17, dword ptr [rdx - 516]{1to16}, 123 +// CHECK: encoding: [0x62,0xf1,0x74,0x50,0xc2,0x92,0xfc,0xfd,0xff,0xff,0x7b] + vcmpps k2,zmm17,DWORD PTR [rdx-0x204]{1to16},0x7b + + + + + + + + + + + + + + diff --git a/test/MC/X86/intel-syntax-x86-64-avx512f_vl.s b/test/MC/X86/intel-syntax-x86-64-avx512f_vl.s new file mode 100644 index 00000000000..6de59da40b3 --- /dev/null +++ b/test/MC/X86/intel-syntax-x86-64-avx512f_vl.s @@ -0,0 +1,225 @@ +// RUN: llvm-mc -triple x86_64-unknown-unknown -mcpu=knl -mattr=+avx512vl -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s + +// CHECK: vcmppd k3 , xmm27, xmm23, 171 +// CHECK: encoding: [0x62,0xb1,0xa5,0x00,0xc2,0xdf,0xab] + vcmppd k3,xmm27,xmm23,0xab + +// CHECK: vcmppd k3 {k5}, xmm27, xmm23, 171 +// CHECK: encoding: [0x62,0xb1,0xa5,0x05,0xc2,0xdf,0xab] + vcmppd k3{k5},xmm27,xmm23,0xab + +// CHECK: vcmppd k3 , xmm27, xmm23, 123 +// CHECK: encoding: [0x62,0xb1,0xa5,0x00,0xc2,0xdf,0x7b] + vcmppd k3,xmm27,xmm23,0x7b + +// CHECK: vcmppd k3 , xmm27, xmmword ptr [rcx], 123 +// CHECK: encoding: [0x62,0xf1,0xa5,0x00,0xc2,0x19,0x7b] + vcmppd k3,xmm27,XMMWORD PTR [rcx],0x7b + +// CHECK: vcmppd k3 , xmm27, xmmword ptr [rax + 8*r14 + 291], 123 +// CHECK: encoding: [0x62,0xb1,0xa5,0x00,0xc2,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b] + vcmppd k3,xmm27,XMMWORD PTR [rax+r14*8+0x123],0x7b + +// CHECK: vcmppd k3 , xmm27, qword ptr [rcx]{1to2}, 123 +// CHECK: encoding: [0x62,0xf1,0xa5,0x10,0xc2,0x19,0x7b] + vcmppd k3,xmm27,QWORD PTR [rcx]{1to2},0x7b + +// CHECK: vcmppd k3 , xmm27, xmmword ptr [rdx + 2032], 123 +// CHECK: encoding: [0x62,0xf1,0xa5,0x00,0xc2,0x5a,0x7f,0x7b] + vcmppd k3,xmm27,XMMWORD PTR [rdx+0x7f0],0x7b + +// CHECK: vcmppd k3 , xmm27, xmmword ptr [rdx + 2048], 123 +// CHECK: encoding: [0x62,0xf1,0xa5,0x00,0xc2,0x9a,0x00,0x08,0x00,0x00,0x7b] + vcmppd k3,xmm27,XMMWORD PTR [rdx+0x800],0x7b + +// CHECK: vcmppd k3 , xmm27, xmmword ptr [rdx - 2048], 123 +// CHECK: encoding: [0x62,0xf1,0xa5,0x00,0xc2,0x5a,0x80,0x7b] + vcmppd k3,xmm27,XMMWORD PTR [rdx-0x800],0x7b + +// CHECK: vcmppd k3 , xmm27, xmmword ptr [rdx - 2064], 123 +// CHECK: encoding: [0x62,0xf1,0xa5,0x00,0xc2,0x9a,0xf0,0xf7,0xff,0xff,0x7b] + vcmppd k3,xmm27,XMMWORD PTR [rdx-0x810],0x7b + +// CHECK: vcmppd k3 , xmm27, qword ptr [rdx + 1016]{1to2}, 123 +// CHECK: encoding: [0x62,0xf1,0xa5,0x10,0xc2,0x5a,0x7f,0x7b] + vcmppd k3,xmm27,QWORD PTR [rdx+0x3f8]{1to2},0x7b + +// CHECK: vcmppd k3 , xmm27, qword ptr [rdx + 1024]{1to2}, 123 +// CHECK: encoding: [0x62,0xf1,0xa5,0x10,0xc2,0x9a,0x00,0x04,0x00,0x00,0x7b] + vcmppd k3,xmm27,QWORD PTR [rdx+0x400]{1to2},0x7b + +// CHECK: vcmppd k3 , xmm27, qword ptr [rdx - 1024]{1to2}, 123 +// CHECK: encoding: [0x62,0xf1,0xa5,0x10,0xc2,0x5a,0x80,0x7b] + vcmppd k3,xmm27,QWORD PTR [rdx-0x400]{1to2},0x7b + +// CHECK: vcmppd k3 , xmm27, qword ptr [rdx - 1032]{1to2}, 123 +// CHECK: encoding: [0x62,0xf1,0xa5,0x10,0xc2,0x9a,0xf8,0xfb,0xff,0xff,0x7b] + vcmppd k3,xmm27,QWORD PTR [rdx-0x408]{1to2},0x7b + +// CHECK: vcmppd k4 , ymm17, ymm27, 171 +// CHECK: encoding: [0x62,0x91,0xf5,0x20,0xc2,0xe3,0xab] + vcmppd k4,ymm17,ymm27,0xab + +// CHECK: vcmppd k4 {k7}, ymm17, ymm27, 171 +// CHECK: encoding: [0x62,0x91,0xf5,0x27,0xc2,0xe3,0xab] + vcmppd k4{k7},ymm17,ymm27,0xab + +// CHECK: vcmppd k4 , ymm17, ymm27, 123 +// CHECK: encoding: [0x62,0x91,0xf5,0x20,0xc2,0xe3,0x7b] + vcmppd k4,ymm17,ymm27,0x7b + +// CHECK: vcmppd k4 , ymm17, ymmword ptr [rcx], 123 +// CHECK: encoding: [0x62,0xf1,0xf5,0x20,0xc2,0x21,0x7b] + vcmppd k4,ymm17,YMMWORD PTR [rcx],0x7b + +// CHECK: vcmppd k4 , ymm17, ymmword ptr [rax + 8*r14 + 291], 123 +// CHECK: encoding: [0x62,0xb1,0xf5,0x20,0xc2,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b] + vcmppd k4,ymm17,YMMWORD PTR [rax+r14*8+0x123],0x7b + +// CHECK: vcmppd k4 , ymm17, qword ptr [rcx]{1to4}, 123 +// CHECK: encoding: [0x62,0xf1,0xf5,0x30,0xc2,0x21,0x7b] + vcmppd k4,ymm17,QWORD PTR [rcx]{1to4},0x7b + +// CHECK: vcmppd k4 , ymm17, ymmword ptr [rdx + 4064], 123 +// CHECK: encoding: [0x62,0xf1,0xf5,0x20,0xc2,0x62,0x7f,0x7b] + vcmppd k4,ymm17,YMMWORD PTR [rdx+0xfe0],0x7b + +// CHECK: vcmppd k4 , ymm17, ymmword ptr [rdx + 4096], 123 +// CHECK: encoding: [0x62,0xf1,0xf5,0x20,0xc2,0xa2,0x00,0x10,0x00,0x00,0x7b] + vcmppd k4,ymm17,YMMWORD PTR [rdx+0x1000],0x7b + +// CHECK: vcmppd k4 , ymm17, ymmword ptr [rdx - 4096], 123 +// CHECK: encoding: [0x62,0xf1,0xf5,0x20,0xc2,0x62,0x80,0x7b] + vcmppd k4,ymm17,YMMWORD PTR [rdx-0x1000],0x7b + +// CHECK: vcmppd k4 , ymm17, ymmword ptr [rdx - 4128], 123 +// CHECK: encoding: [0x62,0xf1,0xf5,0x20,0xc2,0xa2,0xe0,0xef,0xff,0xff,0x7b] + vcmppd k4,ymm17,YMMWORD PTR [rdx-0x1020],0x7b + +// CHECK: vcmppd k4 , ymm17, qword ptr [rdx + 1016]{1to4}, 123 +// CHECK: encoding: [0x62,0xf1,0xf5,0x30,0xc2,0x62,0x7f,0x7b] + vcmppd k4,ymm17,QWORD PTR [rdx+0x3f8]{1to4},0x7b + +// CHECK: vcmppd k4 , ymm17, qword ptr [rdx + 1024]{1to4}, 123 +// CHECK: encoding: [0x62,0xf1,0xf5,0x30,0xc2,0xa2,0x00,0x04,0x00,0x00,0x7b] + vcmppd k4,ymm17,QWORD PTR [rdx+0x400]{1to4},0x7b + +// CHECK: vcmppd k4 , ymm17, qword ptr [rdx - 1024]{1to4}, 123 +// CHECK: encoding: [0x62,0xf1,0xf5,0x30,0xc2,0x62,0x80,0x7b] + vcmppd k4,ymm17,QWORD PTR [rdx-0x400]{1to4},0x7b + +// CHECK: vcmppd k4 , ymm17, qword ptr [rdx - 1032]{1to4}, 123 +// CHECK: encoding: [0x62,0xf1,0xf5,0x30,0xc2,0xa2,0xf8,0xfb,0xff,0xff,0x7b] + vcmppd k4,ymm17,QWORD PTR [rdx-0x408]{1to4},0x7b + +// CHECK: vcmpps k4 , xmm29, xmm28, 171 +// CHECK: encoding: [0x62,0x91,0x14,0x00,0xc2,0xe4,0xab] + vcmpps k4,xmm29,xmm28,0xab + +// CHECK: vcmpps k4 {k2}, xmm29, xmm28, 171 +// CHECK: encoding: [0x62,0x91,0x14,0x02,0xc2,0xe4,0xab] + vcmpps k4{k2},xmm29,xmm28,0xab + +// CHECK: vcmpps k4 , xmm29, xmm28, 123 +// CHECK: encoding: [0x62,0x91,0x14,0x00,0xc2,0xe4,0x7b] + vcmpps k4,xmm29,xmm28,0x7b + +// CHECK: vcmpps k4 , xmm29, xmmword ptr [rcx], 123 +// CHECK: encoding: [0x62,0xf1,0x14,0x00,0xc2,0x21,0x7b] + vcmpps k4,xmm29,XMMWORD PTR [rcx],0x7b + +// CHECK: vcmpps k4 , xmm29, xmmword ptr [rax + 8*r14 + 291], 123 +// CHECK: encoding: [0x62,0xb1,0x14,0x00,0xc2,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b] + vcmpps k4,xmm29,XMMWORD PTR [rax+r14*8+0x123],0x7b + +// CHECK: vcmpps k4 , xmm29, dword ptr [rcx]{1to4}, 123 +// CHECK: encoding: [0x62,0xf1,0x14,0x10,0xc2,0x21,0x7b] + vcmpps k4,xmm29,DWORD PTR [rcx]{1to4},0x7b + +// CHECK: vcmpps k4 , xmm29, xmmword ptr [rdx + 2032], 123 +// CHECK: encoding: [0x62,0xf1,0x14,0x00,0xc2,0x62,0x7f,0x7b] + vcmpps k4,xmm29,XMMWORD PTR [rdx+0x7f0],0x7b + +// CHECK: vcmpps k4 , xmm29, xmmword ptr [rdx + 2048], 123 +// CHECK: encoding: [0x62,0xf1,0x14,0x00,0xc2,0xa2,0x00,0x08,0x00,0x00,0x7b] + vcmpps k4,xmm29,XMMWORD PTR [rdx+0x800],0x7b + +// CHECK: vcmpps k4 , xmm29, xmmword ptr [rdx - 2048], 123 +// CHECK: encoding: [0x62,0xf1,0x14,0x00,0xc2,0x62,0x80,0x7b] + vcmpps k4,xmm29,XMMWORD PTR [rdx-0x800],0x7b + +// CHECK: vcmpps k4 , xmm29, xmmword ptr [rdx - 2064], 123 +// CHECK: encoding: [0x62,0xf1,0x14,0x00,0xc2,0xa2,0xf0,0xf7,0xff,0xff,0x7b] + vcmpps k4,xmm29,XMMWORD PTR [rdx-0x810],0x7b + +// CHECK: vcmpps k4 , xmm29, dword ptr [rdx + 508]{1to4}, 123 +// CHECK: encoding: [0x62,0xf1,0x14,0x10,0xc2,0x62,0x7f,0x7b] + vcmpps k4,xmm29,DWORD PTR [rdx+0x1fc]{1to4},0x7b + +// CHECK: vcmpps k4 , xmm29, dword ptr [rdx + 512]{1to4}, 123 +// CHECK: encoding: [0x62,0xf1,0x14,0x10,0xc2,0xa2,0x00,0x02,0x00,0x00,0x7b] + vcmpps k4,xmm29,DWORD PTR [rdx+0x200]{1to4},0x7b + +// CHECK: vcmpps k4 , xmm29, dword ptr [rdx - 512]{1to4}, 123 +// CHECK: encoding: [0x62,0xf1,0x14,0x10,0xc2,0x62,0x80,0x7b] + vcmpps k4,xmm29,DWORD PTR [rdx-0x200]{1to4},0x7b + +// CHECK: vcmpps k4 , xmm29, dword ptr [rdx - 516]{1to4}, 123 +// CHECK: encoding: [0x62,0xf1,0x14,0x10,0xc2,0xa2,0xfc,0xfd,0xff,0xff,0x7b] + vcmpps k4,xmm29,DWORD PTR [rdx-0x204]{1to4},0x7b + +// CHECK: vcmpps k4 , ymm19, ymm18, 171 +// CHECK: encoding: [0x62,0xb1,0x64,0x20,0xc2,0xe2,0xab] + vcmpps k4,ymm19,ymm18,0xab + +// CHECK: vcmpps k4 {k1}, ymm19, ymm18, 171 +// CHECK: encoding: [0x62,0xb1,0x64,0x21,0xc2,0xe2,0xab] + vcmpps k4{k1},ymm19,ymm18,0xab + +// CHECK: vcmpps k4 , ymm19, ymm18, 123 +// CHECK: encoding: [0x62,0xb1,0x64,0x20,0xc2,0xe2,0x7b] + vcmpps k4,ymm19,ymm18,0x7b + +// CHECK: vcmpps k4 , ymm19, ymmword ptr [rcx], 123 +// CHECK: encoding: [0x62,0xf1,0x64,0x20,0xc2,0x21,0x7b] + vcmpps k4,ymm19,YMMWORD PTR [rcx],0x7b + +// CHECK: vcmpps k4 , ymm19, ymmword ptr [rax + 8*r14 + 291], 123 +// CHECK: encoding: [0x62,0xb1,0x64,0x20,0xc2,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b] + vcmpps k4,ymm19,YMMWORD PTR [rax+r14*8+0x123],0x7b + +// CHECK: vcmpps k4 , ymm19, dword ptr [rcx]{1to8}, 123 +// CHECK: encoding: [0x62,0xf1,0x64,0x30,0xc2,0x21,0x7b] + vcmpps k4,ymm19,DWORD PTR [rcx]{1to8},0x7b + +// CHECK: vcmpps k4 , ymm19, ymmword ptr [rdx + 4064], 123 +// CHECK: encoding: [0x62,0xf1,0x64,0x20,0xc2,0x62,0x7f,0x7b] + vcmpps k4,ymm19,YMMWORD PTR [rdx+0xfe0],0x7b + +// CHECK: vcmpps k4 , ymm19, ymmword ptr [rdx + 4096], 123 +// CHECK: encoding: [0x62,0xf1,0x64,0x20,0xc2,0xa2,0x00,0x10,0x00,0x00,0x7b] + vcmpps k4,ymm19,YMMWORD PTR [rdx+0x1000],0x7b + +// CHECK: vcmpps k4 , ymm19, ymmword ptr [rdx - 4096], 123 +// CHECK: encoding: [0x62,0xf1,0x64,0x20,0xc2,0x62,0x80,0x7b] + vcmpps k4,ymm19,YMMWORD PTR [rdx-0x1000],0x7b + +// CHECK: vcmpps k4 , ymm19, ymmword ptr [rdx - 4128], 123 +// CHECK: encoding: [0x62,0xf1,0x64,0x20,0xc2,0xa2,0xe0,0xef,0xff,0xff,0x7b] + vcmpps k4,ymm19,YMMWORD PTR [rdx-0x1020],0x7b + +// CHECK: vcmpps k4 , ymm19, dword ptr [rdx + 508]{1to8}, 123 +// CHECK: encoding: [0x62,0xf1,0x64,0x30,0xc2,0x62,0x7f,0x7b] + vcmpps k4,ymm19,DWORD PTR [rdx+0x1fc]{1to8},0x7b + +// CHECK: vcmpps k4 , ymm19, dword ptr [rdx + 512]{1to8}, 123 +// CHECK: encoding: [0x62,0xf1,0x64,0x30,0xc2,0xa2,0x00,0x02,0x00,0x00,0x7b] + vcmpps k4,ymm19,DWORD PTR [rdx+0x200]{1to8},0x7b + +// CHECK: vcmpps k4 , ymm19, dword ptr [rdx - 512]{1to8}, 123 +// CHECK: encoding: [0x62,0xf1,0x64,0x30,0xc2,0x62,0x80,0x7b] + vcmpps k4,ymm19,DWORD PTR [rdx-0x200]{1to8},0x7b + +// CHECK: vcmpps k4 , ymm19, dword ptr [rdx - 516]{1to8}, 123 +// CHECK: encoding: [0x62,0xf1,0x64,0x30,0xc2,0xa2,0xfc,0xfd,0xff,0xff,0x7b] + vcmpps k4,ymm19,DWORD PTR [rdx-0x204]{1to8},0x7b diff --git a/test/MC/X86/x86-64-avx512f_vl.s b/test/MC/X86/x86-64-avx512f_vl.s index 837b030646c..7839f74d168 100644 --- a/test/MC/X86/x86-64-avx512f_vl.s +++ b/test/MC/X86/x86-64-avx512f_vl.s @@ -9179,3 +9179,237 @@ vaddpd {rz-sae}, %zmm2, %zmm1, %zmm1 // CHECK: vpmuldq -1032(%rdx){1to4}, %ymm18, %ymm19 // CHECK: encoding: [0x62,0xe2,0xed,0x30,0x28,0x9a,0xf8,0xfb,0xff,0xff] vpmuldq -1032(%rdx){1to4}, %ymm18, %ymm19 + +// CHECK: vcmppd $171, %xmm23, %xmm27, %k3 +// CHECK: encoding: [0x62,0xb1,0xa5,0x00,0xc2,0xdf,0xab] + vcmppd $0xab, %xmm23, %xmm27, %k3 + +// CHECK: vcmppd $171, %xmm23, %xmm27, %k3 {%k5} +// CHECK: encoding: [0x62,0xb1,0xa5,0x05,0xc2,0xdf,0xab] + vcmppd $0xab, %xmm23, %xmm27, %k3 {%k5} + +// CHECK: vcmppd $123, %xmm23, %xmm27, %k3 +// CHECK: encoding: [0x62,0xb1,0xa5,0x00,0xc2,0xdf,0x7b] + vcmppd $0x7b, %xmm23, %xmm27, %k3 + +// CHECK: vcmppd $123, (%rcx), %xmm27, %k3 +// CHECK: encoding: [0x62,0xf1,0xa5,0x00,0xc2,0x19,0x7b] + vcmppd $0x7b, (%rcx), %xmm27, %k3 + +// CHECK: vcmppd $123, 291(%rax,%r14,8), %xmm27, %k3 +// CHECK: encoding: [0x62,0xb1,0xa5,0x00,0xc2,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b] + vcmppd $0x7b, 291(%rax,%r14,8), %xmm27, %k3 + +// CHECK: vcmppd $123, (%rcx){1to2}, %xmm27, %k3 +// CHECK: encoding: [0x62,0xf1,0xa5,0x10,0xc2,0x19,0x7b] + vcmppd $0x7b, (%rcx){1to2}, %xmm27, %k3 + +// CHECK: vcmppd $123, 2032(%rdx), %xmm27, %k3 +// CHECK: encoding: [0x62,0xf1,0xa5,0x00,0xc2,0x5a,0x7f,0x7b] + vcmppd $0x7b, 2032(%rdx), %xmm27, %k3 + +// CHECK: vcmppd $123, 2048(%rdx), %xmm27, %k3 +// CHECK: encoding: [0x62,0xf1,0xa5,0x00,0xc2,0x9a,0x00,0x08,0x00,0x00,0x7b] + vcmppd $0x7b, 2048(%rdx), %xmm27, %k3 + +// CHECK: vcmppd $123, -2048(%rdx), %xmm27, %k3 +// CHECK: encoding: [0x62,0xf1,0xa5,0x00,0xc2,0x5a,0x80,0x7b] + vcmppd $0x7b, -2048(%rdx), %xmm27, %k3 + +// CHECK: vcmppd $123, -2064(%rdx), %xmm27, %k3 +// CHECK: encoding: [0x62,0xf1,0xa5,0x00,0xc2,0x9a,0xf0,0xf7,0xff,0xff,0x7b] + vcmppd $0x7b, -2064(%rdx), %xmm27, %k3 + +// CHECK: vcmppd $123, 1016(%rdx){1to2}, %xmm27, %k3 +// CHECK: encoding: [0x62,0xf1,0xa5,0x10,0xc2,0x5a,0x7f,0x7b] + vcmppd $0x7b, 1016(%rdx){1to2}, %xmm27, %k3 + +// CHECK: vcmppd $123, 1024(%rdx){1to2}, %xmm27, %k3 +// CHECK: encoding: [0x62,0xf1,0xa5,0x10,0xc2,0x9a,0x00,0x04,0x00,0x00,0x7b] + vcmppd $0x7b, 1024(%rdx){1to2}, %xmm27, %k3 + +// CHECK: vcmppd $123, -1024(%rdx){1to2}, %xmm27, %k3 +// CHECK: encoding: [0x62,0xf1,0xa5,0x10,0xc2,0x5a,0x80,0x7b] + vcmppd $0x7b, -1024(%rdx){1to2}, %xmm27, %k3 + +// CHECK: vcmppd $123, -1032(%rdx){1to2}, %xmm27, %k3 +// CHECK: encoding: [0x62,0xf1,0xa5,0x10,0xc2,0x9a,0xf8,0xfb,0xff,0xff,0x7b] + vcmppd $0x7b, -1032(%rdx){1to2}, %xmm27, %k3 + +// CHECK: vcmppd $171, %ymm27, %ymm17, %k4 +// CHECK: encoding: [0x62,0x91,0xf5,0x20,0xc2,0xe3,0xab] + vcmppd $0xab, %ymm27, %ymm17, %k4 + +// CHECK: vcmppd $171, %ymm27, %ymm17, %k4 {%k7} +// CHECK: encoding: [0x62,0x91,0xf5,0x27,0xc2,0xe3,0xab] + vcmppd $0xab, %ymm27, %ymm17, %k4 {%k7} + +// CHECK: vcmppd $123, %ymm27, %ymm17, %k4 +// CHECK: encoding: [0x62,0x91,0xf5,0x20,0xc2,0xe3,0x7b] + vcmppd $0x7b, %ymm27, %ymm17, %k4 + +// CHECK: vcmppd $123, (%rcx), %ymm17, %k4 +// CHECK: encoding: [0x62,0xf1,0xf5,0x20,0xc2,0x21,0x7b] + vcmppd $0x7b, (%rcx), %ymm17, %k4 + +// CHECK: vcmppd $123, 291(%rax,%r14,8), %ymm17, %k4 +// CHECK: encoding: [0x62,0xb1,0xf5,0x20,0xc2,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b] + vcmppd $0x7b, 291(%rax,%r14,8), %ymm17, %k4 + +// CHECK: vcmppd $123, (%rcx){1to4}, %ymm17, %k4 +// CHECK: encoding: [0x62,0xf1,0xf5,0x30,0xc2,0x21,0x7b] + vcmppd $0x7b, (%rcx){1to4}, %ymm17, %k4 + +// CHECK: vcmppd $123, 4064(%rdx), %ymm17, %k4 +// CHECK: encoding: [0x62,0xf1,0xf5,0x20,0xc2,0x62,0x7f,0x7b] + vcmppd $123, 4064(%rdx), %ymm17, %k4 + +// CHECK: vcmppd $123, 4096(%rdx), %ymm17, %k4 +// CHECK: encoding: [0x62,0xf1,0xf5,0x20,0xc2,0xa2,0x00,0x10,0x00,0x00,0x7b] + vcmppd $0x7b, 4096(%rdx), %ymm17, %k4 + +// CHECK: vcmppd $123, -4096(%rdx), %ymm17, %k4 +// CHECK: encoding: [0x62,0xf1,0xf5,0x20,0xc2,0x62,0x80,0x7b] + vcmppd $0x7b, -4096(%rdx), %ymm17, %k4 + +// CHECK: vcmppd $123, -4128(%rdx), %ymm17, %k4 +// CHECK: encoding: [0x62,0xf1,0xf5,0x20,0xc2,0xa2,0xe0,0xef,0xff,0xff,0x7b] + vcmppd $0x7b, -4128(%rdx), %ymm17, %k4 + +// CHECK: vcmppd $123, 1016(%rdx){1to4}, %ymm17, %k4 +// CHECK: encoding: [0x62,0xf1,0xf5,0x30,0xc2,0x62,0x7f,0x7b] + vcmppd $0x7b, 1016(%rdx){1to4}, %ymm17, %k4 + +// CHECK: vcmppd $123, 1024(%rdx){1to4}, %ymm17, %k4 +// CHECK: encoding: [0x62,0xf1,0xf5,0x30,0xc2,0xa2,0x00,0x04,0x00,0x00,0x7b] + vcmppd $0x7b, 1024(%rdx){1to4}, %ymm17, %k4 + +// CHECK: vcmppd $123, -1024(%rdx){1to4}, %ymm17, %k4 +// CHECK: encoding: [0x62,0xf1,0xf5,0x30,0xc2,0x62,0x80,0x7b] + vcmppd $0x7b, -1024(%rdx){1to4}, %ymm17, %k4 + +// CHECK: vcmppd $123, -1032(%rdx){1to4}, %ymm17, %k4 +// CHECK: encoding: [0x62,0xf1,0xf5,0x30,0xc2,0xa2,0xf8,0xfb,0xff,0xff,0x7b] + vcmppd $0x7b, -1032(%rdx){1to4}, %ymm17, %k4 + +// CHECK: vcmpps $171, %xmm28, %xmm29, %k4 +// CHECK: encoding: [0x62,0x91,0x14,0x00,0xc2,0xe4,0xab] + vcmpps $0xab, %xmm28, %xmm29, %k4 + +// CHECK: vcmpps $171, %xmm28, %xmm29, %k4 {%k2} +// CHECK: encoding: [0x62,0x91,0x14,0x02,0xc2,0xe4,0xab] + vcmpps $0xab, %xmm28, %xmm29, %k4 {%k2} + +// CHECK: vcmpps $123, %xmm28, %xmm29, %k4 +// CHECK: encoding: [0x62,0x91,0x14,0x00,0xc2,0xe4,0x7b] + vcmpps $0x7b, %xmm28, %xmm29, %k4 + +// CHECK: vcmpps $123, (%rcx), %xmm29, %k4 +// CHECK: encoding: [0x62,0xf1,0x14,0x00,0xc2,0x21,0x7b] + vcmpps $0x7b, (%rcx), %xmm29, %k4 + +// CHECK: vcmpps $123, 291(%rax,%r14,8), %xmm29, %k4 +// CHECK: encoding: [0x62,0xb1,0x14,0x00,0xc2,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b] + vcmpps $0x7b, 291(%rax,%r14,8), %xmm29, %k4 + +// CHECK: vcmpps $123, (%rcx){1to4}, %xmm29, %k4 +// CHECK: encoding: [0x62,0xf1,0x14,0x10,0xc2,0x21,0x7b] + vcmpps $0x7b, (%rcx){1to4}, %xmm29, %k4 + +// CHECK: vcmpps $123, 2032(%rdx), %xmm29, %k4 +// CHECK: encoding: [0x62,0xf1,0x14,0x00,0xc2,0x62,0x7f,0x7b] + vcmpps $0x7b, 2032(%rdx), %xmm29, %k4 + +// CHECK: vcmpps $123, 2048(%rdx), %xmm29, %k4 +// CHECK: encoding: [0x62,0xf1,0x14,0x00,0xc2,0xa2,0x00,0x08,0x00,0x00,0x7b] + vcmpps $0x7b, 2048(%rdx), %xmm29, %k4 + +// CHECK: vcmpps $123, -2048(%rdx), %xmm29, %k4 +// CHECK: encoding: [0x62,0xf1,0x14,0x00,0xc2,0x62,0x80,0x7b] + vcmpps $0x7b, -2048(%rdx), %xmm29, %k4 + +// CHECK: vcmpps $123, -2064(%rdx), %xmm29, %k4 +// CHECK: encoding: [0x62,0xf1,0x14,0x00,0xc2,0xa2,0xf0,0xf7,0xff,0xff,0x7b] + vcmpps $0x7b, -2064(%rdx), %xmm29, %k4 + +// CHECK: vcmpps $123, 508(%rdx){1to4}, %xmm29, %k4 +// CHECK: encoding: [0x62,0xf1,0x14,0x10,0xc2,0x62,0x7f,0x7b] + vcmpps $123, 508(%rdx){1to4}, %xmm29, %k4 + +// CHECK: vcmpps $123, 512(%rdx){1to4}, %xmm29, %k4 +// CHECK: encoding: [0x62,0xf1,0x14,0x10,0xc2,0xa2,0x00,0x02,0x00,0x00,0x7b] + vcmpps $123, 512(%rdx){1to4}, %xmm29, %k4 + +// CHECK: vcmpps $123, -512(%rdx){1to4}, %xmm29, %k4 +// CHECK: encoding: [0x62,0xf1,0x14,0x10,0xc2,0x62,0x80,0x7b] + vcmpps $0x7b, -512(%rdx){1to4}, %xmm29, %k4 + +// CHECK: vcmpps $123, -516(%rdx){1to4}, %xmm29, %k4 +// CHECK: encoding: [0x62,0xf1,0x14,0x10,0xc2,0xa2,0xfc,0xfd,0xff,0xff,0x7b] + vcmpps $0x7b, -516(%rdx){1to4}, %xmm29, %k4 + +// CHECK: vcmpps $171, %ymm18, %ymm19, %k4 +// CHECK: encoding: [0x62,0xb1,0x64,0x20,0xc2,0xe2,0xab] + vcmpps $0xab, %ymm18, %ymm19, %k4 + +// CHECK: vcmpps $171, %ymm18, %ymm19, %k4 {%k1} +// CHECK: encoding: [0x62,0xb1,0x64,0x21,0xc2,0xe2,0xab] + vcmpps $0xab, %ymm18, %ymm19, %k4 {%k1} + +// CHECK: vcmpps $123, %ymm18, %ymm19, %k4 +// CHECK: encoding: [0x62,0xb1,0x64,0x20,0xc2,0xe2,0x7b] + vcmpps $0x7b, %ymm18, %ymm19, %k4 + +// CHECK: vcmpps $123, (%rcx), %ymm19, %k4 +// CHECK: encoding: [0x62,0xf1,0x64,0x20,0xc2,0x21,0x7b] + vcmpps $0x7b, (%rcx), %ymm19, %k4 + +// CHECK: vcmpps $123, 291(%rax,%r14,8), %ymm19, %k4 +// CHECK: encoding: [0x62,0xb1,0x64,0x20,0xc2,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b] + vcmpps $0x7b, 291(%rax,%r14,8), %ymm19, %k4 + +// CHECK: vcmpps $123, (%rcx){1to8}, %ymm19, %k4 +// CHECK: encoding: [0x62,0xf1,0x64,0x30,0xc2,0x21,0x7b] + vcmpps $0x7b, (%rcx){1to8}, %ymm19, %k4 + +// CHECK: vcmpps $123, 4064(%rdx), %ymm19, %k4 +// CHECK: encoding: [0x62,0xf1,0x64,0x20,0xc2,0x62,0x7f,0x7b] + vcmpps $0x7b, 4064(%rdx), %ymm19, %k4 + +// CHECK: vcmpps $123, 4096(%rdx), %ymm19, %k4 +// CHECK: encoding: [0x62,0xf1,0x64,0x20,0xc2,0xa2,0x00,0x10,0x00,0x00,0x7b] + vcmpps $0x7b, 4096(%rdx), %ymm19, %k4 + +// CHECK: vcmpps $123, -4096(%rdx), %ymm19, %k4 +// CHECK: encoding: [0x62,0xf1,0x64,0x20,0xc2,0x62,0x80,0x7b] + vcmpps $0x7b, -4096(%rdx), %ymm19, %k4 + +// CHECK: vcmpps $123, -4128(%rdx), %ymm19, %k4 +// CHECK: encoding: [0x62,0xf1,0x64,0x20,0xc2,0xa2,0xe0,0xef,0xff,0xff,0x7b] + vcmpps $0x7b, -4128(%rdx), %ymm19, %k4 + +// CHECK: vcmpps $123, 508(%rdx){1to8}, %ymm19, %k4 +// CHECK: encoding: [0x62,0xf1,0x64,0x30,0xc2,0x62,0x7f,0x7b] + vcmpps $0x7b, 508(%rdx){1to8}, %ymm19, %k4 + +// CHECK: vcmpps $123, 512(%rdx){1to8}, %ymm19, %k4 +// CHECK: encoding: [0x62,0xf1,0x64,0x30,0xc2,0xa2,0x00,0x02,0x00,0x00,0x7b] + vcmpps $0x7b, 512(%rdx){1to8}, %ymm19, %k4 + +// CHECK: vcmpps $123, -512(%rdx){1to8}, %ymm19, %k4 +// CHECK: encoding: [0x62,0xf1,0x64,0x30,0xc2,0x62,0x80,0x7b] + vcmpps $0x7b, -512(%rdx){1to8}, %ymm19, %k4 + +// CHECK: vcmpps $123, -516(%rdx){1to8}, %ymm19, %k4 +// CHECK: encoding: [0x62,0xf1,0x64,0x30,0xc2,0xa2,0xfc,0xfd,0xff,0xff,0x7b] + vcmpps $0x7b, -516(%rdx){1to8}, %ymm19, %k4 + + + + + + + + + +