}
// Misc.
let TargetPrefix = "x86" in {
- def int_x86_avx512_mask_cmp_ps_512 : GCCBuiltin<"__builtin_ia32_cmpps512_mask">,
- Intrinsic<[llvm_i16_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i8_ty,
- llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_cmp_pd_512 : GCCBuiltin<"__builtin_ia32_cmppd512_mask">,
- Intrinsic<[llvm_i8_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty,
- llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_movntdqa : GCCBuiltin<"__builtin_ia32_movntdqa512">,
+ def int_x86_avx512_mask_cmp_ps_512 :
+ GCCBuiltin<"__builtin_ia32_cmpps512_mask">,
+ Intrinsic<[llvm_i16_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
+ llvm_i32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_cmp_pd_512 :
+ GCCBuiltin<"__builtin_ia32_cmppd512_mask">,
+ Intrinsic<[llvm_i8_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
+ llvm_i32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_cmp_ps_256 :
+ GCCBuiltin<"__builtin_ia32_cmpps256_mask">,
+ Intrinsic<[llvm_i8_ty], [llvm_v8f32_ty, llvm_v8f32_ty,
+ llvm_i32_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_cmp_pd_256 :
+ GCCBuiltin<"__builtin_ia32_cmppd256_mask">,
+ Intrinsic<[llvm_i8_ty], [llvm_v4f64_ty, llvm_v4f64_ty,
+ llvm_i32_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_cmp_ps_128 :
+ GCCBuiltin<"__builtin_ia32_cmpps128_mask">,
+ Intrinsic<[llvm_i8_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
+ llvm_i32_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_cmp_pd_128 :
+ GCCBuiltin<"__builtin_ia32_cmppd128_mask">,
+ Intrinsic<[llvm_i8_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
+ llvm_i32_ty, llvm_i8_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_movntdqa :
+ GCCBuiltin<"__builtin_ia32_movntdqa512">,
Intrinsic<[llvm_v8i64_ty], [llvm_ptr_ty], [IntrReadMem]>;
}
if (Name == "x86.avx2.mpsadbw")
return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
NewFn);
-
- if (Name == "x86.avx512.mask.cmp.ps.512")
- return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_ps_512,
- NewFn);
- if (Name == "x86.avx512.mask.cmp.pd.512")
- return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_pd_512,
- NewFn);
-
if (Name == "x86.avx512.mask.cmp.b.512")
return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_b_512,
NewFn);
CI->eraseFromParent();
return;
}
- case Intrinsic::x86_avx512_mask_cmp_ps_512:
- case Intrinsic::x86_avx512_mask_cmp_pd_512: {
- // Need to truncate the last argument from i32 to i8 -- this argument models
- // an inherently 8-bit immediate operand to these x86 instructions.
- SmallVector<Value *, 5> Args(CI->arg_operands().begin(),
- CI->arg_operands().end());
-
- // Replace the last argument with a trunc.
- Args[2] = Builder.CreateTrunc(Args[2], Type::getInt8Ty(C), "trunc");
-
- CallInst *NewCall = Builder.CreateCall(NewFn, Args);
- CI->replaceAllUsesWith(NewCall);
- CI->eraseFromParent();
- return;
- }
}
}
X86AsmParser::ParseRoundingModeOp(SMLoc Start, SMLoc End) {
MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
- consumeToken(); // Eat "{"
+ // Eat "{" and mark the current place.
+ const SMLoc consumedToken = consumeToken();
if (Tok.getIdentifier().startswith("r")){
int rndMode = StringSwitch<int>(Tok.getIdentifier())
.Case("rn", X86::STATIC_ROUNDING::TO_NEAREST_INT)
MCConstantExpr::Create(rndMode, Parser.getContext());
return X86Operand::CreateImm(RndModeOp, Start, End);
}
+ if(Tok.getIdentifier().equals("sae")){
+ Parser.Lex(); // Eat the sae
+ if (!getLexer().is(AsmToken::RCurly))
+ return ErrorOperand(Tok.getLoc(), "Expected } at this point");
+ Parser.Lex(); // Eat "}"
+ return X86Operand::CreateToken("{sae}", consumedToken);
+ }
return ErrorOperand(Tok.getLoc(), "unknown token in expression");
}
/// ParseIntelMemOperand - Parse intel style memory operand.
Mask.getValueType().getSizeInBits());
SDValue Cmp;
if (IntrData->Type == CMP_MASK_CC) {
- Cmp = DAG.getNode(IntrData->Opc0, dl, MaskVT, Op.getOperand(1),
- Op.getOperand(2), Op.getOperand(3));
+ SDValue CC = Op.getOperand(3);
+ CC = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, CC);
+ // We specify 2 possible opcodes for intrinsics with rounding modes.
+ // First, we check if the intrinsic may have non-default rounding mode,
+ // (IntrData->Opc1 != 0), then we check the rounding mode operand.
+ if (IntrData->Opc1 != 0) {
+ SDValue Rnd = Op.getOperand(5);
+ if (cast<ConstantSDNode>(Rnd)->getZExtValue() !=
+ X86::STATIC_ROUNDING::CUR_DIRECTION)
+ Cmp = DAG.getNode(IntrData->Opc1, dl, MaskVT, Op.getOperand(1),
+ Op.getOperand(2), CC, Rnd);
+ }
+ //default rounding mode
+ if(!Cmp.getNode())
+ Cmp = DAG.getNode(IntrData->Opc0, dl, MaskVT, Op.getOperand(1),
+ Op.getOperand(2), CC);
+
} else {
assert(IntrData->Type == CMP_MASK && "Unexpected intrinsic type!");
Cmp = DAG.getNode(IntrData->Opc0, dl, MaskVT, Op.getOperand(1),
- Op.getOperand(2));
+ Op.getOperand(2));
}
SDValue CmpMask = getVectorMaskingNode(Cmp, Mask,
DAG.getTargetConstant(0, dl,
/// integer signed and unsigned data types.
CMPM,
CMPMU,
+ // Vector comparison with rounding mode for FP values
+ CMPM_RND,
// Arithmetic operations with FLAGS results.
ADD, SUB, ADC, SBB, SMUL,
Pattern, itin>;
def NAME#k: AVX512<O, F, Outs, MaskingIns,
- OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}"#Round#"|"#
- "$dst {${mask}}"#Round#", "#IntelSrcAsm#"}",
+ OpcodeStr#"\t{"#Round#AttSrcAsm#", $dst {${mask}}|"#
+ "$dst {${mask}}, "#IntelSrcAsm#Round#"}",
MaskingPattern, itin>, EVEX_K;
}
(and _.KRCWM:$mask, RHS),
Round, itin>;
+multiclass AVX512_maskable_cmp_alt<bits<8> O, Format F, X86VectorVTInfo _,
+ dag Outs, dag Ins, string OpcodeStr,
+ string AttSrcAsm, string IntelSrcAsm> :
+ AVX512_maskable_custom_cmp<O, F, Outs,
+ Ins, !con((ins _.KRCWM:$mask),Ins), OpcodeStr,
+ AttSrcAsm, IntelSrcAsm,
+ [],[],"", NoItinerary>;
+
// Bitcasts between 512-bit vector types. Return the original type since
// no instruction is needed for the conversion
let Predicates = [HasAVX512] in {
defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86cmpmu, avx512vl_i64_info,
HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
-// avx512_cmp_packed - compare packed instructions
-multiclass avx512_cmp_packed<RegisterClass KRC, RegisterClass RC,
- X86MemOperand x86memop, ValueType vt,
- string suffix, Domain d> {
- def rri : AVX512PIi8<0xC2, MRMSrcReg,
- (outs KRC:$dst), (ins RC:$src1, RC:$src2, AVXCC:$cc),
- !strconcat("vcmp${cc}", suffix,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set KRC:$dst, (X86cmpm (vt RC:$src1), (vt RC:$src2), imm:$cc))], d>;
- let hasSideEffects = 0 in
- def rrib: AVX512PIi8<0xC2, MRMSrcReg,
- (outs KRC:$dst), (ins RC:$src1, RC:$src2, AVXCC:$cc),
- !strconcat("vcmp${cc}", suffix,
- "\t{{sae}, $src2, $src1, $dst|$dst, $src1, $src2, {sae}}"),
- [], d>, EVEX_B;
- def rmi : AVX512PIi8<0xC2, MRMSrcMem,
- (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, AVXCC:$cc),
- !strconcat("vcmp${cc}", suffix,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
- [(set KRC:$dst,
- (X86cmpm (vt RC:$src1), (load addr:$src2), imm:$cc))], d>;
+multiclass avx512_vcmp_common<X86VectorVTInfo _> {
+
+ defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
+ (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,AVXCC:$cc),
+ "vcmp${cc}"#_.Suffix,
+ "$src2, $src1", "$src1, $src2",
+ (X86cmpm (_.VT _.RC:$src1),
+ (_.VT _.RC:$src2),
+ imm:$cc)>;
+ let mayLoad = 1 in {
+ defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
+ (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, AVXCC:$cc),
+ "vcmp${cc}"#_.Suffix,
+ "$src2, $src1", "$src1, $src2",
+ (X86cmpm (_.VT _.RC:$src1),
+ (_.VT (bitconvert (_.LdFrag addr:$src2))),
+ imm:$cc)>;
+
+ defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
+ (outs _.KRC:$dst),
+ (ins _.RC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc),
+ "vcmp${cc}"#_.Suffix,
+ "${src2}"##_.BroadcastStr##", $src1",
+ "$src1, ${src2}"##_.BroadcastStr,
+ (X86cmpm (_.VT _.RC:$src1),
+ (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
+ imm:$cc)>,EVEX_B;
+ }
// Accept explicit immediate argument form instead of comparison code.
let isAsmParserOnly = 1, hasSideEffects = 0 in {
- def rri_alt : AVX512PIi8<0xC2, MRMSrcReg,
- (outs KRC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc),
- !strconcat("vcmp", suffix,
- "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), [], d>;
- def rrib_alt: AVX512PIi8<0xC2, MRMSrcReg,
- (outs KRC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc),
- !strconcat("vcmp", suffix,
- "\t{{sae}, $cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc, {sae}}"),
- [], d>, EVEX_B;
- let mayLoad = 1 in
- def rmi_alt : AVX512PIi8<0xC2, MRMSrcMem,
- (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc),
- !strconcat("vcmp", suffix,
- "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), [], d>;
+ defm rri_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
+ (outs _.KRC:$dst),
+ (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
+ "vcmp"#_.Suffix,
+ "$cc, $src2, $src1", "$src1, $src2, $cc">;
+
+ let mayLoad = 1 in {
+ defm rmi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
+ (outs _.KRC:$dst),
+ (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
+ "vcmp"#_.Suffix,
+ "$cc, $src2, $src1", "$src1, $src2, $cc">;
+
+ defm rmbi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
+ (outs _.KRC:$dst),
+ (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
+ "vcmp"#_.Suffix,
+ "$cc, ${src2}"##_.BroadcastStr##", $src1",
+ "$src1, ${src2}"##_.BroadcastStr##", $cc">,EVEX_B;
+ }
+ }
+}
+
+multiclass avx512_vcmp_sae<X86VectorVTInfo _> {
+ // comparison code form (VCMP[EQ/LT/LE/...]
+ defm rrib : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
+ (outs _.KRC:$dst),(ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
+ "vcmp${cc}"#_.Suffix,
+ "{sae}, $src2, $src1", "$src1, $src2,{sae}",
+ (X86cmpmRnd (_.VT _.RC:$src1),
+ (_.VT _.RC:$src2),
+ imm:$cc,
+ (i32 FROUND_NO_EXC))>, EVEX_B;
+
+ let isAsmParserOnly = 1, hasSideEffects = 0 in {
+ defm rrib_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
+ (outs _.KRC:$dst),
+ (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
+ "vcmp"#_.Suffix,
+ "$cc,{sae}, $src2, $src1",
+ "$src1, $src2,{sae}, $cc">, EVEX_B;
+ }
+}
+
+multiclass avx512_vcmp<AVX512VLVectorVTInfo _> {
+ let Predicates = [HasAVX512] in {
+ defm Z : avx512_vcmp_common<_.info512>,
+ avx512_vcmp_sae<_.info512>, EVEX_V512;
+
+ }
+ let Predicates = [HasAVX512,HasVLX] in {
+ defm Z128 : avx512_vcmp_common<_.info128>, EVEX_V128;
+ defm Z256 : avx512_vcmp_common<_.info256>, EVEX_V256;
}
}
-defm VCMPPSZ : avx512_cmp_packed<VK16, VR512, f512mem, v16f32,
- "ps", SSEPackedSingle>, PS, EVEX_4V, EVEX_V512,
- EVEX_CD8<32, CD8VF>;
-defm VCMPPDZ : avx512_cmp_packed<VK8, VR512, f512mem, v8f64,
- "pd", SSEPackedDouble>, PD, EVEX_4V, VEX_W, EVEX_V512,
- EVEX_CD8<64, CD8VF>;
+defm VCMPPD : avx512_vcmp<avx512vl_f64_info>,
+ AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
+defm VCMPPS : avx512_vcmp<avx512vl_f32_info>,
+ AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
def : Pat<(v8i1 (X86cmpm (v8f32 VR256X:$src1), (v8f32 VR256X:$src2), imm:$cc)),
(COPY_TO_REGCLASS (VCMPPSZrri
(v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
imm:$cc), VK8)>;
-def : Pat<(i16 (int_x86_avx512_mask_cmp_ps_512 (v16f32 VR512:$src1),
- (v16f32 VR512:$src2), i8immZExt5:$cc, (i16 -1),
- FROUND_NO_EXC)),
- (COPY_TO_REGCLASS (VCMPPSZrrib VR512:$src1, VR512:$src2,
- (I8Imm imm:$cc)), GR16)>;
-
-def : Pat<(i8 (int_x86_avx512_mask_cmp_pd_512 (v8f64 VR512:$src1),
- (v8f64 VR512:$src2), i8immZExt5:$cc, (i8 -1),
- FROUND_NO_EXC)),
- (COPY_TO_REGCLASS (VCMPPDZrrib VR512:$src1, VR512:$src2,
- (I8Imm imm:$cc)), GR8)>;
-
-def : Pat<(i16 (int_x86_avx512_mask_cmp_ps_512 (v16f32 VR512:$src1),
- (v16f32 VR512:$src2), i8immZExt5:$cc, (i16 -1),
- FROUND_CURRENT)),
- (COPY_TO_REGCLASS (VCMPPSZrri VR512:$src1, VR512:$src2,
- (I8Imm imm:$cc)), GR16)>;
-
-def : Pat<(i8 (int_x86_avx512_mask_cmp_pd_512 (v8f64 VR512:$src1),
- (v8f64 VR512:$src2), i8immZExt5:$cc, (i8 -1),
- FROUND_CURRENT)),
- (COPY_TO_REGCLASS (VCMPPDZrri VR512:$src1, VR512:$src2,
- (I8Imm imm:$cc)), GR8)>;
-
+//-----------------------------------------------------------------
// Mask register copy, including
// - copy between mask registers
// - load/store mask registers
Domain ExeDomain = SSEPackedInt;
ImmType ImmT = Imm8;
}
+class AVX512PSIi8Base : PS {
+ Domain ExeDomain = SSEPackedSingle;
+ ImmType ImmT = Imm8;
+}
+class AVX512PDIi8Base : PD {
+ Domain ExeDomain = SSEPackedDouble;
+ ImmType ImmT = Imm8;
+}
class AVX512AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary>
: Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TAPD,
def X86pcmpgtm : SDNode<"X86ISD::PCMPGTM", X86IntCmpMask>;
def X86CmpMaskCC :
- SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisInt<0>, SDTCisVec<1>,
- SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>;
+ SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCVecEltisVT<0, i1>,
+ SDTCisVec<1>, SDTCisSameAs<2, 1>,
+ SDTCisSameNumEltsAs<0, 1>, SDTCisVT<3, i8>]>;
+def X86CmpMaskCCRound :
+ SDTypeProfile<1, 4, [SDTCisVec<0>,SDTCVecEltisVT<0, i1>,
+ SDTCisVec<1>, SDTCisSameAs<2, 1>,
+ SDTCisSameNumEltsAs<0, 1>, SDTCisVT<3, i8>,
+ SDTCisInt<4>]>;
def X86CmpMaskCCScalar :
SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>;
-def X86cmpm : SDNode<"X86ISD::CMPM", X86CmpMaskCC>;
-def X86cmpmu : SDNode<"X86ISD::CMPMU", X86CmpMaskCC>;
-def X86cmpms : SDNode<"X86ISD::FSETCC", X86CmpMaskCCScalar>;
+def X86cmpm : SDNode<"X86ISD::CMPM", X86CmpMaskCC>;
+def X86cmpmRnd : SDNode<"X86ISD::CMPM_RND", X86CmpMaskCCRound>;
+def X86cmpmu : SDNode<"X86ISD::CMPMU", X86CmpMaskCC>;
+def X86cmpms : SDNode<"X86ISD::FSETCC", X86CmpMaskCCScalar>;
def X86vshl : SDNode<"X86ISD::VSHL",
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
X86_INTRINSIC_DATA(avx512_mask_blend_w_128, BLEND, X86ISD::SELECT, 0),
X86_INTRINSIC_DATA(avx512_mask_blend_w_256, BLEND, X86ISD::SELECT, 0),
X86_INTRINSIC_DATA(avx512_mask_blend_w_512, BLEND, X86ISD::SELECT, 0),
- X86_INTRINSIC_DATA(avx512_mask_cmp_b_128, CMP_MASK_CC, X86ISD::CMPM, 0),
- X86_INTRINSIC_DATA(avx512_mask_cmp_b_256, CMP_MASK_CC, X86ISD::CMPM, 0),
- X86_INTRINSIC_DATA(avx512_mask_cmp_b_512, CMP_MASK_CC, X86ISD::CMPM, 0),
- X86_INTRINSIC_DATA(avx512_mask_cmp_d_128, CMP_MASK_CC, X86ISD::CMPM, 0),
- X86_INTRINSIC_DATA(avx512_mask_cmp_d_256, CMP_MASK_CC, X86ISD::CMPM, 0),
- X86_INTRINSIC_DATA(avx512_mask_cmp_d_512, CMP_MASK_CC, X86ISD::CMPM, 0),
- X86_INTRINSIC_DATA(avx512_mask_cmp_q_128, CMP_MASK_CC, X86ISD::CMPM, 0),
- X86_INTRINSIC_DATA(avx512_mask_cmp_q_256, CMP_MASK_CC, X86ISD::CMPM, 0),
- X86_INTRINSIC_DATA(avx512_mask_cmp_q_512, CMP_MASK_CC, X86ISD::CMPM, 0),
- X86_INTRINSIC_DATA(avx512_mask_cmp_w_128, CMP_MASK_CC, X86ISD::CMPM, 0),
- X86_INTRINSIC_DATA(avx512_mask_cmp_w_256, CMP_MASK_CC, X86ISD::CMPM, 0),
- X86_INTRINSIC_DATA(avx512_mask_cmp_w_512, CMP_MASK_CC, X86ISD::CMPM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cmp_b_128, CMP_MASK_CC, X86ISD::CMPM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cmp_b_256, CMP_MASK_CC, X86ISD::CMPM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cmp_b_512, CMP_MASK_CC, X86ISD::CMPM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cmp_d_128, CMP_MASK_CC, X86ISD::CMPM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cmp_d_256, CMP_MASK_CC, X86ISD::CMPM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cmp_d_512, CMP_MASK_CC, X86ISD::CMPM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cmp_pd_128, CMP_MASK_CC, X86ISD::CMPM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cmp_pd_256, CMP_MASK_CC, X86ISD::CMPM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cmp_pd_512, CMP_MASK_CC, X86ISD::CMPM,
+ X86ISD::CMPM_RND),
+ X86_INTRINSIC_DATA(avx512_mask_cmp_ps_128, CMP_MASK_CC, X86ISD::CMPM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cmp_ps_256, CMP_MASK_CC, X86ISD::CMPM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cmp_ps_512, CMP_MASK_CC, X86ISD::CMPM,
+ X86ISD::CMPM_RND),
+ X86_INTRINSIC_DATA(avx512_mask_cmp_q_128, CMP_MASK_CC, X86ISD::CMPM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cmp_q_256, CMP_MASK_CC, X86ISD::CMPM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cmp_q_512, CMP_MASK_CC, X86ISD::CMPM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cmp_w_128, CMP_MASK_CC, X86ISD::CMPM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cmp_w_256, CMP_MASK_CC, X86ISD::CMPM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cmp_w_512, CMP_MASK_CC, X86ISD::CMPM, 0),
X86_INTRINSIC_DATA(avx512_mask_compress_d_128, COMPRESS_EXPAND_IN_REG,
X86ISD::COMPRESS, 0),
X86_INTRINSIC_DATA(avx512_mask_compress_d_256, COMPRESS_EXPAND_IN_REG,
define i16 @test_cmpps(<16 x float> %a, <16 x float> %b) {
;CHECK: vcmpleps {sae}{{.*}}encoding: [0x62,0xf1,0x7c,0x18,0xc2,0xc1,0x02]
- %res = call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %b, i8 2, i16 -1, i32 8)
+ %res = call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, i16 -1, i32 8)
ret i16 %res
}
- declare i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> , <16 x float> , i8, i16, i32)
+ declare i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> , <16 x float> , i32, i16, i32)
define i8 @test_cmppd(<8 x double> %a, <8 x double> %b) {
;CHECK: vcmpneqpd %zmm{{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xc1,0x04]
- %res = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a, <8 x double> %b, i8 4, i8 -1, i32 4)
+ %res = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 4, i8 -1, i32 4)
ret i8 %res
}
- declare i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> , <8 x double> , i8, i8, i32)
+ declare i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> , <8 x double> , i32, i8, i32)
; cvt intrinsics
define <16 x float> @test_cvtdq2ps(<16 x i32> %a) {
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=KNL
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=SKX
define <16 x float> @test1(<16 x float> %x, <16 x float> %y) nounwind {
-; CHECK-LABEL: test1:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vcmpleps %zmm1, %zmm0, %k1
-; CHECK-NEXT: vmovaps %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
-; CHECK-NEXT: retq
+; KNL-LABEL: test1:
+; KNL: ## BB#0:
+; KNL-NEXT: vcmpleps %zmm1, %zmm0, %k1
+; KNL-NEXT: vmovaps %zmm0, %zmm1 {%k1}
+; KNL-NEXT: vmovaps %zmm1, %zmm0
+; KNL-NEXT: retq
%mask = fcmp ole <16 x float> %x, %y
%max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %y
ret <16 x float> %max
}
define <8 x double> @test2(<8 x double> %x, <8 x double> %y) nounwind {
-; CHECK-LABEL: test2:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vcmplepd %zmm1, %zmm0, %k1
-; CHECK-NEXT: vmovapd %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
-; CHECK-NEXT: retq
+; KNL-LABEL: test2:
+; KNL: ## BB#0:
+; KNL-NEXT: vcmplepd %zmm1, %zmm0, %k1
+; KNL-NEXT: vmovapd %zmm0, %zmm1 {%k1}
+; KNL-NEXT: vmovaps %zmm1, %zmm0
+; KNL-NEXT: retq
%mask = fcmp ole <8 x double> %x, %y
%max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %y
ret <8 x double> %max
}
define <16 x i32> @test3(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %yp) nounwind {
-; CHECK-LABEL: test3:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpcmpeqd (%rdi), %zmm0, %k1
-; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
-; CHECK-NEXT: retq
+; KNL-LABEL: test3:
+; KNL: ## BB#0:
+; KNL-NEXT: vpcmpeqd (%rdi), %zmm0, %k1
+; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
+; KNL-NEXT: vmovaps %zmm1, %zmm0
+; KNL-NEXT: retq
%y = load <16 x i32>, <16 x i32>* %yp, align 4
%mask = icmp eq <16 x i32> %x, %y
%max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
}
define <16 x i32> @test4_unsigned(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1) nounwind {
-; CHECK-LABEL: test4_unsigned:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpcmpnltud %zmm1, %zmm0, %k1
-; CHECK-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
-; CHECK-NEXT: retq
+; KNL-LABEL: test4_unsigned:
+; KNL: ## BB#0:
+; KNL-NEXT: vpcmpnltud %zmm1, %zmm0, %k1
+; KNL-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1}
+; KNL-NEXT: vmovaps %zmm1, %zmm0
+; KNL-NEXT: retq
%mask = icmp uge <16 x i32> %x, %y
%max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y
ret <16 x i32> %max
}
define <8 x i64> @test5(<8 x i64> %x, <8 x i64> %y) nounwind {
-; CHECK-LABEL: test5:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k1
-; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
-; CHECK-NEXT: retq
+; KNL-LABEL: test5:
+; KNL: ## BB#0:
+; KNL-NEXT: vpcmpeqq %zmm1, %zmm0, %k1
+; KNL-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
+; KNL-NEXT: vmovaps %zmm1, %zmm0
+; KNL-NEXT: retq
%mask = icmp eq <8 x i64> %x, %y
%max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %y
ret <8 x i64> %max
}
define <8 x i64> @test6_unsigned(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1) nounwind {
-; CHECK-LABEL: test6_unsigned:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1
-; CHECK-NEXT: vmovdqa64 %zmm2, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
-; CHECK-NEXT: retq
+; KNL-LABEL: test6_unsigned:
+; KNL: ## BB#0:
+; KNL-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1
+; KNL-NEXT: vmovdqa64 %zmm2, %zmm1 {%k1}
+; KNL-NEXT: vmovaps %zmm1, %zmm0
+; KNL-NEXT: retq
%mask = icmp ugt <8 x i64> %x, %y
%max = select <8 x i1> %mask, <8 x i64> %x1, <8 x i64> %y
ret <8 x i64> %max
}
define <4 x float> @test7(<4 x float> %a, <4 x float> %b) {
-; CHECK-LABEL: test7:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
-; CHECK-NEXT: vcmpltps %xmm2, %xmm0, %xmm2
-; CHECK-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
-; CHECK-NEXT: retq
+; KNL-LABEL: test7:
+; KNL: ## BB#0:
+; KNL-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; KNL-NEXT: vcmpltps %xmm2, %xmm0, %xmm2
+; KNL-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
+; KNL-NEXT: retq
+; SKX-LABEL: test7:
+; SKX: ## BB#0:
+; SKX: vxorps %xmm2, %xmm2, %xmm2
+; SKX: vcmpltps %xmm2, %xmm0, %k1
+; SKX: vmovaps %xmm0, %xmm1 {%k1}
+; SKX: vmovaps %zmm1, %zmm0
+; SKX: retq
+
%mask = fcmp olt <4 x float> %a, zeroinitializer
%c = select <4 x i1>%mask, <4 x float>%a, <4 x float>%b
ret <4 x float>%c
}
define <2 x double> @test8(<2 x double> %a, <2 x double> %b) {
-; CHECK-LABEL: test8:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
-; CHECK-NEXT: vcmpltpd %xmm2, %xmm0, %xmm2
-; CHECK-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
-; CHECK-NEXT: retq
+; KNL-LABEL: test8:
+; KNL: ## BB#0:
+; KNL-NEXT: vxorpd %xmm2, %xmm2, %xmm2
+; KNL-NEXT: vcmpltpd %xmm2, %xmm0, %xmm2
+; KNL-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; KNL-NEXT: retq
+; SKX-LABEL: test8:
+; SKX: ## BB#0:
+; SKX: vxorpd %xmm2, %xmm2, %xmm2
+; SKX: vcmpltpd %xmm2, %xmm0, %k1
+; SKX: vmovapd %xmm0, %xmm1 {%k1}
+; SKX: vmovaps %zmm1, %zmm0
+; SKX: retq
%mask = fcmp olt <2 x double> %a, zeroinitializer
%c = select <2 x i1>%mask, <2 x double>%a, <2 x double>%b
ret <2 x double>%c
}
define <8 x i32> @test9(<8 x i32> %x, <8 x i32> %y) nounwind {
-; CHECK-LABEL: test9:
-; CHECK: ## BB#0:
-; CHECK-NEXT: ## kill: YMM1<def> YMM1<kill> ZMM1<def>
-; CHECK-NEXT: ## kill: YMM0<def> YMM0<kill> ZMM0<def>
-; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k1
-; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
-; CHECK-NEXT: ## kill: YMM0<def> YMM0<kill> ZMM0<kill>
-; CHECK-NEXT: retq
+; KNL-LABEL: test9:
+; KNL: ## BB#0:
+; KNL-NEXT: ## kill: YMM1<def> YMM1<kill> ZMM1<def>
+; KNL-NEXT: ## kill: YMM0<def> YMM0<kill> ZMM0<def>
+; KNL-NEXT: vpcmpeqd %zmm1, %zmm0, %k1
+; KNL-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
+; KNL-NEXT: ## kill: YMM0<def> YMM0<kill> ZMM0<kill>
+; KNL-NEXT: retq
%mask = icmp eq <8 x i32> %x, %y
%max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y
ret <8 x i32> %max
}
define <8 x float> @test10(<8 x float> %x, <8 x float> %y) nounwind {
-; CHECK-LABEL: test10:
-; CHECK: ## BB#0:
-; CHECK-NEXT: ## kill: YMM1<def> YMM1<kill> ZMM1<def>
-; CHECK-NEXT: ## kill: YMM0<def> YMM0<kill> ZMM0<def>
-; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
-; CHECK-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1}
-; CHECK-NEXT: ## kill: YMM0<def> YMM0<kill> ZMM0<kill>
-; CHECK-NEXT: retq
+; KNL-LABEL: test10:
+; KNL: ## BB#0:
+; KNL-NEXT: ## kill: YMM1<def> YMM1<kill> ZMM1<def>
+; KNL-NEXT: ## kill: YMM0<def> YMM0<kill> ZMM0<def>
+; KNL-NEXT: vcmpeqps %zmm1, %zmm0, %k1
+; KNL-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1}
+; KNL-NEXT: ## kill: YMM0<def> YMM0<kill> ZMM0<kill>
+; KNL-NEXT: retq
+; SKX-LABEL: test10:
+; SKX: ## BB#0:
+; SKX: vcmpeqps %ymm1, %ymm0, %k1
+; SKX: vmovaps %ymm0, %ymm1 {%k1}
+; SKX: vmovaps %zmm1, %zmm0
+; SKX: retq
+
%mask = fcmp oeq <8 x float> %x, %y
%max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %y
ret <8 x float> %max
}
define <8 x i32> @test11_unsigned(<8 x i32> %x, <8 x i32> %y) nounwind {
-; CHECK-LABEL: test11_unsigned:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
-; CHECK-NEXT: retq
+; KNL-LABEL: test11_unsigned:
+; KNL: ## BB#0:
+; KNL-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
+; KNL-NEXT: retq
%mask = icmp ugt <8 x i32> %x, %y
%max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y
ret <8 x i32> %max
define i16 @test12(<16 x i64> %a, <16 x i64> %b) nounwind {
-; CHECK-LABEL: test12:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpcmpeqq %zmm2, %zmm0, %k0
-; CHECK-NEXT: vpcmpeqq %zmm3, %zmm1, %k1
-; CHECK-NEXT: kunpckbw %k0, %k1, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: ## kill: AX<def> AX<kill> EAX<kill>
-; CHECK-NEXT: retq
+; KNL-LABEL: test12:
+; KNL: ## BB#0:
+; KNL-NEXT: vpcmpeqq %zmm2, %zmm0, %k0
+; KNL-NEXT: vpcmpeqq %zmm3, %zmm1, %k1
+; KNL-NEXT: kunpckbw %k0, %k1, %k0
+; KNL-NEXT: kmovw %k0, %eax
+; KNL-NEXT: ## kill: AX<def> AX<kill> EAX<kill>
+; KNL-NEXT: retq
%res = icmp eq <16 x i64> %a, %b
%res1 = bitcast <16 x i1> %res to i16
ret i16 %res1
}
define <16 x i32> @test13(<16 x float>%a, <16 x float>%b)
-; CHECK-LABEL: test13:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
-; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
-; CHECK-NEXT: retq
+; KNL-LABEL: test13:
+; KNL: ## BB#0:
+; KNL-NEXT: vcmpeqps %zmm1, %zmm0, %k1
+; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT: retq
{
%cmpvector_i = fcmp oeq <16 x float> %a, %b
%conv = zext <16 x i1> %cmpvector_i to <16 x i32>
}
define <16 x i32> @test14(<16 x i32>%a, <16 x i32>%b) {
-; CHECK-LABEL: test14:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm1
-; CHECK-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
-; CHECK-NEXT: knotw %k0, %k0
-; CHECK-NEXT: knotw %k0, %k1
-; CHECK-NEXT: vmovdqu32 %zmm1, %zmm0 {%k1} {z}
-; CHECK-NEXT: retq
+; KNL-LABEL: test14:
+; KNL: ## BB#0:
+; KNL-NEXT: vpsubd %zmm1, %zmm0, %zmm1
+; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
+; KNL-NEXT: knotw %k0, %k0
+; KNL-NEXT: knotw %k0, %k1
+; KNL-NEXT: vmovdqu32 %zmm1, %zmm0 {%k1} {z}
+; KNL-NEXT: retq
%sub_r = sub <16 x i32> %a, %b
%cmp.i2.i = icmp sgt <16 x i32> %sub_r, %a
%sext.i3.i = sext <16 x i1> %cmp.i2.i to <16 x i32>
}
define <8 x i64> @test15(<8 x i64>%a, <8 x i64>%b) {
-; CHECK-LABEL: test15:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm1
-; CHECK-NEXT: vpcmpgtq %zmm0, %zmm1, %k0
-; CHECK-NEXT: knotw %k0, %k0
-; CHECK-NEXT: knotw %k0, %k1
-; CHECK-NEXT: vmovdqu64 %zmm1, %zmm0 {%k1} {z}
-; CHECK-NEXT: retq
+; KNL-LABEL: test15:
+; KNL: ## BB#0:
+; KNL-NEXT: vpsubq %zmm1, %zmm0, %zmm1
+; KNL-NEXT: vpcmpgtq %zmm0, %zmm1, %k0
+; KNL-NEXT: knotw %k0, %k0
+; KNL-NEXT: knotw %k0, %k1
+; KNL-NEXT: vmovdqu64 %zmm1, %zmm0 {%k1} {z}
+; KNL-NEXT: retq
%sub_r = sub <8 x i64> %a, %b
%cmp.i2.i = icmp sgt <8 x i64> %sub_r, %a
%sext.i3.i = sext <8 x i1> %cmp.i2.i to <8 x i64>
}
define <16 x i32> @test16(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1) nounwind {
-; CHECK-LABEL: test16:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpcmpled %zmm0, %zmm1, %k1
-; CHECK-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
-; CHECK-NEXT: retq
+; KNL-LABEL: test16:
+; KNL: ## BB#0:
+; KNL-NEXT: vpcmpled %zmm0, %zmm1, %k1
+; KNL-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1}
+; KNL-NEXT: vmovaps %zmm1, %zmm0
+; KNL-NEXT: retq
%mask = icmp sge <16 x i32> %x, %y
%max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y
ret <16 x i32> %max
}
define <16 x i32> @test17(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nounwind {
-; CHECK-LABEL: test17:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpcmpgtd (%rdi), %zmm0, %k1
-; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
-; CHECK-NEXT: retq
+; KNL-LABEL: test17:
+; KNL: ## BB#0:
+; KNL-NEXT: vpcmpgtd (%rdi), %zmm0, %k1
+; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
+; KNL-NEXT: vmovaps %zmm1, %zmm0
+; KNL-NEXT: retq
%y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
%mask = icmp sgt <16 x i32> %x, %y
%max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
}
define <16 x i32> @test18(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nounwind {
-; CHECK-LABEL: test18:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpcmpled (%rdi), %zmm0, %k1
-; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
-; CHECK-NEXT: retq
+; KNL-LABEL: test18:
+; KNL: ## BB#0:
+; KNL-NEXT: vpcmpled (%rdi), %zmm0, %k1
+; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
+; KNL-NEXT: vmovaps %zmm1, %zmm0
+; KNL-NEXT: retq
%y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
%mask = icmp sle <16 x i32> %x, %y
%max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
}
define <16 x i32> @test19(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nounwind {
-; CHECK-LABEL: test19:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpcmpleud (%rdi), %zmm0, %k1
-; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
-; CHECK-NEXT: retq
+; KNL-LABEL: test19:
+; KNL: ## BB#0:
+; KNL-NEXT: vpcmpleud (%rdi), %zmm0, %k1
+; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
+; KNL-NEXT: vmovaps %zmm1, %zmm0
+; KNL-NEXT: retq
%y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
%mask = icmp ule <16 x i32> %x, %y
%max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
}
define <16 x i32> @test20(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i32> %y1) nounwind {
-; CHECK-LABEL: test20:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k1
-; CHECK-NEXT: vpcmpeqd %zmm3, %zmm2, %k1 {%k1}
-; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
-; CHECK-NEXT: retq
+; KNL-LABEL: test20:
+; KNL: ## BB#0:
+; KNL-NEXT: vpcmpeqd %zmm1, %zmm0, %k1
+; KNL-NEXT: vpcmpeqd %zmm3, %zmm2, %k1 {%k1}
+; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
+; KNL-NEXT: vmovaps %zmm1, %zmm0
+; KNL-NEXT: retq
%mask1 = icmp eq <16 x i32> %x1, %y1
%mask0 = icmp eq <16 x i32> %x, %y
%mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
}
define <8 x i64> @test21(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y1) nounwind {
-; CHECK-LABEL: test21:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpcmpleq %zmm1, %zmm0, %k1
-; CHECK-NEXT: vpcmpleq %zmm2, %zmm3, %k1 {%k1}
-; CHECK-NEXT: vmovdqa64 %zmm0, %zmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
-; CHECK-NEXT: retq
+; KNL-LABEL: test21:
+; KNL: ## BB#0:
+; KNL-NEXT: vpcmpleq %zmm1, %zmm0, %k1
+; KNL-NEXT: vpcmpleq %zmm2, %zmm3, %k1 {%k1}
+; KNL-NEXT: vmovdqa64 %zmm0, %zmm2 {%k1}
+; KNL-NEXT: vmovaps %zmm2, %zmm0
+; KNL-NEXT: retq
%mask1 = icmp sge <8 x i64> %x1, %y1
%mask0 = icmp sle <8 x i64> %x, %y
%mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
}
define <8 x i64> @test22(<8 x i64> %x, <8 x i64>* %y.ptr, <8 x i64> %x1, <8 x i64> %y1) nounwind {
-; CHECK-LABEL: test22:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpcmpgtq %zmm2, %zmm1, %k1
-; CHECK-NEXT: vpcmpgtq (%rdi), %zmm0, %k1 {%k1}
-; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
-; CHECK-NEXT: retq
+; KNL-LABEL: test22:
+; KNL: ## BB#0:
+; KNL-NEXT: vpcmpgtq %zmm2, %zmm1, %k1
+; KNL-NEXT: vpcmpgtq (%rdi), %zmm0, %k1 {%k1}
+; KNL-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
+; KNL-NEXT: vmovaps %zmm1, %zmm0
+; KNL-NEXT: retq
%mask1 = icmp sgt <8 x i64> %x1, %y1
%y = load <8 x i64>, <8 x i64>* %y.ptr, align 4
%mask0 = icmp sgt <8 x i64> %x, %y
}
define <16 x i32> @test23(<16 x i32> %x, <16 x i32>* %y.ptr, <16 x i32> %x1, <16 x i32> %y1) nounwind {
-; CHECK-LABEL: test23:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpcmpled %zmm1, %zmm2, %k1
-; CHECK-NEXT: vpcmpleud (%rdi), %zmm0, %k1 {%k1}
-; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
-; CHECK-NEXT: retq
+; KNL-LABEL: test23:
+; KNL: ## BB#0:
+; KNL-NEXT: vpcmpled %zmm1, %zmm2, %k1
+; KNL-NEXT: vpcmpleud (%rdi), %zmm0, %k1 {%k1}
+; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
+; KNL-NEXT: vmovaps %zmm1, %zmm0
+; KNL-NEXT: retq
%mask1 = icmp sge <16 x i32> %x1, %y1
%y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
%mask0 = icmp ule <16 x i32> %x, %y
}
define <8 x i64> @test24(<8 x i64> %x, <8 x i64> %x1, i64* %yb.ptr) nounwind {
-; CHECK-LABEL: test24:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k1
-; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
-; CHECK-NEXT: retq
+; KNL-LABEL: test24:
+; KNL: ## BB#0:
+; KNL-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k1
+; KNL-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
+; KNL-NEXT: vmovaps %zmm1, %zmm0
+; KNL-NEXT: retq
%yb = load i64, i64* %yb.ptr, align 4
%y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0
%y = shufflevector <8 x i64> %y.0, <8 x i64> undef, <8 x i32> zeroinitializer
}
define <16 x i32> @test25(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1) nounwind {
-; CHECK-LABEL: test25:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpcmpled (%rdi){1to16}, %zmm0, %k1
-; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
-; CHECK-NEXT: retq
+; KNL-LABEL: test25:
+; KNL: ## BB#0:
+; KNL-NEXT: vpcmpled (%rdi){1to16}, %zmm0, %k1
+; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
+; KNL-NEXT: vmovaps %zmm1, %zmm0
+; KNL-NEXT: retq
%yb = load i32, i32* %yb.ptr, align 4
%y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0
%y = shufflevector <16 x i32> %y.0, <16 x i32> undef, <16 x i32> zeroinitializer
}
define <16 x i32> @test26(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1, <16 x i32> %y1) nounwind {
-; CHECK-LABEL: test26:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpcmpled %zmm1, %zmm2, %k1
-; CHECK-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k1 {%k1}
-; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
-; CHECK-NEXT: retq
+; KNL-LABEL: test26:
+; KNL: ## BB#0:
+; KNL-NEXT: vpcmpled %zmm1, %zmm2, %k1
+; KNL-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k1 {%k1}
+; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
+; KNL-NEXT: vmovaps %zmm1, %zmm0
+; KNL-NEXT: retq
%mask1 = icmp sge <16 x i32> %x1, %y1
%yb = load i32, i32* %yb.ptr, align 4
%y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0
}
define <8 x i64> @test27(<8 x i64> %x, i64* %yb.ptr, <8 x i64> %x1, <8 x i64> %y1) nounwind {
-; CHECK-LABEL: test27:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpcmpleq %zmm1, %zmm2, %k1
-; CHECK-NEXT: vpcmpleq (%rdi){1to8}, %zmm0, %k1 {%k1}
-; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
-; CHECK-NEXT: retq
+; KNL-LABEL: test27:
+; KNL: ## BB#0:
+; KNL-NEXT: vpcmpleq %zmm1, %zmm2, %k1
+; KNL-NEXT: vpcmpleq (%rdi){1to8}, %zmm0, %k1 {%k1}
+; KNL-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
+; KNL-NEXT: vmovaps %zmm1, %zmm0
+; KNL-NEXT: retq
%mask1 = icmp sge <8 x i64> %x1, %y1
%yb = load i64, i64* %yb.ptr, align 4
%y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0
ret <8 x i64> %max
}
-; CHECK-LABEL: test28
-; CHECK: vpcmpgtq
-; CHECK: vpcmpgtq
-; CHECK: kxorw
+; KNL-LABEL: test28
+; KNL: vpcmpgtq
+; KNL: vpcmpgtq
+; KNL: kxorw
define <8 x i32>@test28(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y1) {
%x_gt_y = icmp sgt <8 x i64> %x, %y
%x1_gt_y1 = icmp sgt <8 x i64> %x1, %y1
ret <8 x i32> %resse
}
-; CHECK-LABEL: test29
-; CHECK: vpcmpgtd
-; CHECK: vpcmpgtd
-; CHECK: kxnorw
+; KNL-LABEL: test29
+; KNL: vpcmpgtd
+; KNL: vpcmpgtd
+; KNL: kxnorw
define <16 x i8>@test29(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i32> %y1) {
%x_gt_y = icmp sgt <16 x i32> %x, %y
%x1_gt_y1 = icmp sgt <16 x i32> %x1, %y1
%res = icmp ne <16 x i1>%x_gt_y, %x1_gt_y1
%resse = sext <16 x i1>%res to <16 x i8>
ret <16 x i8> %resse
-}
\ No newline at end of file
+}
+
+define <4 x double> @test30(<4 x double> %x, <4 x double> %y) nounwind {
+; SKX-LABEL: test30:
+; SKX: vcmpeqpd %ymm1, %ymm0, %k1
+; SKX: vmovapd %ymm0, %ymm1 {%k1}
+
+ %mask = fcmp oeq <4 x double> %x, %y
+ %max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %y
+ ret <4 x double> %max
+}
+
+define <2 x double> @test31(<2 x double> %x, <2 x double> %x1, <2 x double>* %yp) nounwind {
+; SKX-LABEL: test31:
+; SKX: vcmpltpd (%rdi), %xmm0, %k1
+; SKX: vmovapd %xmm0, %xmm1 {%k1}
+
+ %y = load <2 x double>, <2 x double>* %yp, align 4
+ %mask = fcmp olt <2 x double> %x, %y
+ %max = select <2 x i1> %mask, <2 x double> %x, <2 x double> %x1
+ ret <2 x double> %max
+}
+
+define <4 x double> @test32(<4 x double> %x, <4 x double> %x1, <4 x double>* %yp) nounwind {
+; SKX-LABEL: test32:
+; SKX: vcmpltpd (%rdi), %ymm0, %k1
+; SKX: vmovapd %ymm0, %ymm1 {%k1}
+
+ %y = load <4 x double>, <4 x double>* %yp, align 4
+ %mask = fcmp ogt <4 x double> %y, %x
+ %max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %x1
+ ret <4 x double> %max
+}
+
+define <8 x double> @test33(<8 x double> %x, <8 x double> %x1, <8 x double>* %yp) nounwind {
+; SKX-LABEL: test33:
+; SKX: vcmpltpd (%rdi), %zmm0, %k1
+; SKX: vmovapd %zmm0, %zmm1 {%k1}
+ %y = load <8 x double>, <8 x double>* %yp, align 4
+ %mask = fcmp olt <8 x double> %x, %y
+ %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1
+ ret <8 x double> %max
+}
+
+define <4 x float> @test34(<4 x float> %x, <4 x float> %x1, <4 x float>* %yp) nounwind {
+; SKX-LABEL: test34:
+; SKX: vcmpltps (%rdi), %xmm0, %k1
+; SKX: vmovaps %xmm0, %xmm1 {%k1}
+ %y = load <4 x float>, <4 x float>* %yp, align 4
+ %mask = fcmp olt <4 x float> %x, %y
+ %max = select <4 x i1> %mask, <4 x float> %x, <4 x float> %x1
+ ret <4 x float> %max
+}
+
+define <8 x float> @test35(<8 x float> %x, <8 x float> %x1, <8 x float>* %yp) nounwind {
+; SKX-LABEL: test35:
+; SKX: vcmpltps (%rdi), %ymm0, %k1
+; SKX: vmovaps %ymm0, %ymm1 {%k1}
+
+ %y = load <8 x float>, <8 x float>* %yp, align 4
+ %mask = fcmp ogt <8 x float> %y, %x
+ %max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %x1
+ ret <8 x float> %max
+}
+
+define <16 x float> @test36(<16 x float> %x, <16 x float> %x1, <16 x float>* %yp) nounwind {
+; SKX-LABEL: test36:
+; SKX: vcmpltps (%rdi), %zmm0, %k1
+; SKX: vmovaps %zmm0, %zmm1 {%k1}
+ %y = load <16 x float>, <16 x float>* %yp, align 4
+ %mask = fcmp olt <16 x float> %x, %y
+ %max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %x1
+ ret <16 x float> %max
+}
+
+define <8 x double> @test37(<8 x double> %x, <8 x double> %x1, double* %ptr) nounwind {
+; SKX-LABEL: test37:
+; SKX: vcmpltpd (%rdi){1to8}, %zmm0, %k1
+; SKX: vmovapd %zmm0, %zmm1 {%k1}
+
+ %a = load double, double* %ptr
+ %v = insertelement <8 x double> undef, double %a, i32 0
+ %shuffle = shufflevector <8 x double> %v, <8 x double> undef, <8 x i32> zeroinitializer
+
+ %mask = fcmp ogt <8 x double> %shuffle, %x
+ %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1
+ ret <8 x double> %max
+}
+
+define <4 x double> @test38(<4 x double> %x, <4 x double> %x1, double* %ptr) nounwind {
+; SKX-LABEL: test38:
+; SKX: vcmpltpd (%rdi){1to4}, %ymm0, %k1
+; SKX: vmovapd %ymm0, %ymm1 {%k1}
+
+ %a = load double, double* %ptr
+ %v = insertelement <4 x double> undef, double %a, i32 0
+ %shuffle = shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> zeroinitializer
+
+ %mask = fcmp ogt <4 x double> %shuffle, %x
+ %max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %x1
+ ret <4 x double> %max
+}
+
+define <2 x double> @test39(<2 x double> %x, <2 x double> %x1, double* %ptr) nounwind {
+; SKX-LABEL: test39:
+; SKX: vcmpltpd (%rdi){1to2}, %xmm0, %k1
+; SKX: vmovapd %xmm0, %xmm1 {%k1}
+
+ %a = load double, double* %ptr
+ %v = insertelement <2 x double> undef, double %a, i32 0
+ %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
+
+ %mask = fcmp ogt <2 x double> %shuffle, %x
+ %max = select <2 x i1> %mask, <2 x double> %x, <2 x double> %x1
+ ret <2 x double> %max
+}
+
+
+define <16 x float> @test40(<16 x float> %x, <16 x float> %x1, float* %ptr) nounwind {
+; SKX-LABEL: test40:
+; SKX: vcmpltps (%rdi){1to16}, %zmm0, %k1
+; SKX: vmovaps %zmm0, %zmm1 {%k1}
+
+ %a = load float, float* %ptr
+ %v = insertelement <16 x float> undef, float %a, i32 0
+ %shuffle = shufflevector <16 x float> %v, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+
+ %mask = fcmp ogt <16 x float> %shuffle, %x
+ %max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %x1
+ ret <16 x float> %max
+}
+
+define <8 x float> @test41(<8 x float> %x, <8 x float> %x1, float* %ptr) nounwind {
+; SKX-LABEL: test41:
+; SKX: vcmpltps (%rdi){1to8}, %ymm0, %k1
+; SKX: vmovaps %ymm0, %ymm1 {%k1}
+
+ %a = load float, float* %ptr
+ %v = insertelement <8 x float> undef, float %a, i32 0
+ %shuffle = shufflevector <8 x float> %v, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+
+ %mask = fcmp ogt <8 x float> %shuffle, %x
+ %max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %x1
+ ret <8 x float> %max
+}
+
+define <4 x float> @test42(<4 x float> %x, <4 x float> %x1, float* %ptr) nounwind {
+; SKX-LABEL: test42:
+; SKX: vcmpltps (%rdi){1to4}, %xmm0, %k1
+; SKX: vmovaps %xmm0, %xmm1 {%k1}
+
+ %a = load float, float* %ptr
+ %v = insertelement <4 x float> undef, float %a, i32 0
+ %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+
+ %mask = fcmp ogt <4 x float> %shuffle, %x
+ %max = select <4 x i1> %mask, <4 x float> %x, <4 x float> %x1
+ ret <4 x float> %max
+}
+
+define <8 x double> @test43(<8 x double> %x, <8 x double> %x1, double* %ptr,<8 x i1> %mask_in) nounwind {
+; SKX-LABEL: test43:
+; SKX: vpmovw2m %xmm2, %k1
+; SKX: vcmpltpd (%rdi){1to8}, %zmm0, %k1 {%k1}
+; SKX: vmovapd %zmm0, %zmm1 {%k1}
+
+ %a = load double, double* %ptr
+ %v = insertelement <8 x double> undef, double %a, i32 0
+ %shuffle = shufflevector <8 x double> %v, <8 x double> undef, <8 x i32> zeroinitializer
+
+ %mask_cmp = fcmp ogt <8 x double> %shuffle, %x
+ %mask = and <8 x i1> %mask_cmp, %mask_in
+ %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1
+ ret <8 x double> %max
+}
}
declare <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
+
+define i8 @test_cmpps_256(<8 x float> %a, <8 x float> %b) {
+ ;CHECK: vcmpleps %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc1,0x02]
+ %res = call i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %a, <8 x float> %b, i32 2, i8 -1)
+ ret i8 %res
+ }
+ declare i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> , <8 x float> , i32, i8)
+
+define i8 @test_cmpps_128(<4 x float> %a, <4 x float> %b) {
+ ;CHECK: vcmpleps %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x02]
+ %res = call i8 @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %a, <4 x float> %b, i32 2, i8 -1)
+ ret i8 %res
+ }
+ declare i8 @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> , <4 x float> , i32, i8)
+
+define i8 @test_cmppd_256(<4 x double> %a, <4 x double> %b) {
+ ;CHECK: vcmplepd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0xfd,0x28,0xc2,0xc1,0x02]
+ %res = call i8 @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %a, <4 x double> %b, i32 2, i8 -1)
+ ret i8 %res
+ }
+ declare i8 @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> , <4 x double> , i32, i8)
+
+define i8 @test_cmppd_128(<2 x double> %a, <2 x double> %b) {
+ ;CHECK: vcmplepd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x02]
+ %res = call i8 @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %a, <2 x double> %b, i32 2, i8 -1)
+ ret i8 %res
+ }
+ declare i8 @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> , <2 x double> , i32, i8)
# CHECK: vpcmpd $8, %zmm10, %zmm25, %k5
0x62 0xd3 0x35 0x40 0x1f 0xea 0x8
-# CHECK: vcmppd {sae}, $127, %zmm27, %zmm11, %k4
+# CHECK: vcmppd $127,{sae}, %zmm27, %zmm11, %k4
0x62 0x91 0xa5 0x58 0xc2 0xe3 0x7f
// CHECK: vpermilpd
// CHECK: encoding: [0x62,0xf3,0xfd,0x48,0x05,0x53,0x10,0x23]
vpermilpd $0x23, 0x400(%rbx), %zmm2
+
+// CHECK: vcmppd $171, %zmm26, %zmm12, %k2
+// CHECK: encoding: [0x62,0x91,0x9d,0x48,0xc2,0xd2,0xab]
+ vcmppd $0xab, %zmm26, %zmm12, %k2
+
+// CHECK: vcmppd $171, %zmm26, %zmm12, %k2 {%k3}
+// CHECK: encoding: [0x62,0x91,0x9d,0x4b,0xc2,0xd2,0xab]
+ vcmppd $0xab, %zmm26, %zmm12, %k2 {%k3}
+
+// CHECK: vcmppd $171,{sae}, %zmm26, %zmm12, %k2
+// CHECK: encoding: [0x62,0x91,0x9d,0x18,0xc2,0xd2,0xab]
+ vcmppd $0xab,{sae}, %zmm26, %zmm12, %k2
+
+// CHECK: vcmppd $123, %zmm26, %zmm12, %k2
+// CHECK: encoding: [0x62,0x91,0x9d,0x48,0xc2,0xd2,0x7b]
+ vcmppd $0x7b, %zmm26, %zmm12, %k2
+
+// CHECK: vcmppd $123,{sae}, %zmm26, %zmm12, %k2
+// CHECK: encoding: [0x62,0x91,0x9d,0x18,0xc2,0xd2,0x7b]
+ vcmppd $0x7b,{sae}, %zmm26, %zmm12, %k2
+
+// CHECK: vcmppd $123, (%rcx), %zmm12, %k2
+// CHECK: encoding: [0x62,0xf1,0x9d,0x48,0xc2,0x11,0x7b]
+ vcmppd $0x7b, (%rcx), %zmm12, %k2
+
+// CHECK: vcmppd $123, 291(%rax,%r14,8), %zmm12, %k2
+// CHECK: encoding: [0x62,0xb1,0x9d,0x48,0xc2,0x94,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vcmppd $0x7b, 291(%rax,%r14,8), %zmm12, %k2
+
+// CHECK: vcmppd $123, (%rcx){1to8}, %zmm12, %k2
+// CHECK: encoding: [0x62,0xf1,0x9d,0x58,0xc2,0x11,0x7b]
+ vcmppd $0x7b, (%rcx){1to8}, %zmm12, %k2
+
+// CHECK: vcmppd $123, 8128(%rdx), %zmm12, %k2
+// CHECK: encoding: [0x62,0xf1,0x9d,0x48,0xc2,0x52,0x7f,0x7b]
+ vcmppd $0x7b, 8128(%rdx), %zmm12, %k2
+
+// CHECK: vcmppd $123, 8192(%rdx), %zmm12, %k2
+// CHECK: encoding: [0x62,0xf1,0x9d,0x48,0xc2,0x92,0x00,0x20,0x00,0x00,0x7b]
+ vcmppd $0x7b, 8192(%rdx), %zmm12, %k2
+
+// CHECK: vcmppd $123, -8192(%rdx), %zmm12, %k2
+// CHECK: encoding: [0x62,0xf1,0x9d,0x48,0xc2,0x52,0x80,0x7b]
+ vcmppd $0x7b, -8192(%rdx), %zmm12, %k2
+
+// CHECK: vcmppd $123, -8256(%rdx), %zmm12, %k2
+// CHECK: encoding: [0x62,0xf1,0x9d,0x48,0xc2,0x92,0xc0,0xdf,0xff,0xff,0x7b]
+ vcmppd $0x7b, -8256(%rdx), %zmm12, %k2
+
+// CHECK: vcmppd $123, 1016(%rdx){1to8}, %zmm12, %k2
+// CHECK: encoding: [0x62,0xf1,0x9d,0x58,0xc2,0x52,0x7f,0x7b]
+ vcmppd $0x7b, 1016(%rdx){1to8}, %zmm12, %k2
+
+// CHECK: vcmppd $123, 1024(%rdx){1to8}, %zmm12, %k2
+// CHECK: encoding: [0x62,0xf1,0x9d,0x58,0xc2,0x92,0x00,0x04,0x00,0x00,0x7b]
+ vcmppd $0x7b, 1024(%rdx){1to8}, %zmm12, %k2
+
+// CHECK: vcmppd $123, -1024(%rdx){1to8}, %zmm12, %k2
+// CHECK: encoding: [0x62,0xf1,0x9d,0x58,0xc2,0x52,0x80,0x7b]
+ vcmppd $0x7b, -1024(%rdx){1to8}, %zmm12, %k2
+
+// CHECK: vcmppd $123, -1032(%rdx){1to8}, %zmm12, %k2
+// CHECK: encoding: [0x62,0xf1,0x9d,0x58,0xc2,0x92,0xf8,0xfb,0xff,0xff,0x7b]
+ vcmppd $0x7b, -1032(%rdx){1to8}, %zmm12, %k2
+
+// CHECK: vcmpps $171, %zmm22, %zmm17, %k2
+// CHECK: encoding: [0x62,0xb1,0x74,0x40,0xc2,0xd6,0xab]
+ vcmpps $0xab, %zmm22, %zmm17, %k2
+
+// CHECK: vcmpps $171, %zmm22, %zmm17, %k2 {%k3}
+// CHECK: encoding: [0x62,0xb1,0x74,0x43,0xc2,0xd6,0xab]
+ vcmpps $0xab, %zmm22, %zmm17, %k2 {%k3}
+
+// CHECK: vcmpps $171,{sae}, %zmm22, %zmm17, %k2
+// CHECK: encoding: [0x62,0xb1,0x74,0x10,0xc2,0xd6,0xab]
+ vcmpps $0xab,{sae}, %zmm22, %zmm17, %k2
+
+// CHECK: vcmpps $123, %zmm22, %zmm17, %k2
+// CHECK: encoding: [0x62,0xb1,0x74,0x40,0xc2,0xd6,0x7b]
+ vcmpps $0x7b, %zmm22, %zmm17, %k2
+
+// CHECK: vcmpps $123,{sae}, %zmm22, %zmm17, %k2
+// CHECK: encoding: [0x62,0xb1,0x74,0x10,0xc2,0xd6,0x7b]
+ vcmpps $0x7b,{sae}, %zmm22, %zmm17, %k2
+
+// CHECK: vcmpps $123, (%rcx), %zmm17, %k2
+// CHECK: encoding: [0x62,0xf1,0x74,0x40,0xc2,0x11,0x7b]
+ vcmpps $0x7b, (%rcx), %zmm17, %k2
+
+// CHECK: vcmpps $123, 291(%rax,%r14,8), %zmm17, %k2
+// CHECK: encoding: [0x62,0xb1,0x74,0x40,0xc2,0x94,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vcmpps $0x7b, 291(%rax,%r14,8), %zmm17, %k2
+
+// CHECK: vcmpps $123, (%rcx){1to16}, %zmm17, %k2
+// CHECK: encoding: [0x62,0xf1,0x74,0x50,0xc2,0x11,0x7b]
+ vcmpps $0x7b, (%rcx){1to16}, %zmm17, %k2
+
+// CHECK: vcmpps $123, 8128(%rdx), %zmm17, %k2
+// CHECK: encoding: [0x62,0xf1,0x74,0x40,0xc2,0x52,0x7f,0x7b]
+ vcmpps $0x7b, 8128(%rdx), %zmm17, %k2
+
+// CHECK: vcmpps $123, 8192(%rdx), %zmm17, %k2
+// CHECK: encoding: [0x62,0xf1,0x74,0x40,0xc2,0x92,0x00,0x20,0x00,0x00,0x7b]
+ vcmpps $0x7b, 8192(%rdx), %zmm17, %k2
+
+// CHECK: vcmpps $123, -8192(%rdx), %zmm17, %k2
+// CHECK: encoding: [0x62,0xf1,0x74,0x40,0xc2,0x52,0x80,0x7b]
+ vcmpps $0x7b, -8192(%rdx), %zmm17, %k2
+
+// CHECK: vcmpps $123, -8256(%rdx), %zmm17, %k2
+// CHECK: encoding: [0x62,0xf1,0x74,0x40,0xc2,0x92,0xc0,0xdf,0xff,0xff,0x7b]
+ vcmpps $0x7b, -8256(%rdx), %zmm17, %k2
+
+// CHECK: vcmpps $123, 508(%rdx){1to16}, %zmm17, %k2
+// CHECK: encoding: [0x62,0xf1,0x74,0x50,0xc2,0x52,0x7f,0x7b]
+ vcmpps $0x7b, 508(%rdx){1to16}, %zmm17, %k2
+
+// CHECK: vcmpps $123, 512(%rdx){1to16}, %zmm17, %k2
+// CHECK: encoding: [0x62,0xf1,0x74,0x50,0xc2,0x92,0x00,0x02,0x00,0x00,0x7b]
+ vcmpps $0x7b, 512(%rdx){1to16}, %zmm17, %k2
+
+// CHECK: vcmpps $123, -512(%rdx){1to16}, %zmm17, %k2
+// CHECK: encoding: [0x62,0xf1,0x74,0x50,0xc2,0x52,0x80,0x7b]
+ vcmpps $0x7b, -512(%rdx){1to16}, %zmm17, %k2
+
+// CHECK: vcmpps $123, -516(%rdx){1to16}, %zmm17, %k2
+// CHECK: encoding: [0x62,0xf1,0x74,0x50,0xc2,0x92,0xfc,0xfd,0xff,0xff,0x7b]
+ vcmpps $0x7b, -516(%rdx){1to16}, %zmm17, %k2
+
-// RUN: llvm-mc -triple x86_64-unknown-unknown -x86-asm-syntax=intel -mcpu=knl --show-encoding %s | FileCheck %s
+// RUN: llvm-mc -triple x86_64-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 -mcpu=knl --show-encoding %s | FileCheck %s
-// CHECK: vaddps (%rax), %zmm1, %zmm1
+// CHECK: vaddps zmm1 , zmm1, zmmword ptr [rax]
// CHECK: encoding: [0x62,0xf1,0x74,0x48,0x58,0x08]
vaddps zmm1, zmm1, zmmword ptr [rax]
-// CHECK: vaddpd %zmm2, %zmm1, %zmm1
+// CHECK: vaddpd zmm1 , zmm1, zmm2
// CHECK: encoding: [0x62,0xf1,0xf5,0x48,0x58,0xca]
vaddpd zmm1,zmm1,zmm2
-// CHECK: vaddpd %zmm2, %zmm1, %zmm1 {%k5}
+// CHECK: vaddpd zmm1 {k5}, zmm1, zmm2
// CHECK: encoding: [0x62,0xf1,0xf5,0x4d,0x58,0xca]
vaddpd zmm1{k5},zmm1,zmm2
-// CHECK: vaddpd %zmm2, %zmm1, %zmm1 {%k5} {z}
+// CHECK: vaddpd zmm1 {k5} {z}, zmm1, zmm2
// CHECK: encoding: [0x62,0xf1,0xf5,0xcd,0x58,0xca]
vaddpd zmm1{k5} {z},zmm1,zmm2
-// CHECK: vaddpd {rn-sae}, %zmm2, %zmm1, %zmm1
+// CHECK: vaddpd zmm1 , zmm1, zmm2, {rn-sae}
// CHECK: encoding: [0x62,0xf1,0xf5,0x18,0x58,0xca]
vaddpd zmm1,zmm1,zmm2,{rn-sae}
-// CHECK: vaddpd {ru-sae}, %zmm2, %zmm1, %zmm1
+// CHECK: vaddpd zmm1 , zmm1, zmm2, {ru-sae}
// CHECK: encoding: [0x62,0xf1,0xf5,0x58,0x58,0xca]
vaddpd zmm1,zmm1,zmm2,{ru-sae}
-// CHECK: vaddpd {rd-sae}, %zmm2, %zmm1, %zmm1
+// CHECK: vaddpd zmm1 , zmm1, zmm2, {rd-sae}
// CHECK: encoding: [0x62,0xf1,0xf5,0x38,0x58,0xca]
vaddpd zmm1,zmm1,zmm2,{rd-sae}
-// CHECK: vaddpd {rz-sae}, %zmm2, %zmm1, %zmm1
+// CHECK: vaddpd zmm1 , zmm1, zmm2, {rz-sae}
// CHECK: encoding: [0x62,0xf1,0xf5,0x78,0x58,0xca]
vaddpd zmm1,zmm1,zmm2,{rz-sae}
+// CHECK: vcmppd k2 , zmm12, zmm26, 171
+// CHECK: encoding: [0x62,0x91,0x9d,0x48,0xc2,0xd2,0xab]
+ vcmppd k2,zmm12,zmm26,0xab
+
+// CHECK: vcmppd k2 {k3}, zmm12, zmm26, 171
+// CHECK: encoding: [0x62,0x91,0x9d,0x4b,0xc2,0xd2,0xab]
+ vcmppd k2{k3},zmm12,zmm26,0xab
+
+// CHECK: vcmppd k2 , zmm12, zmm26,{sae}, 171
+// CHECK: encoding: [0x62,0x91,0x9d,0x18,0xc2,0xd2,0xab]
+ vcmppd k2,zmm12,zmm26,{sae},0xab
+
+// CHECK: vcmppd k2 , zmm12, zmm26, 123
+// CHECK: encoding: [0x62,0x91,0x9d,0x48,0xc2,0xd2,0x7b]
+ vcmppd k2 ,zmm12,zmm26,0x7b
+
+// CHECK: vcmppd k2 , zmm12, zmm26,{sae}, 123
+// CHECK: encoding: [0x62,0x91,0x9d,0x18,0xc2,0xd2,0x7b]
+ vcmppd k2,zmm12,zmm26,{sae},0x7b
+
+// CHECK: vcmppd k2 , zmm12, zmmword ptr [rcx], 123
+// CHECK: encoding: [0x62,0xf1,0x9d,0x48,0xc2,0x11,0x7b]
+ vcmppd k2,zmm12,zmmword PTR [rcx],0x7b
+
+// CHECK: vcmppd k2 , zmm12, zmmword ptr [rax + 8*r14 + 291], 123
+// CHECK: encoding: [0x62,0xb1,0x9d,0x48,0xc2,0x94,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vcmppd k2 ,zmm12,zmmword PTR [rax+r14*8+0x123],0x7b
+
+// CHECK: vcmppd k2 , zmm12, qword ptr [rcx]{1to8}, 123
+// CHECK: encoding: [0x62,0xf1,0x9d,0x58,0xc2,0x11,0x7b]
+ vcmppd k2,zmm12,QWORD PTR [rcx]{1to8},0x7b
+
+// CHECK: vcmppd k2 , zmm12, zmmword ptr [rdx + 8128], 123
+// CHECK: encoding: [0x62,0xf1,0x9d,0x48,0xc2,0x52,0x7f,0x7b]
+ vcmppd k2,zmm12,zmmword PTR [rdx+0x1fc0],0x7b
+
+// CHECK: vcmppd k2 , zmm12, zmmword ptr [rdx + 8192], 123
+// CHECK: encoding: [0x62,0xf1,0x9d,0x48,0xc2,0x92,0x00,0x20,0x00,0x00,0x7b]
+ vcmppd k2,zmm12,zmmword PTR [rdx+0x2000],0x7b
+
+// CHECK: vcmppd k2 , zmm12, zmmword ptr [rdx - 8192], 123
+// CHECK: encoding: [0x62,0xf1,0x9d,0x48,0xc2,0x52,0x80,0x7b]
+ vcmppd k2,zmm12,zmmword PTR [rdx-0x2000],0x7b
+
+// CHECK: vcmppd k2 , zmm12, zmmword ptr [rdx - 8256], 123
+// CHECK: encoding: [0x62,0xf1,0x9d,0x48,0xc2,0x92,0xc0,0xdf,0xff,0xff,0x7b]
+ vcmppd k2,zmm12,zmmword PTR [rdx-0x2040],0x7b
+
+// CHECK: vcmppd k2 , zmm12, qword ptr [rdx + 1016]{1to8}, 123
+// CHECK: encoding: [0x62,0xf1,0x9d,0x58,0xc2,0x52,0x7f,0x7b]
+ vcmppd k2,zmm12,QWORD PTR [rdx+0x3f8]{1to8},0x7b
+
+// CHECK: vcmppd k2 , zmm12, qword ptr [rdx + 1024]{1to8}, 123
+// CHECK: encoding: [0x62,0xf1,0x9d,0x58,0xc2,0x92,0x00,0x04,0x00,0x00,0x7b]
+ vcmppd k2,zmm12,QWORD PTR [rdx+0x400]{1to8},0x7b
+
+// CHECK: vcmppd k2 , zmm12, qword ptr [rdx - 1024]{1to8}, 123
+// CHECK: encoding: [0x62,0xf1,0x9d,0x58,0xc2,0x52,0x80,0x7b]
+ vcmppd k2,zmm12,QWORD PTR [rdx-0x400]{1to8},0x7b
+
+// CHECK: vcmppd k2 , zmm12, qword ptr [rdx - 1032]{1to8}, 123
+// CHECK: encoding: [0x62,0xf1,0x9d,0x58,0xc2,0x92,0xf8,0xfb,0xff,0xff,0x7b]
+ vcmppd k2,zmm12,QWORD PTR [rdx-0x408]{1to8},0x7b
+
+// CHECK: vcmpps k2 , zmm17, zmm22, 171
+// CHECK: encoding: [0x62,0xb1,0x74,0x40,0xc2,0xd6,0xab]
+ vcmpps k2,zmm17,zmm22,0xab
+
+// CHECK: vcmpps k2 {k3}, zmm17, zmm22, 171
+// CHECK: encoding: [0x62,0xb1,0x74,0x43,0xc2,0xd6,0xab]
+ vcmpps k2{k3},zmm17,zmm22,0xab
+
+// CHECK: vcmpps k2 , zmm17, zmm22,{sae}, 171
+// CHECK: encoding: [0x62,0xb1,0x74,0x10,0xc2,0xd6,0xab]
+ vcmpps k2,zmm17,zmm22,{sae},0xab
+
+// CHECK: vcmpps k2 , zmm17, zmm22, 123
+// CHECK: encoding: [0x62,0xb1,0x74,0x40,0xc2,0xd6,0x7b]
+ vcmpps k2,zmm17,zmm22,0x7b
+
+// CHECK: vcmpps k2 , zmm17, zmm22,{sae}, 123
+// CHECK: encoding: [0x62,0xb1,0x74,0x10,0xc2,0xd6,0x7b]
+ vcmpps k2,zmm17,zmm22,{sae},0x7b
+
+// CHECK: vcmpps k2 , zmm17, zmmword ptr [rcx], 123
+// CHECK: encoding: [0x62,0xf1,0x74,0x40,0xc2,0x11,0x7b]
+ vcmpps k2,zmm17,zmmword PTR [rcx],0x7b
+
+// CHECK: vcmpps k2 , zmm17, zmmword ptr [rax + 8*r14 + 291], 123
+// CHECK: encoding: [0x62,0xb1,0x74,0x40,0xc2,0x94,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vcmpps k2,zmm17,zmmword PTR [rax+r14*8+0x123],0x7b
+
+// CHECK: vcmpps k2 , zmm17, dword ptr [rcx]{1to16}, 123
+// CHECK: encoding: [0x62,0xf1,0x74,0x50,0xc2,0x11,0x7b]
+ vcmpps k2,zmm17,DWORD PTR [rcx]{1to16},0x7b
+
+// CHECK: vcmpps k2 , zmm17, zmmword ptr [rdx + 8128], 123
+// CHECK: encoding: [0x62,0xf1,0x74,0x40,0xc2,0x52,0x7f,0x7b]
+ vcmpps k2,zmm17,zmmword PTR [rdx+0x1fc0],0x7b
+
+// CHECK: vcmpps k2 , zmm17, zmmword ptr [rdx + 8192], 123
+// CHECK: encoding: [0x62,0xf1,0x74,0x40,0xc2,0x92,0x00,0x20,0x00,0x00,0x7b]
+ vcmpps k2,zmm17,zmmword PTR [rdx+0x2000],0x7b
+
+// CHECK: vcmpps k2 , zmm17, zmmword ptr [rdx - 8192], 123
+// CHECK: encoding: [0x62,0xf1,0x74,0x40,0xc2,0x52,0x80,0x7b]
+ vcmpps k2,zmm17,zmmword PTR [rdx-0x2000],0x7b
+
+// CHECK: vcmpps k2 , zmm17, zmmword ptr [rdx - 8256], 123
+// CHECK: encoding: [0x62,0xf1,0x74,0x40,0xc2,0x92,0xc0,0xdf,0xff,0xff,0x7b]
+ vcmpps k2,zmm17,zmmword PTR [rdx-0x2040],0x7b
+
+// CHECK: vcmpps k2 , zmm17, dword ptr [rdx + 508]{1to16}, 123
+// CHECK: encoding: [0x62,0xf1,0x74,0x50,0xc2,0x52,0x7f,0x7b]
+ vcmpps k2,zmm17,DWORD PTR [rdx+0x1fc]{1to16},0x7b
+
+// CHECK: vcmpps k2 , zmm17, dword ptr [rdx + 512]{1to16}, 123
+// CHECK: encoding: [0x62,0xf1,0x74,0x50,0xc2,0x92,0x00,0x02,0x00,0x00,0x7b]
+ vcmpps k2,zmm17,DWORD PTR [rdx+0x200]{1to16},0x7b
+
+// CHECK: vcmpps k2 , zmm17, dword ptr [rdx - 512]{1to16}, 123
+// CHECK: encoding: [0x62,0xf1,0x74,0x50,0xc2,0x52,0x80,0x7b]
+ vcmpps k2,zmm17,DWORD PTR [rdx-0x200]{1to16},0x7b
+
+// CHECK: vcmpps k2 , zmm17, dword ptr [rdx - 516]{1to16}, 123
+// CHECK: encoding: [0x62,0xf1,0x74,0x50,0xc2,0x92,0xfc,0xfd,0xff,0xff,0x7b]
+ vcmpps k2,zmm17,DWORD PTR [rdx-0x204]{1to16},0x7b
+
+
+
+
+
+
+
+
+
+
+
+
+
+
--- /dev/null
+// RUN: llvm-mc -triple x86_64-unknown-unknown -mcpu=knl -mattr=+avx512vl -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+
+// CHECK: vcmppd k3 , xmm27, xmm23, 171
+// CHECK: encoding: [0x62,0xb1,0xa5,0x00,0xc2,0xdf,0xab]
+ vcmppd k3,xmm27,xmm23,0xab
+
+// CHECK: vcmppd k3 {k5}, xmm27, xmm23, 171
+// CHECK: encoding: [0x62,0xb1,0xa5,0x05,0xc2,0xdf,0xab]
+ vcmppd k3{k5},xmm27,xmm23,0xab
+
+// CHECK: vcmppd k3 , xmm27, xmm23, 123
+// CHECK: encoding: [0x62,0xb1,0xa5,0x00,0xc2,0xdf,0x7b]
+ vcmppd k3,xmm27,xmm23,0x7b
+
+// CHECK: vcmppd k3 , xmm27, xmmword ptr [rcx], 123
+// CHECK: encoding: [0x62,0xf1,0xa5,0x00,0xc2,0x19,0x7b]
+ vcmppd k3,xmm27,XMMWORD PTR [rcx],0x7b
+
+// CHECK: vcmppd k3 , xmm27, xmmword ptr [rax + 8*r14 + 291], 123
+// CHECK: encoding: [0x62,0xb1,0xa5,0x00,0xc2,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vcmppd k3,xmm27,XMMWORD PTR [rax+r14*8+0x123],0x7b
+
+// CHECK: vcmppd k3 , xmm27, qword ptr [rcx]{1to2}, 123
+// CHECK: encoding: [0x62,0xf1,0xa5,0x10,0xc2,0x19,0x7b]
+ vcmppd k3,xmm27,QWORD PTR [rcx]{1to2},0x7b
+
+// CHECK: vcmppd k3 , xmm27, xmmword ptr [rdx + 2032], 123
+// CHECK: encoding: [0x62,0xf1,0xa5,0x00,0xc2,0x5a,0x7f,0x7b]
+ vcmppd k3,xmm27,XMMWORD PTR [rdx+0x7f0],0x7b
+
+// CHECK: vcmppd k3 , xmm27, xmmword ptr [rdx + 2048], 123
+// CHECK: encoding: [0x62,0xf1,0xa5,0x00,0xc2,0x9a,0x00,0x08,0x00,0x00,0x7b]
+ vcmppd k3,xmm27,XMMWORD PTR [rdx+0x800],0x7b
+
+// CHECK: vcmppd k3 , xmm27, xmmword ptr [rdx - 2048], 123
+// CHECK: encoding: [0x62,0xf1,0xa5,0x00,0xc2,0x5a,0x80,0x7b]
+ vcmppd k3,xmm27,XMMWORD PTR [rdx-0x800],0x7b
+
+// CHECK: vcmppd k3 , xmm27, xmmword ptr [rdx - 2064], 123
+// CHECK: encoding: [0x62,0xf1,0xa5,0x00,0xc2,0x9a,0xf0,0xf7,0xff,0xff,0x7b]
+ vcmppd k3,xmm27,XMMWORD PTR [rdx-0x810],0x7b
+
+// CHECK: vcmppd k3 , xmm27, qword ptr [rdx + 1016]{1to2}, 123
+// CHECK: encoding: [0x62,0xf1,0xa5,0x10,0xc2,0x5a,0x7f,0x7b]
+ vcmppd k3,xmm27,QWORD PTR [rdx+0x3f8]{1to2},0x7b
+
+// CHECK: vcmppd k3 , xmm27, qword ptr [rdx + 1024]{1to2}, 123
+// CHECK: encoding: [0x62,0xf1,0xa5,0x10,0xc2,0x9a,0x00,0x04,0x00,0x00,0x7b]
+ vcmppd k3,xmm27,QWORD PTR [rdx+0x400]{1to2},0x7b
+
+// CHECK: vcmppd k3 , xmm27, qword ptr [rdx - 1024]{1to2}, 123
+// CHECK: encoding: [0x62,0xf1,0xa5,0x10,0xc2,0x5a,0x80,0x7b]
+ vcmppd k3,xmm27,QWORD PTR [rdx-0x400]{1to2},0x7b
+
+// CHECK: vcmppd k3 , xmm27, qword ptr [rdx - 1032]{1to2}, 123
+// CHECK: encoding: [0x62,0xf1,0xa5,0x10,0xc2,0x9a,0xf8,0xfb,0xff,0xff,0x7b]
+ vcmppd k3,xmm27,QWORD PTR [rdx-0x408]{1to2},0x7b
+
+// CHECK: vcmppd k4 , ymm17, ymm27, 171
+// CHECK: encoding: [0x62,0x91,0xf5,0x20,0xc2,0xe3,0xab]
+ vcmppd k4,ymm17,ymm27,0xab
+
+// CHECK: vcmppd k4 {k7}, ymm17, ymm27, 171
+// CHECK: encoding: [0x62,0x91,0xf5,0x27,0xc2,0xe3,0xab]
+ vcmppd k4{k7},ymm17,ymm27,0xab
+
+// CHECK: vcmppd k4 , ymm17, ymm27, 123
+// CHECK: encoding: [0x62,0x91,0xf5,0x20,0xc2,0xe3,0x7b]
+ vcmppd k4,ymm17,ymm27,0x7b
+
+// CHECK: vcmppd k4 , ymm17, ymmword ptr [rcx], 123
+// CHECK: encoding: [0x62,0xf1,0xf5,0x20,0xc2,0x21,0x7b]
+ vcmppd k4,ymm17,YMMWORD PTR [rcx],0x7b
+
+// CHECK: vcmppd k4 , ymm17, ymmword ptr [rax + 8*r14 + 291], 123
+// CHECK: encoding: [0x62,0xb1,0xf5,0x20,0xc2,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vcmppd k4,ymm17,YMMWORD PTR [rax+r14*8+0x123],0x7b
+
+// CHECK: vcmppd k4 , ymm17, qword ptr [rcx]{1to4}, 123
+// CHECK: encoding: [0x62,0xf1,0xf5,0x30,0xc2,0x21,0x7b]
+ vcmppd k4,ymm17,QWORD PTR [rcx]{1to4},0x7b
+
+// CHECK: vcmppd k4 , ymm17, ymmword ptr [rdx + 4064], 123
+// CHECK: encoding: [0x62,0xf1,0xf5,0x20,0xc2,0x62,0x7f,0x7b]
+ vcmppd k4,ymm17,YMMWORD PTR [rdx+0xfe0],0x7b
+
+// CHECK: vcmppd k4 , ymm17, ymmword ptr [rdx + 4096], 123
+// CHECK: encoding: [0x62,0xf1,0xf5,0x20,0xc2,0xa2,0x00,0x10,0x00,0x00,0x7b]
+ vcmppd k4,ymm17,YMMWORD PTR [rdx+0x1000],0x7b
+
+// CHECK: vcmppd k4 , ymm17, ymmword ptr [rdx - 4096], 123
+// CHECK: encoding: [0x62,0xf1,0xf5,0x20,0xc2,0x62,0x80,0x7b]
+ vcmppd k4,ymm17,YMMWORD PTR [rdx-0x1000],0x7b
+
+// CHECK: vcmppd k4 , ymm17, ymmword ptr [rdx - 4128], 123
+// CHECK: encoding: [0x62,0xf1,0xf5,0x20,0xc2,0xa2,0xe0,0xef,0xff,0xff,0x7b]
+ vcmppd k4,ymm17,YMMWORD PTR [rdx-0x1020],0x7b
+
+// CHECK: vcmppd k4 , ymm17, qword ptr [rdx + 1016]{1to4}, 123
+// CHECK: encoding: [0x62,0xf1,0xf5,0x30,0xc2,0x62,0x7f,0x7b]
+ vcmppd k4,ymm17,QWORD PTR [rdx+0x3f8]{1to4},0x7b
+
+// CHECK: vcmppd k4 , ymm17, qword ptr [rdx + 1024]{1to4}, 123
+// CHECK: encoding: [0x62,0xf1,0xf5,0x30,0xc2,0xa2,0x00,0x04,0x00,0x00,0x7b]
+ vcmppd k4,ymm17,QWORD PTR [rdx+0x400]{1to4},0x7b
+
+// CHECK: vcmppd k4 , ymm17, qword ptr [rdx - 1024]{1to4}, 123
+// CHECK: encoding: [0x62,0xf1,0xf5,0x30,0xc2,0x62,0x80,0x7b]
+ vcmppd k4,ymm17,QWORD PTR [rdx-0x400]{1to4},0x7b
+
+// CHECK: vcmppd k4 , ymm17, qword ptr [rdx - 1032]{1to4}, 123
+// CHECK: encoding: [0x62,0xf1,0xf5,0x30,0xc2,0xa2,0xf8,0xfb,0xff,0xff,0x7b]
+ vcmppd k4,ymm17,QWORD PTR [rdx-0x408]{1to4},0x7b
+
+// CHECK: vcmpps k4 , xmm29, xmm28, 171
+// CHECK: encoding: [0x62,0x91,0x14,0x00,0xc2,0xe4,0xab]
+ vcmpps k4,xmm29,xmm28,0xab
+
+// CHECK: vcmpps k4 {k2}, xmm29, xmm28, 171
+// CHECK: encoding: [0x62,0x91,0x14,0x02,0xc2,0xe4,0xab]
+ vcmpps k4{k2},xmm29,xmm28,0xab
+
+// CHECK: vcmpps k4 , xmm29, xmm28, 123
+// CHECK: encoding: [0x62,0x91,0x14,0x00,0xc2,0xe4,0x7b]
+ vcmpps k4,xmm29,xmm28,0x7b
+
+// CHECK: vcmpps k4 , xmm29, xmmword ptr [rcx], 123
+// CHECK: encoding: [0x62,0xf1,0x14,0x00,0xc2,0x21,0x7b]
+ vcmpps k4,xmm29,XMMWORD PTR [rcx],0x7b
+
+// CHECK: vcmpps k4 , xmm29, xmmword ptr [rax + 8*r14 + 291], 123
+// CHECK: encoding: [0x62,0xb1,0x14,0x00,0xc2,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vcmpps k4,xmm29,XMMWORD PTR [rax+r14*8+0x123],0x7b
+
+// CHECK: vcmpps k4 , xmm29, dword ptr [rcx]{1to4}, 123
+// CHECK: encoding: [0x62,0xf1,0x14,0x10,0xc2,0x21,0x7b]
+ vcmpps k4,xmm29,DWORD PTR [rcx]{1to4},0x7b
+
+// CHECK: vcmpps k4 , xmm29, xmmword ptr [rdx + 2032], 123
+// CHECK: encoding: [0x62,0xf1,0x14,0x00,0xc2,0x62,0x7f,0x7b]
+ vcmpps k4,xmm29,XMMWORD PTR [rdx+0x7f0],0x7b
+
+// CHECK: vcmpps k4 , xmm29, xmmword ptr [rdx + 2048], 123
+// CHECK: encoding: [0x62,0xf1,0x14,0x00,0xc2,0xa2,0x00,0x08,0x00,0x00,0x7b]
+ vcmpps k4,xmm29,XMMWORD PTR [rdx+0x800],0x7b
+
+// CHECK: vcmpps k4 , xmm29, xmmword ptr [rdx - 2048], 123
+// CHECK: encoding: [0x62,0xf1,0x14,0x00,0xc2,0x62,0x80,0x7b]
+ vcmpps k4,xmm29,XMMWORD PTR [rdx-0x800],0x7b
+
+// CHECK: vcmpps k4 , xmm29, xmmword ptr [rdx - 2064], 123
+// CHECK: encoding: [0x62,0xf1,0x14,0x00,0xc2,0xa2,0xf0,0xf7,0xff,0xff,0x7b]
+ vcmpps k4,xmm29,XMMWORD PTR [rdx-0x810],0x7b
+
+// CHECK: vcmpps k4 , xmm29, dword ptr [rdx + 508]{1to4}, 123
+// CHECK: encoding: [0x62,0xf1,0x14,0x10,0xc2,0x62,0x7f,0x7b]
+ vcmpps k4,xmm29,DWORD PTR [rdx+0x1fc]{1to4},0x7b
+
+// CHECK: vcmpps k4 , xmm29, dword ptr [rdx + 512]{1to4}, 123
+// CHECK: encoding: [0x62,0xf1,0x14,0x10,0xc2,0xa2,0x00,0x02,0x00,0x00,0x7b]
+ vcmpps k4,xmm29,DWORD PTR [rdx+0x200]{1to4},0x7b
+
+// CHECK: vcmpps k4 , xmm29, dword ptr [rdx - 512]{1to4}, 123
+// CHECK: encoding: [0x62,0xf1,0x14,0x10,0xc2,0x62,0x80,0x7b]
+ vcmpps k4,xmm29,DWORD PTR [rdx-0x200]{1to4},0x7b
+
+// CHECK: vcmpps k4 , xmm29, dword ptr [rdx - 516]{1to4}, 123
+// CHECK: encoding: [0x62,0xf1,0x14,0x10,0xc2,0xa2,0xfc,0xfd,0xff,0xff,0x7b]
+ vcmpps k4,xmm29,DWORD PTR [rdx-0x204]{1to4},0x7b
+
+// CHECK: vcmpps k4 , ymm19, ymm18, 171
+// CHECK: encoding: [0x62,0xb1,0x64,0x20,0xc2,0xe2,0xab]
+ vcmpps k4,ymm19,ymm18,0xab
+
+// CHECK: vcmpps k4 {k1}, ymm19, ymm18, 171
+// CHECK: encoding: [0x62,0xb1,0x64,0x21,0xc2,0xe2,0xab]
+ vcmpps k4{k1},ymm19,ymm18,0xab
+
+// CHECK: vcmpps k4 , ymm19, ymm18, 123
+// CHECK: encoding: [0x62,0xb1,0x64,0x20,0xc2,0xe2,0x7b]
+ vcmpps k4,ymm19,ymm18,0x7b
+
+// CHECK: vcmpps k4 , ymm19, ymmword ptr [rcx], 123
+// CHECK: encoding: [0x62,0xf1,0x64,0x20,0xc2,0x21,0x7b]
+ vcmpps k4,ymm19,YMMWORD PTR [rcx],0x7b
+
+// CHECK: vcmpps k4 , ymm19, ymmword ptr [rax + 8*r14 + 291], 123
+// CHECK: encoding: [0x62,0xb1,0x64,0x20,0xc2,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vcmpps k4,ymm19,YMMWORD PTR [rax+r14*8+0x123],0x7b
+
+// CHECK: vcmpps k4 , ymm19, dword ptr [rcx]{1to8}, 123
+// CHECK: encoding: [0x62,0xf1,0x64,0x30,0xc2,0x21,0x7b]
+ vcmpps k4,ymm19,DWORD PTR [rcx]{1to8},0x7b
+
+// CHECK: vcmpps k4 , ymm19, ymmword ptr [rdx + 4064], 123
+// CHECK: encoding: [0x62,0xf1,0x64,0x20,0xc2,0x62,0x7f,0x7b]
+ vcmpps k4,ymm19,YMMWORD PTR [rdx+0xfe0],0x7b
+
+// CHECK: vcmpps k4 , ymm19, ymmword ptr [rdx + 4096], 123
+// CHECK: encoding: [0x62,0xf1,0x64,0x20,0xc2,0xa2,0x00,0x10,0x00,0x00,0x7b]
+ vcmpps k4,ymm19,YMMWORD PTR [rdx+0x1000],0x7b
+
+// CHECK: vcmpps k4 , ymm19, ymmword ptr [rdx - 4096], 123
+// CHECK: encoding: [0x62,0xf1,0x64,0x20,0xc2,0x62,0x80,0x7b]
+ vcmpps k4,ymm19,YMMWORD PTR [rdx-0x1000],0x7b
+
+// CHECK: vcmpps k4 , ymm19, ymmword ptr [rdx - 4128], 123
+// CHECK: encoding: [0x62,0xf1,0x64,0x20,0xc2,0xa2,0xe0,0xef,0xff,0xff,0x7b]
+ vcmpps k4,ymm19,YMMWORD PTR [rdx-0x1020],0x7b
+
+// CHECK: vcmpps k4 , ymm19, dword ptr [rdx + 508]{1to8}, 123
+// CHECK: encoding: [0x62,0xf1,0x64,0x30,0xc2,0x62,0x7f,0x7b]
+ vcmpps k4,ymm19,DWORD PTR [rdx+0x1fc]{1to8},0x7b
+
+// CHECK: vcmpps k4 , ymm19, dword ptr [rdx + 512]{1to8}, 123
+// CHECK: encoding: [0x62,0xf1,0x64,0x30,0xc2,0xa2,0x00,0x02,0x00,0x00,0x7b]
+ vcmpps k4,ymm19,DWORD PTR [rdx+0x200]{1to8},0x7b
+
+// CHECK: vcmpps k4 , ymm19, dword ptr [rdx - 512]{1to8}, 123
+// CHECK: encoding: [0x62,0xf1,0x64,0x30,0xc2,0x62,0x80,0x7b]
+ vcmpps k4,ymm19,DWORD PTR [rdx-0x200]{1to8},0x7b
+
+// CHECK: vcmpps k4 , ymm19, dword ptr [rdx - 516]{1to8}, 123
+// CHECK: encoding: [0x62,0xf1,0x64,0x30,0xc2,0xa2,0xfc,0xfd,0xff,0xff,0x7b]
+ vcmpps k4,ymm19,DWORD PTR [rdx-0x204]{1to8},0x7b
// CHECK: vpmuldq -1032(%rdx){1to4}, %ymm18, %ymm19
// CHECK: encoding: [0x62,0xe2,0xed,0x30,0x28,0x9a,0xf8,0xfb,0xff,0xff]
vpmuldq -1032(%rdx){1to4}, %ymm18, %ymm19
+
+// CHECK: vcmppd $171, %xmm23, %xmm27, %k3
+// CHECK: encoding: [0x62,0xb1,0xa5,0x00,0xc2,0xdf,0xab]
+ vcmppd $0xab, %xmm23, %xmm27, %k3
+
+// CHECK: vcmppd $171, %xmm23, %xmm27, %k3 {%k5}
+// CHECK: encoding: [0x62,0xb1,0xa5,0x05,0xc2,0xdf,0xab]
+ vcmppd $0xab, %xmm23, %xmm27, %k3 {%k5}
+
+// CHECK: vcmppd $123, %xmm23, %xmm27, %k3
+// CHECK: encoding: [0x62,0xb1,0xa5,0x00,0xc2,0xdf,0x7b]
+ vcmppd $0x7b, %xmm23, %xmm27, %k3
+
+// CHECK: vcmppd $123, (%rcx), %xmm27, %k3
+// CHECK: encoding: [0x62,0xf1,0xa5,0x00,0xc2,0x19,0x7b]
+ vcmppd $0x7b, (%rcx), %xmm27, %k3
+
+// CHECK: vcmppd $123, 291(%rax,%r14,8), %xmm27, %k3
+// CHECK: encoding: [0x62,0xb1,0xa5,0x00,0xc2,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vcmppd $0x7b, 291(%rax,%r14,8), %xmm27, %k3
+
+// CHECK: vcmppd $123, (%rcx){1to2}, %xmm27, %k3
+// CHECK: encoding: [0x62,0xf1,0xa5,0x10,0xc2,0x19,0x7b]
+ vcmppd $0x7b, (%rcx){1to2}, %xmm27, %k3
+
+// CHECK: vcmppd $123, 2032(%rdx), %xmm27, %k3
+// CHECK: encoding: [0x62,0xf1,0xa5,0x00,0xc2,0x5a,0x7f,0x7b]
+ vcmppd $0x7b, 2032(%rdx), %xmm27, %k3
+
+// CHECK: vcmppd $123, 2048(%rdx), %xmm27, %k3
+// CHECK: encoding: [0x62,0xf1,0xa5,0x00,0xc2,0x9a,0x00,0x08,0x00,0x00,0x7b]
+ vcmppd $0x7b, 2048(%rdx), %xmm27, %k3
+
+// CHECK: vcmppd $123, -2048(%rdx), %xmm27, %k3
+// CHECK: encoding: [0x62,0xf1,0xa5,0x00,0xc2,0x5a,0x80,0x7b]
+ vcmppd $0x7b, -2048(%rdx), %xmm27, %k3
+
+// CHECK: vcmppd $123, -2064(%rdx), %xmm27, %k3
+// CHECK: encoding: [0x62,0xf1,0xa5,0x00,0xc2,0x9a,0xf0,0xf7,0xff,0xff,0x7b]
+ vcmppd $0x7b, -2064(%rdx), %xmm27, %k3
+
+// CHECK: vcmppd $123, 1016(%rdx){1to2}, %xmm27, %k3
+// CHECK: encoding: [0x62,0xf1,0xa5,0x10,0xc2,0x5a,0x7f,0x7b]
+ vcmppd $0x7b, 1016(%rdx){1to2}, %xmm27, %k3
+
+// CHECK: vcmppd $123, 1024(%rdx){1to2}, %xmm27, %k3
+// CHECK: encoding: [0x62,0xf1,0xa5,0x10,0xc2,0x9a,0x00,0x04,0x00,0x00,0x7b]
+ vcmppd $0x7b, 1024(%rdx){1to2}, %xmm27, %k3
+
+// CHECK: vcmppd $123, -1024(%rdx){1to2}, %xmm27, %k3
+// CHECK: encoding: [0x62,0xf1,0xa5,0x10,0xc2,0x5a,0x80,0x7b]
+ vcmppd $0x7b, -1024(%rdx){1to2}, %xmm27, %k3
+
+// CHECK: vcmppd $123, -1032(%rdx){1to2}, %xmm27, %k3
+// CHECK: encoding: [0x62,0xf1,0xa5,0x10,0xc2,0x9a,0xf8,0xfb,0xff,0xff,0x7b]
+ vcmppd $0x7b, -1032(%rdx){1to2}, %xmm27, %k3
+
+// CHECK: vcmppd $171, %ymm27, %ymm17, %k4
+// CHECK: encoding: [0x62,0x91,0xf5,0x20,0xc2,0xe3,0xab]
+ vcmppd $0xab, %ymm27, %ymm17, %k4
+
+// CHECK: vcmppd $171, %ymm27, %ymm17, %k4 {%k7}
+// CHECK: encoding: [0x62,0x91,0xf5,0x27,0xc2,0xe3,0xab]
+ vcmppd $0xab, %ymm27, %ymm17, %k4 {%k7}
+
+// CHECK: vcmppd $123, %ymm27, %ymm17, %k4
+// CHECK: encoding: [0x62,0x91,0xf5,0x20,0xc2,0xe3,0x7b]
+ vcmppd $0x7b, %ymm27, %ymm17, %k4
+
+// CHECK: vcmppd $123, (%rcx), %ymm17, %k4
+// CHECK: encoding: [0x62,0xf1,0xf5,0x20,0xc2,0x21,0x7b]
+ vcmppd $0x7b, (%rcx), %ymm17, %k4
+
+// CHECK: vcmppd $123, 291(%rax,%r14,8), %ymm17, %k4
+// CHECK: encoding: [0x62,0xb1,0xf5,0x20,0xc2,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vcmppd $0x7b, 291(%rax,%r14,8), %ymm17, %k4
+
+// CHECK: vcmppd $123, (%rcx){1to4}, %ymm17, %k4
+// CHECK: encoding: [0x62,0xf1,0xf5,0x30,0xc2,0x21,0x7b]
+ vcmppd $0x7b, (%rcx){1to4}, %ymm17, %k4
+
+// CHECK: vcmppd $123, 4064(%rdx), %ymm17, %k4
+// CHECK: encoding: [0x62,0xf1,0xf5,0x20,0xc2,0x62,0x7f,0x7b]
+ vcmppd $123, 4064(%rdx), %ymm17, %k4
+
+// CHECK: vcmppd $123, 4096(%rdx), %ymm17, %k4
+// CHECK: encoding: [0x62,0xf1,0xf5,0x20,0xc2,0xa2,0x00,0x10,0x00,0x00,0x7b]
+ vcmppd $0x7b, 4096(%rdx), %ymm17, %k4
+
+// CHECK: vcmppd $123, -4096(%rdx), %ymm17, %k4
+// CHECK: encoding: [0x62,0xf1,0xf5,0x20,0xc2,0x62,0x80,0x7b]
+ vcmppd $0x7b, -4096(%rdx), %ymm17, %k4
+
+// CHECK: vcmppd $123, -4128(%rdx), %ymm17, %k4
+// CHECK: encoding: [0x62,0xf1,0xf5,0x20,0xc2,0xa2,0xe0,0xef,0xff,0xff,0x7b]
+ vcmppd $0x7b, -4128(%rdx), %ymm17, %k4
+
+// CHECK: vcmppd $123, 1016(%rdx){1to4}, %ymm17, %k4
+// CHECK: encoding: [0x62,0xf1,0xf5,0x30,0xc2,0x62,0x7f,0x7b]
+ vcmppd $0x7b, 1016(%rdx){1to4}, %ymm17, %k4
+
+// CHECK: vcmppd $123, 1024(%rdx){1to4}, %ymm17, %k4
+// CHECK: encoding: [0x62,0xf1,0xf5,0x30,0xc2,0xa2,0x00,0x04,0x00,0x00,0x7b]
+ vcmppd $0x7b, 1024(%rdx){1to4}, %ymm17, %k4
+
+// CHECK: vcmppd $123, -1024(%rdx){1to4}, %ymm17, %k4
+// CHECK: encoding: [0x62,0xf1,0xf5,0x30,0xc2,0x62,0x80,0x7b]
+ vcmppd $0x7b, -1024(%rdx){1to4}, %ymm17, %k4
+
+// CHECK: vcmppd $123, -1032(%rdx){1to4}, %ymm17, %k4
+// CHECK: encoding: [0x62,0xf1,0xf5,0x30,0xc2,0xa2,0xf8,0xfb,0xff,0xff,0x7b]
+ vcmppd $0x7b, -1032(%rdx){1to4}, %ymm17, %k4
+
+// CHECK: vcmpps $171, %xmm28, %xmm29, %k4
+// CHECK: encoding: [0x62,0x91,0x14,0x00,0xc2,0xe4,0xab]
+ vcmpps $0xab, %xmm28, %xmm29, %k4
+
+// CHECK: vcmpps $171, %xmm28, %xmm29, %k4 {%k2}
+// CHECK: encoding: [0x62,0x91,0x14,0x02,0xc2,0xe4,0xab]
+ vcmpps $0xab, %xmm28, %xmm29, %k4 {%k2}
+
+// CHECK: vcmpps $123, %xmm28, %xmm29, %k4
+// CHECK: encoding: [0x62,0x91,0x14,0x00,0xc2,0xe4,0x7b]
+ vcmpps $0x7b, %xmm28, %xmm29, %k4
+
+// CHECK: vcmpps $123, (%rcx), %xmm29, %k4
+// CHECK: encoding: [0x62,0xf1,0x14,0x00,0xc2,0x21,0x7b]
+ vcmpps $0x7b, (%rcx), %xmm29, %k4
+
+// CHECK: vcmpps $123, 291(%rax,%r14,8), %xmm29, %k4
+// CHECK: encoding: [0x62,0xb1,0x14,0x00,0xc2,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vcmpps $0x7b, 291(%rax,%r14,8), %xmm29, %k4
+
+// CHECK: vcmpps $123, (%rcx){1to4}, %xmm29, %k4
+// CHECK: encoding: [0x62,0xf1,0x14,0x10,0xc2,0x21,0x7b]
+ vcmpps $0x7b, (%rcx){1to4}, %xmm29, %k4
+
+// CHECK: vcmpps $123, 2032(%rdx), %xmm29, %k4
+// CHECK: encoding: [0x62,0xf1,0x14,0x00,0xc2,0x62,0x7f,0x7b]
+ vcmpps $0x7b, 2032(%rdx), %xmm29, %k4
+
+// CHECK: vcmpps $123, 2048(%rdx), %xmm29, %k4
+// CHECK: encoding: [0x62,0xf1,0x14,0x00,0xc2,0xa2,0x00,0x08,0x00,0x00,0x7b]
+ vcmpps $0x7b, 2048(%rdx), %xmm29, %k4
+
+// CHECK: vcmpps $123, -2048(%rdx), %xmm29, %k4
+// CHECK: encoding: [0x62,0xf1,0x14,0x00,0xc2,0x62,0x80,0x7b]
+ vcmpps $0x7b, -2048(%rdx), %xmm29, %k4
+
+// CHECK: vcmpps $123, -2064(%rdx), %xmm29, %k4
+// CHECK: encoding: [0x62,0xf1,0x14,0x00,0xc2,0xa2,0xf0,0xf7,0xff,0xff,0x7b]
+ vcmpps $0x7b, -2064(%rdx), %xmm29, %k4
+
+// CHECK: vcmpps $123, 508(%rdx){1to4}, %xmm29, %k4
+// CHECK: encoding: [0x62,0xf1,0x14,0x10,0xc2,0x62,0x7f,0x7b]
+ vcmpps $123, 508(%rdx){1to4}, %xmm29, %k4
+
+// CHECK: vcmpps $123, 512(%rdx){1to4}, %xmm29, %k4
+// CHECK: encoding: [0x62,0xf1,0x14,0x10,0xc2,0xa2,0x00,0x02,0x00,0x00,0x7b]
+ vcmpps $123, 512(%rdx){1to4}, %xmm29, %k4
+
+// CHECK: vcmpps $123, -512(%rdx){1to4}, %xmm29, %k4
+// CHECK: encoding: [0x62,0xf1,0x14,0x10,0xc2,0x62,0x80,0x7b]
+ vcmpps $0x7b, -512(%rdx){1to4}, %xmm29, %k4
+
+// CHECK: vcmpps $123, -516(%rdx){1to4}, %xmm29, %k4
+// CHECK: encoding: [0x62,0xf1,0x14,0x10,0xc2,0xa2,0xfc,0xfd,0xff,0xff,0x7b]
+ vcmpps $0x7b, -516(%rdx){1to4}, %xmm29, %k4
+
+// CHECK: vcmpps $171, %ymm18, %ymm19, %k4
+// CHECK: encoding: [0x62,0xb1,0x64,0x20,0xc2,0xe2,0xab]
+ vcmpps $0xab, %ymm18, %ymm19, %k4
+
+// CHECK: vcmpps $171, %ymm18, %ymm19, %k4 {%k1}
+// CHECK: encoding: [0x62,0xb1,0x64,0x21,0xc2,0xe2,0xab]
+ vcmpps $0xab, %ymm18, %ymm19, %k4 {%k1}
+
+// CHECK: vcmpps $123, %ymm18, %ymm19, %k4
+// CHECK: encoding: [0x62,0xb1,0x64,0x20,0xc2,0xe2,0x7b]
+ vcmpps $0x7b, %ymm18, %ymm19, %k4
+
+// CHECK: vcmpps $123, (%rcx), %ymm19, %k4
+// CHECK: encoding: [0x62,0xf1,0x64,0x20,0xc2,0x21,0x7b]
+ vcmpps $0x7b, (%rcx), %ymm19, %k4
+
+// CHECK: vcmpps $123, 291(%rax,%r14,8), %ymm19, %k4
+// CHECK: encoding: [0x62,0xb1,0x64,0x20,0xc2,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vcmpps $0x7b, 291(%rax,%r14,8), %ymm19, %k4
+
+// CHECK: vcmpps $123, (%rcx){1to8}, %ymm19, %k4
+// CHECK: encoding: [0x62,0xf1,0x64,0x30,0xc2,0x21,0x7b]
+ vcmpps $0x7b, (%rcx){1to8}, %ymm19, %k4
+
+// CHECK: vcmpps $123, 4064(%rdx), %ymm19, %k4
+// CHECK: encoding: [0x62,0xf1,0x64,0x20,0xc2,0x62,0x7f,0x7b]
+ vcmpps $0x7b, 4064(%rdx), %ymm19, %k4
+
+// CHECK: vcmpps $123, 4096(%rdx), %ymm19, %k4
+// CHECK: encoding: [0x62,0xf1,0x64,0x20,0xc2,0xa2,0x00,0x10,0x00,0x00,0x7b]
+ vcmpps $0x7b, 4096(%rdx), %ymm19, %k4
+
+// CHECK: vcmpps $123, -4096(%rdx), %ymm19, %k4
+// CHECK: encoding: [0x62,0xf1,0x64,0x20,0xc2,0x62,0x80,0x7b]
+ vcmpps $0x7b, -4096(%rdx), %ymm19, %k4
+
+// CHECK: vcmpps $123, -4128(%rdx), %ymm19, %k4
+// CHECK: encoding: [0x62,0xf1,0x64,0x20,0xc2,0xa2,0xe0,0xef,0xff,0xff,0x7b]
+ vcmpps $0x7b, -4128(%rdx), %ymm19, %k4
+
+// CHECK: vcmpps $123, 508(%rdx){1to8}, %ymm19, %k4
+// CHECK: encoding: [0x62,0xf1,0x64,0x30,0xc2,0x62,0x7f,0x7b]
+ vcmpps $0x7b, 508(%rdx){1to8}, %ymm19, %k4
+
+// CHECK: vcmpps $123, 512(%rdx){1to8}, %ymm19, %k4
+// CHECK: encoding: [0x62,0xf1,0x64,0x30,0xc2,0xa2,0x00,0x02,0x00,0x00,0x7b]
+ vcmpps $0x7b, 512(%rdx){1to8}, %ymm19, %k4
+
+// CHECK: vcmpps $123, -512(%rdx){1to8}, %ymm19, %k4
+// CHECK: encoding: [0x62,0xf1,0x64,0x30,0xc2,0x62,0x80,0x7b]
+ vcmpps $0x7b, -512(%rdx){1to8}, %ymm19, %k4
+
+// CHECK: vcmpps $123, -516(%rdx){1to8}, %ymm19, %k4
+// CHECK: encoding: [0x62,0xf1,0x64,0x30,0xc2,0xa2,0xfc,0xfd,0xff,0xff,0x7b]
+ vcmpps $0x7b, -516(%rdx){1to8}, %ymm19, %k4
+
+
+
+
+
+
+
+
+
+