Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty],
[IntrNoMem]>;
- def int_x86_avx512_sqrt_pd_512 : GCCBuiltin<"__builtin_ia32_sqrtpd512_mask">,
+ def int_x86_avx512_mask_sqrt_pd_128 : GCCBuiltin<"__builtin_ia32_sqrtpd128_mask">,
+ Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
+ llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_sqrt_pd_256 : GCCBuiltin<"__builtin_ia32_sqrtpd256_mask">,
+ Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty,
+ llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_sqrt_pd_512 : GCCBuiltin<"__builtin_ia32_sqrtpd512_mask">,
+ Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
+ llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_sqrt_ps_128 : GCCBuiltin<"__builtin_ia32_sqrtps128_mask">,
+ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
+ llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_sqrt_ps_256 : GCCBuiltin<"__builtin_ia32_sqrtps256_mask">,
+ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty,
+ llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_sqrt_ps_512 : GCCBuiltin<"__builtin_ia32_sqrtps512_mask">,
+ Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
+ llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_getexp_pd_128 : GCCBuiltin<"__builtin_ia32_getexppd128_mask">,
+ Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
+ llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_getexp_pd_256 : GCCBuiltin<"__builtin_ia32_getexppd256_mask">,
+ Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty,
+ llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_getexp_pd_512 : GCCBuiltin<"__builtin_ia32_getexppd512_mask">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_sqrt_ps_512 : GCCBuiltin<"__builtin_ia32_sqrtps512_mask">,
+ def int_x86_avx512_mask_getexp_ps_128 : GCCBuiltin<"__builtin_ia32_getexpps128_mask">,
+ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
+ llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_getexp_ps_256 : GCCBuiltin<"__builtin_ia32_getexpps256_mask">,
+ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty,
+ llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_getexp_ps_512 : GCCBuiltin<"__builtin_ia32_getexpps512_mask">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
Op.getOperand(2), Op.getOperand(3));
case INTR_TYPE_1OP_MASK_RM: {
SDValue Src = Op.getOperand(1);
- SDValue Src0 = Op.getOperand(2);
+ SDValue PassThru = Op.getOperand(2);
SDValue Mask = Op.getOperand(3);
- SDValue RoundingMode = Op.getOperand(4);
+ SDValue RoundingMode;
+ if (Op.getNumOperands() == 4)
+ RoundingMode = DAG.getConstant(X86::STATIC_ROUNDING::CUR_DIRECTION, dl, MVT::i32);
+ else
+ RoundingMode = Op.getOperand(4);
+ unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
+ if (IntrWithRoundingModeOpcode != 0) {
+ unsigned Round = cast<ConstantSDNode>(RoundingMode)->getZExtValue();
+ if (Round != X86::STATIC_ROUNDING::CUR_DIRECTION)
+ return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
+ dl, Op.getValueType(), Src, RoundingMode),
+ Mask, PassThru, Subtarget, DAG);
+ }
return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src,
RoundingMode),
- Mask, Src0, Subtarget, DAG);
+ Mask, PassThru, Subtarget, DAG);
+ }
+ case INTR_TYPE_1OP_MASK: {
+ SDValue Src = Op.getOperand(1);
+ SDValue Passthru = Op.getOperand(2);
+ SDValue Mask = Op.getOperand(3);
+ return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src),
+ Mask, Passthru, Subtarget, DAG);
}
case INTR_TYPE_SCALAR_MASK_RM: {
SDValue Src1 = Op.getOperand(1);
case X86ISD::FSUB_RND: return "X86ISD::FSUB_RND";
case X86ISD::FMUL_RND: return "X86ISD::FMUL_RND";
case X86ISD::FDIV_RND: return "X86ISD::FDIV_RND";
+ case X86ISD::FSQRT_RND: return "X86ISD::FSQRT_RND";
+ case X86ISD::FGETEXP_RND: return "X86ISD::FGETEXP_RND";
case X86ISD::ADDS: return "X86ISD::ADDS";
case X86ISD::SUBS: return "X86ISD::SUBS";
}
/// Combined add and sub on an FP vector.
ADDSUB,
+
// FP vector ops with rounding mode.
FADD_RND,
FSUB_RND,
FDIV_RND,
FMAX_RND,
FMIN_RND,
+ FSQRT_RND,
+
+ // FP vector get exponent
+ FGETEXP_RND,
// Integer add/sub with unsigned saturation.
ADDUS,
(ins _.RC:$src), OpcodeStr, "$src", "$src",
(OpNode (_.VT _.RC:$src), (i32 FROUND_CURRENT))>;
- defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
- (ins _.RC:$src), OpcodeStr,
- "{sae}, $src", "$src, {sae}",
- (OpNode (_.VT _.RC:$src), (i32 FROUND_NO_EXC))>, EVEX_B;
-
defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.MemOp:$src), OpcodeStr, "$src", "$src",
(OpNode (_.FloatVT
(i32 FROUND_CURRENT))>;
defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
- (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
+ (ins _.MemOp:$src), OpcodeStr,
+ "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
(OpNode (_.FloatVT
(X86VBroadcast (_.ScalarLdFrag addr:$src))),
(i32 FROUND_CURRENT))>, EVEX_B;
}
+multiclass avx512_fp28_p_round<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
+ SDNode OpNode> {
+ defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src), OpcodeStr,
+ "{sae}, $src", "$src, {sae}",
+ (OpNode (_.VT _.RC:$src), (i32 FROUND_NO_EXC))>, EVEX_B;
+}
multiclass avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode> {
defm PS : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode>,
- EVEX_CD8<32, CD8VF>;
+ avx512_fp28_p_round<opc, OpcodeStr#"ps", v16f32_info, OpNode>,
+ T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
defm PD : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode>,
- VEX_W, EVEX_CD8<32, CD8VF>;
+ avx512_fp28_p_round<opc, OpcodeStr#"pd", v8f64_info, OpNode>,
+ T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
}
+multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
+ SDNode OpNode> {
+ // Define only if AVX512VL feature is present.
+ let Predicates = [HasVLX] in {
+ defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode>,
+ EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>;
+ defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode>,
+ EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>;
+ defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode>,
+ EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
+ defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode>,
+ EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
+ }
+}
let Predicates = [HasERI], hasSideEffects = 0 in {
- defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28>, EVEX, EVEX_V512, T8PD;
- defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28>, EVEX, EVEX_V512, T8PD;
- defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2>, EVEX, EVEX_V512, T8PD;
+ defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28>, EVEX;
+ defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28>, EVEX;
+ defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2>, EVEX;
+}
+defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexpRnd>,
+ avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexpRnd> , EVEX;
+
+multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr,
+ SDNode OpNodeRnd, X86VectorVTInfo _>{
+ defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc",
+ (_.VT (OpNodeRnd _.RC:$src, (i32 imm:$rc)))>,
+ EVEX, EVEX_B, EVEX_RC;
}
multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
}
}
-defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", fsqrt>;
+multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr,
+ SDNode OpNodeRnd> {
+ defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"), OpNodeRnd,
+ v16f32_info>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
+ defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"), OpNodeRnd,
+ v8f64_info>, EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
+}
+
+defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", fsqrt>,
+ avx512_sqrt_packed_all_round<0x51, "vsqrt", X86fsqrtRnd>;
defm VSQRT : avx512_sqrt_scalar<0x51, "sqrt",
int_x86_avx512_sqrt_ss, int_x86_avx512_sqrt_sd,
SSE_SQRTSS, SSE_SQRTSD>;
let Predicates = [HasAVX512] in {
- def : Pat<(v16f32 (int_x86_avx512_sqrt_ps_512 (v16f32 VR512:$src1),
- (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), FROUND_CURRENT)),
- (VSQRTPSZr VR512:$src1)>;
- def : Pat<(v8f64 (int_x86_avx512_sqrt_pd_512 (v8f64 VR512:$src1),
- (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_CURRENT)),
- (VSQRTPDZr VR512:$src1)>;
-
def : Pat<(f32 (fsqrt FR32X:$src)),
(VSQRTSSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
def : Pat<(f32 (fsqrt (load addr:$src))),
def SDTFPBinOpRound : SDTypeProfile<1, 3, [ // fadd_round, fmul_round, etc.
SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisFP<0>, SDTCisInt<3>]>;
+def SDTFPUnaryOpRound : SDTypeProfile<1, 2, [ // fsqrt_round, fgetexp_round, etc.
+ SDTCisSameAs<0, 1>, SDTCisFP<0>, SDTCisInt<2>]>;
+
def SDTFma : SDTypeProfile<1, 3, [SDTCisSameAs<0,1>,
SDTCisSameAs<1,2>, SDTCisSameAs<1,3>]>;
def SDTFmaRound : SDTypeProfile<1, 4, [SDTCisSameAs<0,1>,
def X86fdivRnd : SDNode<"X86ISD::FDIV_RND", SDTFPBinOpRound>;
def X86fmaxRnd : SDNode<"X86ISD::FMAX_RND", SDTFPBinOpRound>;
def X86fminRnd : SDNode<"X86ISD::FMIN_RND", SDTFPBinOpRound>;
+def X86fsqrtRnd : SDNode<"X86ISD::FSQRT_RND", SDTFPUnaryOpRound>;
+def X86fgetexpRnd : SDNode<"X86ISD::FGETEXP_RND", SDTFPUnaryOpRound>;
def X86Fmadd : SDNode<"X86ISD::FMADD", SDTFma>;
def X86Fnmadd : SDNode<"X86ISD::FNMADD", SDTFma>;
GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, ADX,
INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP,
CMP_MASK, CMP_MASK_CC, VSHIFT, VSHIFT_MASK, COMI,
- INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK, FMA_OP_MASK,
+ INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK, FMA_OP_MASK,
INTR_TYPE_SCALAR_MASK_RM, COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM,
EXPAND_FROM_MEM, BLEND
};
X86_INTRINSIC_DATA(avx512_mask_div_ps_512, INTR_TYPE_2OP_MASK, ISD::FDIV,
X86ISD::FDIV_RND),
X86_INTRINSIC_DATA(avx512_mask_div_sd_round, INTR_TYPE_SCALAR_MASK_RM, ISD::FDIV,
- X86ISD::FDIV_RND),
+ X86ISD::FDIV_RND),
X86_INTRINSIC_DATA(avx512_mask_div_ss_round, INTR_TYPE_SCALAR_MASK_RM, ISD::FDIV,
- X86ISD::FDIV_RND),
+ X86ISD::FDIV_RND),
X86_INTRINSIC_DATA(avx512_mask_expand_d_128, COMPRESS_EXPAND_IN_REG,
X86ISD::EXPAND, 0),
X86_INTRINSIC_DATA(avx512_mask_expand_d_256, COMPRESS_EXPAND_IN_REG,
X86ISD::EXPAND, 0),
X86_INTRINSIC_DATA(avx512_mask_expand_q_512, COMPRESS_EXPAND_IN_REG,
X86ISD::EXPAND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_getexp_pd_128, INTR_TYPE_1OP_MASK_RM,
+ X86ISD::FGETEXP_RND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_getexp_pd_256, INTR_TYPE_1OP_MASK_RM,
+ X86ISD::FGETEXP_RND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_getexp_pd_512, INTR_TYPE_1OP_MASK_RM,
+ X86ISD::FGETEXP_RND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_getexp_ps_128, INTR_TYPE_1OP_MASK_RM,
+ X86ISD::FGETEXP_RND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_getexp_ps_256, INTR_TYPE_1OP_MASK_RM,
+ X86ISD::FGETEXP_RND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_getexp_ps_512, INTR_TYPE_1OP_MASK_RM,
+ X86ISD::FGETEXP_RND, 0),
X86_INTRINSIC_DATA(avx512_mask_max_pd_128, INTR_TYPE_2OP_MASK, X86ISD::FMAX, 0),
X86_INTRINSIC_DATA(avx512_mask_max_pd_256, INTR_TYPE_2OP_MASK, X86ISD::FMAX, 0),
X86_INTRINSIC_DATA(avx512_mask_max_pd_512, INTR_TYPE_2OP_MASK, X86ISD::FMAX,
X86ISD::RNDSCALE, 0),
X86_INTRINSIC_DATA(avx512_mask_rndscale_ss, INTR_TYPE_SCALAR_MASK_RM,
X86ISD::RNDSCALE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_128, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_256, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_512, INTR_TYPE_1OP_MASK_RM, ISD::FSQRT,
+ X86ISD::FSQRT_RND),
+ X86_INTRINSIC_DATA(avx512_mask_sqrt_ps_128, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_sqrt_ps_256, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_sqrt_ps_512, INTR_TYPE_1OP_MASK_RM, ISD::FSQRT,
+ X86ISD::FSQRT_RND),
X86_INTRINSIC_DATA(avx512_mask_sub_pd_128, INTR_TYPE_2OP_MASK, ISD::FSUB, 0),
X86_INTRINSIC_DATA(avx512_mask_sub_pd_256, INTR_TYPE_2OP_MASK, ISD::FSUB, 0),
X86_INTRINSIC_DATA(avx512_mask_sub_pd_512, INTR_TYPE_2OP_MASK, ISD::FSUB,
declare <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
define <8 x double> @test_sqrt_pd_512(<8 x double> %a0) {
+ ; CHECK-LABEL: test_sqrt_pd_512
; CHECK: vsqrtpd
- %res = call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 4) ; <<8 x double>> [#uses=1]
+ %res = call <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 4)
ret <8 x double> %res
}
-declare <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone
+declare <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone
define <16 x float> @test_sqrt_ps_512(<16 x float> %a0) {
+ ; CHECK-LABEL: test_sqrt_ps_512
; CHECK: vsqrtps
- %res = call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4) ; <<16 x float>> [#uses=1]
+ %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4)
ret <16 x float> %res
}
-declare <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float>, <16 x float>, i16, i32) nounwind readnone
+define <16 x float> @test_sqrt_round_ps_512(<16 x float> %a0) {
+ ; CHECK-LABEL: test_sqrt_round_ps_512
+ ; CHECK: vsqrtps {rz-sae}
+ %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 3)
+ ret <16 x float> %res
+}
+declare <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float>, <16 x float>, i16, i32) nounwind readnone
+
+define <8 x double> @test_getexp_pd_512(<8 x double> %a0) {
+ ; CHECK-LABEL: test_getexp_pd_512
+ ; CHECK: vgetexppd
+ %res = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 4)
+ ret <8 x double> %res
+}
+define <8 x double> @test_getexp_round_pd_512(<8 x double> %a0) {
+ ; CHECK-LABEL: test_getexp_round_pd_512
+ ; CHECK: vgetexppd {sae}
+ %res = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 8)
+ ret <8 x double> %res
+}
+declare <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone
+
+define <16 x float> @test_getexp_ps_512(<16 x float> %a0) {
+ ; CHECK-LABEL: test_getexp_ps_512
+ ; CHECK: vgetexpps
+ %res = call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_getexp_round_ps_512(<16 x float> %a0) {
+ ; CHECK-LABEL: test_getexp_round_ps_512
+ ; CHECK: vgetexpps {sae}
+ %res = call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8)
+ ret <16 x float> %res
+}
+declare <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float>, <16 x float>, i16, i32) nounwind readnone
define <4 x float> @test_sqrt_ss(<4 x float> %a0, <4 x float> %a1) {
; CHECK: vsqrtss {{.*}}encoding: [0x62
ret <4 x float> %res
}
declare <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
+
+define <4 x double> @test_sqrt_pd_256(<4 x double> %a0, i8 %mask) {
+ ; CHECK-LABEL: test_sqrt_pd_256
+ ; CHECK: vsqrtpd
+ %res = call <4 x double> @llvm.x86.avx512.mask.sqrt.pd.256(<4 x double> %a0, <4 x double> zeroinitializer, i8 %mask)
+ ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx512.mask.sqrt.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
+
+define <8 x float> @test_sqrt_ps_256(<8 x float> %a0, i8 %mask) {
+ ; CHECK-LABEL: test_sqrt_ps_256
+ ; CHECK: vsqrtps
+ %res = call <8 x float> @llvm.x86.avx512.mask.sqrt.ps.256(<8 x float> %a0, <8 x float> zeroinitializer, i8 %mask)
+ ret <8 x float> %res
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.sqrt.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
+
+define <4 x double> @test_getexp_pd_256(<4 x double> %a0) {
+ ; CHECK-LABEL: test_getexp_pd_256
+ ; CHECK: vgetexppd
+ %res = call <4 x double> @llvm.x86.avx512.mask.getexp.pd.256(<4 x double> %a0, <4 x double> zeroinitializer, i8 -1)
+ ret <4 x double> %res
+}
+
+declare <4 x double> @llvm.x86.avx512.mask.getexp.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
+
+define <8 x float> @test_getexp_ps_256(<8 x float> %a0) {
+ ; CHECK-LABEL: test_getexp_ps_256
+ ; CHECK: vgetexpps
+ %res = call <8 x float> @llvm.x86.avx512.mask.getexp.ps.256(<8 x float> %a0, <8 x float> zeroinitializer, i8 -1)
+ ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx512.mask.getexp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
\ No newline at end of file
// CHECK: encoding: [0x62,0x61,0x7c,0x58,0x51,0xa2,0xfc,0xfd,0xff,0xff]
vsqrtps -516(%rdx){1to16}, %zmm28
+// CHECK: vsqrtpd {rn-sae}, %zmm19, %zmm19
+// CHECK: encoding: [0x62,0xa1,0xfd,0x18,0x51,0xdb]
+ vsqrtpd {rn-sae}, %zmm19, %zmm19
+
+// CHECK: vsqrtpd {ru-sae}, %zmm19, %zmm19
+// CHECK: encoding: [0x62,0xa1,0xfd,0x58,0x51,0xdb]
+ vsqrtpd {ru-sae}, %zmm19, %zmm19
+
+// CHECK: vsqrtpd {rd-sae}, %zmm19, %zmm19
+// CHECK: encoding: [0x62,0xa1,0xfd,0x38,0x51,0xdb]
+ vsqrtpd {rd-sae}, %zmm19, %zmm19
+
+// CHECK: vsqrtpd {rz-sae}, %zmm19, %zmm19
+// CHECK: encoding: [0x62,0xa1,0xfd,0x78,0x51,0xdb]
+ vsqrtpd {rz-sae}, %zmm19, %zmm19
+
+// CHECK: vsqrtps {rn-sae}, %zmm29, %zmm28
+// CHECK: encoding: [0x62,0x01,0x7c,0x18,0x51,0xe5]
+ vsqrtps {rn-sae}, %zmm29, %zmm28
+
+// CHECK: vsqrtps {ru-sae}, %zmm29, %zmm28
+// CHECK: encoding: [0x62,0x01,0x7c,0x58,0x51,0xe5]
+ vsqrtps {ru-sae}, %zmm29, %zmm28
+
+// CHECK: vsqrtps {rd-sae}, %zmm29, %zmm28
+// CHECK: encoding: [0x62,0x01,0x7c,0x38,0x51,0xe5]
+ vsqrtps {rd-sae}, %zmm29, %zmm28
+
+// CHECK: vsqrtps {rz-sae}, %zmm29, %zmm28
+// CHECK: encoding: [0x62,0x01,0x7c,0x78,0x51,0xe5]
+ vsqrtps {rz-sae}, %zmm29, %zmm28
+
// CHECK: vsubpd %zmm9, %zmm12, %zmm9
// CHECK: encoding: [0x62,0x51,0x9d,0x48,0x5c,0xc9]
vsubpd %zmm9, %zmm12, %zmm9
// CHECK: encoding: [0x62,0xe1,0x7d,0x58,0x70,0x9a,0xfc,0xfd,0xff,0xff,0x7b]
vpshufd $123, -516(%rdx){1to16}, %zmm19
+// CHECK: vgetexppd %zmm25, %zmm14
+// CHECK: encoding: [0x62,0x12,0xfd,0x48,0x42,0xf1]
+ vgetexppd %zmm25, %zmm14
+
+// CHECK: vgetexppd %zmm25, %zmm14 {%k5}
+// CHECK: encoding: [0x62,0x12,0xfd,0x4d,0x42,0xf1]
+ vgetexppd %zmm25, %zmm14 {%k5}
+
+// CHECK: vgetexppd %zmm25, %zmm14 {%k5} {z}
+// CHECK: encoding: [0x62,0x12,0xfd,0xcd,0x42,0xf1]
+ vgetexppd %zmm25, %zmm14 {%k5} {z}
+
+// CHECK: vgetexppd {sae}, %zmm25, %zmm14
+// CHECK: encoding: [0x62,0x12,0xfd,0x18,0x42,0xf1]
+ vgetexppd {sae}, %zmm25, %zmm14
+
+// CHECK: vgetexppd (%rcx), %zmm14
+// CHECK: encoding: [0x62,0x72,0xfd,0x48,0x42,0x31]
+ vgetexppd (%rcx), %zmm14
+
+// CHECK: vgetexppd 291(%rax,%r14,8), %zmm14
+// CHECK: encoding: [0x62,0x32,0xfd,0x48,0x42,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vgetexppd 291(%rax,%r14,8), %zmm14
+
+// CHECK: vgetexppd (%rcx){1to8}, %zmm14
+// CHECK: encoding: [0x62,0x72,0xfd,0x58,0x42,0x31]
+ vgetexppd (%rcx){1to8}, %zmm14
+
+// CHECK: vgetexppd 8128(%rdx), %zmm14
+// CHECK: encoding: [0x62,0x72,0xfd,0x48,0x42,0x72,0x7f]
+ vgetexppd 8128(%rdx), %zmm14
+
+// CHECK: vgetexppd 8192(%rdx), %zmm14
+// CHECK: encoding: [0x62,0x72,0xfd,0x48,0x42,0xb2,0x00,0x20,0x00,0x00]
+ vgetexppd 8192(%rdx), %zmm14
+
+// CHECK: vgetexppd -8192(%rdx), %zmm14
+// CHECK: encoding: [0x62,0x72,0xfd,0x48,0x42,0x72,0x80]
+ vgetexppd -8192(%rdx), %zmm14
+
+// CHECK: vgetexppd -8256(%rdx), %zmm14
+// CHECK: encoding: [0x62,0x72,0xfd,0x48,0x42,0xb2,0xc0,0xdf,0xff,0xff]
+ vgetexppd -8256(%rdx), %zmm14
+
+// CHECK: vgetexppd 1016(%rdx){1to8}, %zmm14
+// CHECK: encoding: [0x62,0x72,0xfd,0x58,0x42,0x72,0x7f]
+ vgetexppd 1016(%rdx){1to8}, %zmm14
+
+// CHECK: vgetexppd 1024(%rdx){1to8}, %zmm14
+// CHECK: encoding: [0x62,0x72,0xfd,0x58,0x42,0xb2,0x00,0x04,0x00,0x00]
+ vgetexppd 1024(%rdx){1to8}, %zmm14
+
+// CHECK: vgetexppd -1024(%rdx){1to8}, %zmm14
+// CHECK: encoding: [0x62,0x72,0xfd,0x58,0x42,0x72,0x80]
+ vgetexppd -1024(%rdx){1to8}, %zmm14
+
+// CHECK: vgetexppd -1032(%rdx){1to8}, %zmm14
+// CHECK: encoding: [0x62,0x72,0xfd,0x58,0x42,0xb2,0xf8,0xfb,0xff,0xff]
+ vgetexppd -1032(%rdx){1to8}, %zmm14
+
+// CHECK: vgetexpps %zmm6, %zmm1
+// CHECK: encoding: [0x62,0xf2,0x7d,0x48,0x42,0xce]
+ vgetexpps %zmm6, %zmm1
+
+// CHECK: vgetexpps %zmm6, %zmm1 {%k3}
+// CHECK: encoding: [0x62,0xf2,0x7d,0x4b,0x42,0xce]
+ vgetexpps %zmm6, %zmm1 {%k3}
+
+// CHECK: vgetexpps %zmm6, %zmm1 {%k3} {z}
+// CHECK: encoding: [0x62,0xf2,0x7d,0xcb,0x42,0xce]
+ vgetexpps %zmm6, %zmm1 {%k3} {z}
+
+// CHECK: vgetexpps {sae}, %zmm6, %zmm1
+// CHECK: encoding: [0x62,0xf2,0x7d,0x18,0x42,0xce]
+ vgetexpps {sae}, %zmm6, %zmm1
+
+// CHECK: vgetexpps (%rcx), %zmm1
+// CHECK: encoding: [0x62,0xf2,0x7d,0x48,0x42,0x09]
+ vgetexpps (%rcx), %zmm1
+
+// CHECK: vgetexpps 291(%rax,%r14,8), %zmm1
+// CHECK: encoding: [0x62,0xb2,0x7d,0x48,0x42,0x8c,0xf0,0x23,0x01,0x00,0x00]
+ vgetexpps 291(%rax,%r14,8), %zmm1
+
+// CHECK: vgetexpps (%rcx){1to16}, %zmm1
+// CHECK: encoding: [0x62,0xf2,0x7d,0x58,0x42,0x09]
+ vgetexpps (%rcx){1to16}, %zmm1
+
+// CHECK: vgetexpps 8128(%rdx), %zmm1
+// CHECK: encoding: [0x62,0xf2,0x7d,0x48,0x42,0x4a,0x7f]
+ vgetexpps 8128(%rdx), %zmm1
+
+// CHECK: vgetexpps 8192(%rdx), %zmm1
+// CHECK: encoding: [0x62,0xf2,0x7d,0x48,0x42,0x8a,0x00,0x20,0x00,0x00]
+ vgetexpps 8192(%rdx), %zmm1
+
+// CHECK: vgetexpps -8192(%rdx), %zmm1
+// CHECK: encoding: [0x62,0xf2,0x7d,0x48,0x42,0x4a,0x80]
+ vgetexpps -8192(%rdx), %zmm1
+
+// CHECK: vgetexpps -8256(%rdx), %zmm1
+// CHECK: encoding: [0x62,0xf2,0x7d,0x48,0x42,0x8a,0xc0,0xdf,0xff,0xff]
+ vgetexpps -8256(%rdx), %zmm1
+
+// CHECK: vgetexpps 508(%rdx){1to16}, %zmm1
+// CHECK: encoding: [0x62,0xf2,0x7d,0x58,0x42,0x4a,0x7f]
+ vgetexpps 508(%rdx){1to16}, %zmm1
+
+// CHECK: vgetexpps 512(%rdx){1to16}, %zmm1
+// CHECK: encoding: [0x62,0xf2,0x7d,0x58,0x42,0x8a,0x00,0x02,0x00,0x00]
+ vgetexpps 512(%rdx){1to16}, %zmm1
+
+// CHECK: vgetexpps -512(%rdx){1to16}, %zmm1
+// CHECK: encoding: [0x62,0xf2,0x7d,0x58,0x42,0x4a,0x80]
+ vgetexpps -512(%rdx){1to16}, %zmm1
+
+// CHECK: vgetexpps -516(%rdx){1to16}, %zmm1
+// CHECK: encoding: [0x62,0xf2,0x7d,0x58,0x42,0x8a,0xfc,0xfd,0xff,0xff]
+ vgetexpps -516(%rdx){1to16}, %zmm1
+
// CHECK: vshuff32x4 $171, %zmm3, %zmm24, %zmm6
// CHECK: encoding: [0x62,0xf3,0x3d,0x40,0x23,0xf3,0xab]
vshuff32x4 $171, %zmm3, %zmm24, %zmm6
// CHECK: encoding: [0x62,0xe1,0x7d,0x38,0x70,0xa2,0xfc,0xfd,0xff,0xff,0x7b]
vpshufd $123, -516(%rdx){1to8}, %ymm20
+// CHECK: vgetexppd %xmm18, %xmm17
+// CHECK: encoding: [0x62,0xa2,0xfd,0x08,0x42,0xca]
+ vgetexppd %xmm18, %xmm17
+
+// CHECK: vgetexppd %xmm18, %xmm17 {%k1}
+// CHECK: encoding: [0x62,0xa2,0xfd,0x09,0x42,0xca]
+ vgetexppd %xmm18, %xmm17 {%k1}
+
+// CHECK: vgetexppd %xmm18, %xmm17 {%k1} {z}
+// CHECK: encoding: [0x62,0xa2,0xfd,0x89,0x42,0xca]
+ vgetexppd %xmm18, %xmm17 {%k1} {z}
+
+// CHECK: vgetexppd (%rcx), %xmm17
+// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0x42,0x09]
+ vgetexppd (%rcx), %xmm17
+
+// CHECK: vgetexppd 291(%rax,%r14,8), %xmm17
+// CHECK: encoding: [0x62,0xa2,0xfd,0x08,0x42,0x8c,0xf0,0x23,0x01,0x00,0x00]
+ vgetexppd 291(%rax,%r14,8), %xmm17
+
+// CHECK: vgetexppd (%rcx){1to2}, %xmm17
+// CHECK: encoding: [0x62,0xe2,0xfd,0x18,0x42,0x09]
+ vgetexppd (%rcx){1to2}, %xmm17
+
+// CHECK: vgetexppd 2032(%rdx), %xmm17
+// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0x42,0x4a,0x7f]
+ vgetexppd 2032(%rdx), %xmm17
+
+// CHECK: vgetexppd 2048(%rdx), %xmm17
+// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0x42,0x8a,0x00,0x08,0x00,0x00]
+ vgetexppd 2048(%rdx), %xmm17
+
+// CHECK: vgetexppd -2048(%rdx), %xmm17
+// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0x42,0x4a,0x80]
+ vgetexppd -2048(%rdx), %xmm17
+
+// CHECK: vgetexppd -2064(%rdx), %xmm17
+// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0x42,0x8a,0xf0,0xf7,0xff,0xff]
+ vgetexppd -2064(%rdx), %xmm17
+
+// CHECK: vgetexppd 1016(%rdx){1to2}, %xmm17
+// CHECK: encoding: [0x62,0xe2,0xfd,0x18,0x42,0x4a,0x7f]
+ vgetexppd 1016(%rdx){1to2}, %xmm17
+
+// CHECK: vgetexppd 1024(%rdx){1to2}, %xmm17
+// CHECK: encoding: [0x62,0xe2,0xfd,0x18,0x42,0x8a,0x00,0x04,0x00,0x00]
+ vgetexppd 1024(%rdx){1to2}, %xmm17
+
+// CHECK: vgetexppd -1024(%rdx){1to2}, %xmm17
+// CHECK: encoding: [0x62,0xe2,0xfd,0x18,0x42,0x4a,0x80]
+ vgetexppd -1024(%rdx){1to2}, %xmm17
+
+// CHECK: vgetexppd -1032(%rdx){1to2}, %xmm17
+// CHECK: encoding: [0x62,0xe2,0xfd,0x18,0x42,0x8a,0xf8,0xfb,0xff,0xff]
+ vgetexppd -1032(%rdx){1to2}, %xmm17
+
+// CHECK: vgetexppd %ymm17, %ymm20
+// CHECK: encoding: [0x62,0xa2,0xfd,0x28,0x42,0xe1]
+ vgetexppd %ymm17, %ymm20
+
+// CHECK: vgetexppd %ymm17, %ymm20 {%k3}
+// CHECK: encoding: [0x62,0xa2,0xfd,0x2b,0x42,0xe1]
+ vgetexppd %ymm17, %ymm20 {%k3}
+
+// CHECK: vgetexppd %ymm17, %ymm20 {%k3} {z}
+// CHECK: encoding: [0x62,0xa2,0xfd,0xab,0x42,0xe1]
+ vgetexppd %ymm17, %ymm20 {%k3} {z}
+
+// CHECK: vgetexppd (%rcx), %ymm20
+// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x42,0x21]
+ vgetexppd (%rcx), %ymm20
+
+// CHECK: vgetexppd 291(%rax,%r14,8), %ymm20
+// CHECK: encoding: [0x62,0xa2,0xfd,0x28,0x42,0xa4,0xf0,0x23,0x01,0x00,0x00]
+ vgetexppd 291(%rax,%r14,8), %ymm20
+
+// CHECK: vgetexppd (%rcx){1to4}, %ymm20
+// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0x42,0x21]
+ vgetexppd (%rcx){1to4}, %ymm20
+
+// CHECK: vgetexppd 4064(%rdx), %ymm20
+// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x42,0x62,0x7f]
+ vgetexppd 4064(%rdx), %ymm20
+
+// CHECK: vgetexppd 4096(%rdx), %ymm20
+// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x42,0xa2,0x00,0x10,0x00,0x00]
+ vgetexppd 4096(%rdx), %ymm20
+
+// CHECK: vgetexppd -4096(%rdx), %ymm20
+// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x42,0x62,0x80]
+ vgetexppd -4096(%rdx), %ymm20
+
+// CHECK: vgetexppd -4128(%rdx), %ymm20
+// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x42,0xa2,0xe0,0xef,0xff,0xff]
+ vgetexppd -4128(%rdx), %ymm20
+
+// CHECK: vgetexppd 1016(%rdx){1to4}, %ymm20
+// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0x42,0x62,0x7f]
+ vgetexppd 1016(%rdx){1to4}, %ymm20
+
+// CHECK: vgetexppd 1024(%rdx){1to4}, %ymm20
+// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0x42,0xa2,0x00,0x04,0x00,0x00]
+ vgetexppd 1024(%rdx){1to4}, %ymm20
+
+// CHECK: vgetexppd -1024(%rdx){1to4}, %ymm20
+// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0x42,0x62,0x80]
+ vgetexppd -1024(%rdx){1to4}, %ymm20
+
+// CHECK: vgetexppd -1032(%rdx){1to4}, %ymm20
+// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0x42,0xa2,0xf8,0xfb,0xff,0xff]
+ vgetexppd -1032(%rdx){1to4}, %ymm20
+
+// CHECK: vgetexpps %xmm27, %xmm17
+// CHECK: encoding: [0x62,0x82,0x7d,0x08,0x42,0xcb]
+ vgetexpps %xmm27, %xmm17
+
+// CHECK: vgetexpps %xmm27, %xmm17 {%k2}
+// CHECK: encoding: [0x62,0x82,0x7d,0x0a,0x42,0xcb]
+ vgetexpps %xmm27, %xmm17 {%k2}
+
+// CHECK: vgetexpps %xmm27, %xmm17 {%k2} {z}
+// CHECK: encoding: [0x62,0x82,0x7d,0x8a,0x42,0xcb]
+ vgetexpps %xmm27, %xmm17 {%k2} {z}
+
+// CHECK: vgetexpps (%rcx), %xmm17
+// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x42,0x09]
+ vgetexpps (%rcx), %xmm17
+
+// CHECK: vgetexpps 291(%rax,%r14,8), %xmm17
+// CHECK: encoding: [0x62,0xa2,0x7d,0x08,0x42,0x8c,0xf0,0x23,0x01,0x00,0x00]
+ vgetexpps 291(%rax,%r14,8), %xmm17
+
+// CHECK: vgetexpps (%rcx){1to4}, %xmm17
+// CHECK: encoding: [0x62,0xe2,0x7d,0x18,0x42,0x09]
+ vgetexpps (%rcx){1to4}, %xmm17
+
+// CHECK: vgetexpps 2032(%rdx), %xmm17
+// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x42,0x4a,0x7f]
+ vgetexpps 2032(%rdx), %xmm17
+
+// CHECK: vgetexpps 2048(%rdx), %xmm17
+// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x42,0x8a,0x00,0x08,0x00,0x00]
+ vgetexpps 2048(%rdx), %xmm17
+
+// CHECK: vgetexpps -2048(%rdx), %xmm17
+// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x42,0x4a,0x80]
+ vgetexpps -2048(%rdx), %xmm17
+
+// CHECK: vgetexpps -2064(%rdx), %xmm17
+// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x42,0x8a,0xf0,0xf7,0xff,0xff]
+ vgetexpps -2064(%rdx), %xmm17
+
+// CHECK: vgetexpps 508(%rdx){1to4}, %xmm17
+// CHECK: encoding: [0x62,0xe2,0x7d,0x18,0x42,0x4a,0x7f]
+ vgetexpps 508(%rdx){1to4}, %xmm17
+
+// CHECK: vgetexpps 512(%rdx){1to4}, %xmm17
+// CHECK: encoding: [0x62,0xe2,0x7d,0x18,0x42,0x8a,0x00,0x02,0x00,0x00]
+ vgetexpps 512(%rdx){1to4}, %xmm17
+
+// CHECK: vgetexpps -512(%rdx){1to4}, %xmm17
+// CHECK: encoding: [0x62,0xe2,0x7d,0x18,0x42,0x4a,0x80]
+ vgetexpps -512(%rdx){1to4}, %xmm17
+
+// CHECK: vgetexpps -516(%rdx){1to4}, %xmm17
+// CHECK: encoding: [0x62,0xe2,0x7d,0x18,0x42,0x8a,0xfc,0xfd,0xff,0xff]
+ vgetexpps -516(%rdx){1to4}, %xmm17
+
+// CHECK: vgetexpps %ymm29, %ymm30
+// CHECK: encoding: [0x62,0x02,0x7d,0x28,0x42,0xf5]
+ vgetexpps %ymm29, %ymm30
+
+// CHECK: vgetexpps %ymm29, %ymm30 {%k6}
+// CHECK: encoding: [0x62,0x02,0x7d,0x2e,0x42,0xf5]
+ vgetexpps %ymm29, %ymm30 {%k6}
+
+// CHECK: vgetexpps %ymm29, %ymm30 {%k6} {z}
+// CHECK: encoding: [0x62,0x02,0x7d,0xae,0x42,0xf5]
+ vgetexpps %ymm29, %ymm30 {%k6} {z}
+
+// CHECK: vgetexpps (%rcx), %ymm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x42,0x31]
+ vgetexpps (%rcx), %ymm30
+
+// CHECK: vgetexpps 291(%rax,%r14,8), %ymm30
+// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x42,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vgetexpps 291(%rax,%r14,8), %ymm30
+
+// CHECK: vgetexpps (%rcx){1to8}, %ymm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x38,0x42,0x31]
+ vgetexpps (%rcx){1to8}, %ymm30
+
+// CHECK: vgetexpps 4064(%rdx), %ymm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x42,0x72,0x7f]
+ vgetexpps 4064(%rdx), %ymm30
+
+// CHECK: vgetexpps 4096(%rdx), %ymm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x42,0xb2,0x00,0x10,0x00,0x00]
+ vgetexpps 4096(%rdx), %ymm30
+
+// CHECK: vgetexpps -4096(%rdx), %ymm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x42,0x72,0x80]
+ vgetexpps -4096(%rdx), %ymm30
+
+// CHECK: vgetexpps -4128(%rdx), %ymm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x42,0xb2,0xe0,0xef,0xff,0xff]
+ vgetexpps -4128(%rdx), %ymm30
+
+// CHECK: vgetexpps 508(%rdx){1to8}, %ymm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x38,0x42,0x72,0x7f]
+ vgetexpps 508(%rdx){1to8}, %ymm30
+
+// CHECK: vgetexpps 512(%rdx){1to8}, %ymm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x38,0x42,0xb2,0x00,0x02,0x00,0x00]
+ vgetexpps 512(%rdx){1to8}, %ymm30
+
+// CHECK: vgetexpps -512(%rdx){1to8}, %ymm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x38,0x42,0x72,0x80]
+ vgetexpps -512(%rdx){1to8}, %ymm30
+
+// CHECK: vgetexpps -516(%rdx){1to8}, %ymm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x38,0x42,0xb2,0xfc,0xfd,0xff,0xff]
+ vgetexpps -516(%rdx){1to8}, %ymm30
+
// CHECK: vshuff32x4 $171, %ymm18, %ymm27, %ymm29
// CHECK: encoding: [0x62,0x23,0x25,0x20,0x23,0xea,0xab]
vshuff32x4 $0xab, %ymm18, %ymm27, %ymm29