From a1fa0de25819bfe22b64c3fde7417f4667a24fcf Mon Sep 17 00:00:00 2001 From: Elena Demikhovsky Date: Tue, 21 Apr 2015 10:27:40 +0000 Subject: [PATCH] AVX-512: Added logical and arithmetic instructions for SKX by Asaf Badouh (asaf.badouh@intel.com) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@235375 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IR/IntrinsicsX86.td | 213 ++- lib/Target/X86/X86ISelLowering.cpp | 6 +- lib/Target/X86/X86InstrAVX512.td | 35 +- lib/Target/X86/X86IntrinsicsInfo.h | 219 ++- test/CodeGen/X86/avx512-intrinsics.ll | 77 + test/CodeGen/X86/avx512bwvl-intrinsics.ll | 423 +++++ test/CodeGen/X86/avx512vl-intrinsics.ll | 1091 +++++++++++++ test/MC/X86/x86-64-avx512dq.s | 895 ++++++++++ test/MC/X86/x86-64-avx512dq_vl.s | 1793 +++++++++++++++++++++ test/MC/X86/x86-64-avx512f_vl.s | 224 +++ 10 files changed, 4893 insertions(+), 83 deletions(-) diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td index 49231d814f3..51f760d05c4 100644 --- a/include/llvm/IR/IntrinsicsX86.td +++ b/include/llvm/IR/IntrinsicsX86.td @@ -3089,21 +3089,57 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". } //Bitwise Ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_avx512_mask_pand_d_128 : GCCBuiltin<"__builtin_ia32_pandd128_mask">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, + llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pand_d_256 : GCCBuiltin<"__builtin_ia32_pandd256_mask">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, + llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pand_d_512 : GCCBuiltin<"__builtin_ia32_pandd512_mask">, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pand_q_128 : GCCBuiltin<"__builtin_ia32_pandq128_mask">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, + llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pand_q_256 : GCCBuiltin<"__builtin_ia32_pandq256_mask">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, + llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pand_q_512 : GCCBuiltin<"__builtin_ia32_pandq512_mask">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_por_d_128 : GCCBuiltin<"__builtin_ia32_pord128_mask">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, + llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_por_d_256 : GCCBuiltin<"__builtin_ia32_pord256_mask">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, + llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_por_d_512 : GCCBuiltin<"__builtin_ia32_pord512_mask">, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_por_q_128 : GCCBuiltin<"__builtin_ia32_porq128_mask">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, + llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_por_q_256 : GCCBuiltin<"__builtin_ia32_porq256_mask">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, + llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_por_q_512 : GCCBuiltin<"__builtin_ia32_porq512_mask">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pxor_d_128 : GCCBuiltin<"__builtin_ia32_pxord128_mask">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, + llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pxor_d_256 : GCCBuiltin<"__builtin_ia32_pxord256_mask">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, + llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pxor_d_512 : GCCBuiltin<"__builtin_ia32_pxord512_mask">, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pxor_q_128 : GCCBuiltin<"__builtin_ia32_pxorq128_mask">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, + llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pxor_q_256 : GCCBuiltin<"__builtin_ia32_pxorq256_mask">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, + llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pxor_q_512 : GCCBuiltin<"__builtin_ia32_pxorq512_mask">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; @@ -3235,28 +3271,203 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; } - +// FP logical ops +let TargetPrefix = "x86" in { + def int_x86_avx512_mask_and_pd_128 : GCCBuiltin<"__builtin_ia32_andpd128_mask">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, + llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_and_pd_256 : GCCBuiltin<"__builtin_ia32_andpd256_mask">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, + llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_and_pd_512 : GCCBuiltin<"__builtin_ia32_andpd512_mask">, + Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, + llvm_v8f64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_and_ps_128 : GCCBuiltin<"__builtin_ia32_andps128_mask">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, + llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_and_ps_256 : GCCBuiltin<"__builtin_ia32_andps256_mask">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, + llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_and_ps_512 : GCCBuiltin<"__builtin_ia32_andps512_mask">, + Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, + llvm_v16f32_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_andn_pd_128 : GCCBuiltin<"__builtin_ia32_andnpd128_mask">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, + llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_andn_pd_256 : GCCBuiltin<"__builtin_ia32_andnpd256_mask">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, + llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_andn_pd_512 : GCCBuiltin<"__builtin_ia32_andnpd512_mask">, + Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, + llvm_v8f64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_andn_ps_128 : GCCBuiltin<"__builtin_ia32_andnps128_mask">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, + llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_andn_ps_256 : GCCBuiltin<"__builtin_ia32_andnps256_mask">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, + llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_andn_ps_512 : GCCBuiltin<"__builtin_ia32_andnps512_mask">, + Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, + llvm_v16f32_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_or_pd_128 : GCCBuiltin<"__builtin_ia32_orpd128_mask">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, + llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_or_pd_256 : GCCBuiltin<"__builtin_ia32_orpd256_mask">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, + llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_or_pd_512 : GCCBuiltin<"__builtin_ia32_orpd512_mask">, + Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, + llvm_v8f64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_or_ps_128 : GCCBuiltin<"__builtin_ia32_orps128_mask">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, + llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_or_ps_256 : GCCBuiltin<"__builtin_ia32_orps256_mask">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, + llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_or_ps_512 : GCCBuiltin<"__builtin_ia32_orps512_mask">, + Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, + llvm_v16f32_ty, llvm_i16_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_xor_pd_128 : GCCBuiltin<"__builtin_ia32_xorpd128_mask">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, + llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_xor_pd_256 : GCCBuiltin<"__builtin_ia32_xorpd256_mask">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, + llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_xor_pd_512 : GCCBuiltin<"__builtin_ia32_xorpd512_mask">, + Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, + llvm_v8f64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_xor_ps_128 : GCCBuiltin<"__builtin_ia32_xorps128_mask">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, + llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_xor_ps_256 : GCCBuiltin<"__builtin_ia32_xorps256_mask">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, + llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_xor_ps_512 : GCCBuiltin<"__builtin_ia32_xorps512_mask">, + Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, + llvm_v16f32_ty, llvm_i16_ty], [IntrNoMem]>; +} // Integer arithmetic ops let TargetPrefix = "x86" in { + def int_x86_avx512_mask_padd_b_128 : GCCBuiltin<"__builtin_ia32_paddb128_mask">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, + llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_padd_b_256 : GCCBuiltin<"__builtin_ia32_paddb256_mask">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, + llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_padd_b_512 : GCCBuiltin<"__builtin_ia32_paddb512_mask">, + Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty, + llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>; + def int_x86_avx512_mask_padd_w_128 : GCCBuiltin<"__builtin_ia32_paddw128_mask">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, + llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_padd_w_256 : GCCBuiltin<"__builtin_ia32_paddw256_mask">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, + llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_padd_w_512 : GCCBuiltin<"__builtin_ia32_paddw512_mask">, + Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty, + llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_padd_d_128 : GCCBuiltin<"__builtin_ia32_paddd128_mask">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, + llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_padd_d_256 : GCCBuiltin<"__builtin_ia32_paddd256_mask">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, + llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_padd_d_512 : GCCBuiltin<"__builtin_ia32_paddd512_mask">, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_padd_q_128 : GCCBuiltin<"__builtin_ia32_paddq128_mask">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, + llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_padd_q_256 : GCCBuiltin<"__builtin_ia32_paddq256_mask">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, + llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_padd_q_512 : GCCBuiltin<"__builtin_ia32_paddq512_mask">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psub_b_128 : GCCBuiltin<"__builtin_ia32_psubb128_mask">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, + llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psub_b_256 : GCCBuiltin<"__builtin_ia32_psubb256_mask">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, + llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psub_b_512 : GCCBuiltin<"__builtin_ia32_psubb512_mask">, + Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty, + llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psub_w_128 : GCCBuiltin<"__builtin_ia32_psubw128_mask">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, + llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psub_w_256 : GCCBuiltin<"__builtin_ia32_psubw256_mask">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, + llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psub_w_512 : GCCBuiltin<"__builtin_ia32_psubw512_mask">, + Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty, + llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psub_d_128 : GCCBuiltin<"__builtin_ia32_psubd128_mask">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, + llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psub_d_256 : GCCBuiltin<"__builtin_ia32_psubd256_mask">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, + llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_psub_d_512 : GCCBuiltin<"__builtin_ia32_psubd512_mask">, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psub_q_128 : GCCBuiltin<"__builtin_ia32_psubq128_mask">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, + llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psub_q_256 : GCCBuiltin<"__builtin_ia32_psubq256_mask">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, + llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_psub_q_512 : GCCBuiltin<"__builtin_ia32_psubq512_mask">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pmulu_dq_128 : GCCBuiltin<"__builtin_ia32_pmuludq128_mask">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty, + llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pmul_dq_128 : GCCBuiltin<"__builtin_ia32_pmuldq128_mask">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty, + llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pmulu_dq_256 : GCCBuiltin<"__builtin_ia32_pmuludq256_mask">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty, + llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pmul_dq_256 : GCCBuiltin<"__builtin_ia32_pmuldq256_mask">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty, + llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmulu_dq_512 : GCCBuiltin<"__builtin_ia32_pmuludq512_mask">, Intrinsic<[llvm_v8i64_ty], [llvm_v16i32_ty, llvm_v16i32_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmul_dq_512 : GCCBuiltin<"__builtin_ia32_pmuldq512_mask">, Intrinsic<[llvm_v8i64_ty], [llvm_v16i32_ty, llvm_v16i32_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pmull_w_128 : GCCBuiltin<"__builtin_ia32_pmullw128_mask">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, + llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pmull_w_256 : GCCBuiltin<"__builtin_ia32_pmullw256_mask">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, + llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pmull_w_512 : GCCBuiltin<"__builtin_ia32_pmullw512_mask">, + Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty, + llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pmull_d_128 : GCCBuiltin<"__builtin_ia32_pmulld128_mask">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, + llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pmull_d_256 : GCCBuiltin<"__builtin_ia32_pmulld256_mask">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, + llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pmull_d_512 : GCCBuiltin<"__builtin_ia32_pmulld512_mask">, + Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty, + llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pmull_q_128 : GCCBuiltin<"__builtin_ia32_pmullq128_mask">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, + llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pmull_q_256 : GCCBuiltin<"__builtin_ia32_pmullq256_mask">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, + llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pmull_q_512 : GCCBuiltin<"__builtin_ia32_pmullq512_mask">, + Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty, + llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; } + // Gather and Scatter ops let TargetPrefix = "x86" in { def int_x86_avx512_gather_dpd_512 : GCCBuiltin<"__builtin_ia32_gathersiv8df">, diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 762ad72d0be..1271d8d2aca 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1373,7 +1373,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::CTLZ, MVT::v8i64, Legal); setOperationAction(ISD::CTLZ, MVT::v16i32, Legal); } - + if (Subtarget->hasDQI()) { + setOperationAction(ISD::MUL, MVT::v2i64, Legal); + setOperationAction(ISD::MUL, MVT::v4i64, Legal); + setOperationAction(ISD::MUL, MVT::v8i64, Legal); + } // Custom lower several nodes. for (MVT VT : MVT::vector_valuetypes()) { unsigned EltSize = VT.getVectorElementType().getSizeInBits(); diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 095916233c8..b74bd2a72b5 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -2996,7 +2996,7 @@ multiclass avx512_binop_rm2 opc, string OpcodeStr, OpndItins itins, OpcodeStr, "${src2}"##_Dst.BroadcastStr##", $src1", "$src1, ${src2}"##_Dst.BroadcastStr, - (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bc_v16i32 + (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert (_Dst.VT (X86VBroadcast (_Dst.ScalarLdFrag addr:$src2)))))), "", itins.rm>, @@ -3015,13 +3015,27 @@ defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmull", mul, defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmull", mul, SSE_INTALU_ITINS_P, HasDQI, 1>, T8PD; -defm VPMULDQZ : avx512_binop_rm2<0x28, "vpmuldq", SSE_INTALU_ITINS_P, - X86pmuldq, v16i32_info, v8i64_info, 1>, - T8PD, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W; + +multiclass avx512_binop_all opc, string OpcodeStr, OpndItins itins, + SDNode OpNode, bit IsCommutable = 0> { -defm VPMULUDQZ : avx512_binop_rm2<0xF4, "vpmuludq", SSE_INTMUL_ITINS_P, - X86pmuludq, v16i32_info, v8i64_info, 1>, - EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W; + defm NAME#Z : avx512_binop_rm2, + EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W; + let Predicates = [HasVLX] in { + defm NAME#Z256 : avx512_binop_rm2, + EVEX_V256, EVEX_CD8<64, CD8VF>, VEX_W; + defm NAME#Z128 : avx512_binop_rm2, + EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W; + } +} + +defm VPMULDQ : avx512_binop_all<0x28, "vpmuldq", SSE_INTALU_ITINS_P, + X86pmuldq, 1>,T8PD; +defm VPMULUDQ : avx512_binop_all<0xF4, "vpmuludq", SSE_INTMUL_ITINS_P, + X86pmuludq, 1>; defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxs", X86smax, SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD; @@ -3340,7 +3354,12 @@ defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv>, avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd>; defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, 1>; defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, 1>; - +let Predicates = [HasDQI] in { + defm VAND : avx512_fp_binop_p<0x54, "vand", X86fand, 1>; + defm VANDN : avx512_fp_binop_p<0x55, "vandn", X86fandn, 0>; + defm VOR : avx512_fp_binop_p<0x56, "vor", X86for, 1>; + defm VXOR : avx512_fp_binop_p<0x57, "vxor", X86fxor, 1>; +} def : Pat<(v16f32 (int_x86_avx512_mask_max_ps_512 (v16f32 VR512:$src1), (v16f32 VR512:$src2), (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), FROUND_CURRENT)), diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h index 28a3b7b11e0..f16ce830cd8 100644 --- a/lib/Target/X86/X86IntrinsicsInfo.h +++ b/lib/Target/X86/X86IntrinsicsInfo.h @@ -171,81 +171,93 @@ static const IntrinsicData* getIntrinsicWithChain(unsigned IntNo) { * the alphabetical order. */ static const IntrinsicData IntrinsicsWithoutChain[] = { - X86_INTRINSIC_DATA(avx2_packssdw, INTR_TYPE_2OP, X86ISD::PACKSS, 0), - X86_INTRINSIC_DATA(avx2_packsswb, INTR_TYPE_2OP, X86ISD::PACKSS, 0), - X86_INTRINSIC_DATA(avx2_packusdw, INTR_TYPE_2OP, X86ISD::PACKUS, 0), - X86_INTRINSIC_DATA(avx2_packuswb, INTR_TYPE_2OP, X86ISD::PACKUS, 0), - X86_INTRINSIC_DATA(avx2_phadd_d, INTR_TYPE_2OP, X86ISD::HADD, 0), - X86_INTRINSIC_DATA(avx2_phadd_w, INTR_TYPE_2OP, X86ISD::HADD, 0), - X86_INTRINSIC_DATA(avx2_phsub_d, INTR_TYPE_2OP, X86ISD::HSUB, 0), - X86_INTRINSIC_DATA(avx2_phsub_w, INTR_TYPE_2OP, X86ISD::HSUB, 0), - X86_INTRINSIC_DATA(avx2_pmaxs_b, INTR_TYPE_2OP, X86ISD::SMAX, 0), - X86_INTRINSIC_DATA(avx2_pmaxs_d, INTR_TYPE_2OP, X86ISD::SMAX, 0), - X86_INTRINSIC_DATA(avx2_pmaxs_w, INTR_TYPE_2OP, X86ISD::SMAX, 0), - X86_INTRINSIC_DATA(avx2_pmaxu_b, INTR_TYPE_2OP, X86ISD::UMAX, 0), - X86_INTRINSIC_DATA(avx2_pmaxu_d, INTR_TYPE_2OP, X86ISD::UMAX, 0), - X86_INTRINSIC_DATA(avx2_pmaxu_w, INTR_TYPE_2OP, X86ISD::UMAX, 0), - X86_INTRINSIC_DATA(avx2_pmins_b, INTR_TYPE_2OP, X86ISD::SMIN, 0), - X86_INTRINSIC_DATA(avx2_pmins_d, INTR_TYPE_2OP, X86ISD::SMIN, 0), - X86_INTRINSIC_DATA(avx2_pmins_w, INTR_TYPE_2OP, X86ISD::SMIN, 0), - X86_INTRINSIC_DATA(avx2_pminu_b, INTR_TYPE_2OP, X86ISD::UMIN, 0), - X86_INTRINSIC_DATA(avx2_pminu_d, INTR_TYPE_2OP, X86ISD::UMIN, 0), - X86_INTRINSIC_DATA(avx2_pminu_w, INTR_TYPE_2OP, X86ISD::UMIN, 0), - X86_INTRINSIC_DATA(avx2_pmovsxbd, INTR_TYPE_1OP, X86ISD::VSEXT, 0), - X86_INTRINSIC_DATA(avx2_pmovsxbq, INTR_TYPE_1OP, X86ISD::VSEXT, 0), - X86_INTRINSIC_DATA(avx2_pmovsxbw, INTR_TYPE_1OP, X86ISD::VSEXT, 0), - X86_INTRINSIC_DATA(avx2_pmovsxdq, INTR_TYPE_1OP, X86ISD::VSEXT, 0), - X86_INTRINSIC_DATA(avx2_pmovsxwd, INTR_TYPE_1OP, X86ISD::VSEXT, 0), - X86_INTRINSIC_DATA(avx2_pmovsxwq, INTR_TYPE_1OP, X86ISD::VSEXT, 0), - X86_INTRINSIC_DATA(avx2_pmovzxbd, INTR_TYPE_1OP, X86ISD::VZEXT, 0), - X86_INTRINSIC_DATA(avx2_pmovzxbq, INTR_TYPE_1OP, X86ISD::VZEXT, 0), - X86_INTRINSIC_DATA(avx2_pmovzxbw, INTR_TYPE_1OP, X86ISD::VZEXT, 0), - X86_INTRINSIC_DATA(avx2_pmovzxdq, INTR_TYPE_1OP, X86ISD::VZEXT, 0), - X86_INTRINSIC_DATA(avx2_pmovzxwd, INTR_TYPE_1OP, X86ISD::VZEXT, 0), - X86_INTRINSIC_DATA(avx2_pmovzxwq, INTR_TYPE_1OP, X86ISD::VZEXT, 0), - X86_INTRINSIC_DATA(avx2_pmul_dq, INTR_TYPE_2OP, X86ISD::PMULDQ, 0), - X86_INTRINSIC_DATA(avx2_pmulh_w, INTR_TYPE_2OP, ISD::MULHS, 0), - X86_INTRINSIC_DATA(avx2_pmulhu_w, INTR_TYPE_2OP, ISD::MULHU, 0), - X86_INTRINSIC_DATA(avx2_pmulu_dq, INTR_TYPE_2OP, X86ISD::PMULUDQ, 0), - X86_INTRINSIC_DATA(avx2_pshuf_b, INTR_TYPE_2OP, X86ISD::PSHUFB, 0), - X86_INTRINSIC_DATA(avx2_psign_b, INTR_TYPE_2OP, X86ISD::PSIGN, 0), - X86_INTRINSIC_DATA(avx2_psign_d, INTR_TYPE_2OP, X86ISD::PSIGN, 0), - X86_INTRINSIC_DATA(avx2_psign_w, INTR_TYPE_2OP, X86ISD::PSIGN, 0), - X86_INTRINSIC_DATA(avx2_psll_d, INTR_TYPE_2OP, X86ISD::VSHL, 0), - X86_INTRINSIC_DATA(avx2_psll_q, INTR_TYPE_2OP, X86ISD::VSHL, 0), - X86_INTRINSIC_DATA(avx2_psll_w, INTR_TYPE_2OP, X86ISD::VSHL, 0), - X86_INTRINSIC_DATA(avx2_pslli_d, VSHIFT, X86ISD::VSHLI, 0), - X86_INTRINSIC_DATA(avx2_pslli_q, VSHIFT, X86ISD::VSHLI, 0), - X86_INTRINSIC_DATA(avx2_pslli_w, VSHIFT, X86ISD::VSHLI, 0), - X86_INTRINSIC_DATA(avx2_psllv_d, INTR_TYPE_2OP, ISD::SHL, 0), - X86_INTRINSIC_DATA(avx2_psllv_d_256, INTR_TYPE_2OP, ISD::SHL, 0), - X86_INTRINSIC_DATA(avx2_psllv_q, INTR_TYPE_2OP, ISD::SHL, 0), - X86_INTRINSIC_DATA(avx2_psllv_q_256, INTR_TYPE_2OP, ISD::SHL, 0), - X86_INTRINSIC_DATA(avx2_psra_d, INTR_TYPE_2OP, X86ISD::VSRA, 0), - X86_INTRINSIC_DATA(avx2_psra_w, INTR_TYPE_2OP, X86ISD::VSRA, 0), - X86_INTRINSIC_DATA(avx2_psrai_d, VSHIFT, X86ISD::VSRAI, 0), - X86_INTRINSIC_DATA(avx2_psrai_w, VSHIFT, X86ISD::VSRAI, 0), - X86_INTRINSIC_DATA(avx2_psrav_d, INTR_TYPE_2OP, ISD::SRA, 0), - X86_INTRINSIC_DATA(avx2_psrav_d_256, INTR_TYPE_2OP, ISD::SRA, 0), - X86_INTRINSIC_DATA(avx2_psrl_d, INTR_TYPE_2OP, X86ISD::VSRL, 0), - X86_INTRINSIC_DATA(avx2_psrl_q, INTR_TYPE_2OP, X86ISD::VSRL, 0), - X86_INTRINSIC_DATA(avx2_psrl_w, INTR_TYPE_2OP, X86ISD::VSRL, 0), - X86_INTRINSIC_DATA(avx2_psrli_d, VSHIFT, X86ISD::VSRLI, 0), - X86_INTRINSIC_DATA(avx2_psrli_q, VSHIFT, X86ISD::VSRLI, 0), - X86_INTRINSIC_DATA(avx2_psrli_w, VSHIFT, X86ISD::VSRLI, 0), - X86_INTRINSIC_DATA(avx2_psrlv_d, INTR_TYPE_2OP, ISD::SRL, 0), - X86_INTRINSIC_DATA(avx2_psrlv_d_256, INTR_TYPE_2OP, ISD::SRL, 0), - X86_INTRINSIC_DATA(avx2_psrlv_q, INTR_TYPE_2OP, ISD::SRL, 0), - X86_INTRINSIC_DATA(avx2_psrlv_q_256, INTR_TYPE_2OP, ISD::SRL, 0), - X86_INTRINSIC_DATA(avx2_psubus_b, INTR_TYPE_2OP, X86ISD::SUBUS, 0), - X86_INTRINSIC_DATA(avx2_psubus_w, INTR_TYPE_2OP, X86ISD::SUBUS, 0), - X86_INTRINSIC_DATA(avx2_vperm2i128, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0), - X86_INTRINSIC_DATA(avx512_exp2_pd, INTR_TYPE_1OP_MASK_RM,X86ISD::EXP2, 0), - X86_INTRINSIC_DATA(avx512_exp2_ps, INTR_TYPE_1OP_MASK_RM,X86ISD::EXP2, 0), + X86_INTRINSIC_DATA(avx2_packssdw, INTR_TYPE_2OP, X86ISD::PACKSS, 0), + X86_INTRINSIC_DATA(avx2_packsswb, INTR_TYPE_2OP, X86ISD::PACKSS, 0), + X86_INTRINSIC_DATA(avx2_packusdw, INTR_TYPE_2OP, X86ISD::PACKUS, 0), + X86_INTRINSIC_DATA(avx2_packuswb, INTR_TYPE_2OP, X86ISD::PACKUS, 0), + X86_INTRINSIC_DATA(avx2_phadd_d, INTR_TYPE_2OP, X86ISD::HADD, 0), + X86_INTRINSIC_DATA(avx2_phadd_w, INTR_TYPE_2OP, X86ISD::HADD, 0), + X86_INTRINSIC_DATA(avx2_phsub_d, INTR_TYPE_2OP, X86ISD::HSUB, 0), + X86_INTRINSIC_DATA(avx2_phsub_w, INTR_TYPE_2OP, X86ISD::HSUB, 0), + X86_INTRINSIC_DATA(avx2_pmaxs_b, INTR_TYPE_2OP, X86ISD::SMAX, 0), + X86_INTRINSIC_DATA(avx2_pmaxs_d, INTR_TYPE_2OP, X86ISD::SMAX, 0), + X86_INTRINSIC_DATA(avx2_pmaxs_w, INTR_TYPE_2OP, X86ISD::SMAX, 0), + X86_INTRINSIC_DATA(avx2_pmaxu_b, INTR_TYPE_2OP, X86ISD::UMAX, 0), + X86_INTRINSIC_DATA(avx2_pmaxu_d, INTR_TYPE_2OP, X86ISD::UMAX, 0), + X86_INTRINSIC_DATA(avx2_pmaxu_w, INTR_TYPE_2OP, X86ISD::UMAX, 0), + X86_INTRINSIC_DATA(avx2_pmins_b, INTR_TYPE_2OP, X86ISD::SMIN, 0), + X86_INTRINSIC_DATA(avx2_pmins_d, INTR_TYPE_2OP, X86ISD::SMIN, 0), + X86_INTRINSIC_DATA(avx2_pmins_w, INTR_TYPE_2OP, X86ISD::SMIN, 0), + X86_INTRINSIC_DATA(avx2_pminu_b, INTR_TYPE_2OP, X86ISD::UMIN, 0), + X86_INTRINSIC_DATA(avx2_pminu_d, INTR_TYPE_2OP, X86ISD::UMIN, 0), + X86_INTRINSIC_DATA(avx2_pminu_w, INTR_TYPE_2OP, X86ISD::UMIN, 0), + X86_INTRINSIC_DATA(avx2_pmovsxbd, INTR_TYPE_1OP, X86ISD::VSEXT, 0), + X86_INTRINSIC_DATA(avx2_pmovsxbq, INTR_TYPE_1OP, X86ISD::VSEXT, 0), + X86_INTRINSIC_DATA(avx2_pmovsxbw, INTR_TYPE_1OP, X86ISD::VSEXT, 0), + X86_INTRINSIC_DATA(avx2_pmovsxdq, INTR_TYPE_1OP, X86ISD::VSEXT, 0), + X86_INTRINSIC_DATA(avx2_pmovsxwd, INTR_TYPE_1OP, X86ISD::VSEXT, 0), + X86_INTRINSIC_DATA(avx2_pmovsxwq, INTR_TYPE_1OP, X86ISD::VSEXT, 0), + X86_INTRINSIC_DATA(avx2_pmovzxbd, INTR_TYPE_1OP, X86ISD::VZEXT, 0), + X86_INTRINSIC_DATA(avx2_pmovzxbq, INTR_TYPE_1OP, X86ISD::VZEXT, 0), + X86_INTRINSIC_DATA(avx2_pmovzxbw, INTR_TYPE_1OP, X86ISD::VZEXT, 0), + X86_INTRINSIC_DATA(avx2_pmovzxdq, INTR_TYPE_1OP, X86ISD::VZEXT, 0), + X86_INTRINSIC_DATA(avx2_pmovzxwd, INTR_TYPE_1OP, X86ISD::VZEXT, 0), + X86_INTRINSIC_DATA(avx2_pmovzxwq, INTR_TYPE_1OP, X86ISD::VZEXT, 0), + X86_INTRINSIC_DATA(avx2_pmul_dq, INTR_TYPE_2OP, X86ISD::PMULDQ, 0), + X86_INTRINSIC_DATA(avx2_pmulh_w, INTR_TYPE_2OP, ISD::MULHS, 0), + X86_INTRINSIC_DATA(avx2_pmulhu_w, INTR_TYPE_2OP, ISD::MULHU, 0), + X86_INTRINSIC_DATA(avx2_pmulu_dq, INTR_TYPE_2OP, X86ISD::PMULUDQ, 0), + X86_INTRINSIC_DATA(avx2_pshuf_b, INTR_TYPE_2OP, X86ISD::PSHUFB, 0), + X86_INTRINSIC_DATA(avx2_psign_b, INTR_TYPE_2OP, X86ISD::PSIGN, 0), + X86_INTRINSIC_DATA(avx2_psign_d, INTR_TYPE_2OP, X86ISD::PSIGN, 0), + X86_INTRINSIC_DATA(avx2_psign_w, INTR_TYPE_2OP, X86ISD::PSIGN, 0), + X86_INTRINSIC_DATA(avx2_psll_d, INTR_TYPE_2OP, X86ISD::VSHL, 0), + X86_INTRINSIC_DATA(avx2_psll_q, INTR_TYPE_2OP, X86ISD::VSHL, 0), + X86_INTRINSIC_DATA(avx2_psll_w, INTR_TYPE_2OP, X86ISD::VSHL, 0), + X86_INTRINSIC_DATA(avx2_pslli_d, VSHIFT, X86ISD::VSHLI, 0), + X86_INTRINSIC_DATA(avx2_pslli_q, VSHIFT, X86ISD::VSHLI, 0), + X86_INTRINSIC_DATA(avx2_pslli_w, VSHIFT, X86ISD::VSHLI, 0), + X86_INTRINSIC_DATA(avx2_psllv_d, INTR_TYPE_2OP, ISD::SHL, 0), + X86_INTRINSIC_DATA(avx2_psllv_d_256, INTR_TYPE_2OP, ISD::SHL, 0), + X86_INTRINSIC_DATA(avx2_psllv_q, INTR_TYPE_2OP, ISD::SHL, 0), + X86_INTRINSIC_DATA(avx2_psllv_q_256, INTR_TYPE_2OP, ISD::SHL, 0), + X86_INTRINSIC_DATA(avx2_psra_d, INTR_TYPE_2OP, X86ISD::VSRA, 0), + X86_INTRINSIC_DATA(avx2_psra_w, INTR_TYPE_2OP, X86ISD::VSRA, 0), + X86_INTRINSIC_DATA(avx2_psrai_d, VSHIFT, X86ISD::VSRAI, 0), + X86_INTRINSIC_DATA(avx2_psrai_w, VSHIFT, X86ISD::VSRAI, 0), + X86_INTRINSIC_DATA(avx2_psrav_d, INTR_TYPE_2OP, ISD::SRA, 0), + X86_INTRINSIC_DATA(avx2_psrav_d_256, INTR_TYPE_2OP, ISD::SRA, 0), + X86_INTRINSIC_DATA(avx2_psrl_d, INTR_TYPE_2OP, X86ISD::VSRL, 0), + X86_INTRINSIC_DATA(avx2_psrl_q, INTR_TYPE_2OP, X86ISD::VSRL, 0), + X86_INTRINSIC_DATA(avx2_psrl_w, INTR_TYPE_2OP, X86ISD::VSRL, 0), + X86_INTRINSIC_DATA(avx2_psrli_d, VSHIFT, X86ISD::VSRLI, 0), + X86_INTRINSIC_DATA(avx2_psrli_q, VSHIFT, X86ISD::VSRLI, 0), + X86_INTRINSIC_DATA(avx2_psrli_w, VSHIFT, X86ISD::VSRLI, 0), + X86_INTRINSIC_DATA(avx2_psrlv_d, INTR_TYPE_2OP, ISD::SRL, 0), + X86_INTRINSIC_DATA(avx2_psrlv_d_256, INTR_TYPE_2OP, ISD::SRL, 0), + X86_INTRINSIC_DATA(avx2_psrlv_q, INTR_TYPE_2OP, ISD::SRL, 0), + X86_INTRINSIC_DATA(avx2_psrlv_q_256, INTR_TYPE_2OP, ISD::SRL, 0), + X86_INTRINSIC_DATA(avx2_psubus_b, INTR_TYPE_2OP, X86ISD::SUBUS, 0), + X86_INTRINSIC_DATA(avx2_psubus_w, INTR_TYPE_2OP, X86ISD::SUBUS, 0), + X86_INTRINSIC_DATA(avx2_vperm2i128, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0), + X86_INTRINSIC_DATA(avx512_exp2_pd, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0), + X86_INTRINSIC_DATA(avx512_exp2_ps, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0), X86_INTRINSIC_DATA(avx512_mask_add_pd_512, INTR_TYPE_2OP_MASK, ISD::FADD, - X86ISD::FADD_RND), + X86ISD::FADD_RND), X86_INTRINSIC_DATA(avx512_mask_add_ps_512, INTR_TYPE_2OP_MASK, ISD::FADD, - X86ISD::FADD_RND), + X86ISD::FADD_RND), + X86_INTRINSIC_DATA(avx512_mask_and_pd_128, INTR_TYPE_2OP_MASK, X86ISD::FAND, 0), + X86_INTRINSIC_DATA(avx512_mask_and_pd_256, INTR_TYPE_2OP_MASK, X86ISD::FAND, 0), + X86_INTRINSIC_DATA(avx512_mask_and_pd_512, INTR_TYPE_2OP_MASK, X86ISD::FAND, 0), + X86_INTRINSIC_DATA(avx512_mask_and_ps_128, INTR_TYPE_2OP_MASK, X86ISD::FAND, 0), + X86_INTRINSIC_DATA(avx512_mask_and_ps_256, INTR_TYPE_2OP_MASK, X86ISD::FAND, 0), + X86_INTRINSIC_DATA(avx512_mask_and_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FAND, 0), + X86_INTRINSIC_DATA(avx512_mask_andn_pd_128, INTR_TYPE_2OP_MASK, X86ISD::FANDN, 0), + X86_INTRINSIC_DATA(avx512_mask_andn_pd_256, INTR_TYPE_2OP_MASK, X86ISD::FANDN, 0), + X86_INTRINSIC_DATA(avx512_mask_andn_pd_512, INTR_TYPE_2OP_MASK, X86ISD::FANDN, 0), + X86_INTRINSIC_DATA(avx512_mask_andn_ps_128, INTR_TYPE_2OP_MASK, X86ISD::FANDN, 0), + X86_INTRINSIC_DATA(avx512_mask_andn_ps_256, INTR_TYPE_2OP_MASK, X86ISD::FANDN, 0), + X86_INTRINSIC_DATA(avx512_mask_andn_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FANDN, 0), X86_INTRINSIC_DATA(avx512_mask_blend_b_128, BLEND, X86ISD::SELECT, 0), X86_INTRINSIC_DATA(avx512_mask_blend_b_256, BLEND, X86ISD::SELECT, 0), X86_INTRINSIC_DATA(avx512_mask_blend_b_512, BLEND, X86ISD::SELECT, 0), @@ -334,9 +346,29 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86ISD::FMUL_RND), X86_INTRINSIC_DATA(avx512_mask_mul_ps_512, INTR_TYPE_2OP_MASK, ISD::FMUL, X86ISD::FMUL_RND), + X86_INTRINSIC_DATA(avx512_mask_or_pd_128, INTR_TYPE_2OP_MASK, X86ISD::FOR, 0), + X86_INTRINSIC_DATA(avx512_mask_or_pd_256, INTR_TYPE_2OP_MASK, X86ISD::FOR, 0), + X86_INTRINSIC_DATA(avx512_mask_or_pd_512, INTR_TYPE_2OP_MASK, X86ISD::FOR, 0), + X86_INTRINSIC_DATA(avx512_mask_or_ps_128, INTR_TYPE_2OP_MASK, X86ISD::FOR, 0), + X86_INTRINSIC_DATA(avx512_mask_or_ps_256, INTR_TYPE_2OP_MASK, X86ISD::FOR, 0), + X86_INTRINSIC_DATA(avx512_mask_or_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FOR, 0), + X86_INTRINSIC_DATA(avx512_mask_padd_b_128, INTR_TYPE_2OP_MASK, ISD::ADD, 0), + X86_INTRINSIC_DATA(avx512_mask_padd_b_256, INTR_TYPE_2OP_MASK, ISD::ADD, 0), + X86_INTRINSIC_DATA(avx512_mask_padd_b_512, INTR_TYPE_2OP_MASK, ISD::ADD, 0), + X86_INTRINSIC_DATA(avx512_mask_padd_d_128, INTR_TYPE_2OP_MASK, ISD::ADD, 0), + X86_INTRINSIC_DATA(avx512_mask_padd_d_256, INTR_TYPE_2OP_MASK, ISD::ADD, 0), X86_INTRINSIC_DATA(avx512_mask_padd_d_512, INTR_TYPE_2OP_MASK, ISD::ADD, 0), + X86_INTRINSIC_DATA(avx512_mask_padd_q_128, INTR_TYPE_2OP_MASK, ISD::ADD, 0), + X86_INTRINSIC_DATA(avx512_mask_padd_q_256, INTR_TYPE_2OP_MASK, ISD::ADD, 0), X86_INTRINSIC_DATA(avx512_mask_padd_q_512, INTR_TYPE_2OP_MASK, ISD::ADD, 0), + X86_INTRINSIC_DATA(avx512_mask_padd_w_128, INTR_TYPE_2OP_MASK, ISD::ADD, 0), + X86_INTRINSIC_DATA(avx512_mask_padd_w_256, INTR_TYPE_2OP_MASK, ISD::ADD, 0), + X86_INTRINSIC_DATA(avx512_mask_padd_w_512, INTR_TYPE_2OP_MASK, ISD::ADD, 0), + X86_INTRINSIC_DATA(avx512_mask_pand_d_128, INTR_TYPE_2OP_MASK, ISD::AND, 0), + X86_INTRINSIC_DATA(avx512_mask_pand_d_256, INTR_TYPE_2OP_MASK, ISD::AND, 0), X86_INTRINSIC_DATA(avx512_mask_pand_d_512, INTR_TYPE_2OP_MASK, ISD::AND, 0), + X86_INTRINSIC_DATA(avx512_mask_pand_q_128, INTR_TYPE_2OP_MASK, ISD::AND, 0), + X86_INTRINSIC_DATA(avx512_mask_pand_q_256, INTR_TYPE_2OP_MASK, ISD::AND, 0), X86_INTRINSIC_DATA(avx512_mask_pand_q_512, INTR_TYPE_2OP_MASK, ISD::AND, 0), X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_128, CMP_MASK, X86ISD::PCMPEQM, 0), X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_256, CMP_MASK, X86ISD::PCMPEQM, 0), @@ -362,11 +394,32 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_128, CMP_MASK, X86ISD::PCMPGTM, 0), X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_256, CMP_MASK, X86ISD::PCMPGTM, 0), X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_512, CMP_MASK, X86ISD::PCMPGTM, 0), + X86_INTRINSIC_DATA(avx512_mask_pmul_dq_128, INTR_TYPE_2OP_MASK, + X86ISD::PMULDQ, 0), + X86_INTRINSIC_DATA(avx512_mask_pmul_dq_256, INTR_TYPE_2OP_MASK, + X86ISD::PMULDQ, 0), X86_INTRINSIC_DATA(avx512_mask_pmul_dq_512, INTR_TYPE_2OP_MASK, X86ISD::PMULDQ, 0), + X86_INTRINSIC_DATA(avx512_mask_pmull_d_128, INTR_TYPE_2OP_MASK, ISD::MUL, 0), + X86_INTRINSIC_DATA(avx512_mask_pmull_d_256, INTR_TYPE_2OP_MASK, ISD::MUL, 0), + X86_INTRINSIC_DATA(avx512_mask_pmull_d_512, INTR_TYPE_2OP_MASK, ISD::MUL, 0), + X86_INTRINSIC_DATA(avx512_mask_pmull_q_128, INTR_TYPE_2OP_MASK, ISD::MUL, 0), + X86_INTRINSIC_DATA(avx512_mask_pmull_q_256, INTR_TYPE_2OP_MASK, ISD::MUL, 0), + X86_INTRINSIC_DATA(avx512_mask_pmull_q_512, INTR_TYPE_2OP_MASK, ISD::MUL, 0), + X86_INTRINSIC_DATA(avx512_mask_pmull_w_128, INTR_TYPE_2OP_MASK, ISD::MUL, 0), + X86_INTRINSIC_DATA(avx512_mask_pmull_w_256, INTR_TYPE_2OP_MASK, ISD::MUL, 0), + X86_INTRINSIC_DATA(avx512_mask_pmull_w_512, INTR_TYPE_2OP_MASK, ISD::MUL, 0), + X86_INTRINSIC_DATA(avx512_mask_pmulu_dq_128, INTR_TYPE_2OP_MASK, + X86ISD::PMULUDQ, 0), + X86_INTRINSIC_DATA(avx512_mask_pmulu_dq_256, INTR_TYPE_2OP_MASK, + X86ISD::PMULUDQ, 0), X86_INTRINSIC_DATA(avx512_mask_pmulu_dq_512, INTR_TYPE_2OP_MASK, X86ISD::PMULUDQ, 0), + X86_INTRINSIC_DATA(avx512_mask_por_d_128, INTR_TYPE_2OP_MASK, ISD::OR, 0), + X86_INTRINSIC_DATA(avx512_mask_por_d_256, INTR_TYPE_2OP_MASK, ISD::OR, 0), X86_INTRINSIC_DATA(avx512_mask_por_d_512, INTR_TYPE_2OP_MASK, ISD::OR, 0), + X86_INTRINSIC_DATA(avx512_mask_por_q_128, INTR_TYPE_2OP_MASK, ISD::OR, 0), + X86_INTRINSIC_DATA(avx512_mask_por_q_256, INTR_TYPE_2OP_MASK, ISD::OR, 0), X86_INTRINSIC_DATA(avx512_mask_por_q_512, INTR_TYPE_2OP_MASK, ISD::OR, 0), X86_INTRINSIC_DATA(avx512_mask_psll_d, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0), X86_INTRINSIC_DATA(avx512_mask_psll_q, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0), @@ -386,9 +439,23 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_psrli_q, VSHIFT_MASK, X86ISD::VSRLI, 0), X86_INTRINSIC_DATA(avx512_mask_psrlv_d, INTR_TYPE_2OP_MASK, ISD::SRL, 0), X86_INTRINSIC_DATA(avx512_mask_psrlv_q, INTR_TYPE_2OP_MASK, ISD::SRL, 0), + X86_INTRINSIC_DATA(avx512_mask_psub_b_128, INTR_TYPE_2OP_MASK, ISD::SUB, 0), + X86_INTRINSIC_DATA(avx512_mask_psub_b_256, INTR_TYPE_2OP_MASK, ISD::SUB, 0), + X86_INTRINSIC_DATA(avx512_mask_psub_b_512, INTR_TYPE_2OP_MASK, ISD::SUB, 0), + X86_INTRINSIC_DATA(avx512_mask_psub_d_128, INTR_TYPE_2OP_MASK, ISD::SUB, 0), + X86_INTRINSIC_DATA(avx512_mask_psub_d_256, INTR_TYPE_2OP_MASK, ISD::SUB, 0), X86_INTRINSIC_DATA(avx512_mask_psub_d_512, INTR_TYPE_2OP_MASK, ISD::SUB, 0), + X86_INTRINSIC_DATA(avx512_mask_psub_q_128, INTR_TYPE_2OP_MASK, ISD::SUB, 0), + X86_INTRINSIC_DATA(avx512_mask_psub_q_256, INTR_TYPE_2OP_MASK, ISD::SUB, 0), X86_INTRINSIC_DATA(avx512_mask_psub_q_512, INTR_TYPE_2OP_MASK, ISD::SUB, 0), + X86_INTRINSIC_DATA(avx512_mask_psub_w_128, INTR_TYPE_2OP_MASK, ISD::SUB, 0), + X86_INTRINSIC_DATA(avx512_mask_psub_w_256, INTR_TYPE_2OP_MASK, ISD::SUB, 0), + X86_INTRINSIC_DATA(avx512_mask_psub_w_512, INTR_TYPE_2OP_MASK, ISD::SUB, 0), + X86_INTRINSIC_DATA(avx512_mask_pxor_d_128, INTR_TYPE_2OP_MASK, ISD::XOR, 0), + X86_INTRINSIC_DATA(avx512_mask_pxor_d_256, INTR_TYPE_2OP_MASK, ISD::XOR, 0), X86_INTRINSIC_DATA(avx512_mask_pxor_d_512, INTR_TYPE_2OP_MASK, ISD::XOR, 0), + X86_INTRINSIC_DATA(avx512_mask_pxor_q_128, INTR_TYPE_2OP_MASK, ISD::XOR, 0), + X86_INTRINSIC_DATA(avx512_mask_pxor_q_256, INTR_TYPE_2OP_MASK, ISD::XOR, 0), X86_INTRINSIC_DATA(avx512_mask_pxor_q_512, INTR_TYPE_2OP_MASK, ISD::XOR, 0), X86_INTRINSIC_DATA(avx512_mask_rndscale_sd, INTR_TYPE_SCALAR_MASK_RM, X86ISD::RNDSCALE, 0), @@ -410,6 +477,12 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_ucmp_w_128, CMP_MASK_CC, X86ISD::CMPMU, 0), X86_INTRINSIC_DATA(avx512_mask_ucmp_w_256, CMP_MASK_CC, X86ISD::CMPMU, 0), X86_INTRINSIC_DATA(avx512_mask_ucmp_w_512, CMP_MASK_CC, X86ISD::CMPMU, 0), + X86_INTRINSIC_DATA(avx512_mask_xor_pd_128, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0), + X86_INTRINSIC_DATA(avx512_mask_xor_pd_256, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0), + X86_INTRINSIC_DATA(avx512_mask_xor_pd_512, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0), + X86_INTRINSIC_DATA(avx512_mask_xor_ps_128, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0), + X86_INTRINSIC_DATA(avx512_mask_xor_ps_256, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0), + X86_INTRINSIC_DATA(avx512_mask_xor_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0), X86_INTRINSIC_DATA(avx512_rcp28_pd, INTR_TYPE_1OP_MASK_RM,X86ISD::RCP28, 0), X86_INTRINSIC_DATA(avx512_rcp28_ps, INTR_TYPE_1OP_MASK_RM,X86ISD::RCP28, 0), X86_INTRINSIC_DATA(avx512_rcp28_sd, INTR_TYPE_SCALAR_MASK_RM, X86ISD::RCP28, 0), diff --git a/test/CodeGen/X86/avx512-intrinsics.ll b/test/CodeGen/X86/avx512-intrinsics.ll index 07d984a0ee9..59a9c71a336 100644 --- a/test/CodeGen/X86/avx512-intrinsics.ll +++ b/test/CodeGen/X86/avx512-intrinsics.ll @@ -2163,3 +2163,80 @@ define <8 x i64> @test_mask_mul_epu32_rmbkz(<16 x i32> %a, i64* %ptr_b, i8 %mask } declare <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32>, <16 x i32>, <8 x i64>, i8) + +define <16 x i32> @test_mask_mullo_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) { + ;CHECK-LABEL: test_mask_mullo_epi32_rr_512 + ;CHECK: vpmulld %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x40,0xc1] + %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1) + ret <16 x i32> %res +} + +define <16 x i32> @test_mask_mullo_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) { + ;CHECK-LABEL: test_mask_mullo_epi32_rrk_512 + ;CHECK: vpmulld %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x40,0xd1] + %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) + ret < 16 x i32> %res +} + +define <16 x i32> @test_mask_mullo_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i16 %mask) { + ;CHECK-LABEL: test_mask_mullo_epi32_rrkz_512 + ;CHECK: vpmulld %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x40,0xc1] + %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask) + ret < 16 x i32> %res +} + +define <16 x i32> @test_mask_mullo_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) { + ;CHECK-LABEL: test_mask_mullo_epi32_rm_512 + ;CHECK: vpmulld (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x40,0x07] + %b = load <16 x i32>, <16 x i32>* %ptr_b + %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1) + ret < 16 x i32> %res +} + +define <16 x i32> @test_mask_mullo_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) { + ;CHECK-LABEL: test_mask_mullo_epi32_rmk_512 + ;CHECK: vpmulld (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x40,0x0f] + %b = load <16 x i32>, <16 x i32>* %ptr_b + %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) + ret < 16 x i32> %res +} + +define <16 x i32> @test_mask_mullo_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) { + ;CHECK-LABEL: test_mask_mullo_epi32_rmkz_512 + ;CHECK: vpmulld (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x40,0x07] + %b = load <16 x i32>, <16 x i32>* %ptr_b + %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask) + ret < 16 x i32> %res +} + +define <16 x i32> @test_mask_mullo_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) { + ;CHECK-LABEL: test_mask_mullo_epi32_rmb_512 + ;CHECK: vpmulld (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x58,0x40,0x07] + %q = load i32, i32* %ptr_b + %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 + %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer + %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1) + ret < 16 x i32> %res +} + +define <16 x i32> @test_mask_mullo_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) { + ;CHECK-LABEL: test_mask_mullo_epi32_rmbk_512 + ;CHECK: vpmulld (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x59,0x40,0x0f] + %q = load i32, i32* %ptr_b + %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 + %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer + %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) + ret < 16 x i32> %res +} + +define <16 x i32> @test_mask_mullo_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i16 %mask) { + ;CHECK-LABEL: test_mask_mullo_epi32_rmbkz_512 + ;CHECK: vpmulld (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xd9,0x40,0x07] + %q = load i32, i32* %ptr_b + %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 + %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer + %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask) + ret < 16 x i32> %res +} + +declare <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) diff --git a/test/CodeGen/X86/avx512bwvl-intrinsics.ll b/test/CodeGen/X86/avx512bwvl-intrinsics.ll index cffa0a5e9b7..03a0466ab19 100644 --- a/test/CodeGen/X86/avx512bwvl-intrinsics.ll +++ b/test/CodeGen/X86/avx512bwvl-intrinsics.ll @@ -996,3 +996,426 @@ define <4 x double> @test_mask_vfmadd256_pd_rmkz(<4 x double> %a0, <4 x double> %res = call <4 x double> @llvm.x86.fma.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 -1) nounwind ret <4 x double> %res } +define <8 x i16> @test_mask_add_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) { + ;CHECK-LABEL: test_mask_add_epi16_rr_128 + ;CHECK: vpaddw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfd,0xc1] + %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) + ret <8 x i16> %res +} + +define <8 x i16> @test_mask_add_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) { + ;CHECK-LABEL: test_mask_add_epi16_rrk_128 + ;CHECK: vpaddw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfd,0xd1] + %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) + ret <8 x i16> %res +} + +define <8 x i16> @test_mask_add_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { + ;CHECK-LABEL: test_mask_add_epi16_rrkz_128 + ;CHECK: vpaddw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfd,0xc1] + %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) + ret <8 x i16> %res +} + +define <8 x i16> @test_mask_add_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) { + ;CHECK-LABEL: test_mask_add_epi16_rm_128 + ;CHECK: vpaddw (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfd,0x07] + %b = load <8 x i16>, <8 x i16>* %ptr_b + %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) + ret <8 x i16> %res +} + +define <8 x i16> @test_mask_add_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) { + ;CHECK-LABEL: test_mask_add_epi16_rmk_128 + ;CHECK: vpaddw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfd,0x0f] + %b = load <8 x i16>, <8 x i16>* %ptr_b + %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) + ret <8 x i16> %res +} + +define <8 x i16> @test_mask_add_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) { + ;CHECK-LABEL: test_mask_add_epi16_rmkz_128 + ;CHECK: vpaddw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfd,0x07] + %b = load <8 x i16>, <8 x i16>* %ptr_b + %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) + ret <8 x i16> %res +} + +declare <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) + +define <16 x i16> @test_mask_add_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) { + ;CHECK-LABEL: test_mask_add_epi16_rr_256 + ;CHECK: vpaddw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfd,0xc1] + %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) + ret <16 x i16> %res +} + +define <16 x i16> @test_mask_add_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) { + ;CHECK-LABEL: test_mask_add_epi16_rrk_256 + ;CHECK: vpaddw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfd,0xd1] + %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) + ret <16 x i16> %res +} + +define <16 x i16> @test_mask_add_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { + ;CHECK-LABEL: test_mask_add_epi16_rrkz_256 + ;CHECK: vpaddw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfd,0xc1] + %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) + ret <16 x i16> %res +} + +define <16 x i16> @test_mask_add_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) { + ;CHECK-LABEL: test_mask_add_epi16_rm_256 + ;CHECK: vpaddw (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfd,0x07] + %b = load <16 x i16>, <16 x i16>* %ptr_b + %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) + ret <16 x i16> %res +} + +define <16 x i16> @test_mask_add_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) { + ;CHECK-LABEL: test_mask_add_epi16_rmk_256 + ;CHECK: vpaddw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfd,0x0f] + %b = load <16 x i16>, <16 x i16>* %ptr_b + %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) + ret <16 x i16> %res +} + +define <16 x i16> @test_mask_add_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) { + ;CHECK-LABEL: test_mask_add_epi16_rmkz_256 + ;CHECK: vpaddw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfd,0x07] + %b = load <16 x i16>, <16 x i16>* %ptr_b + %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) + ret <16 x i16> %res +} + +declare <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) + +define <8 x i16> @test_mask_sub_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) { + ;CHECK-LABEL: test_mask_sub_epi16_rr_128 + ;CHECK: vpsubw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xf9,0xc1] + %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) + ret <8 x i16> %res +} + +define <8 x i16> @test_mask_sub_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) { + ;CHECK-LABEL: test_mask_sub_epi16_rrk_128 + ;CHECK: vpsubw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xf9,0xd1] + %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) + ret <8 x i16> %res +} + +define <8 x i16> @test_mask_sub_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { + ;CHECK-LABEL: test_mask_sub_epi16_rrkz_128 + ;CHECK: vpsubw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xf9,0xc1] + %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) + ret <8 x i16> %res +} + +define <8 x i16> @test_mask_sub_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) { + ;CHECK-LABEL: test_mask_sub_epi16_rm_128 + ;CHECK: vpsubw (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xf9,0x07] + %b = load <8 x i16>, <8 x i16>* %ptr_b + %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) + ret <8 x i16> %res +} + +define <8 x i16> @test_mask_sub_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) { + ;CHECK-LABEL: test_mask_sub_epi16_rmk_128 + ;CHECK: vpsubw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xf9,0x0f] + %b = load <8 x i16>, <8 x i16>* %ptr_b + %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) + ret <8 x i16> %res +} + +define <8 x i16> @test_mask_sub_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) { + ;CHECK-LABEL: test_mask_sub_epi16_rmkz_128 + ;CHECK: vpsubw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xf9,0x07] + %b = load <8 x i16>, <8 x i16>* %ptr_b + %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) + ret <8 x i16> %res +} + +declare <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) + +define <16 x i16> @test_mask_sub_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) { + ;CHECK-LABEL: test_mask_sub_epi16_rr_256 + ;CHECK: vpsubw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xf9,0xc1] + %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) + ret <16 x i16> %res +} + +define <16 x i16> @test_mask_sub_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) { + ;CHECK-LABEL: test_mask_sub_epi16_rrk_256 + ;CHECK: vpsubw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xf9,0xd1] + %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) + ret <16 x i16> %res +} + +define <16 x i16> @test_mask_sub_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { + ;CHECK-LABEL: test_mask_sub_epi16_rrkz_256 + ;CHECK: vpsubw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xf9,0xc1] + %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) + ret <16 x i16> %res +} + +define <16 x i16> @test_mask_sub_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) { + ;CHECK-LABEL: test_mask_sub_epi16_rm_256 + ;CHECK: vpsubw (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xf9,0x07] + %b = load <16 x i16>, <16 x i16>* %ptr_b + %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) + ret <16 x i16> %res +} + +define <16 x i16> @test_mask_sub_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) { + ;CHECK-LABEL: test_mask_sub_epi16_rmk_256 + ;CHECK: vpsubw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xf9,0x0f] + %b = load <16 x i16>, <16 x i16>* %ptr_b + %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) + ret <16 x i16> %res +} + +define <16 x i16> @test_mask_sub_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) { + ;CHECK-LABEL: test_mask_sub_epi16_rmkz_256 + ;CHECK: vpsubw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xf9,0x07] + %b = load <16 x i16>, <16 x i16>* %ptr_b + %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) + ret <16 x i16> %res +} + +declare <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) + +define <32 x i16> @test_mask_add_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { + ;CHECK-LABEL: test_mask_add_epi16_rr_512 + ;CHECK: vpaddw %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc1] + %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_add_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) { + ;CHECK-LABEL: test_mask_add_epi16_rrk_512 + ;CHECK: vpaddw %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfd,0xd1] + %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_add_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) { + ;CHECK-LABEL: test_mask_add_epi16_rrkz_512 + ;CHECK: vpaddw %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfd,0xc1] + %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_add_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { + ;CHECK-LABEL: test_mask_add_epi16_rm_512 + ;CHECK: vpaddw (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfd,0x07] + %b = load <32 x i16>, <32 x i16>* %ptr_b + %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_add_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) { + ;CHECK-LABEL: test_mask_add_epi16_rmk_512 + ;CHECK: vpaddw (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfd,0x0f] + %b = load <32 x i16>, <32 x i16>* %ptr_b + %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_add_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) { + ;CHECK-LABEL: test_mask_add_epi16_rmkz_512 + ;CHECK: vpaddw (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfd,0x07] + %b = load <32 x i16>, <32 x i16>* %ptr_b + %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) + ret <32 x i16> %res +} + +declare <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) + +define <32 x i16> @test_mask_sub_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { + ;CHECK-LABEL: test_mask_sub_epi16_rr_512 + ;CHECK: vpsubw %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xf9,0xc1] + %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_sub_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) { + ;CHECK-LABEL: test_mask_sub_epi16_rrk_512 + ;CHECK: vpsubw %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xf9,0xd1] + %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_sub_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) { + ;CHECK-LABEL: test_mask_sub_epi16_rrkz_512 + ;CHECK: vpsubw %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xf9,0xc1] + %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_sub_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { + ;CHECK-LABEL: test_mask_sub_epi16_rm_512 + ;CHECK: vpsubw (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xf9,0x07] + %b = load <32 x i16>, <32 x i16>* %ptr_b + %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_sub_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) { + ;CHECK-LABEL: test_mask_sub_epi16_rmk_512 + ;CHECK: vpsubw (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xf9,0x0f] + %b = load <32 x i16>, <32 x i16>* %ptr_b + %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_sub_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) { + ;CHECK-LABEL: test_mask_sub_epi16_rmkz_512 + ;CHECK: vpsubw (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xf9,0x07] + %b = load <32 x i16>, <32 x i16>* %ptr_b + %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) + ret <32 x i16> %res +} + +declare <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) + +define <32 x i16> @test_mask_mullo_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { + ;CHECK-LABEL: test_mask_mullo_epi16_rr_512 + ;CHECK: vpmullw %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xd5,0xc1] + %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_mullo_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) { + ;CHECK-LABEL: test_mask_mullo_epi16_rrk_512 + ;CHECK: vpmullw %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xd5,0xd1] + %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_mullo_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) { + ;CHECK-LABEL: test_mask_mullo_epi16_rrkz_512 + ;CHECK: vpmullw %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xd5,0xc1] + %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_mullo_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { + ;CHECK-LABEL: test_mask_mullo_epi16_rm_512 + ;CHECK: vpmullw (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xd5,0x07] + %b = load <32 x i16>, <32 x i16>* %ptr_b + %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_mullo_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) { + ;CHECK-LABEL: test_mask_mullo_epi16_rmk_512 + ;CHECK: vpmullw (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xd5,0x0f] + %b = load <32 x i16>, <32 x i16>* %ptr_b + %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_mullo_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) { + ;CHECK-LABEL: test_mask_mullo_epi16_rmkz_512 + ;CHECK: vpmullw (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xd5,0x07] + %b = load <32 x i16>, <32 x i16>* %ptr_b + %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) + ret <32 x i16> %res +} + +declare <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) + +define <8 x i16> @test_mask_mullo_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) { + ;CHECK-LABEL: test_mask_mullo_epi16_rr_128 + ;CHECK: vpmullw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xd5,0xc1] + %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) + ret <8 x i16> %res +} + +define <8 x i16> @test_mask_mullo_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) { + ;CHECK-LABEL: test_mask_mullo_epi16_rrk_128 + ;CHECK: vpmullw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd5,0xd1] + %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) + ret <8 x i16> %res +} + +define <8 x i16> @test_mask_mullo_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { + ;CHECK-LABEL: test_mask_mullo_epi16_rrkz_128 + ;CHECK: vpmullw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xd5,0xc1] + %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) + ret <8 x i16> %res +} + +define <8 x i16> @test_mask_mullo_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) { + ;CHECK-LABEL: test_mask_mullo_epi16_rm_128 + ;CHECK: vpmullw (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xd5,0x07] + %b = load <8 x i16>, <8 x i16>* %ptr_b + %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) + ret <8 x i16> %res +} + +define <8 x i16> @test_mask_mullo_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) { + ;CHECK-LABEL: test_mask_mullo_epi16_rmk_128 + ;CHECK: vpmullw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd5,0x0f] + %b = load <8 x i16>, <8 x i16>* %ptr_b + %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) + ret <8 x i16> %res +} + +define <8 x i16> @test_mask_mullo_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) { + ;CHECK-LABEL: test_mask_mullo_epi16_rmkz_128 + ;CHECK: vpmullw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xd5,0x07] + %b = load <8 x i16>, <8 x i16>* %ptr_b + %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) + ret <8 x i16> %res +} + +declare <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) + +define <16 x i16> @test_mask_mullo_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) { + ;CHECK-LABEL: test_mask_mullo_epi16_rr_256 + ;CHECK: vpmullw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xd5,0xc1] + %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) + ret <16 x i16> %res +} + +define <16 x i16> @test_mask_mullo_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) { + ;CHECK-LABEL: test_mask_mullo_epi16_rrk_256 + ;CHECK: vpmullw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xd5,0xd1] + %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) + ret <16 x i16> %res +} + +define <16 x i16> @test_mask_mullo_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { + ;CHECK-LABEL: test_mask_mullo_epi16_rrkz_256 + ;CHECK: vpmullw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xd5,0xc1] + %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) + ret <16 x i16> %res +} + +define <16 x i16> @test_mask_mullo_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) { + ;CHECK-LABEL: test_mask_mullo_epi16_rm_256 + ;CHECK: vpmullw (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xd5,0x07] + %b = load <16 x i16>, <16 x i16>* %ptr_b + %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) + ret <16 x i16> %res +} + +define <16 x i16> @test_mask_mullo_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) { + ;CHECK-LABEL: test_mask_mullo_epi16_rmk_256 + ;CHECK: vpmullw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xd5,0x0f] + %b = load <16 x i16>, <16 x i16>* %ptr_b + %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) + ret <16 x i16> %res +} + +define <16 x i16> @test_mask_mullo_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) { + ;CHECK-LABEL: test_mask_mullo_epi16_rmkz_256 + ;CHECK: vpmullw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xd5,0x07] + %b = load <16 x i16>, <16 x i16>* %ptr_b + %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) + ret <16 x i16> %res +} + +declare <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) + diff --git a/test/CodeGen/X86/avx512vl-intrinsics.ll b/test/CodeGen/X86/avx512vl-intrinsics.ll index 38a7e7afa5c..e61860df718 100644 --- a/test/CodeGen/X86/avx512vl-intrinsics.ll +++ b/test/CodeGen/X86/avx512vl-intrinsics.ll @@ -862,3 +862,1094 @@ define <2 x i64> @test_x86_mask_blend_q_128(i8 %a0, <2 x i64> %a1, <2 x i64> %a2 ret <2 x i64> %res } declare <2 x i64> @llvm.x86.avx512.mask.blend.q.128(<2 x i64>, <2 x i64>, i8) nounwind readonly + + +define < 2 x i64> @test_mask_mul_epi32_rr_128(< 4 x i32> %a, < 4 x i32> %b) { + ;CHECK-LABEL: test_mask_mul_epi32_rr_128 + ;CHECK: vpmuldq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x28,0xc1] + %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1) + ret < 2 x i64> %res +} + +define < 2 x i64> @test_mask_mul_epi32_rrk_128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask) { + ;CHECK-LABEL: test_mask_mul_epi32_rrk_128 + ;CHECK: vpmuldq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x28,0xd1] + %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask) + ret < 2 x i64> %res +} + +define < 2 x i64> @test_mask_mul_epi32_rrkz_128(< 4 x i32> %a, < 4 x i32> %b, i8 %mask) { + ;CHECK-LABEL: test_mask_mul_epi32_rrkz_128 + ;CHECK: vpmuldq %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x28,0xc1] + %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask) + ret < 2 x i64> %res +} + +define < 2 x i64> @test_mask_mul_epi32_rm_128(< 4 x i32> %a, < 4 x i32>* %ptr_b) { + ;CHECK-LABEL: test_mask_mul_epi32_rm_128 + ;CHECK: vpmuldq (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x28,0x07] + %b = load < 4 x i32>, < 4 x i32>* %ptr_b + %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1) + ret < 2 x i64> %res +} + +define < 2 x i64> @test_mask_mul_epi32_rmk_128(< 4 x i32> %a, < 4 x i32>* %ptr_b, < 2 x i64> %passThru, i8 %mask) { + ;CHECK-LABEL: test_mask_mul_epi32_rmk_128 + ;CHECK: vpmuldq (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x28,0x0f] + %b = load < 4 x i32>, < 4 x i32>* %ptr_b + %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask) + ret < 2 x i64> %res +} + +define < 2 x i64> @test_mask_mul_epi32_rmkz_128(< 4 x i32> %a, < 4 x i32>* %ptr_b, i8 %mask) { + ;CHECK-LABEL: test_mask_mul_epi32_rmkz_128 + ;CHECK: vpmuldq (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x28,0x07] + %b = load < 4 x i32>, < 4 x i32>* %ptr_b + %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask) + ret < 2 x i64> %res +} + +define < 2 x i64> @test_mask_mul_epi32_rmb_128(< 4 x i32> %a, i64* %ptr_b) { + ;CHECK-LABEL: test_mask_mul_epi32_rmb_128 + ;CHECK: vpmuldq (%rdi){1to2}, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x18,0x28,0x07] + %q = load i64, i64* %ptr_b + %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0 + %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, <2 x i32> zeroinitializer + %b = bitcast < 2 x i64> %b64 to < 4 x i32> + %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1) + ret < 2 x i64> %res +} + +define < 2 x i64> @test_mask_mul_epi32_rmbk_128(< 4 x i32> %a, i64* %ptr_b, < 2 x i64> %passThru, i8 %mask) { + ;CHECK-LABEL: test_mask_mul_epi32_rmbk_128 + ;CHECK: vpmuldq (%rdi){1to2}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x19,0x28,0x0f] + %q = load i64, i64* %ptr_b + %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0 + %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, <2 x i32> zeroinitializer + %b = bitcast < 2 x i64> %b64 to < 4 x i32> + %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask) + ret < 2 x i64> %res +} + +define < 2 x i64> @test_mask_mul_epi32_rmbkz_128(< 4 x i32> %a, i64* %ptr_b, i8 %mask) { + ;CHECK-LABEL: test_mask_mul_epi32_rmbkz_128 + ;CHECK: vpmuldq (%rdi){1to2}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x99,0x28,0x07] + %q = load i64, i64* %ptr_b + %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0 + %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, < 2 x i32> zeroinitializer + %b = bitcast < 2 x i64> %b64 to < 4 x i32> + %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask) + ret < 2 x i64> %res +} + +declare < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32>, < 4 x i32>, < 2 x i64>, i8) + +define < 4 x i64> @test_mask_mul_epi32_rr_256(< 8 x i32> %a, < 8 x i32> %b) { + ;CHECK-LABEL: test_mask_mul_epi32_rr_256 + ;CHECK: vpmuldq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x28,0xc1] + %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1) + ret < 4 x i64> %res +} + +define < 4 x i64> @test_mask_mul_epi32_rrk_256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask) { + ;CHECK-LABEL: test_mask_mul_epi32_rrk_256 + ;CHECK: vpmuldq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x28,0xd1] + %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask) + ret < 4 x i64> %res +} + +define < 4 x i64> @test_mask_mul_epi32_rrkz_256(< 8 x i32> %a, < 8 x i32> %b, i8 %mask) { + ;CHECK-LABEL: test_mask_mul_epi32_rrkz_256 + ;CHECK: vpmuldq %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x28,0xc1] + %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask) + ret < 4 x i64> %res +} + +define < 4 x i64> @test_mask_mul_epi32_rm_256(< 8 x i32> %a, < 8 x i32>* %ptr_b) { + ;CHECK-LABEL: test_mask_mul_epi32_rm_256 + ;CHECK: vpmuldq (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x28,0x07] + %b = load < 8 x i32>, < 8 x i32>* %ptr_b + %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1) + ret < 4 x i64> %res +} + +define < 4 x i64> @test_mask_mul_epi32_rmk_256(< 8 x i32> %a, < 8 x i32>* %ptr_b, < 4 x i64> %passThru, i8 %mask) { + ;CHECK-LABEL: test_mask_mul_epi32_rmk_256 + ;CHECK: vpmuldq (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x28,0x0f] + %b = load < 8 x i32>, < 8 x i32>* %ptr_b + %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask) + ret < 4 x i64> %res +} + +define < 4 x i64> @test_mask_mul_epi32_rmkz_256(< 8 x i32> %a, < 8 x i32>* %ptr_b, i8 %mask) { + ;CHECK-LABEL: test_mask_mul_epi32_rmkz_256 + ;CHECK: vpmuldq (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x28,0x07] + %b = load < 8 x i32>, < 8 x i32>* %ptr_b + %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask) + ret < 4 x i64> %res +} + +define < 4 x i64> @test_mask_mul_epi32_rmb_256(< 8 x i32> %a, i64* %ptr_b) { + ;CHECK-LABEL: test_mask_mul_epi32_rmb_256 + ;CHECK: vpmuldq (%rdi){1to4}, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x38,0x28,0x07] + %q = load i64, i64* %ptr_b + %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0 + %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer + %b = bitcast < 4 x i64> %b64 to < 8 x i32> + %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1) + ret < 4 x i64> %res +} + +define < 4 x i64> @test_mask_mul_epi32_rmbk_256(< 8 x i32> %a, i64* %ptr_b, < 4 x i64> %passThru, i8 %mask) { + ;CHECK-LABEL: test_mask_mul_epi32_rmbk_256 + ;CHECK: vpmuldq (%rdi){1to4}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x39,0x28,0x0f] + %q = load i64, i64* %ptr_b + %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0 + %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer + %b = bitcast < 4 x i64> %b64 to < 8 x i32> + %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask) + ret < 4 x i64> %res +} + +define < 4 x i64> @test_mask_mul_epi32_rmbkz_256(< 8 x i32> %a, i64* %ptr_b, i8 %mask) { + ;CHECK-LABEL: test_mask_mul_epi32_rmbkz_256 + ;CHECK: vpmuldq (%rdi){1to4}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xb9,0x28,0x07] + %q = load i64, i64* %ptr_b + %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0 + %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer + %b = bitcast < 4 x i64> %b64 to < 8 x i32> + %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask) + ret < 4 x i64> %res +} + +declare < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32>, < 8 x i32>, < 4 x i64>, i8) + +define < 2 x i64> @test_mask_mul_epu32_rr_128(< 4 x i32> %a, < 4 x i32> %b) { + ;CHECK-LABEL: test_mask_mul_epu32_rr_128 + ;CHECK: vpmuludq %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xf4,0xc1] + %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1) + ret < 2 x i64> %res +} + +define < 2 x i64> @test_mask_mul_epu32_rrk_128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask) { + ;CHECK-LABEL: test_mask_mul_epu32_rrk_128 + ;CHECK: vpmuludq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0xf4,0xd1] + %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask) + ret < 2 x i64> %res +} + +define < 2 x i64> @test_mask_mul_epu32_rrkz_128(< 4 x i32> %a, < 4 x i32> %b, i8 %mask) { + ;CHECK-LABEL: test_mask_mul_epu32_rrkz_128 + ;CHECK: vpmuludq %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0xf4,0xc1] + %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask) + ret < 2 x i64> %res +} + +define < 2 x i64> @test_mask_mul_epu32_rm_128(< 4 x i32> %a, < 4 x i32>* %ptr_b) { + ;CHECK-LABEL: test_mask_mul_epu32_rm_128 + ;CHECK: vpmuludq (%rdi), %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xf4,0x07] + %b = load < 4 x i32>, < 4 x i32>* %ptr_b + %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1) + ret < 2 x i64> %res +} + +define < 2 x i64> @test_mask_mul_epu32_rmk_128(< 4 x i32> %a, < 4 x i32>* %ptr_b, < 2 x i64> %passThru, i8 %mask) { + ;CHECK-LABEL: test_mask_mul_epu32_rmk_128 + ;CHECK: vpmuludq (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0xf4,0x0f] + %b = load < 4 x i32>, < 4 x i32>* %ptr_b + %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask) + ret < 2 x i64> %res +} + +define < 2 x i64> @test_mask_mul_epu32_rmkz_128(< 4 x i32> %a, < 4 x i32>* %ptr_b, i8 %mask) { + ;CHECK-LABEL: test_mask_mul_epu32_rmkz_128 + ;CHECK: vpmuludq (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0xf4,0x07] + %b = load < 4 x i32>, < 4 x i32>* %ptr_b + %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask) + ret < 2 x i64> %res +} + +define < 2 x i64> @test_mask_mul_epu32_rmb_128(< 4 x i32> %a, i64* %ptr_b) { + ;CHECK-LABEL: test_mask_mul_epu32_rmb_128 + ;CHECK: vpmuludq (%rdi){1to2}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x18,0xf4,0x07] + %q = load i64, i64* %ptr_b + %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0 + %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, <2 x i32> zeroinitializer + %b = bitcast < 2 x i64> %b64 to < 4 x i32> + %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1) + ret < 2 x i64> %res +} + +define < 2 x i64> @test_mask_mul_epu32_rmbk_128(< 4 x i32> %a, i64* %ptr_b, < 2 x i64> %passThru, i8 %mask) { + ;CHECK-LABEL: test_mask_mul_epu32_rmbk_128 + ;CHECK: vpmuludq (%rdi){1to2}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x19,0xf4,0x0f] + %q = load i64, i64* %ptr_b + %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0 + %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, <2 x i32> zeroinitializer + %b = bitcast < 2 x i64> %b64 to < 4 x i32> + %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask) + ret < 2 x i64> %res +} + +define < 2 x i64> @test_mask_mul_epu32_rmbkz_128(< 4 x i32> %a, i64* %ptr_b, i8 %mask) { + ;CHECK-LABEL: test_mask_mul_epu32_rmbkz_128 + ;CHECK: vpmuludq (%rdi){1to2}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x99,0xf4,0x07] + %q = load i64, i64* %ptr_b + %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0 + %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, < 2 x i32> zeroinitializer + %b = bitcast < 2 x i64> %b64 to < 4 x i32> + %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask) + ret < 2 x i64> %res +} + +declare < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32>, < 4 x i32>, < 2 x i64>, i8) + +define < 4 x i64> @test_mask_mul_epu32_rr_256(< 8 x i32> %a, < 8 x i32> %b) { + ;CHECK-LABEL: test_mask_mul_epu32_rr_256 + ;CHECK: vpmuludq %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xf4,0xc1] + %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1) + ret < 4 x i64> %res +} + +define < 4 x i64> @test_mask_mul_epu32_rrk_256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask) { + ;CHECK-LABEL: test_mask_mul_epu32_rrk_256 + ;CHECK: vpmuludq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0xf4,0xd1] + %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask) + ret < 4 x i64> %res +} + +define < 4 x i64> @test_mask_mul_epu32_rrkz_256(< 8 x i32> %a, < 8 x i32> %b, i8 %mask) { + ;CHECK-LABEL: test_mask_mul_epu32_rrkz_256 + ;CHECK: vpmuludq %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0xf4,0xc1] + %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask) + ret < 4 x i64> %res +} + +define < 4 x i64> @test_mask_mul_epu32_rm_256(< 8 x i32> %a, < 8 x i32>* %ptr_b) { + ;CHECK-LABEL: test_mask_mul_epu32_rm_256 + ;CHECK: vpmuludq (%rdi), %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xf4,0x07] + %b = load < 8 x i32>, < 8 x i32>* %ptr_b + %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1) + ret < 4 x i64> %res +} + +define < 4 x i64> @test_mask_mul_epu32_rmk_256(< 8 x i32> %a, < 8 x i32>* %ptr_b, < 4 x i64> %passThru, i8 %mask) { + ;CHECK-LABEL: test_mask_mul_epu32_rmk_256 + ;CHECK: vpmuludq (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0xf4,0x0f] + %b = load < 8 x i32>, < 8 x i32>* %ptr_b + %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask) + ret < 4 x i64> %res +} + +define < 4 x i64> @test_mask_mul_epu32_rmkz_256(< 8 x i32> %a, < 8 x i32>* %ptr_b, i8 %mask) { + ;CHECK-LABEL: test_mask_mul_epu32_rmkz_256 + ;CHECK: vpmuludq (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0xf4,0x07] + %b = load < 8 x i32>, < 8 x i32>* %ptr_b + %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask) + ret < 4 x i64> %res +} + +define < 4 x i64> @test_mask_mul_epu32_rmb_256(< 8 x i32> %a, i64* %ptr_b) { + ;CHECK-LABEL: test_mask_mul_epu32_rmb_256 + ;CHECK: vpmuludq (%rdi){1to4}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x38,0xf4,0x07] + %q = load i64, i64* %ptr_b + %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0 + %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer + %b = bitcast < 4 x i64> %b64 to < 8 x i32> + %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1) + ret < 4 x i64> %res +} + +define < 4 x i64> @test_mask_mul_epu32_rmbk_256(< 8 x i32> %a, i64* %ptr_b, < 4 x i64> %passThru, i8 %mask) { + ;CHECK-LABEL: test_mask_mul_epu32_rmbk_256 + ;CHECK: vpmuludq (%rdi){1to4}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x39,0xf4,0x0f] + %q = load i64, i64* %ptr_b + %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0 + %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer + %b = bitcast < 4 x i64> %b64 to < 8 x i32> + %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask) + ret < 4 x i64> %res +} + +define < 4 x i64> @test_mask_mul_epu32_rmbkz_256(< 8 x i32> %a, i64* %ptr_b, i8 %mask) { + ;CHECK-LABEL: test_mask_mul_epu32_rmbkz_256 + ;CHECK: vpmuludq (%rdi){1to4}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xb9,0xf4,0x07] + %q = load i64, i64* %ptr_b + %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0 + %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer + %b = bitcast < 4 x i64> %b64 to < 8 x i32> + %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask) + ret < 4 x i64> %res +} + +declare < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32>, < 8 x i32>, < 4 x i64>, i8) + +define <4 x i32> @test_mask_add_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) { + ;CHECK-LABEL: test_mask_add_epi32_rr_128 + ;CHECK: vpaddd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc1] + %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) + ret <4 x i32> %res +} + +define <4 x i32> @test_mask_add_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) { + ;CHECK-LABEL: test_mask_add_epi32_rrk_128 + ;CHECK: vpaddd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfe,0xd1] + %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) + ret <4 x i32> %res +} + +define <4 x i32> @test_mask_add_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) { + ;CHECK-LABEL: test_mask_add_epi32_rrkz_128 + ;CHECK: vpaddd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfe,0xc1] + %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) + ret <4 x i32> %res +} + +define <4 x i32> @test_mask_add_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) { + ;CHECK-LABEL: test_mask_add_epi32_rm_128 + ;CHECK: vpaddd (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0x07] + %b = load <4 x i32>, <4 x i32>* %ptr_b + %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) + ret <4 x i32> %res +} + +define <4 x i32> @test_mask_add_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) { + ;CHECK-LABEL: test_mask_add_epi32_rmk_128 + ;CHECK: vpaddd (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfe,0x0f] + %b = load <4 x i32>, <4 x i32>* %ptr_b + %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) + ret <4 x i32> %res +} + +define <4 x i32> @test_mask_add_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) { + ;CHECK-LABEL: test_mask_add_epi32_rmkz_128 + ;CHECK: vpaddd (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfe,0x07] + %b = load <4 x i32>, <4 x i32>* %ptr_b + %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) + ret <4 x i32> %res +} + +define <4 x i32> @test_mask_add_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) { + ;CHECK-LABEL: test_mask_add_epi32_rmb_128 + ;CHECK: vpaddd (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x18,0xfe,0x07] + %q = load i32, i32* %ptr_b + %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 + %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer + %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) + ret <4 x i32> %res +} + +define <4 x i32> @test_mask_add_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) { + ;CHECK-LABEL: test_mask_add_epi32_rmbk_128 + ;CHECK: vpaddd (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xfe,0x0f] + %q = load i32, i32* %ptr_b + %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 + %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer + %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) + ret <4 x i32> %res +} + +define <4 x i32> @test_mask_add_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) { + ;CHECK-LABEL: test_mask_add_epi32_rmbkz_128 + ;CHECK: vpaddd (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0xfe,0x07] + %q = load i32, i32* %ptr_b + %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 + %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer + %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) + ret <4 x i32> %res +} + +declare <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) + +define <4 x i32> @test_mask_sub_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) { + ;CHECK-LABEL: test_mask_sub_epi32_rr_128 + ;CHECK: vpsubd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfa,0xc1] + %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) + ret <4 x i32> %res +} + +define <4 x i32> @test_mask_sub_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) { + ;CHECK-LABEL: test_mask_sub_epi32_rrk_128 + ;CHECK: vpsubd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfa,0xd1] + %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) + ret <4 x i32> %res +} + +define <4 x i32> @test_mask_sub_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) { + ;CHECK-LABEL: test_mask_sub_epi32_rrkz_128 + ;CHECK: vpsubd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfa,0xc1] + %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) + ret <4 x i32> %res +} + +define <4 x i32> @test_mask_sub_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) { + ;CHECK-LABEL: test_mask_sub_epi32_rm_128 + ;CHECK: (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfa,0x07] + %b = load <4 x i32>, <4 x i32>* %ptr_b + %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) + ret <4 x i32> %res +} + +define <4 x i32> @test_mask_sub_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) { + ;CHECK-LABEL: test_mask_sub_epi32_rmk_128 + ;CHECK: vpsubd (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfa,0x0f] + %b = load <4 x i32>, <4 x i32>* %ptr_b + %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) + ret <4 x i32> %res +} + +define <4 x i32> @test_mask_sub_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) { + ;CHECK-LABEL: test_mask_sub_epi32_rmkz_128 + ;CHECK: vpsubd (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfa,0x07] + %b = load <4 x i32>, <4 x i32>* %ptr_b + %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) + ret <4 x i32> %res +} + +define <4 x i32> @test_mask_sub_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) { + ;CHECK-LABEL: test_mask_sub_epi32_rmb_128 + ;CHECK: vpsubd (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x18,0xfa,0x07] + %q = load i32, i32* %ptr_b + %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 + %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer + %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) + ret <4 x i32> %res +} + +define <4 x i32> @test_mask_sub_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) { + ;CHECK-LABEL: test_mask_sub_epi32_rmbk_128 + ;CHECK: vpsubd (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xfa,0x0f] + %q = load i32, i32* %ptr_b + %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 + %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer + %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) + ret <4 x i32> %res +} + +define <4 x i32> @test_mask_sub_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) { + ;CHECK-LABEL: test_mask_sub_epi32_rmbkz_128 + ;CHECK: vpsubd (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0xfa,0x07] + %q = load i32, i32* %ptr_b + %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 + %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer + %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) + ret <4 x i32> %res +} + +declare <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) + +define <8 x i32> @test_mask_sub_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) { + ;CHECK-LABEL: test_mask_sub_epi32_rr_256 + ;CHECK: vpsubd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfa,0xc1] + %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) + ret <8 x i32> %res +} + +define <8 x i32> @test_mask_sub_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) { + ;CHECK-LABEL: test_mask_sub_epi32_rrk_256 + ;CHECK: vpsubd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfa,0xd1] + %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) + ret <8 x i32> %res +} + +define <8 x i32> @test_mask_sub_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) { + ;CHECK-LABEL: test_mask_sub_epi32_rrkz_256 + ;CHECK: vpsubd %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfa,0xc1] + %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) + ret <8 x i32> %res +} + +define <8 x i32> @test_mask_sub_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) { + ;CHECK-LABEL: test_mask_sub_epi32_rm_256 + ;CHECK: vpsubd (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfa,0x07] + %b = load <8 x i32>, <8 x i32>* %ptr_b + %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) + ret <8 x i32> %res +} + +define <8 x i32> @test_mask_sub_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) { + ;CHECK-LABEL: test_mask_sub_epi32_rmk_256 + ;CHECK: vpsubd (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfa,0x0f] + %b = load <8 x i32>, <8 x i32>* %ptr_b + %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) + ret <8 x i32> %res +} + +define <8 x i32> @test_mask_sub_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) { + ;CHECK-LABEL: test_mask_sub_epi32_rmkz_256 + ;CHECK: vpsubd (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfa,0x07] + %b = load <8 x i32>, <8 x i32>* %ptr_b + %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) + ret <8 x i32> %res +} + +define <8 x i32> @test_mask_sub_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) { + ;CHECK-LABEL: test_mask_sub_epi32_rmb_256 + ;CHECK: vpsubd (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x38,0xfa,0x07] + %q = load i32, i32* %ptr_b + %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 + %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer + %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) + ret <8 x i32> %res +} + +define <8 x i32> @test_mask_sub_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) { + ;CHECK-LABEL: test_mask_sub_epi32_rmbk_256 + ;CHECK: vpsubd (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xfa,0x0f] + %q = load i32, i32* %ptr_b + %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 + %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer + %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) + ret <8 x i32> %res +} + +define <8 x i32> @test_mask_sub_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) { + ;CHECK-LABEL: test_mask_sub_epi32_rmbkz_256 + ;CHECK: vpsubd (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0xfa,0x07] + %q = load i32, i32* %ptr_b + %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 + %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer + %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) + ret <8 x i32> %res +} + +declare <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) + +define <8 x i32> @test_mask_add_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) { + ;CHECK-LABEL: test_mask_add_epi32_rr_256 + ;CHECK: vpaddd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfe,0xc1] + %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) + ret <8 x i32> %res +} + +define <8 x i32> @test_mask_add_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) { + ;CHECK-LABEL: test_mask_add_epi32_rrk_256 + ;CHECK: vpaddd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfe,0xd1] + %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) + ret <8 x i32> %res +} + +define <8 x i32> @test_mask_add_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) { + ;CHECK-LABEL: test_mask_add_epi32_rrkz_256 + ;CHECK: vpaddd %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfe,0xc1] + %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) + ret <8 x i32> %res +} + +define <8 x i32> @test_mask_add_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) { + ;CHECK-LABEL: test_mask_add_epi32_rm_256 + ;CHECK: vpaddd (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfe,0x07] + %b = load <8 x i32>, <8 x i32>* %ptr_b + %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) + ret <8 x i32> %res +} + +define <8 x i32> @test_mask_add_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) { + ;CHECK-LABEL: test_mask_add_epi32_rmk_256 + ;CHECK: vpaddd (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfe,0x0f] + %b = load <8 x i32>, <8 x i32>* %ptr_b + %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) + ret <8 x i32> %res +} + +define <8 x i32> @test_mask_add_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) { + ;CHECK-LABEL: test_mask_add_epi32_rmkz_256 + ;CHECK: vpaddd (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfe,0x07] + %b = load <8 x i32>, <8 x i32>* %ptr_b + %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) + ret <8 x i32> %res +} + +define <8 x i32> @test_mask_add_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) { + ;CHECK-LABEL: test_mask_add_epi32_rmb_256 + ;CHECK: vpaddd (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x38,0xfe,0x07] + %q = load i32, i32* %ptr_b + %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 + %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer + %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) + ret <8 x i32> %res +} + +define <8 x i32> @test_mask_add_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) { + ;CHECK-LABEL: test_mask_add_epi32_rmbk_256 + ;CHECK: vpaddd (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xfe,0x0f] + %q = load i32, i32* %ptr_b + %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 + %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer + %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) + ret <8 x i32> %res +} + +define <8 x i32> @test_mask_add_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) { + ;CHECK-LABEL: test_mask_add_epi32_rmbkz_256 + ;CHECK: vpaddd (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0xfe,0x07] + %q = load i32, i32* %ptr_b + %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 + %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer + %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) + ret <8 x i32> %res +} + +declare <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) + +define <4 x i32> @test_mask_and_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) { + ;CHECK-LABEL: test_mask_and_epi32_rr_128 + ;CHECK: vpandd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xdb,0xc1] + %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) + ret <4 x i32> %res +} + +define <4 x i32> @test_mask_and_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) { + ;CHECK-LABEL: test_mask_and_epi32_rrk_128 + ;CHECK: vpandd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdb,0xd1] + %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) + ret <4 x i32> %res +} + +define <4 x i32> @test_mask_and_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) { + ;CHECK-LABEL: test_mask_and_epi32_rrkz_128 + ;CHECK: vpandd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xdb,0xc1] + %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) + ret <4 x i32> %res +} + +define <4 x i32> @test_mask_and_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) { + ;CHECK-LABEL: test_mask_and_epi32_rm_128 + ;CHECK: vpandd (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xdb,0x07] + %b = load <4 x i32>, <4 x i32>* %ptr_b + %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) + ret <4 x i32> %res +} + +define <4 x i32> @test_mask_and_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) { + ;CHECK-LABEL: test_mask_and_epi32_rmk_128 + ;CHECK: vpandd (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdb,0x0f] + %b = load <4 x i32>, <4 x i32>* %ptr_b + %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) + ret <4 x i32> %res +} + +define <4 x i32> @test_mask_and_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) { + ;CHECK-LABEL: test_mask_and_epi32_rmkz_128 + ;CHECK: vpandd (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xdb,0x07] + %b = load <4 x i32>, <4 x i32>* %ptr_b + %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) + ret <4 x i32> %res +} + +define <4 x i32> @test_mask_and_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) { + ;CHECK-LABEL: test_mask_and_epi32_rmb_128 + ;CHECK: vpandd (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x18,0xdb,0x07] + %q = load i32, i32* %ptr_b + %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 + %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer + %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) + ret <4 x i32> %res +} + +define <4 x i32> @test_mask_and_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) { + ;CHECK-LABEL: test_mask_and_epi32_rmbk_128 + ;CHECK: vpandd (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xdb,0x0f] + %q = load i32, i32* %ptr_b + %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 + %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer + %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) + ret <4 x i32> %res +} + +define <4 x i32> @test_mask_and_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) { + ;CHECK-LABEL: test_mask_and_epi32_rmbkz_128 + ;CHECK: vpandd (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0xdb,0x07] + %q = load i32, i32* %ptr_b + %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 + %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer + %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) + ret <4 x i32> %res +} + +declare <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) + +define <8 x i32> @test_mask_and_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) { + ;CHECK-LABEL: test_mask_and_epi32_rr_256 + ;CHECK: vpandd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xdb,0xc1] + %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) + ret <8 x i32> %res +} + +define <8 x i32> @test_mask_and_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) { + ;CHECK-LABEL: test_mask_and_epi32_rrk_256 + ;CHECK: vpandd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdb,0xd1] + %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) + ret <8 x i32> %res +} + +define <8 x i32> @test_mask_and_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) { + ;CHECK-LABEL: test_mask_and_epi32_rrkz_256 + ;CHECK: vpandd %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xdb,0xc1] + %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) + ret <8 x i32> %res +} + +define <8 x i32> @test_mask_and_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) { + ;CHECK-LABEL: test_mask_and_epi32_rm_256 + ;CHECK: vpandd (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xdb,0x07] + %b = load <8 x i32>, <8 x i32>* %ptr_b + %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) + ret <8 x i32> %res +} + +define <8 x i32> @test_mask_and_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) { + ;CHECK-LABEL: test_mask_and_epi32_rmk_256 + ;CHECK: vpandd (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdb,0x0f] + %b = load <8 x i32>, <8 x i32>* %ptr_b + %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) + ret <8 x i32> %res +} + +define <8 x i32> @test_mask_and_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) { + ;CHECK-LABEL: test_mask_and_epi32_rmkz_256 + ;CHECK: vpandd (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xdb,0x07] + %b = load <8 x i32>, <8 x i32>* %ptr_b + %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) + ret <8 x i32> %res +} + +define <8 x i32> @test_mask_and_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) { + ;CHECK-LABEL: test_mask_and_epi32_rmb_256 + ;CHECK: vpandd (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x38,0xdb,0x07] + %q = load i32, i32* %ptr_b + %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 + %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer + %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) + ret <8 x i32> %res +} + +define <8 x i32> @test_mask_and_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) { + ;CHECK-LABEL: test_mask_and_epi32_rmbk_256 + ;CHECK: vpandd (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xdb,0x0f] + %q = load i32, i32* %ptr_b + %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 + %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer + %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) + ret <8 x i32> %res +} + +define <8 x i32> @test_mask_and_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) { + ;CHECK-LABEL: test_mask_and_epi32_rmbkz_256 + ;CHECK: vpandd (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0xdb,0x07] + %q = load i32, i32* %ptr_b + %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 + %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer + %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) + ret <8 x i32> %res +} + +declare <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) + +define <4 x i32> @test_mask_or_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) { + ;CHECK-LABEL: test_mask_or_epi32_rr_128 + ;CHECK: vpord %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xeb,0xc1] + %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) + ret <4 x i32> %res +} + +define <4 x i32> @test_mask_or_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) { + ;CHECK-LABEL: test_mask_or_epi32_rrk_128 + ;CHECK: vpord %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xeb,0xd1] + %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) + ret <4 x i32> %res +} + +define <4 x i32> @test_mask_or_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) { + ;CHECK-LABEL: test_mask_or_epi32_rrkz_128 + ;CHECK: vpord %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xeb,0xc1] + %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) + ret <4 x i32> %res +} + +define <4 x i32> @test_mask_or_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) { + ;CHECK-LABEL: test_mask_or_epi32_rm_128 + ;CHECK: vpord (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xeb,0x07] + %b = load <4 x i32>, <4 x i32>* %ptr_b + %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) + ret <4 x i32> %res +} + +define <4 x i32> @test_mask_or_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) { + ;CHECK-LABEL: test_mask_or_epi32_rmk_128 + ;CHECK: vpord (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xeb,0x0f] + %b = load <4 x i32>, <4 x i32>* %ptr_b + %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) + ret <4 x i32> %res +} + +define <4 x i32> @test_mask_or_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) { + ;CHECK-LABEL: test_mask_or_epi32_rmkz_128 + ;CHECK: vpord (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xeb,0x07] + %b = load <4 x i32>, <4 x i32>* %ptr_b + %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) + ret <4 x i32> %res +} + +define <4 x i32> @test_mask_or_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) { + ;CHECK-LABEL: test_mask_or_epi32_rmb_128 + ;CHECK: vpord (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x18,0xeb,0x07] + %q = load i32, i32* %ptr_b + %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 + %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer + %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) + ret <4 x i32> %res +} + +define <4 x i32> @test_mask_or_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) { + ;CHECK-LABEL: test_mask_or_epi32_rmbk_128 + ;CHECK: vpord (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xeb,0x0f] + %q = load i32, i32* %ptr_b + %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 + %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer + %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) + ret <4 x i32> %res +} + +define <4 x i32> @test_mask_or_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) { + ;CHECK-LABEL: test_mask_or_epi32_rmbkz_128 + ;CHECK: vpord (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0xeb,0x07] + %q = load i32, i32* %ptr_b + %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 + %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer + %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) + ret <4 x i32> %res +} + +declare <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) + +define <8 x i32> @test_mask_or_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) { + ;CHECK-LABEL: test_mask_or_epi32_rr_256 + ;CHECK: vpord %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xeb,0xc1] + %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) + ret <8 x i32> %res +} + +define <8 x i32> @test_mask_or_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) { + ;CHECK-LABEL: test_mask_or_epi32_rrk_256 + ;CHECK: vpord %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xeb,0xd1] + %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) + ret <8 x i32> %res +} + +define <8 x i32> @test_mask_or_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) { + ;CHECK-LABEL: test_mask_or_epi32_rrkz_256 + ;CHECK: vpord %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xeb,0xc1] + %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) + ret <8 x i32> %res +} + +define <8 x i32> @test_mask_or_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) { + ;CHECK-LABEL: test_mask_or_epi32_rm_256 + ;CHECK: vpord (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xeb,0x07] + %b = load <8 x i32>, <8 x i32>* %ptr_b + %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) + ret <8 x i32> %res +} + +define <8 x i32> @test_mask_or_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) { + ;CHECK-LABEL: test_mask_or_epi32_rmk_256 + ;CHECK: vpord (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xeb,0x0f] + %b = load <8 x i32>, <8 x i32>* %ptr_b + %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) + ret <8 x i32> %res +} + +define <8 x i32> @test_mask_or_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) { + ;CHECK-LABEL: test_mask_or_epi32_rmkz_256 + ;CHECK: vpord (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xeb,0x07] + %b = load <8 x i32>, <8 x i32>* %ptr_b + %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) + ret <8 x i32> %res +} + +define <8 x i32> @test_mask_or_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) { + ;CHECK-LABEL: test_mask_or_epi32_rmb_256 + ;CHECK: vpord (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x38,0xeb,0x07] + %q = load i32, i32* %ptr_b + %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 + %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer + %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) + ret <8 x i32> %res +} + +define <8 x i32> @test_mask_or_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) { + ;CHECK-LABEL: test_mask_or_epi32_rmbk_256 + ;CHECK: vpord (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xeb,0x0f] + %q = load i32, i32* %ptr_b + %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 + %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer + %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) + ret <8 x i32> %res +} + +define <8 x i32> @test_mask_or_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) { + ;CHECK-LABEL: test_mask_or_epi32_rmbkz_256 + ;CHECK: vpord (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0xeb,0x07] + %q = load i32, i32* %ptr_b + %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 + %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer + %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) + ret <8 x i32> %res +} + +declare <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) + +define <4 x i32> @test_mask_xor_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) { + ;CHECK-LABEL: test_mask_xor_epi32_rr_128 + ;CHECK: vpxord %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xef,0xc1] + %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) + ret <4 x i32> %res +} + +define <4 x i32> @test_mask_xor_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) { + ;CHECK-LABEL: test_mask_xor_epi32_rrk_128 + ;CHECK: vpxord %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xef,0xd1] + %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) + ret <4 x i32> %res +} + +define <4 x i32> @test_mask_xor_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) { + ;CHECK-LABEL: test_mask_xor_epi32_rrkz_128 + ;CHECK: vpxord %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xef,0xc1] + %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) + ret <4 x i32> %res +} + +define <4 x i32> @test_mask_xor_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) { + ;CHECK-LABEL: test_mask_xor_epi32_rm_128 + ;CHECK: vpxord (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xef,0x07] + %b = load <4 x i32>, <4 x i32>* %ptr_b + %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) + ret <4 x i32> %res +} + +define <4 x i32> @test_mask_xor_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) { + ;CHECK-LABEL: test_mask_xor_epi32_rmk_128 + ;CHECK: vpxord (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xef,0x0f] + %b = load <4 x i32>, <4 x i32>* %ptr_b + %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) + ret <4 x i32> %res +} + +define <4 x i32> @test_mask_xor_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) { + ;CHECK-LABEL: test_mask_xor_epi32_rmkz_128 + ;CHECK: vpxord (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xef,0x07] + %b = load <4 x i32>, <4 x i32>* %ptr_b + %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) + ret <4 x i32> %res +} + +define <4 x i32> @test_mask_xor_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) { + ;CHECK-LABEL: test_mask_xor_epi32_rmb_128 + ;CHECK: vpxord (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x18,0xef,0x07] + %q = load i32, i32* %ptr_b + %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 + %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer + %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) + ret <4 x i32> %res +} + +define <4 x i32> @test_mask_xor_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) { + ;CHECK-LABEL: test_mask_xor_epi32_rmbk_128 + ;CHECK: vpxord (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xef,0x0f] + %q = load i32, i32* %ptr_b + %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 + %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer + %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) + ret <4 x i32> %res +} + +define <4 x i32> @test_mask_xor_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) { + ;CHECK-LABEL: test_mask_xor_epi32_rmbkz_128 + ;CHECK: vpxord (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0xef,0x07] + %q = load i32, i32* %ptr_b + %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 + %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer + %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) + ret <4 x i32> %res +} + +declare <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) + +define <8 x i32> @test_mask_xor_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) { + ;CHECK-LABEL: test_mask_xor_epi32_rr_256 + ;CHECK: vpxord %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xef,0xc1] + %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) + ret <8 x i32> %res +} + +define <8 x i32> @test_mask_xor_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) { + ;CHECK-LABEL: test_mask_xor_epi32_rrk_256 + ;CHECK: vpxord %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xef,0xd1] + %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) + ret <8 x i32> %res +} + +define <8 x i32> @test_mask_xor_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) { + ;CHECK-LABEL: test_mask_xor_epi32_rrkz_256 + ;CHECK: vpxord %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xef,0xc1] + %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) + ret <8 x i32> %res +} + +define <8 x i32> @test_mask_xor_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) { + ;CHECK-LABEL: test_mask_xor_epi32_rm_256 + ;CHECK: vpxord (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xef,0x07] + %b = load <8 x i32>, <8 x i32>* %ptr_b + %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) + ret <8 x i32> %res +} + +define <8 x i32> @test_mask_xor_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) { + ;CHECK-LABEL: test_mask_xor_epi32_rmk_256 + ;CHECK: vpxord (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xef,0x0f] + %b = load <8 x i32>, <8 x i32>* %ptr_b + %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) + ret <8 x i32> %res +} + +define <8 x i32> @test_mask_xor_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) { + ;CHECK-LABEL: test_mask_xor_epi32_rmkz_256 + ;CHECK: vpxord (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xef,0x07] + %b = load <8 x i32>, <8 x i32>* %ptr_b + %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) + ret <8 x i32> %res +} + +define <8 x i32> @test_mask_xor_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) { + ;CHECK-LABEL: test_mask_xor_epi32_rmb_256 + ;CHECK: vpxord (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x38,0xef,0x07] + %q = load i32, i32* %ptr_b + %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 + %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer + %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) + ret <8 x i32> %res +} + +define <8 x i32> @test_mask_xor_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) { + ;CHECK-LABEL: test_mask_xor_epi32_rmbk_256 + ;CHECK: vpxord (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xef,0x0f] + %q = load i32, i32* %ptr_b + %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 + %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer + %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) + ret <8 x i32> %res +} + +define <8 x i32> @test_mask_xor_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) { + ;CHECK-LABEL: test_mask_xor_epi32_rmbkz_256 + ;CHECK: vpxord (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0xef,0x07] + %q = load i32, i32* %ptr_b + %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 + %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer + %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) + ret <8 x i32> %res +} + +declare <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) \ No newline at end of file diff --git a/test/MC/X86/x86-64-avx512dq.s b/test/MC/X86/x86-64-avx512dq.s index aac176517bf..e4bffa6b265 100644 --- a/test/MC/X86/x86-64-avx512dq.s +++ b/test/MC/X86/x86-64-avx512dq.s @@ -127,3 +127,898 @@ // CHECK: kmovb %k3, %r13d // CHECK: encoding: [0xc5,0x79,0x93,0xeb] kmovb %k3, %r13d + +// CHECK: vandpd %zmm27, %zmm28, %zmm19 +// CHECK: encoding: [0x62,0x81,0x9d,0x40,0x54,0xdb] + vandpd %zmm27, %zmm28, %zmm19 + +// CHECK: vandpd %zmm27, %zmm28, %zmm19 {%k5} +// CHECK: encoding: [0x62,0x81,0x9d,0x45,0x54,0xdb] + vandpd %zmm27, %zmm28, %zmm19 {%k5} + +// CHECK: vandpd %zmm27, %zmm28, %zmm19 {%k5} {z} +// CHECK: encoding: [0x62,0x81,0x9d,0xc5,0x54,0xdb] + vandpd %zmm27, %zmm28, %zmm19 {%k5} {z} + +// CHECK: vandpd (%rcx), %zmm28, %zmm19 +// CHECK: encoding: [0x62,0xe1,0x9d,0x40,0x54,0x19] + vandpd (%rcx), %zmm28, %zmm19 + +// CHECK: vandpd 291(%rax,%r14,8), %zmm28, %zmm19 +// CHECK: encoding: [0x62,0xa1,0x9d,0x40,0x54,0x9c,0xf0,0x23,0x01,0x00,0x00] + vandpd 291(%rax,%r14,8), %zmm28, %zmm19 + +// CHECK: vandpd (%rcx){1to8}, %zmm28, %zmm19 +// CHECK: encoding: [0x62,0xe1,0x9d,0x50,0x54,0x19] + vandpd (%rcx){1to8}, %zmm28, %zmm19 + +// CHECK: vandpd 8128(%rdx), %zmm28, %zmm19 +// CHECK: encoding: [0x62,0xe1,0x9d,0x40,0x54,0x5a,0x7f] + vandpd 8128(%rdx), %zmm28, %zmm19 + +// CHECK: vandpd 8192(%rdx), %zmm28, %zmm19 +// CHECK: encoding: [0x62,0xe1,0x9d,0x40,0x54,0x9a,0x00,0x20,0x00,0x00] + vandpd 8192(%rdx), %zmm28, %zmm19 + +// CHECK: vandpd -8192(%rdx), %zmm28, %zmm19 +// CHECK: encoding: [0x62,0xe1,0x9d,0x40,0x54,0x5a,0x80] + vandpd -8192(%rdx), %zmm28, %zmm19 + +// CHECK: vandpd -8256(%rdx), %zmm28, %zmm19 +// CHECK: encoding: [0x62,0xe1,0x9d,0x40,0x54,0x9a,0xc0,0xdf,0xff,0xff] + vandpd -8256(%rdx), %zmm28, %zmm19 + +// CHECK: vandpd 1016(%rdx){1to8}, %zmm28, %zmm19 +// CHECK: encoding: [0x62,0xe1,0x9d,0x50,0x54,0x5a,0x7f] + vandpd 1016(%rdx){1to8}, %zmm28, %zmm19 + +// CHECK: vandpd 1024(%rdx){1to8}, %zmm28, %zmm19 +// CHECK: encoding: [0x62,0xe1,0x9d,0x50,0x54,0x9a,0x00,0x04,0x00,0x00] + vandpd 1024(%rdx){1to8}, %zmm28, %zmm19 + +// CHECK: vandpd -1024(%rdx){1to8}, %zmm28, %zmm19 +// CHECK: encoding: [0x62,0xe1,0x9d,0x50,0x54,0x5a,0x80] + vandpd -1024(%rdx){1to8}, %zmm28, %zmm19 + +// CHECK: vandpd -1032(%rdx){1to8}, %zmm28, %zmm19 +// CHECK: encoding: [0x62,0xe1,0x9d,0x50,0x54,0x9a,0xf8,0xfb,0xff,0xff] + vandpd -1032(%rdx){1to8}, %zmm28, %zmm19 + +// CHECK: vandps %zmm25, %zmm22, %zmm17 +// CHECK: encoding: [0x62,0x81,0x4c,0x40,0x54,0xc9] + vandps %zmm25, %zmm22, %zmm17 + +// CHECK: vandps %zmm25, %zmm22, %zmm17 {%k4} +// CHECK: encoding: [0x62,0x81,0x4c,0x44,0x54,0xc9] + vandps %zmm25, %zmm22, %zmm17 {%k4} + +// CHECK: vandps %zmm25, %zmm22, %zmm17 {%k4} {z} +// CHECK: encoding: [0x62,0x81,0x4c,0xc4,0x54,0xc9] + vandps %zmm25, %zmm22, %zmm17 {%k4} {z} + +// CHECK: vandps (%rcx), %zmm22, %zmm17 +// CHECK: encoding: [0x62,0xe1,0x4c,0x40,0x54,0x09] + vandps (%rcx), %zmm22, %zmm17 + +// CHECK: vandps 291(%rax,%r14,8), %zmm22, %zmm17 +// CHECK: encoding: [0x62,0xa1,0x4c,0x40,0x54,0x8c,0xf0,0x23,0x01,0x00,0x00] + vandps 291(%rax,%r14,8), %zmm22, %zmm17 + +// CHECK: vandps (%rcx){1to16}, %zmm22, %zmm17 +// CHECK: encoding: [0x62,0xe1,0x4c,0x50,0x54,0x09] + vandps (%rcx){1to16}, %zmm22, %zmm17 + +// CHECK: vandps 8128(%rdx), %zmm22, %zmm17 +// CHECK: encoding: [0x62,0xe1,0x4c,0x40,0x54,0x4a,0x7f] + vandps 8128(%rdx), %zmm22, %zmm17 + +// CHECK: vandps 8192(%rdx), %zmm22, %zmm17 +// CHECK: encoding: [0x62,0xe1,0x4c,0x40,0x54,0x8a,0x00,0x20,0x00,0x00] + vandps 8192(%rdx), %zmm22, %zmm17 + +// CHECK: vandps -8192(%rdx), %zmm22, %zmm17 +// CHECK: encoding: [0x62,0xe1,0x4c,0x40,0x54,0x4a,0x80] + vandps -8192(%rdx), %zmm22, %zmm17 + +// CHECK: vandps -8256(%rdx), %zmm22, %zmm17 +// CHECK: encoding: [0x62,0xe1,0x4c,0x40,0x54,0x8a,0xc0,0xdf,0xff,0xff] + vandps -8256(%rdx), %zmm22, %zmm17 + +// CHECK: vandps 508(%rdx){1to16}, %zmm22, %zmm17 +// CHECK: encoding: [0x62,0xe1,0x4c,0x50,0x54,0x4a,0x7f] + vandps 508(%rdx){1to16}, %zmm22, %zmm17 + +// CHECK: vandps 512(%rdx){1to16}, %zmm22, %zmm17 +// CHECK: encoding: [0x62,0xe1,0x4c,0x50,0x54,0x8a,0x00,0x02,0x00,0x00] + vandps 512(%rdx){1to16}, %zmm22, %zmm17 + +// CHECK: vandps -512(%rdx){1to16}, %zmm22, %zmm17 +// CHECK: encoding: [0x62,0xe1,0x4c,0x50,0x54,0x4a,0x80] + vandps -512(%rdx){1to16}, %zmm22, %zmm17 + +// CHECK: vandps -516(%rdx){1to16}, %zmm22, %zmm17 +// CHECK: encoding: [0x62,0xe1,0x4c,0x50,0x54,0x8a,0xfc,0xfd,0xff,0xff] + vandps -516(%rdx){1to16}, %zmm22, %zmm17 + +// CHECK: vandnpd %zmm22, %zmm17, %zmm20 +// CHECK: encoding: [0x62,0xa1,0xf5,0x40,0x55,0xe6] + vandnpd %zmm22, %zmm17, %zmm20 + +// CHECK: vandnpd %zmm22, %zmm17, %zmm20 {%k1} +// CHECK: encoding: [0x62,0xa1,0xf5,0x41,0x55,0xe6] + vandnpd %zmm22, %zmm17, %zmm20 {%k1} + +// CHECK: vandnpd %zmm22, %zmm17, %zmm20 {%k1} {z} +// CHECK: encoding: [0x62,0xa1,0xf5,0xc1,0x55,0xe6] + vandnpd %zmm22, %zmm17, %zmm20 {%k1} {z} + +// CHECK: vandnpd (%rcx), %zmm17, %zmm20 +// CHECK: encoding: [0x62,0xe1,0xf5,0x40,0x55,0x21] + vandnpd (%rcx), %zmm17, %zmm20 + +// CHECK: vandnpd 291(%rax,%r14,8), %zmm17, %zmm20 +// CHECK: encoding: [0x62,0xa1,0xf5,0x40,0x55,0xa4,0xf0,0x23,0x01,0x00,0x00] + vandnpd 291(%rax,%r14,8), %zmm17, %zmm20 + +// CHECK: vandnpd (%rcx){1to8}, %zmm17, %zmm20 +// CHECK: encoding: [0x62,0xe1,0xf5,0x50,0x55,0x21] + vandnpd (%rcx){1to8}, %zmm17, %zmm20 + +// CHECK: vandnpd 8128(%rdx), %zmm17, %zmm20 +// CHECK: encoding: [0x62,0xe1,0xf5,0x40,0x55,0x62,0x7f] + vandnpd 8128(%rdx), %zmm17, %zmm20 + +// CHECK: vandnpd 8192(%rdx), %zmm17, %zmm20 +// CHECK: encoding: [0x62,0xe1,0xf5,0x40,0x55,0xa2,0x00,0x20,0x00,0x00] + vandnpd 8192(%rdx), %zmm17, %zmm20 + +// CHECK: vandnpd -8192(%rdx), %zmm17, %zmm20 +// CHECK: encoding: [0x62,0xe1,0xf5,0x40,0x55,0x62,0x80] + vandnpd -8192(%rdx), %zmm17, %zmm20 + +// CHECK: vandnpd -8256(%rdx), %zmm17, %zmm20 +// CHECK: encoding: [0x62,0xe1,0xf5,0x40,0x55,0xa2,0xc0,0xdf,0xff,0xff] + vandnpd -8256(%rdx), %zmm17, %zmm20 + +// CHECK: vandnpd 1016(%rdx){1to8}, %zmm17, %zmm20 +// CHECK: encoding: [0x62,0xe1,0xf5,0x50,0x55,0x62,0x7f] + vandnpd 1016(%rdx){1to8}, %zmm17, %zmm20 + +// CHECK: vandnpd 1024(%rdx){1to8}, %zmm17, %zmm20 +// CHECK: encoding: [0x62,0xe1,0xf5,0x50,0x55,0xa2,0x00,0x04,0x00,0x00] + vandnpd 1024(%rdx){1to8}, %zmm17, %zmm20 + +// CHECK: vandnpd -1024(%rdx){1to8}, %zmm17, %zmm20 +// CHECK: encoding: [0x62,0xe1,0xf5,0x50,0x55,0x62,0x80] + vandnpd -1024(%rdx){1to8}, %zmm17, %zmm20 + +// CHECK: vandnpd -1032(%rdx){1to8}, %zmm17, %zmm20 +// CHECK: encoding: [0x62,0xe1,0xf5,0x50,0x55,0xa2,0xf8,0xfb,0xff,0xff] + vandnpd -1032(%rdx){1to8}, %zmm17, %zmm20 + +// CHECK: vandnps %zmm19, %zmm17, %zmm22 +// CHECK: encoding: [0x62,0xa1,0x74,0x40,0x55,0xf3] + vandnps %zmm19, %zmm17, %zmm22 + +// CHECK: vandnps %zmm19, %zmm17, %zmm22 {%k2} +// CHECK: encoding: [0x62,0xa1,0x74,0x42,0x55,0xf3] + vandnps %zmm19, %zmm17, %zmm22 {%k2} + +// CHECK: vandnps %zmm19, %zmm17, %zmm22 {%k2} {z} +// CHECK: encoding: [0x62,0xa1,0x74,0xc2,0x55,0xf3] + vandnps %zmm19, %zmm17, %zmm22 {%k2} {z} + +// CHECK: vandnps (%rcx), %zmm17, %zmm22 +// CHECK: encoding: [0x62,0xe1,0x74,0x40,0x55,0x31] + vandnps (%rcx), %zmm17, %zmm22 + +// CHECK: vandnps 291(%rax,%r14,8), %zmm17, %zmm22 +// CHECK: encoding: [0x62,0xa1,0x74,0x40,0x55,0xb4,0xf0,0x23,0x01,0x00,0x00] + vandnps 291(%rax,%r14,8), %zmm17, %zmm22 + +// CHECK: vandnps (%rcx){1to16}, %zmm17, %zmm22 +// CHECK: encoding: [0x62,0xe1,0x74,0x50,0x55,0x31] + vandnps (%rcx){1to16}, %zmm17, %zmm22 + +// CHECK: vandnps 8128(%rdx), %zmm17, %zmm22 +// CHECK: encoding: [0x62,0xe1,0x74,0x40,0x55,0x72,0x7f] + vandnps 8128(%rdx), %zmm17, %zmm22 + +// CHECK: vandnps 8192(%rdx), %zmm17, %zmm22 +// CHECK: encoding: [0x62,0xe1,0x74,0x40,0x55,0xb2,0x00,0x20,0x00,0x00] + vandnps 8192(%rdx), %zmm17, %zmm22 + +// CHECK: vandnps -8192(%rdx), %zmm17, %zmm22 +// CHECK: encoding: [0x62,0xe1,0x74,0x40,0x55,0x72,0x80] + vandnps -8192(%rdx), %zmm17, %zmm22 + +// CHECK: vandnps -8256(%rdx), %zmm17, %zmm22 +// CHECK: encoding: [0x62,0xe1,0x74,0x40,0x55,0xb2,0xc0,0xdf,0xff,0xff] + vandnps -8256(%rdx), %zmm17, %zmm22 + +// CHECK: vandnps 508(%rdx){1to16}, %zmm17, %zmm22 +// CHECK: encoding: [0x62,0xe1,0x74,0x50,0x55,0x72,0x7f] + vandnps 508(%rdx){1to16}, %zmm17, %zmm22 + +// CHECK: vandnps 512(%rdx){1to16}, %zmm17, %zmm22 +// CHECK: encoding: [0x62,0xe1,0x74,0x50,0x55,0xb2,0x00,0x02,0x00,0x00] + vandnps 512(%rdx){1to16}, %zmm17, %zmm22 + +// CHECK: vandnps -512(%rdx){1to16}, %zmm17, %zmm22 +// CHECK: encoding: [0x62,0xe1,0x74,0x50,0x55,0x72,0x80] + vandnps -512(%rdx){1to16}, %zmm17, %zmm22 + +// CHECK: vandnps -516(%rdx){1to16}, %zmm17, %zmm22 +// CHECK: encoding: [0x62,0xe1,0x74,0x50,0x55,0xb2,0xfc,0xfd,0xff,0xff] + vandnps -516(%rdx){1to16}, %zmm17, %zmm22 + +// CHECK: vorpd %zmm21, %zmm22, %zmm30 +// CHECK: encoding: [0x62,0x21,0xcd,0x40,0x56,0xf5] + vorpd %zmm21, %zmm22, %zmm30 + +// CHECK: vorpd %zmm21, %zmm22, %zmm30 {%k6} +// CHECK: encoding: [0x62,0x21,0xcd,0x46,0x56,0xf5] + vorpd %zmm21, %zmm22, %zmm30 {%k6} + +// CHECK: vorpd %zmm21, %zmm22, %zmm30 {%k6} {z} +// CHECK: encoding: [0x62,0x21,0xcd,0xc6,0x56,0xf5] + vorpd %zmm21, %zmm22, %zmm30 {%k6} {z} + +// CHECK: vorpd (%rcx), %zmm22, %zmm30 +// CHECK: encoding: [0x62,0x61,0xcd,0x40,0x56,0x31] + vorpd (%rcx), %zmm22, %zmm30 + +// CHECK: vorpd 291(%rax,%r14,8), %zmm22, %zmm30 +// CHECK: encoding: [0x62,0x21,0xcd,0x40,0x56,0xb4,0xf0,0x23,0x01,0x00,0x00] + vorpd 291(%rax,%r14,8), %zmm22, %zmm30 + +// CHECK: vorpd (%rcx){1to8}, %zmm22, %zmm30 +// CHECK: encoding: [0x62,0x61,0xcd,0x50,0x56,0x31] + vorpd (%rcx){1to8}, %zmm22, %zmm30 + +// CHECK: vorpd 8128(%rdx), %zmm22, %zmm30 +// CHECK: encoding: [0x62,0x61,0xcd,0x40,0x56,0x72,0x7f] + vorpd 8128(%rdx), %zmm22, %zmm30 + +// CHECK: vorpd 8192(%rdx), %zmm22, %zmm30 +// CHECK: encoding: [0x62,0x61,0xcd,0x40,0x56,0xb2,0x00,0x20,0x00,0x00] + vorpd 8192(%rdx), %zmm22, %zmm30 + +// CHECK: vorpd -8192(%rdx), %zmm22, %zmm30 +// CHECK: encoding: [0x62,0x61,0xcd,0x40,0x56,0x72,0x80] + vorpd -8192(%rdx), %zmm22, %zmm30 + +// CHECK: vorpd -8256(%rdx), %zmm22, %zmm30 +// CHECK: encoding: [0x62,0x61,0xcd,0x40,0x56,0xb2,0xc0,0xdf,0xff,0xff] + vorpd -8256(%rdx), %zmm22, %zmm30 + +// CHECK: vorpd 1016(%rdx){1to8}, %zmm22, %zmm30 +// CHECK: encoding: [0x62,0x61,0xcd,0x50,0x56,0x72,0x7f] + vorpd 1016(%rdx){1to8}, %zmm22, %zmm30 + +// CHECK: vorpd 1024(%rdx){1to8}, %zmm22, %zmm30 +// CHECK: encoding: [0x62,0x61,0xcd,0x50,0x56,0xb2,0x00,0x04,0x00,0x00] + vorpd 1024(%rdx){1to8}, %zmm22, %zmm30 + +// CHECK: vorpd -1024(%rdx){1to8}, %zmm22, %zmm30 +// CHECK: encoding: [0x62,0x61,0xcd,0x50,0x56,0x72,0x80] + vorpd -1024(%rdx){1to8}, %zmm22, %zmm30 + +// CHECK: vorpd -1032(%rdx){1to8}, %zmm22, %zmm30 +// CHECK: encoding: [0x62,0x61,0xcd,0x50,0x56,0xb2,0xf8,0xfb,0xff,0xff] + vorpd -1032(%rdx){1to8}, %zmm22, %zmm30 + +// CHECK: vorps %zmm26, %zmm21, %zmm22 +// CHECK: encoding: [0x62,0x81,0x54,0x40,0x56,0xf2] + vorps %zmm26, %zmm21, %zmm22 + +// CHECK: vorps %zmm26, %zmm21, %zmm22 {%k7} +// CHECK: encoding: [0x62,0x81,0x54,0x47,0x56,0xf2] + vorps %zmm26, %zmm21, %zmm22 {%k7} + +// CHECK: vorps %zmm26, %zmm21, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0x81,0x54,0xc7,0x56,0xf2] + vorps %zmm26, %zmm21, %zmm22 {%k7} {z} + +// CHECK: vorps (%rcx), %zmm21, %zmm22 +// CHECK: encoding: [0x62,0xe1,0x54,0x40,0x56,0x31] + vorps (%rcx), %zmm21, %zmm22 + +// CHECK: vorps 291(%rax,%r14,8), %zmm21, %zmm22 +// CHECK: encoding: [0x62,0xa1,0x54,0x40,0x56,0xb4,0xf0,0x23,0x01,0x00,0x00] + vorps 291(%rax,%r14,8), %zmm21, %zmm22 + +// CHECK: vorps (%rcx){1to16}, %zmm21, %zmm22 +// CHECK: encoding: [0x62,0xe1,0x54,0x50,0x56,0x31] + vorps (%rcx){1to16}, %zmm21, %zmm22 + +// CHECK: vorps 8128(%rdx), %zmm21, %zmm22 +// CHECK: encoding: [0x62,0xe1,0x54,0x40,0x56,0x72,0x7f] + vorps 8128(%rdx), %zmm21, %zmm22 + +// CHECK: vorps 8192(%rdx), %zmm21, %zmm22 +// CHECK: encoding: [0x62,0xe1,0x54,0x40,0x56,0xb2,0x00,0x20,0x00,0x00] + vorps 8192(%rdx), %zmm21, %zmm22 + +// CHECK: vorps -8192(%rdx), %zmm21, %zmm22 +// CHECK: encoding: [0x62,0xe1,0x54,0x40,0x56,0x72,0x80] + vorps -8192(%rdx), %zmm21, %zmm22 + +// CHECK: vorps -8256(%rdx), %zmm21, %zmm22 +// CHECK: encoding: [0x62,0xe1,0x54,0x40,0x56,0xb2,0xc0,0xdf,0xff,0xff] + vorps -8256(%rdx), %zmm21, %zmm22 + +// CHECK: vorps 508(%rdx){1to16}, %zmm21, %zmm22 +// CHECK: encoding: [0x62,0xe1,0x54,0x50,0x56,0x72,0x7f] + vorps 508(%rdx){1to16}, %zmm21, %zmm22 + +// CHECK: vorps 512(%rdx){1to16}, %zmm21, %zmm22 +// CHECK: encoding: [0x62,0xe1,0x54,0x50,0x56,0xb2,0x00,0x02,0x00,0x00] + vorps 512(%rdx){1to16}, %zmm21, %zmm22 + +// CHECK: vorps -512(%rdx){1to16}, %zmm21, %zmm22 +// CHECK: encoding: [0x62,0xe1,0x54,0x50,0x56,0x72,0x80] + vorps -512(%rdx){1to16}, %zmm21, %zmm22 + +// CHECK: vorps -516(%rdx){1to16}, %zmm21, %zmm22 +// CHECK: encoding: [0x62,0xe1,0x54,0x50,0x56,0xb2,0xfc,0xfd,0xff,0xff] + vorps -516(%rdx){1to16}, %zmm21, %zmm22 + +// CHECK: vxorpd %zmm24, %zmm24, %zmm27 +// CHECK: encoding: [0x62,0x01,0xbd,0x40,0x57,0xd8] + vxorpd %zmm24, %zmm24, %zmm27 + +// CHECK: vxorpd %zmm24, %zmm24, %zmm27 {%k5} +// CHECK: encoding: [0x62,0x01,0xbd,0x45,0x57,0xd8] + vxorpd %zmm24, %zmm24, %zmm27 {%k5} + +// CHECK: vxorpd %zmm24, %zmm24, %zmm27 {%k5} {z} +// CHECK: encoding: [0x62,0x01,0xbd,0xc5,0x57,0xd8] + vxorpd %zmm24, %zmm24, %zmm27 {%k5} {z} + +// CHECK: vxorpd (%rcx), %zmm24, %zmm27 +// CHECK: encoding: [0x62,0x61,0xbd,0x40,0x57,0x19] + vxorpd (%rcx), %zmm24, %zmm27 + +// CHECK: vxorpd 291(%rax,%r14,8), %zmm24, %zmm27 +// CHECK: encoding: [0x62,0x21,0xbd,0x40,0x57,0x9c,0xf0,0x23,0x01,0x00,0x00] + vxorpd 291(%rax,%r14,8), %zmm24, %zmm27 + +// CHECK: vxorpd (%rcx){1to8}, %zmm24, %zmm27 +// CHECK: encoding: [0x62,0x61,0xbd,0x50,0x57,0x19] + vxorpd (%rcx){1to8}, %zmm24, %zmm27 + +// CHECK: vxorpd 8128(%rdx), %zmm24, %zmm27 +// CHECK: encoding: [0x62,0x61,0xbd,0x40,0x57,0x5a,0x7f] + vxorpd 8128(%rdx), %zmm24, %zmm27 + +// CHECK: vxorpd 8192(%rdx), %zmm24, %zmm27 +// CHECK: encoding: [0x62,0x61,0xbd,0x40,0x57,0x9a,0x00,0x20,0x00,0x00] + vxorpd 8192(%rdx), %zmm24, %zmm27 + +// CHECK: vxorpd -8192(%rdx), %zmm24, %zmm27 +// CHECK: encoding: [0x62,0x61,0xbd,0x40,0x57,0x5a,0x80] + vxorpd -8192(%rdx), %zmm24, %zmm27 + +// CHECK: vxorpd -8256(%rdx), %zmm24, %zmm27 +// CHECK: encoding: [0x62,0x61,0xbd,0x40,0x57,0x9a,0xc0,0xdf,0xff,0xff] + vxorpd -8256(%rdx), %zmm24, %zmm27 + +// CHECK: vxorpd 1016(%rdx){1to8}, %zmm24, %zmm27 +// CHECK: encoding: [0x62,0x61,0xbd,0x50,0x57,0x5a,0x7f] + vxorpd 1016(%rdx){1to8}, %zmm24, %zmm27 + +// CHECK: vxorpd 1024(%rdx){1to8}, %zmm24, %zmm27 +// CHECK: encoding: [0x62,0x61,0xbd,0x50,0x57,0x9a,0x00,0x04,0x00,0x00] + vxorpd 1024(%rdx){1to8}, %zmm24, %zmm27 + +// CHECK: vxorpd -1024(%rdx){1to8}, %zmm24, %zmm27 +// CHECK: encoding: [0x62,0x61,0xbd,0x50,0x57,0x5a,0x80] + vxorpd -1024(%rdx){1to8}, %zmm24, %zmm27 + +// CHECK: vxorpd -1032(%rdx){1to8}, %zmm24, %zmm27 +// CHECK: encoding: [0x62,0x61,0xbd,0x50,0x57,0x9a,0xf8,0xfb,0xff,0xff] + vxorpd -1032(%rdx){1to8}, %zmm24, %zmm27 + +// CHECK: vxorps %zmm19, %zmm18, %zmm18 +// CHECK: encoding: [0x62,0xa1,0x6c,0x40,0x57,0xd3] + vxorps %zmm19, %zmm18, %zmm18 + +// CHECK: vxorps %zmm19, %zmm18, %zmm18 {%k2} +// CHECK: encoding: [0x62,0xa1,0x6c,0x42,0x57,0xd3] + vxorps %zmm19, %zmm18, %zmm18 {%k2} + +// CHECK: vxorps %zmm19, %zmm18, %zmm18 {%k2} {z} +// CHECK: encoding: [0x62,0xa1,0x6c,0xc2,0x57,0xd3] + vxorps %zmm19, %zmm18, %zmm18 {%k2} {z} + +// CHECK: vxorps (%rcx), %zmm18, %zmm18 +// CHECK: encoding: [0x62,0xe1,0x6c,0x40,0x57,0x11] + vxorps (%rcx), %zmm18, %zmm18 + +// CHECK: vxorps 291(%rax,%r14,8), %zmm18, %zmm18 +// CHECK: encoding: [0x62,0xa1,0x6c,0x40,0x57,0x94,0xf0,0x23,0x01,0x00,0x00] + vxorps 291(%rax,%r14,8), %zmm18, %zmm18 + +// CHECK: vxorps (%rcx){1to16}, %zmm18, %zmm18 +// CHECK: encoding: [0x62,0xe1,0x6c,0x50,0x57,0x11] + vxorps (%rcx){1to16}, %zmm18, %zmm18 + +// CHECK: vxorps 8128(%rdx), %zmm18, %zmm18 +// CHECK: encoding: [0x62,0xe1,0x6c,0x40,0x57,0x52,0x7f] + vxorps 8128(%rdx), %zmm18, %zmm18 + +// CHECK: vxorps 8192(%rdx), %zmm18, %zmm18 +// CHECK: encoding: [0x62,0xe1,0x6c,0x40,0x57,0x92,0x00,0x20,0x00,0x00] + vxorps 8192(%rdx), %zmm18, %zmm18 + +// CHECK: vxorps -8192(%rdx), %zmm18, %zmm18 +// CHECK: encoding: [0x62,0xe1,0x6c,0x40,0x57,0x52,0x80] + vxorps -8192(%rdx), %zmm18, %zmm18 + +// CHECK: vxorps -8256(%rdx), %zmm18, %zmm18 +// CHECK: encoding: [0x62,0xe1,0x6c,0x40,0x57,0x92,0xc0,0xdf,0xff,0xff] + vxorps -8256(%rdx), %zmm18, %zmm18 + +// CHECK: vxorps 508(%rdx){1to16}, %zmm18, %zmm18 +// CHECK: encoding: [0x62,0xe1,0x6c,0x50,0x57,0x52,0x7f] + vxorps 508(%rdx){1to16}, %zmm18, %zmm18 + +// CHECK: vxorps 512(%rdx){1to16}, %zmm18, %zmm18 +// CHECK: encoding: [0x62,0xe1,0x6c,0x50,0x57,0x92,0x00,0x02,0x00,0x00] + vxorps 512(%rdx){1to16}, %zmm18, %zmm18 + +// CHECK: vxorps -512(%rdx){1to16}, %zmm18, %zmm18 +// CHECK: encoding: [0x62,0xe1,0x6c,0x50,0x57,0x52,0x80] + vxorps -512(%rdx){1to16}, %zmm18, %zmm18 + +// CHECK: vxorps -516(%rdx){1to16}, %zmm18, %zmm18 +// CHECK: encoding: [0x62,0xe1,0x6c,0x50,0x57,0x92,0xfc,0xfd,0xff,0xff] + vxorps -516(%rdx){1to16}, %zmm18, %zmm18 +// CHECK: vandpd %zmm22, %zmm22, %zmm24 +// CHECK: encoding: [0x62,0x21,0xcd,0x40,0x54,0xc6] + vandpd %zmm22, %zmm22, %zmm24 + +// CHECK: vandpd %zmm22, %zmm22, %zmm24 {%k4} +// CHECK: encoding: [0x62,0x21,0xcd,0x44,0x54,0xc6] + vandpd %zmm22, %zmm22, %zmm24 {%k4} + +// CHECK: vandpd %zmm22, %zmm22, %zmm24 {%k4} {z} +// CHECK: encoding: [0x62,0x21,0xcd,0xc4,0x54,0xc6] + vandpd %zmm22, %zmm22, %zmm24 {%k4} {z} + +// CHECK: vandpd (%rcx), %zmm22, %zmm24 +// CHECK: encoding: [0x62,0x61,0xcd,0x40,0x54,0x01] + vandpd (%rcx), %zmm22, %zmm24 + +// CHECK: vandpd 4660(%rax,%r14,8), %zmm22, %zmm24 +// CHECK: encoding: [0x62,0x21,0xcd,0x40,0x54,0x84,0xf0,0x34,0x12,0x00,0x00] + vandpd 4660(%rax,%r14,8), %zmm22, %zmm24 + +// CHECK: vandpd (%rcx){1to8}, %zmm22, %zmm24 +// CHECK: encoding: [0x62,0x61,0xcd,0x50,0x54,0x01] + vandpd (%rcx){1to8}, %zmm22, %zmm24 + +// CHECK: vandpd 8128(%rdx), %zmm22, %zmm24 +// CHECK: encoding: [0x62,0x61,0xcd,0x40,0x54,0x42,0x7f] + vandpd 8128(%rdx), %zmm22, %zmm24 + +// CHECK: vandpd 8192(%rdx), %zmm22, %zmm24 +// CHECK: encoding: [0x62,0x61,0xcd,0x40,0x54,0x82,0x00,0x20,0x00,0x00] + vandpd 8192(%rdx), %zmm22, %zmm24 + +// CHECK: vandpd -8192(%rdx), %zmm22, %zmm24 +// CHECK: encoding: [0x62,0x61,0xcd,0x40,0x54,0x42,0x80] + vandpd -8192(%rdx), %zmm22, %zmm24 + +// CHECK: vandpd -8256(%rdx), %zmm22, %zmm24 +// CHECK: encoding: [0x62,0x61,0xcd,0x40,0x54,0x82,0xc0,0xdf,0xff,0xff] + vandpd -8256(%rdx), %zmm22, %zmm24 + +// CHECK: vandpd 1016(%rdx){1to8}, %zmm22, %zmm24 +// CHECK: encoding: [0x62,0x61,0xcd,0x50,0x54,0x42,0x7f] + vandpd 1016(%rdx){1to8}, %zmm22, %zmm24 + +// CHECK: vandpd 1024(%rdx){1to8}, %zmm22, %zmm24 +// CHECK: encoding: [0x62,0x61,0xcd,0x50,0x54,0x82,0x00,0x04,0x00,0x00] + vandpd 1024(%rdx){1to8}, %zmm22, %zmm24 + +// CHECK: vandpd -1024(%rdx){1to8}, %zmm22, %zmm24 +// CHECK: encoding: [0x62,0x61,0xcd,0x50,0x54,0x42,0x80] + vandpd -1024(%rdx){1to8}, %zmm22, %zmm24 + +// CHECK: vandpd -1032(%rdx){1to8}, %zmm22, %zmm24 +// CHECK: encoding: [0x62,0x61,0xcd,0x50,0x54,0x82,0xf8,0xfb,0xff,0xff] + vandpd -1032(%rdx){1to8}, %zmm22, %zmm24 + +// CHECK: vandps %zmm23, %zmm23, %zmm30 +// CHECK: encoding: [0x62,0x21,0x44,0x40,0x54,0xf7] + vandps %zmm23, %zmm23, %zmm30 + +// CHECK: vandps %zmm23, %zmm23, %zmm30 {%k5} +// CHECK: encoding: [0x62,0x21,0x44,0x45,0x54,0xf7] + vandps %zmm23, %zmm23, %zmm30 {%k5} + +// CHECK: vandps %zmm23, %zmm23, %zmm30 {%k5} {z} +// CHECK: encoding: [0x62,0x21,0x44,0xc5,0x54,0xf7] + vandps %zmm23, %zmm23, %zmm30 {%k5} {z} + +// CHECK: vandps (%rcx), %zmm23, %zmm30 +// CHECK: encoding: [0x62,0x61,0x44,0x40,0x54,0x31] + vandps (%rcx), %zmm23, %zmm30 + +// CHECK: vandps 4660(%rax,%r14,8), %zmm23, %zmm30 +// CHECK: encoding: [0x62,0x21,0x44,0x40,0x54,0xb4,0xf0,0x34,0x12,0x00,0x00] + vandps 4660(%rax,%r14,8), %zmm23, %zmm30 + +// CHECK: vandps (%rcx){1to16}, %zmm23, %zmm30 +// CHECK: encoding: [0x62,0x61,0x44,0x50,0x54,0x31] + vandps (%rcx){1to16}, %zmm23, %zmm30 + +// CHECK: vandps 8128(%rdx), %zmm23, %zmm30 +// CHECK: encoding: [0x62,0x61,0x44,0x40,0x54,0x72,0x7f] + vandps 8128(%rdx), %zmm23, %zmm30 + +// CHECK: vandps 8192(%rdx), %zmm23, %zmm30 +// CHECK: encoding: [0x62,0x61,0x44,0x40,0x54,0xb2,0x00,0x20,0x00,0x00] + vandps 8192(%rdx), %zmm23, %zmm30 + +// CHECK: vandps -8192(%rdx), %zmm23, %zmm30 +// CHECK: encoding: [0x62,0x61,0x44,0x40,0x54,0x72,0x80] + vandps -8192(%rdx), %zmm23, %zmm30 + +// CHECK: vandps -8256(%rdx), %zmm23, %zmm30 +// CHECK: encoding: [0x62,0x61,0x44,0x40,0x54,0xb2,0xc0,0xdf,0xff,0xff] + vandps -8256(%rdx), %zmm23, %zmm30 + +// CHECK: vandps 508(%rdx){1to16}, %zmm23, %zmm30 +// CHECK: encoding: [0x62,0x61,0x44,0x50,0x54,0x72,0x7f] + vandps 508(%rdx){1to16}, %zmm23, %zmm30 + +// CHECK: vandps 512(%rdx){1to16}, %zmm23, %zmm30 +// CHECK: encoding: [0x62,0x61,0x44,0x50,0x54,0xb2,0x00,0x02,0x00,0x00] + vandps 512(%rdx){1to16}, %zmm23, %zmm30 + +// CHECK: vandps -512(%rdx){1to16}, %zmm23, %zmm30 +// CHECK: encoding: [0x62,0x61,0x44,0x50,0x54,0x72,0x80] + vandps -512(%rdx){1to16}, %zmm23, %zmm30 + +// CHECK: vandps -516(%rdx){1to16}, %zmm23, %zmm30 +// CHECK: encoding: [0x62,0x61,0x44,0x50,0x54,0xb2,0xfc,0xfd,0xff,0xff] + vandps -516(%rdx){1to16}, %zmm23, %zmm30 + +// CHECK: vandnpd %zmm21, %zmm21, %zmm25 +// CHECK: encoding: [0x62,0x21,0xd5,0x40,0x55,0xcd] + vandnpd %zmm21, %zmm21, %zmm25 + +// CHECK: vandnpd %zmm21, %zmm21, %zmm25 {%k2} +// CHECK: encoding: [0x62,0x21,0xd5,0x42,0x55,0xcd] + vandnpd %zmm21, %zmm21, %zmm25 {%k2} + +// CHECK: vandnpd %zmm21, %zmm21, %zmm25 {%k2} {z} +// CHECK: encoding: [0x62,0x21,0xd5,0xc2,0x55,0xcd] + vandnpd %zmm21, %zmm21, %zmm25 {%k2} {z} + +// CHECK: vandnpd (%rcx), %zmm21, %zmm25 +// CHECK: encoding: [0x62,0x61,0xd5,0x40,0x55,0x09] + vandnpd (%rcx), %zmm21, %zmm25 + +// CHECK: vandnpd 4660(%rax,%r14,8), %zmm21, %zmm25 +// CHECK: encoding: [0x62,0x21,0xd5,0x40,0x55,0x8c,0xf0,0x34,0x12,0x00,0x00] + vandnpd 4660(%rax,%r14,8), %zmm21, %zmm25 + +// CHECK: vandnpd (%rcx){1to8}, %zmm21, %zmm25 +// CHECK: encoding: [0x62,0x61,0xd5,0x50,0x55,0x09] + vandnpd (%rcx){1to8}, %zmm21, %zmm25 + +// CHECK: vandnpd 8128(%rdx), %zmm21, %zmm25 +// CHECK: encoding: [0x62,0x61,0xd5,0x40,0x55,0x4a,0x7f] + vandnpd 8128(%rdx), %zmm21, %zmm25 + +// CHECK: vandnpd 8192(%rdx), %zmm21, %zmm25 +// CHECK: encoding: [0x62,0x61,0xd5,0x40,0x55,0x8a,0x00,0x20,0x00,0x00] + vandnpd 8192(%rdx), %zmm21, %zmm25 + +// CHECK: vandnpd -8192(%rdx), %zmm21, %zmm25 +// CHECK: encoding: [0x62,0x61,0xd5,0x40,0x55,0x4a,0x80] + vandnpd -8192(%rdx), %zmm21, %zmm25 + +// CHECK: vandnpd -8256(%rdx), %zmm21, %zmm25 +// CHECK: encoding: [0x62,0x61,0xd5,0x40,0x55,0x8a,0xc0,0xdf,0xff,0xff] + vandnpd -8256(%rdx), %zmm21, %zmm25 + +// CHECK: vandnpd 1016(%rdx){1to8}, %zmm21, %zmm25 +// CHECK: encoding: [0x62,0x61,0xd5,0x50,0x55,0x4a,0x7f] + vandnpd 1016(%rdx){1to8}, %zmm21, %zmm25 + +// CHECK: vandnpd 1024(%rdx){1to8}, %zmm21, %zmm25 +// CHECK: encoding: [0x62,0x61,0xd5,0x50,0x55,0x8a,0x00,0x04,0x00,0x00] + vandnpd 1024(%rdx){1to8}, %zmm21, %zmm25 + +// CHECK: vandnpd -1024(%rdx){1to8}, %zmm21, %zmm25 +// CHECK: encoding: [0x62,0x61,0xd5,0x50,0x55,0x4a,0x80] + vandnpd -1024(%rdx){1to8}, %zmm21, %zmm25 + +// CHECK: vandnpd -1032(%rdx){1to8}, %zmm21, %zmm25 +// CHECK: encoding: [0x62,0x61,0xd5,0x50,0x55,0x8a,0xf8,0xfb,0xff,0xff] + vandnpd -1032(%rdx){1to8}, %zmm21, %zmm25 + +// CHECK: vandnps %zmm18, %zmm21, %zmm17 +// CHECK: encoding: [0x62,0xa1,0x54,0x40,0x55,0xca] + vandnps %zmm18, %zmm21, %zmm17 + +// CHECK: vandnps %zmm18, %zmm21, %zmm17 {%k1} +// CHECK: encoding: [0x62,0xa1,0x54,0x41,0x55,0xca] + vandnps %zmm18, %zmm21, %zmm17 {%k1} + +// CHECK: vandnps %zmm18, %zmm21, %zmm17 {%k1} {z} +// CHECK: encoding: [0x62,0xa1,0x54,0xc1,0x55,0xca] + vandnps %zmm18, %zmm21, %zmm17 {%k1} {z} + +// CHECK: vandnps (%rcx), %zmm21, %zmm17 +// CHECK: encoding: [0x62,0xe1,0x54,0x40,0x55,0x09] + vandnps (%rcx), %zmm21, %zmm17 + +// CHECK: vandnps 4660(%rax,%r14,8), %zmm21, %zmm17 +// CHECK: encoding: [0x62,0xa1,0x54,0x40,0x55,0x8c,0xf0,0x34,0x12,0x00,0x00] + vandnps 4660(%rax,%r14,8), %zmm21, %zmm17 + +// CHECK: vandnps (%rcx){1to16}, %zmm21, %zmm17 +// CHECK: encoding: [0x62,0xe1,0x54,0x50,0x55,0x09] + vandnps (%rcx){1to16}, %zmm21, %zmm17 + +// CHECK: vandnps 8128(%rdx), %zmm21, %zmm17 +// CHECK: encoding: [0x62,0xe1,0x54,0x40,0x55,0x4a,0x7f] + vandnps 8128(%rdx), %zmm21, %zmm17 + +// CHECK: vandnps 8192(%rdx), %zmm21, %zmm17 +// CHECK: encoding: [0x62,0xe1,0x54,0x40,0x55,0x8a,0x00,0x20,0x00,0x00] + vandnps 8192(%rdx), %zmm21, %zmm17 + +// CHECK: vandnps -8192(%rdx), %zmm21, %zmm17 +// CHECK: encoding: [0x62,0xe1,0x54,0x40,0x55,0x4a,0x80] + vandnps -8192(%rdx), %zmm21, %zmm17 + +// CHECK: vandnps -8256(%rdx), %zmm21, %zmm17 +// CHECK: encoding: [0x62,0xe1,0x54,0x40,0x55,0x8a,0xc0,0xdf,0xff,0xff] + vandnps -8256(%rdx), %zmm21, %zmm17 + +// CHECK: vandnps 508(%rdx){1to16}, %zmm21, %zmm17 +// CHECK: encoding: [0x62,0xe1,0x54,0x50,0x55,0x4a,0x7f] + vandnps 508(%rdx){1to16}, %zmm21, %zmm17 + +// CHECK: vandnps 512(%rdx){1to16}, %zmm21, %zmm17 +// CHECK: encoding: [0x62,0xe1,0x54,0x50,0x55,0x8a,0x00,0x02,0x00,0x00] + vandnps 512(%rdx){1to16}, %zmm21, %zmm17 + +// CHECK: vandnps -512(%rdx){1to16}, %zmm21, %zmm17 +// CHECK: encoding: [0x62,0xe1,0x54,0x50,0x55,0x4a,0x80] + vandnps -512(%rdx){1to16}, %zmm21, %zmm17 + +// CHECK: vandnps -516(%rdx){1to16}, %zmm21, %zmm17 +// CHECK: encoding: [0x62,0xe1,0x54,0x50,0x55,0x8a,0xfc,0xfd,0xff,0xff] + vandnps -516(%rdx){1to16}, %zmm21, %zmm17 + +// CHECK: vorpd %zmm24, %zmm28, %zmm18 +// CHECK: encoding: [0x62,0x81,0x9d,0x40,0x56,0xd0] + vorpd %zmm24, %zmm28, %zmm18 + +// CHECK: vorpd %zmm24, %zmm28, %zmm18 {%k1} +// CHECK: encoding: [0x62,0x81,0x9d,0x41,0x56,0xd0] + vorpd %zmm24, %zmm28, %zmm18 {%k1} + +// CHECK: vorpd %zmm24, %zmm28, %zmm18 {%k1} {z} +// CHECK: encoding: [0x62,0x81,0x9d,0xc1,0x56,0xd0] + vorpd %zmm24, %zmm28, %zmm18 {%k1} {z} + +// CHECK: vorpd (%rcx), %zmm28, %zmm18 +// CHECK: encoding: [0x62,0xe1,0x9d,0x40,0x56,0x11] + vorpd (%rcx), %zmm28, %zmm18 + +// CHECK: vorpd 4660(%rax,%r14,8), %zmm28, %zmm18 +// CHECK: encoding: [0x62,0xa1,0x9d,0x40,0x56,0x94,0xf0,0x34,0x12,0x00,0x00] + vorpd 4660(%rax,%r14,8), %zmm28, %zmm18 + +// CHECK: vorpd (%rcx){1to8}, %zmm28, %zmm18 +// CHECK: encoding: [0x62,0xe1,0x9d,0x50,0x56,0x11] + vorpd (%rcx){1to8}, %zmm28, %zmm18 + +// CHECK: vorpd 8128(%rdx), %zmm28, %zmm18 +// CHECK: encoding: [0x62,0xe1,0x9d,0x40,0x56,0x52,0x7f] + vorpd 8128(%rdx), %zmm28, %zmm18 + +// CHECK: vorpd 8192(%rdx), %zmm28, %zmm18 +// CHECK: encoding: [0x62,0xe1,0x9d,0x40,0x56,0x92,0x00,0x20,0x00,0x00] + vorpd 8192(%rdx), %zmm28, %zmm18 + +// CHECK: vorpd -8192(%rdx), %zmm28, %zmm18 +// CHECK: encoding: [0x62,0xe1,0x9d,0x40,0x56,0x52,0x80] + vorpd -8192(%rdx), %zmm28, %zmm18 + +// CHECK: vorpd -8256(%rdx), %zmm28, %zmm18 +// CHECK: encoding: [0x62,0xe1,0x9d,0x40,0x56,0x92,0xc0,0xdf,0xff,0xff] + vorpd -8256(%rdx), %zmm28, %zmm18 + +// CHECK: vorpd 1016(%rdx){1to8}, %zmm28, %zmm18 +// CHECK: encoding: [0x62,0xe1,0x9d,0x50,0x56,0x52,0x7f] + vorpd 1016(%rdx){1to8}, %zmm28, %zmm18 + +// CHECK: vorpd 1024(%rdx){1to8}, %zmm28, %zmm18 +// CHECK: encoding: [0x62,0xe1,0x9d,0x50,0x56,0x92,0x00,0x04,0x00,0x00] + vorpd 1024(%rdx){1to8}, %zmm28, %zmm18 + +// CHECK: vorpd -1024(%rdx){1to8}, %zmm28, %zmm18 +// CHECK: encoding: [0x62,0xe1,0x9d,0x50,0x56,0x52,0x80] + vorpd -1024(%rdx){1to8}, %zmm28, %zmm18 + +// CHECK: vorpd -1032(%rdx){1to8}, %zmm28, %zmm18 +// CHECK: encoding: [0x62,0xe1,0x9d,0x50,0x56,0x92,0xf8,0xfb,0xff,0xff] + vorpd -1032(%rdx){1to8}, %zmm28, %zmm18 + +// CHECK: vorps %zmm23, %zmm17, %zmm28 +// CHECK: encoding: [0x62,0x21,0x74,0x40,0x56,0xe7] + vorps %zmm23, %zmm17, %zmm28 + +// CHECK: vorps %zmm23, %zmm17, %zmm28 {%k7} +// CHECK: encoding: [0x62,0x21,0x74,0x47,0x56,0xe7] + vorps %zmm23, %zmm17, %zmm28 {%k7} + +// CHECK: vorps %zmm23, %zmm17, %zmm28 {%k7} {z} +// CHECK: encoding: [0x62,0x21,0x74,0xc7,0x56,0xe7] + vorps %zmm23, %zmm17, %zmm28 {%k7} {z} + +// CHECK: vorps (%rcx), %zmm17, %zmm28 +// CHECK: encoding: [0x62,0x61,0x74,0x40,0x56,0x21] + vorps (%rcx), %zmm17, %zmm28 + +// CHECK: vorps 4660(%rax,%r14,8), %zmm17, %zmm28 +// CHECK: encoding: [0x62,0x21,0x74,0x40,0x56,0xa4,0xf0,0x34,0x12,0x00,0x00] + vorps 4660(%rax,%r14,8), %zmm17, %zmm28 + +// CHECK: vorps (%rcx){1to16}, %zmm17, %zmm28 +// CHECK: encoding: [0x62,0x61,0x74,0x50,0x56,0x21] + vorps (%rcx){1to16}, %zmm17, %zmm28 + +// CHECK: vorps 8128(%rdx), %zmm17, %zmm28 +// CHECK: encoding: [0x62,0x61,0x74,0x40,0x56,0x62,0x7f] + vorps 8128(%rdx), %zmm17, %zmm28 + +// CHECK: vorps 8192(%rdx), %zmm17, %zmm28 +// CHECK: encoding: [0x62,0x61,0x74,0x40,0x56,0xa2,0x00,0x20,0x00,0x00] + vorps 8192(%rdx), %zmm17, %zmm28 + +// CHECK: vorps -8192(%rdx), %zmm17, %zmm28 +// CHECK: encoding: [0x62,0x61,0x74,0x40,0x56,0x62,0x80] + vorps -8192(%rdx), %zmm17, %zmm28 + +// CHECK: vorps -8256(%rdx), %zmm17, %zmm28 +// CHECK: encoding: [0x62,0x61,0x74,0x40,0x56,0xa2,0xc0,0xdf,0xff,0xff] + vorps -8256(%rdx), %zmm17, %zmm28 + +// CHECK: vorps 508(%rdx){1to16}, %zmm17, %zmm28 +// CHECK: encoding: [0x62,0x61,0x74,0x50,0x56,0x62,0x7f] + vorps 508(%rdx){1to16}, %zmm17, %zmm28 + +// CHECK: vorps 512(%rdx){1to16}, %zmm17, %zmm28 +// CHECK: encoding: [0x62,0x61,0x74,0x50,0x56,0xa2,0x00,0x02,0x00,0x00] + vorps 512(%rdx){1to16}, %zmm17, %zmm28 + +// CHECK: vorps -512(%rdx){1to16}, %zmm17, %zmm28 +// CHECK: encoding: [0x62,0x61,0x74,0x50,0x56,0x62,0x80] + vorps -512(%rdx){1to16}, %zmm17, %zmm28 + +// CHECK: vorps -516(%rdx){1to16}, %zmm17, %zmm28 +// CHECK: encoding: [0x62,0x61,0x74,0x50,0x56,0xa2,0xfc,0xfd,0xff,0xff] + vorps -516(%rdx){1to16}, %zmm17, %zmm28 + +// CHECK: vxorpd %zmm27, %zmm18, %zmm28 +// CHECK: encoding: [0x62,0x01,0xed,0x40,0x57,0xe3] + vxorpd %zmm27, %zmm18, %zmm28 + +// CHECK: vxorpd %zmm27, %zmm18, %zmm28 {%k4} +// CHECK: encoding: [0x62,0x01,0xed,0x44,0x57,0xe3] + vxorpd %zmm27, %zmm18, %zmm28 {%k4} + +// CHECK: vxorpd %zmm27, %zmm18, %zmm28 {%k4} {z} +// CHECK: encoding: [0x62,0x01,0xed,0xc4,0x57,0xe3] + vxorpd %zmm27, %zmm18, %zmm28 {%k4} {z} + +// CHECK: vxorpd (%rcx), %zmm18, %zmm28 +// CHECK: encoding: [0x62,0x61,0xed,0x40,0x57,0x21] + vxorpd (%rcx), %zmm18, %zmm28 + +// CHECK: vxorpd 4660(%rax,%r14,8), %zmm18, %zmm28 +// CHECK: encoding: [0x62,0x21,0xed,0x40,0x57,0xa4,0xf0,0x34,0x12,0x00,0x00] + vxorpd 4660(%rax,%r14,8), %zmm18, %zmm28 + +// CHECK: vxorpd (%rcx){1to8}, %zmm18, %zmm28 +// CHECK: encoding: [0x62,0x61,0xed,0x50,0x57,0x21] + vxorpd (%rcx){1to8}, %zmm18, %zmm28 + +// CHECK: vxorpd 8128(%rdx), %zmm18, %zmm28 +// CHECK: encoding: [0x62,0x61,0xed,0x40,0x57,0x62,0x7f] + vxorpd 8128(%rdx), %zmm18, %zmm28 + +// CHECK: vxorpd 8192(%rdx), %zmm18, %zmm28 +// CHECK: encoding: [0x62,0x61,0xed,0x40,0x57,0xa2,0x00,0x20,0x00,0x00] + vxorpd 8192(%rdx), %zmm18, %zmm28 + +// CHECK: vxorpd -8192(%rdx), %zmm18, %zmm28 +// CHECK: encoding: [0x62,0x61,0xed,0x40,0x57,0x62,0x80] + vxorpd -8192(%rdx), %zmm18, %zmm28 + +// CHECK: vxorpd -8256(%rdx), %zmm18, %zmm28 +// CHECK: encoding: [0x62,0x61,0xed,0x40,0x57,0xa2,0xc0,0xdf,0xff,0xff] + vxorpd -8256(%rdx), %zmm18, %zmm28 + +// CHECK: vxorpd 1016(%rdx){1to8}, %zmm18, %zmm28 +// CHECK: encoding: [0x62,0x61,0xed,0x50,0x57,0x62,0x7f] + vxorpd 1016(%rdx){1to8}, %zmm18, %zmm28 + +// CHECK: vxorpd 1024(%rdx){1to8}, %zmm18, %zmm28 +// CHECK: encoding: [0x62,0x61,0xed,0x50,0x57,0xa2,0x00,0x04,0x00,0x00] + vxorpd 1024(%rdx){1to8}, %zmm18, %zmm28 + +// CHECK: vxorpd -1024(%rdx){1to8}, %zmm18, %zmm28 +// CHECK: encoding: [0x62,0x61,0xed,0x50,0x57,0x62,0x80] + vxorpd -1024(%rdx){1to8}, %zmm18, %zmm28 + +// CHECK: vxorpd -1032(%rdx){1to8}, %zmm18, %zmm28 +// CHECK: encoding: [0x62,0x61,0xed,0x50,0x57,0xa2,0xf8,0xfb,0xff,0xff] + vxorpd -1032(%rdx){1to8}, %zmm18, %zmm28 + +// CHECK: vxorps %zmm18, %zmm28, %zmm24 +// CHECK: encoding: [0x62,0x21,0x1c,0x40,0x57,0xc2] + vxorps %zmm18, %zmm28, %zmm24 + +// CHECK: vxorps %zmm18, %zmm28, %zmm24 {%k4} +// CHECK: encoding: [0x62,0x21,0x1c,0x44,0x57,0xc2] + vxorps %zmm18, %zmm28, %zmm24 {%k4} + +// CHECK: vxorps %zmm18, %zmm28, %zmm24 {%k4} {z} +// CHECK: encoding: [0x62,0x21,0x1c,0xc4,0x57,0xc2] + vxorps %zmm18, %zmm28, %zmm24 {%k4} {z} + +// CHECK: vxorps (%rcx), %zmm28, %zmm24 +// CHECK: encoding: [0x62,0x61,0x1c,0x40,0x57,0x01] + vxorps (%rcx), %zmm28, %zmm24 + +// CHECK: vxorps 4660(%rax,%r14,8), %zmm28, %zmm24 +// CHECK: encoding: [0x62,0x21,0x1c,0x40,0x57,0x84,0xf0,0x34,0x12,0x00,0x00] + vxorps 4660(%rax,%r14,8), %zmm28, %zmm24 + +// CHECK: vxorps (%rcx){1to16}, %zmm28, %zmm24 +// CHECK: encoding: [0x62,0x61,0x1c,0x50,0x57,0x01] + vxorps (%rcx){1to16}, %zmm28, %zmm24 + +// CHECK: vxorps 8128(%rdx), %zmm28, %zmm24 +// CHECK: encoding: [0x62,0x61,0x1c,0x40,0x57,0x42,0x7f] + vxorps 8128(%rdx), %zmm28, %zmm24 + +// CHECK: vxorps 8192(%rdx), %zmm28, %zmm24 +// CHECK: encoding: [0x62,0x61,0x1c,0x40,0x57,0x82,0x00,0x20,0x00,0x00] + vxorps 8192(%rdx), %zmm28, %zmm24 + +// CHECK: vxorps -8192(%rdx), %zmm28, %zmm24 +// CHECK: encoding: [0x62,0x61,0x1c,0x40,0x57,0x42,0x80] + vxorps -8192(%rdx), %zmm28, %zmm24 + +// CHECK: vxorps -8256(%rdx), %zmm28, %zmm24 +// CHECK: encoding: [0x62,0x61,0x1c,0x40,0x57,0x82,0xc0,0xdf,0xff,0xff] + vxorps -8256(%rdx), %zmm28, %zmm24 + +// CHECK: vxorps 508(%rdx){1to16}, %zmm28, %zmm24 +// CHECK: encoding: [0x62,0x61,0x1c,0x50,0x57,0x42,0x7f] + vxorps 508(%rdx){1to16}, %zmm28, %zmm24 + +// CHECK: vxorps 512(%rdx){1to16}, %zmm28, %zmm24 +// CHECK: encoding: [0x62,0x61,0x1c,0x50,0x57,0x82,0x00,0x02,0x00,0x00] + vxorps 512(%rdx){1to16}, %zmm28, %zmm24 + +// CHECK: vxorps -512(%rdx){1to16}, %zmm28, %zmm24 +// CHECK: encoding: [0x62,0x61,0x1c,0x50,0x57,0x42,0x80] + vxorps -512(%rdx){1to16}, %zmm28, %zmm24 + +// CHECK: vxorps -516(%rdx){1to16}, %zmm28, %zmm24 +// CHECK: encoding: [0x62,0x61,0x1c,0x50,0x57,0x82,0xfc,0xfd,0xff,0xff] + vxorps -516(%rdx){1to16}, %zmm28, %zmm24 diff --git a/test/MC/X86/x86-64-avx512dq_vl.s b/test/MC/X86/x86-64-avx512dq_vl.s index 38aab78be2c..81607ce1169 100644 --- a/test/MC/X86/x86-64-avx512dq_vl.s +++ b/test/MC/X86/x86-64-avx512dq_vl.s @@ -111,3 +111,1796 @@ // CHECK: vpmullq -1032(%rdx){1to4}, %ymm25, %ymm25 // CHECK: encoding: [0x62,0x62,0xb5,0x30,0x40,0x8a,0xf8,0xfb,0xff,0xff] vpmullq -1032(%rdx){1to4}, %ymm25, %ymm25 + +// CHECK: vandpd %xmm20, %xmm29, %xmm21 +// CHECK: encoding: [0x62,0xa1,0x95,0x00,0x54,0xec] + vandpd %xmm20, %xmm29, %xmm21 + +// CHECK: vandpd %xmm20, %xmm29, %xmm21 {%k6} +// CHECK: encoding: [0x62,0xa1,0x95,0x06,0x54,0xec] + vandpd %xmm20, %xmm29, %xmm21 {%k6} + +// CHECK: vandpd %xmm20, %xmm29, %xmm21 {%k6} {z} +// CHECK: encoding: [0x62,0xa1,0x95,0x86,0x54,0xec] + vandpd %xmm20, %xmm29, %xmm21 {%k6} {z} + +// CHECK: vandpd (%rcx), %xmm29, %xmm21 +// CHECK: encoding: [0x62,0xe1,0x95,0x00,0x54,0x29] + vandpd (%rcx), %xmm29, %xmm21 + +// CHECK: vandpd 291(%rax,%r14,8), %xmm29, %xmm21 +// CHECK: encoding: [0x62,0xa1,0x95,0x00,0x54,0xac,0xf0,0x23,0x01,0x00,0x00] + vandpd 291(%rax,%r14,8), %xmm29, %xmm21 + +// CHECK: vandpd (%rcx){1to2}, %xmm29, %xmm21 +// CHECK: encoding: [0x62,0xe1,0x95,0x10,0x54,0x29] + vandpd (%rcx){1to2}, %xmm29, %xmm21 + +// CHECK: vandpd 2032(%rdx), %xmm29, %xmm21 +// CHECK: encoding: [0x62,0xe1,0x95,0x00,0x54,0x6a,0x7f] + vandpd 2032(%rdx), %xmm29, %xmm21 + +// CHECK: vandpd 2048(%rdx), %xmm29, %xmm21 +// CHECK: encoding: [0x62,0xe1,0x95,0x00,0x54,0xaa,0x00,0x08,0x00,0x00] + vandpd 2048(%rdx), %xmm29, %xmm21 + +// CHECK: vandpd -2048(%rdx), %xmm29, %xmm21 +// CHECK: encoding: [0x62,0xe1,0x95,0x00,0x54,0x6a,0x80] + vandpd -2048(%rdx), %xmm29, %xmm21 + +// CHECK: vandpd -2064(%rdx), %xmm29, %xmm21 +// CHECK: encoding: [0x62,0xe1,0x95,0x00,0x54,0xaa,0xf0,0xf7,0xff,0xff] + vandpd -2064(%rdx), %xmm29, %xmm21 + +// CHECK: vandpd 1016(%rdx){1to2}, %xmm29, %xmm21 +// CHECK: encoding: [0x62,0xe1,0x95,0x10,0x54,0x6a,0x7f] + vandpd 1016(%rdx){1to2}, %xmm29, %xmm21 + +// CHECK: vandpd 1024(%rdx){1to2}, %xmm29, %xmm21 +// CHECK: encoding: [0x62,0xe1,0x95,0x10,0x54,0xaa,0x00,0x04,0x00,0x00] + vandpd 1024(%rdx){1to2}, %xmm29, %xmm21 + +// CHECK: vandpd -1024(%rdx){1to2}, %xmm29, %xmm21 +// CHECK: encoding: [0x62,0xe1,0x95,0x10,0x54,0x6a,0x80] + vandpd -1024(%rdx){1to2}, %xmm29, %xmm21 + +// CHECK: vandpd -1032(%rdx){1to2}, %xmm29, %xmm21 +// CHECK: encoding: [0x62,0xe1,0x95,0x10,0x54,0xaa,0xf8,0xfb,0xff,0xff] + vandpd -1032(%rdx){1to2}, %xmm29, %xmm21 + +// CHECK: vandpd %ymm28, %ymm21, %ymm28 +// CHECK: encoding: [0x62,0x01,0xd5,0x20,0x54,0xe4] + vandpd %ymm28, %ymm21, %ymm28 + +// CHECK: vandpd %ymm28, %ymm21, %ymm28 {%k4} +// CHECK: encoding: [0x62,0x01,0xd5,0x24,0x54,0xe4] + vandpd %ymm28, %ymm21, %ymm28 {%k4} + +// CHECK: vandpd %ymm28, %ymm21, %ymm28 {%k4} {z} +// CHECK: encoding: [0x62,0x01,0xd5,0xa4,0x54,0xe4] + vandpd %ymm28, %ymm21, %ymm28 {%k4} {z} + +// CHECK: vandpd (%rcx), %ymm21, %ymm28 +// CHECK: encoding: [0x62,0x61,0xd5,0x20,0x54,0x21] + vandpd (%rcx), %ymm21, %ymm28 + +// CHECK: vandpd 291(%rax,%r14,8), %ymm21, %ymm28 +// CHECK: encoding: [0x62,0x21,0xd5,0x20,0x54,0xa4,0xf0,0x23,0x01,0x00,0x00] + vandpd 291(%rax,%r14,8), %ymm21, %ymm28 + +// CHECK: vandpd (%rcx){1to4}, %ymm21, %ymm28 +// CHECK: encoding: [0x62,0x61,0xd5,0x30,0x54,0x21] + vandpd (%rcx){1to4}, %ymm21, %ymm28 + +// CHECK: vandpd 4064(%rdx), %ymm21, %ymm28 +// CHECK: encoding: [0x62,0x61,0xd5,0x20,0x54,0x62,0x7f] + vandpd 4064(%rdx), %ymm21, %ymm28 + +// CHECK: vandpd 4096(%rdx), %ymm21, %ymm28 +// CHECK: encoding: [0x62,0x61,0xd5,0x20,0x54,0xa2,0x00,0x10,0x00,0x00] + vandpd 4096(%rdx), %ymm21, %ymm28 + +// CHECK: vandpd -4096(%rdx), %ymm21, %ymm28 +// CHECK: encoding: [0x62,0x61,0xd5,0x20,0x54,0x62,0x80] + vandpd -4096(%rdx), %ymm21, %ymm28 + +// CHECK: vandpd -4128(%rdx), %ymm21, %ymm28 +// CHECK: encoding: [0x62,0x61,0xd5,0x20,0x54,0xa2,0xe0,0xef,0xff,0xff] + vandpd -4128(%rdx), %ymm21, %ymm28 + +// CHECK: vandpd 1016(%rdx){1to4}, %ymm21, %ymm28 +// CHECK: encoding: [0x62,0x61,0xd5,0x30,0x54,0x62,0x7f] + vandpd 1016(%rdx){1to4}, %ymm21, %ymm28 + +// CHECK: vandpd 1024(%rdx){1to4}, %ymm21, %ymm28 +// CHECK: encoding: [0x62,0x61,0xd5,0x30,0x54,0xa2,0x00,0x04,0x00,0x00] + vandpd 1024(%rdx){1to4}, %ymm21, %ymm28 + +// CHECK: vandpd -1024(%rdx){1to4}, %ymm21, %ymm28 +// CHECK: encoding: [0x62,0x61,0xd5,0x30,0x54,0x62,0x80] + vandpd -1024(%rdx){1to4}, %ymm21, %ymm28 + +// CHECK: vandpd -1032(%rdx){1to4}, %ymm21, %ymm28 +// CHECK: encoding: [0x62,0x61,0xd5,0x30,0x54,0xa2,0xf8,0xfb,0xff,0xff] + vandpd -1032(%rdx){1to4}, %ymm21, %ymm28 + +// CHECK: vandps %xmm24, %xmm21, %xmm23 +// CHECK: encoding: [0x62,0x81,0x54,0x00,0x54,0xf8] + vandps %xmm24, %xmm21, %xmm23 + +// CHECK: vandps %xmm24, %xmm21, %xmm23 {%k5} +// CHECK: encoding: [0x62,0x81,0x54,0x05,0x54,0xf8] + vandps %xmm24, %xmm21, %xmm23 {%k5} + +// CHECK: vandps %xmm24, %xmm21, %xmm23 {%k5} {z} +// CHECK: encoding: [0x62,0x81,0x54,0x85,0x54,0xf8] + vandps %xmm24, %xmm21, %xmm23 {%k5} {z} + +// CHECK: vandps (%rcx), %xmm21, %xmm23 +// CHECK: encoding: [0x62,0xe1,0x54,0x00,0x54,0x39] + vandps (%rcx), %xmm21, %xmm23 + +// CHECK: vandps 291(%rax,%r14,8), %xmm21, %xmm23 +// CHECK: encoding: [0x62,0xa1,0x54,0x00,0x54,0xbc,0xf0,0x23,0x01,0x00,0x00] + vandps 291(%rax,%r14,8), %xmm21, %xmm23 + +// CHECK: vandps (%rcx){1to4}, %xmm21, %xmm23 +// CHECK: encoding: [0x62,0xe1,0x54,0x10,0x54,0x39] + vandps (%rcx){1to4}, %xmm21, %xmm23 + +// CHECK: vandps 2032(%rdx), %xmm21, %xmm23 +// CHECK: encoding: [0x62,0xe1,0x54,0x00,0x54,0x7a,0x7f] + vandps 2032(%rdx), %xmm21, %xmm23 + +// CHECK: vandps 2048(%rdx), %xmm21, %xmm23 +// CHECK: encoding: [0x62,0xe1,0x54,0x00,0x54,0xba,0x00,0x08,0x00,0x00] + vandps 2048(%rdx), %xmm21, %xmm23 + +// CHECK: vandps -2048(%rdx), %xmm21, %xmm23 +// CHECK: encoding: [0x62,0xe1,0x54,0x00,0x54,0x7a,0x80] + vandps -2048(%rdx), %xmm21, %xmm23 + +// CHECK: vandps -2064(%rdx), %xmm21, %xmm23 +// CHECK: encoding: [0x62,0xe1,0x54,0x00,0x54,0xba,0xf0,0xf7,0xff,0xff] + vandps -2064(%rdx), %xmm21, %xmm23 + +// CHECK: vandps 508(%rdx){1to4}, %xmm21, %xmm23 +// CHECK: encoding: [0x62,0xe1,0x54,0x10,0x54,0x7a,0x7f] + vandps 508(%rdx){1to4}, %xmm21, %xmm23 + +// CHECK: vandps 512(%rdx){1to4}, %xmm21, %xmm23 +// CHECK: encoding: [0x62,0xe1,0x54,0x10,0x54,0xba,0x00,0x02,0x00,0x00] + vandps 512(%rdx){1to4}, %xmm21, %xmm23 + +// CHECK: vandps -512(%rdx){1to4}, %xmm21, %xmm23 +// CHECK: encoding: [0x62,0xe1,0x54,0x10,0x54,0x7a,0x80] + vandps -512(%rdx){1to4}, %xmm21, %xmm23 + +// CHECK: vandps -516(%rdx){1to4}, %xmm21, %xmm23 +// CHECK: encoding: [0x62,0xe1,0x54,0x10,0x54,0xba,0xfc,0xfd,0xff,0xff] + vandps -516(%rdx){1to4}, %xmm21, %xmm23 + +// CHECK: vandps %ymm23, %ymm18, %ymm26 +// CHECK: encoding: [0x62,0x21,0x6c,0x20,0x54,0xd7] + vandps %ymm23, %ymm18, %ymm26 + +// CHECK: vandps %ymm23, %ymm18, %ymm26 {%k6} +// CHECK: encoding: [0x62,0x21,0x6c,0x26,0x54,0xd7] + vandps %ymm23, %ymm18, %ymm26 {%k6} + +// CHECK: vandps %ymm23, %ymm18, %ymm26 {%k6} {z} +// CHECK: encoding: [0x62,0x21,0x6c,0xa6,0x54,0xd7] + vandps %ymm23, %ymm18, %ymm26 {%k6} {z} + +// CHECK: vandps (%rcx), %ymm18, %ymm26 +// CHECK: encoding: [0x62,0x61,0x6c,0x20,0x54,0x11] + vandps (%rcx), %ymm18, %ymm26 + +// CHECK: vandps 291(%rax,%r14,8), %ymm18, %ymm26 +// CHECK: encoding: [0x62,0x21,0x6c,0x20,0x54,0x94,0xf0,0x23,0x01,0x00,0x00] + vandps 291(%rax,%r14,8), %ymm18, %ymm26 + +// CHECK: vandps (%rcx){1to8}, %ymm18, %ymm26 +// CHECK: encoding: [0x62,0x61,0x6c,0x30,0x54,0x11] + vandps (%rcx){1to8}, %ymm18, %ymm26 + +// CHECK: vandps 4064(%rdx), %ymm18, %ymm26 +// CHECK: encoding: [0x62,0x61,0x6c,0x20,0x54,0x52,0x7f] + vandps 4064(%rdx), %ymm18, %ymm26 + +// CHECK: vandps 4096(%rdx), %ymm18, %ymm26 +// CHECK: encoding: [0x62,0x61,0x6c,0x20,0x54,0x92,0x00,0x10,0x00,0x00] + vandps 4096(%rdx), %ymm18, %ymm26 + +// CHECK: vandps -4096(%rdx), %ymm18, %ymm26 +// CHECK: encoding: [0x62,0x61,0x6c,0x20,0x54,0x52,0x80] + vandps -4096(%rdx), %ymm18, %ymm26 + +// CHECK: vandps -4128(%rdx), %ymm18, %ymm26 +// CHECK: encoding: [0x62,0x61,0x6c,0x20,0x54,0x92,0xe0,0xef,0xff,0xff] + vandps -4128(%rdx), %ymm18, %ymm26 + +// CHECK: vandps 508(%rdx){1to8}, %ymm18, %ymm26 +// CHECK: encoding: [0x62,0x61,0x6c,0x30,0x54,0x52,0x7f] + vandps 508(%rdx){1to8}, %ymm18, %ymm26 + +// CHECK: vandps 512(%rdx){1to8}, %ymm18, %ymm26 +// CHECK: encoding: [0x62,0x61,0x6c,0x30,0x54,0x92,0x00,0x02,0x00,0x00] + vandps 512(%rdx){1to8}, %ymm18, %ymm26 + +// CHECK: vandps -512(%rdx){1to8}, %ymm18, %ymm26 +// CHECK: encoding: [0x62,0x61,0x6c,0x30,0x54,0x52,0x80] + vandps -512(%rdx){1to8}, %ymm18, %ymm26 + +// CHECK: vandps -516(%rdx){1to8}, %ymm18, %ymm26 +// CHECK: encoding: [0x62,0x61,0x6c,0x30,0x54,0x92,0xfc,0xfd,0xff,0xff] + vandps -516(%rdx){1to8}, %ymm18, %ymm26 + +// CHECK: vandnpd %xmm25, %xmm27, %xmm25 +// CHECK: encoding: [0x62,0x01,0xa5,0x00,0x55,0xc9] + vandnpd %xmm25, %xmm27, %xmm25 + +// CHECK: vandnpd %xmm25, %xmm27, %xmm25 {%k5} +// CHECK: encoding: [0x62,0x01,0xa5,0x05,0x55,0xc9] + vandnpd %xmm25, %xmm27, %xmm25 {%k5} + +// CHECK: vandnpd %xmm25, %xmm27, %xmm25 {%k5} {z} +// CHECK: encoding: [0x62,0x01,0xa5,0x85,0x55,0xc9] + vandnpd %xmm25, %xmm27, %xmm25 {%k5} {z} + +// CHECK: vandnpd (%rcx), %xmm27, %xmm25 +// CHECK: encoding: [0x62,0x61,0xa5,0x00,0x55,0x09] + vandnpd (%rcx), %xmm27, %xmm25 + +// CHECK: vandnpd 291(%rax,%r14,8), %xmm27, %xmm25 +// CHECK: encoding: [0x62,0x21,0xa5,0x00,0x55,0x8c,0xf0,0x23,0x01,0x00,0x00] + vandnpd 291(%rax,%r14,8), %xmm27, %xmm25 + +// CHECK: vandnpd (%rcx){1to2}, %xmm27, %xmm25 +// CHECK: encoding: [0x62,0x61,0xa5,0x10,0x55,0x09] + vandnpd (%rcx){1to2}, %xmm27, %xmm25 + +// CHECK: vandnpd 2032(%rdx), %xmm27, %xmm25 +// CHECK: encoding: [0x62,0x61,0xa5,0x00,0x55,0x4a,0x7f] + vandnpd 2032(%rdx), %xmm27, %xmm25 + +// CHECK: vandnpd 2048(%rdx), %xmm27, %xmm25 +// CHECK: encoding: [0x62,0x61,0xa5,0x00,0x55,0x8a,0x00,0x08,0x00,0x00] + vandnpd 2048(%rdx), %xmm27, %xmm25 + +// CHECK: vandnpd -2048(%rdx), %xmm27, %xmm25 +// CHECK: encoding: [0x62,0x61,0xa5,0x00,0x55,0x4a,0x80] + vandnpd -2048(%rdx), %xmm27, %xmm25 + +// CHECK: vandnpd -2064(%rdx), %xmm27, %xmm25 +// CHECK: encoding: [0x62,0x61,0xa5,0x00,0x55,0x8a,0xf0,0xf7,0xff,0xff] + vandnpd -2064(%rdx), %xmm27, %xmm25 + +// CHECK: vandnpd 1016(%rdx){1to2}, %xmm27, %xmm25 +// CHECK: encoding: [0x62,0x61,0xa5,0x10,0x55,0x4a,0x7f] + vandnpd 1016(%rdx){1to2}, %xmm27, %xmm25 + +// CHECK: vandnpd 1024(%rdx){1to2}, %xmm27, %xmm25 +// CHECK: encoding: [0x62,0x61,0xa5,0x10,0x55,0x8a,0x00,0x04,0x00,0x00] + vandnpd 1024(%rdx){1to2}, %xmm27, %xmm25 + +// CHECK: vandnpd -1024(%rdx){1to2}, %xmm27, %xmm25 +// CHECK: encoding: [0x62,0x61,0xa5,0x10,0x55,0x4a,0x80] + vandnpd -1024(%rdx){1to2}, %xmm27, %xmm25 + +// CHECK: vandnpd -1032(%rdx){1to2}, %xmm27, %xmm25 +// CHECK: encoding: [0x62,0x61,0xa5,0x10,0x55,0x8a,0xf8,0xfb,0xff,0xff] + vandnpd -1032(%rdx){1to2}, %xmm27, %xmm25 + +// CHECK: vandnpd %ymm22, %ymm18, %ymm22 +// CHECK: encoding: [0x62,0xa1,0xed,0x20,0x55,0xf6] + vandnpd %ymm22, %ymm18, %ymm22 + +// CHECK: vandnpd %ymm22, %ymm18, %ymm22 {%k7} +// CHECK: encoding: [0x62,0xa1,0xed,0x27,0x55,0xf6] + vandnpd %ymm22, %ymm18, %ymm22 {%k7} + +// CHECK: vandnpd %ymm22, %ymm18, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0xa1,0xed,0xa7,0x55,0xf6] + vandnpd %ymm22, %ymm18, %ymm22 {%k7} {z} + +// CHECK: vandnpd (%rcx), %ymm18, %ymm22 +// CHECK: encoding: [0x62,0xe1,0xed,0x20,0x55,0x31] + vandnpd (%rcx), %ymm18, %ymm22 + +// CHECK: vandnpd 291(%rax,%r14,8), %ymm18, %ymm22 +// CHECK: encoding: [0x62,0xa1,0xed,0x20,0x55,0xb4,0xf0,0x23,0x01,0x00,0x00] + vandnpd 291(%rax,%r14,8), %ymm18, %ymm22 + +// CHECK: vandnpd (%rcx){1to4}, %ymm18, %ymm22 +// CHECK: encoding: [0x62,0xe1,0xed,0x30,0x55,0x31] + vandnpd (%rcx){1to4}, %ymm18, %ymm22 + +// CHECK: vandnpd 4064(%rdx), %ymm18, %ymm22 +// CHECK: encoding: [0x62,0xe1,0xed,0x20,0x55,0x72,0x7f] + vandnpd 4064(%rdx), %ymm18, %ymm22 + +// CHECK: vandnpd 4096(%rdx), %ymm18, %ymm22 +// CHECK: encoding: [0x62,0xe1,0xed,0x20,0x55,0xb2,0x00,0x10,0x00,0x00] + vandnpd 4096(%rdx), %ymm18, %ymm22 + +// CHECK: vandnpd -4096(%rdx), %ymm18, %ymm22 +// CHECK: encoding: [0x62,0xe1,0xed,0x20,0x55,0x72,0x80] + vandnpd -4096(%rdx), %ymm18, %ymm22 + +// CHECK: vandnpd -4128(%rdx), %ymm18, %ymm22 +// CHECK: encoding: [0x62,0xe1,0xed,0x20,0x55,0xb2,0xe0,0xef,0xff,0xff] + vandnpd -4128(%rdx), %ymm18, %ymm22 + +// CHECK: vandnpd 1016(%rdx){1to4}, %ymm18, %ymm22 +// CHECK: encoding: [0x62,0xe1,0xed,0x30,0x55,0x72,0x7f] + vandnpd 1016(%rdx){1to4}, %ymm18, %ymm22 + +// CHECK: vandnpd 1024(%rdx){1to4}, %ymm18, %ymm22 +// CHECK: encoding: [0x62,0xe1,0xed,0x30,0x55,0xb2,0x00,0x04,0x00,0x00] + vandnpd 1024(%rdx){1to4}, %ymm18, %ymm22 + +// CHECK: vandnpd -1024(%rdx){1to4}, %ymm18, %ymm22 +// CHECK: encoding: [0x62,0xe1,0xed,0x30,0x55,0x72,0x80] + vandnpd -1024(%rdx){1to4}, %ymm18, %ymm22 + +// CHECK: vandnpd -1032(%rdx){1to4}, %ymm18, %ymm22 +// CHECK: encoding: [0x62,0xe1,0xed,0x30,0x55,0xb2,0xf8,0xfb,0xff,0xff] + vandnpd -1032(%rdx){1to4}, %ymm18, %ymm22 + +// CHECK: vandnps %xmm27, %xmm21, %xmm21 +// CHECK: encoding: [0x62,0x81,0x54,0x00,0x55,0xeb] + vandnps %xmm27, %xmm21, %xmm21 + +// CHECK: vandnps %xmm27, %xmm21, %xmm21 {%k2} +// CHECK: encoding: [0x62,0x81,0x54,0x02,0x55,0xeb] + vandnps %xmm27, %xmm21, %xmm21 {%k2} + +// CHECK: vandnps %xmm27, %xmm21, %xmm21 {%k2} {z} +// CHECK: encoding: [0x62,0x81,0x54,0x82,0x55,0xeb] + vandnps %xmm27, %xmm21, %xmm21 {%k2} {z} + +// CHECK: vandnps (%rcx), %xmm21, %xmm21 +// CHECK: encoding: [0x62,0xe1,0x54,0x00,0x55,0x29] + vandnps (%rcx), %xmm21, %xmm21 + +// CHECK: vandnps 291(%rax,%r14,8), %xmm21, %xmm21 +// CHECK: encoding: [0x62,0xa1,0x54,0x00,0x55,0xac,0xf0,0x23,0x01,0x00,0x00] + vandnps 291(%rax,%r14,8), %xmm21, %xmm21 + +// CHECK: vandnps (%rcx){1to4}, %xmm21, %xmm21 +// CHECK: encoding: [0x62,0xe1,0x54,0x10,0x55,0x29] + vandnps (%rcx){1to4}, %xmm21, %xmm21 + +// CHECK: vandnps 2032(%rdx), %xmm21, %xmm21 +// CHECK: encoding: [0x62,0xe1,0x54,0x00,0x55,0x6a,0x7f] + vandnps 2032(%rdx), %xmm21, %xmm21 + +// CHECK: vandnps 2048(%rdx), %xmm21, %xmm21 +// CHECK: encoding: [0x62,0xe1,0x54,0x00,0x55,0xaa,0x00,0x08,0x00,0x00] + vandnps 2048(%rdx), %xmm21, %xmm21 + +// CHECK: vandnps -2048(%rdx), %xmm21, %xmm21 +// CHECK: encoding: [0x62,0xe1,0x54,0x00,0x55,0x6a,0x80] + vandnps -2048(%rdx), %xmm21, %xmm21 + +// CHECK: vandnps -2064(%rdx), %xmm21, %xmm21 +// CHECK: encoding: [0x62,0xe1,0x54,0x00,0x55,0xaa,0xf0,0xf7,0xff,0xff] + vandnps -2064(%rdx), %xmm21, %xmm21 + +// CHECK: vandnps 508(%rdx){1to4}, %xmm21, %xmm21 +// CHECK: encoding: [0x62,0xe1,0x54,0x10,0x55,0x6a,0x7f] + vandnps 508(%rdx){1to4}, %xmm21, %xmm21 + +// CHECK: vandnps 512(%rdx){1to4}, %xmm21, %xmm21 +// CHECK: encoding: [0x62,0xe1,0x54,0x10,0x55,0xaa,0x00,0x02,0x00,0x00] + vandnps 512(%rdx){1to4}, %xmm21, %xmm21 + +// CHECK: vandnps -512(%rdx){1to4}, %xmm21, %xmm21 +// CHECK: encoding: [0x62,0xe1,0x54,0x10,0x55,0x6a,0x80] + vandnps -512(%rdx){1to4}, %xmm21, %xmm21 + +// CHECK: vandnps -516(%rdx){1to4}, %xmm21, %xmm21 +// CHECK: encoding: [0x62,0xe1,0x54,0x10,0x55,0xaa,0xfc,0xfd,0xff,0xff] + vandnps -516(%rdx){1to4}, %xmm21, %xmm21 + +// CHECK: vandnps %ymm25, %ymm23, %ymm19 +// CHECK: encoding: [0x62,0x81,0x44,0x20,0x55,0xd9] + vandnps %ymm25, %ymm23, %ymm19 + +// CHECK: vandnps %ymm25, %ymm23, %ymm19 {%k1} +// CHECK: encoding: [0x62,0x81,0x44,0x21,0x55,0xd9] + vandnps %ymm25, %ymm23, %ymm19 {%k1} + +// CHECK: vandnps %ymm25, %ymm23, %ymm19 {%k1} {z} +// CHECK: encoding: [0x62,0x81,0x44,0xa1,0x55,0xd9] + vandnps %ymm25, %ymm23, %ymm19 {%k1} {z} + +// CHECK: vandnps (%rcx), %ymm23, %ymm19 +// CHECK: encoding: [0x62,0xe1,0x44,0x20,0x55,0x19] + vandnps (%rcx), %ymm23, %ymm19 + +// CHECK: vandnps 291(%rax,%r14,8), %ymm23, %ymm19 +// CHECK: encoding: [0x62,0xa1,0x44,0x20,0x55,0x9c,0xf0,0x23,0x01,0x00,0x00] + vandnps 291(%rax,%r14,8), %ymm23, %ymm19 + +// CHECK: vandnps (%rcx){1to8}, %ymm23, %ymm19 +// CHECK: encoding: [0x62,0xe1,0x44,0x30,0x55,0x19] + vandnps (%rcx){1to8}, %ymm23, %ymm19 + +// CHECK: vandnps 4064(%rdx), %ymm23, %ymm19 +// CHECK: encoding: [0x62,0xe1,0x44,0x20,0x55,0x5a,0x7f] + vandnps 4064(%rdx), %ymm23, %ymm19 + +// CHECK: vandnps 4096(%rdx), %ymm23, %ymm19 +// CHECK: encoding: [0x62,0xe1,0x44,0x20,0x55,0x9a,0x00,0x10,0x00,0x00] + vandnps 4096(%rdx), %ymm23, %ymm19 + +// CHECK: vandnps -4096(%rdx), %ymm23, %ymm19 +// CHECK: encoding: [0x62,0xe1,0x44,0x20,0x55,0x5a,0x80] + vandnps -4096(%rdx), %ymm23, %ymm19 + +// CHECK: vandnps -4128(%rdx), %ymm23, %ymm19 +// CHECK: encoding: [0x62,0xe1,0x44,0x20,0x55,0x9a,0xe0,0xef,0xff,0xff] + vandnps -4128(%rdx), %ymm23, %ymm19 + +// CHECK: vandnps 508(%rdx){1to8}, %ymm23, %ymm19 +// CHECK: encoding: [0x62,0xe1,0x44,0x30,0x55,0x5a,0x7f] + vandnps 508(%rdx){1to8}, %ymm23, %ymm19 + +// CHECK: vandnps 512(%rdx){1to8}, %ymm23, %ymm19 +// CHECK: encoding: [0x62,0xe1,0x44,0x30,0x55,0x9a,0x00,0x02,0x00,0x00] + vandnps 512(%rdx){1to8}, %ymm23, %ymm19 + +// CHECK: vandnps -512(%rdx){1to8}, %ymm23, %ymm19 +// CHECK: encoding: [0x62,0xe1,0x44,0x30,0x55,0x5a,0x80] + vandnps -512(%rdx){1to8}, %ymm23, %ymm19 + +// CHECK: vandnps -516(%rdx){1to8}, %ymm23, %ymm19 +// CHECK: encoding: [0x62,0xe1,0x44,0x30,0x55,0x9a,0xfc,0xfd,0xff,0xff] + vandnps -516(%rdx){1to8}, %ymm23, %ymm19 + +// CHECK: vorpd %xmm18, %xmm27, %xmm23 +// CHECK: encoding: [0x62,0xa1,0xa5,0x00,0x56,0xfa] + vorpd %xmm18, %xmm27, %xmm23 + +// CHECK: vorpd %xmm18, %xmm27, %xmm23 {%k1} +// CHECK: encoding: [0x62,0xa1,0xa5,0x01,0x56,0xfa] + vorpd %xmm18, %xmm27, %xmm23 {%k1} + +// CHECK: vorpd %xmm18, %xmm27, %xmm23 {%k1} {z} +// CHECK: encoding: [0x62,0xa1,0xa5,0x81,0x56,0xfa] + vorpd %xmm18, %xmm27, %xmm23 {%k1} {z} + +// CHECK: vorpd (%rcx), %xmm27, %xmm23 +// CHECK: encoding: [0x62,0xe1,0xa5,0x00,0x56,0x39] + vorpd (%rcx), %xmm27, %xmm23 + +// CHECK: vorpd 291(%rax,%r14,8), %xmm27, %xmm23 +// CHECK: encoding: [0x62,0xa1,0xa5,0x00,0x56,0xbc,0xf0,0x23,0x01,0x00,0x00] + vorpd 291(%rax,%r14,8), %xmm27, %xmm23 + +// CHECK: vorpd (%rcx){1to2}, %xmm27, %xmm23 +// CHECK: encoding: [0x62,0xe1,0xa5,0x10,0x56,0x39] + vorpd (%rcx){1to2}, %xmm27, %xmm23 + +// CHECK: vorpd 2032(%rdx), %xmm27, %xmm23 +// CHECK: encoding: [0x62,0xe1,0xa5,0x00,0x56,0x7a,0x7f] + vorpd 2032(%rdx), %xmm27, %xmm23 + +// CHECK: vorpd 2048(%rdx), %xmm27, %xmm23 +// CHECK: encoding: [0x62,0xe1,0xa5,0x00,0x56,0xba,0x00,0x08,0x00,0x00] + vorpd 2048(%rdx), %xmm27, %xmm23 + +// CHECK: vorpd -2048(%rdx), %xmm27, %xmm23 +// CHECK: encoding: [0x62,0xe1,0xa5,0x00,0x56,0x7a,0x80] + vorpd -2048(%rdx), %xmm27, %xmm23 + +// CHECK: vorpd -2064(%rdx), %xmm27, %xmm23 +// CHECK: encoding: [0x62,0xe1,0xa5,0x00,0x56,0xba,0xf0,0xf7,0xff,0xff] + vorpd -2064(%rdx), %xmm27, %xmm23 + +// CHECK: vorpd 1016(%rdx){1to2}, %xmm27, %xmm23 +// CHECK: encoding: [0x62,0xe1,0xa5,0x10,0x56,0x7a,0x7f] + vorpd 1016(%rdx){1to2}, %xmm27, %xmm23 + +// CHECK: vorpd 1024(%rdx){1to2}, %xmm27, %xmm23 +// CHECK: encoding: [0x62,0xe1,0xa5,0x10,0x56,0xba,0x00,0x04,0x00,0x00] + vorpd 1024(%rdx){1to2}, %xmm27, %xmm23 + +// CHECK: vorpd -1024(%rdx){1to2}, %xmm27, %xmm23 +// CHECK: encoding: [0x62,0xe1,0xa5,0x10,0x56,0x7a,0x80] + vorpd -1024(%rdx){1to2}, %xmm27, %xmm23 + +// CHECK: vorpd -1032(%rdx){1to2}, %xmm27, %xmm23 +// CHECK: encoding: [0x62,0xe1,0xa5,0x10,0x56,0xba,0xf8,0xfb,0xff,0xff] + vorpd -1032(%rdx){1to2}, %xmm27, %xmm23 + +// CHECK: vorpd %ymm20, %ymm29, %ymm26 +// CHECK: encoding: [0x62,0x21,0x95,0x20,0x56,0xd4] + vorpd %ymm20, %ymm29, %ymm26 + +// CHECK: vorpd %ymm20, %ymm29, %ymm26 {%k5} +// CHECK: encoding: [0x62,0x21,0x95,0x25,0x56,0xd4] + vorpd %ymm20, %ymm29, %ymm26 {%k5} + +// CHECK: vorpd %ymm20, %ymm29, %ymm26 {%k5} {z} +// CHECK: encoding: [0x62,0x21,0x95,0xa5,0x56,0xd4] + vorpd %ymm20, %ymm29, %ymm26 {%k5} {z} + +// CHECK: vorpd (%rcx), %ymm29, %ymm26 +// CHECK: encoding: [0x62,0x61,0x95,0x20,0x56,0x11] + vorpd (%rcx), %ymm29, %ymm26 + +// CHECK: vorpd 291(%rax,%r14,8), %ymm29, %ymm26 +// CHECK: encoding: [0x62,0x21,0x95,0x20,0x56,0x94,0xf0,0x23,0x01,0x00,0x00] + vorpd 291(%rax,%r14,8), %ymm29, %ymm26 + +// CHECK: vorpd (%rcx){1to4}, %ymm29, %ymm26 +// CHECK: encoding: [0x62,0x61,0x95,0x30,0x56,0x11] + vorpd (%rcx){1to4}, %ymm29, %ymm26 + +// CHECK: vorpd 4064(%rdx), %ymm29, %ymm26 +// CHECK: encoding: [0x62,0x61,0x95,0x20,0x56,0x52,0x7f] + vorpd 4064(%rdx), %ymm29, %ymm26 + +// CHECK: vorpd 4096(%rdx), %ymm29, %ymm26 +// CHECK: encoding: [0x62,0x61,0x95,0x20,0x56,0x92,0x00,0x10,0x00,0x00] + vorpd 4096(%rdx), %ymm29, %ymm26 + +// CHECK: vorpd -4096(%rdx), %ymm29, %ymm26 +// CHECK: encoding: [0x62,0x61,0x95,0x20,0x56,0x52,0x80] + vorpd -4096(%rdx), %ymm29, %ymm26 + +// CHECK: vorpd -4128(%rdx), %ymm29, %ymm26 +// CHECK: encoding: [0x62,0x61,0x95,0x20,0x56,0x92,0xe0,0xef,0xff,0xff] + vorpd -4128(%rdx), %ymm29, %ymm26 + +// CHECK: vorpd 1016(%rdx){1to4}, %ymm29, %ymm26 +// CHECK: encoding: [0x62,0x61,0x95,0x30,0x56,0x52,0x7f] + vorpd 1016(%rdx){1to4}, %ymm29, %ymm26 + +// CHECK: vorpd 1024(%rdx){1to4}, %ymm29, %ymm26 +// CHECK: encoding: [0x62,0x61,0x95,0x30,0x56,0x92,0x00,0x04,0x00,0x00] + vorpd 1024(%rdx){1to4}, %ymm29, %ymm26 + +// CHECK: vorpd -1024(%rdx){1to4}, %ymm29, %ymm26 +// CHECK: encoding: [0x62,0x61,0x95,0x30,0x56,0x52,0x80] + vorpd -1024(%rdx){1to4}, %ymm29, %ymm26 + +// CHECK: vorpd -1032(%rdx){1to4}, %ymm29, %ymm26 +// CHECK: encoding: [0x62,0x61,0x95,0x30,0x56,0x92,0xf8,0xfb,0xff,0xff] + vorpd -1032(%rdx){1to4}, %ymm29, %ymm26 + +// CHECK: vorps %xmm27, %xmm28, %xmm19 +// CHECK: encoding: [0x62,0x81,0x1c,0x00,0x56,0xdb] + vorps %xmm27, %xmm28, %xmm19 + +// CHECK: vorps %xmm27, %xmm28, %xmm19 {%k4} +// CHECK: encoding: [0x62,0x81,0x1c,0x04,0x56,0xdb] + vorps %xmm27, %xmm28, %xmm19 {%k4} + +// CHECK: vorps %xmm27, %xmm28, %xmm19 {%k4} {z} +// CHECK: encoding: [0x62,0x81,0x1c,0x84,0x56,0xdb] + vorps %xmm27, %xmm28, %xmm19 {%k4} {z} + +// CHECK: vorps (%rcx), %xmm28, %xmm19 +// CHECK: encoding: [0x62,0xe1,0x1c,0x00,0x56,0x19] + vorps (%rcx), %xmm28, %xmm19 + +// CHECK: vorps 291(%rax,%r14,8), %xmm28, %xmm19 +// CHECK: encoding: [0x62,0xa1,0x1c,0x00,0x56,0x9c,0xf0,0x23,0x01,0x00,0x00] + vorps 291(%rax,%r14,8), %xmm28, %xmm19 + +// CHECK: vorps (%rcx){1to4}, %xmm28, %xmm19 +// CHECK: encoding: [0x62,0xe1,0x1c,0x10,0x56,0x19] + vorps (%rcx){1to4}, %xmm28, %xmm19 + +// CHECK: vorps 2032(%rdx), %xmm28, %xmm19 +// CHECK: encoding: [0x62,0xe1,0x1c,0x00,0x56,0x5a,0x7f] + vorps 2032(%rdx), %xmm28, %xmm19 + +// CHECK: vorps 2048(%rdx), %xmm28, %xmm19 +// CHECK: encoding: [0x62,0xe1,0x1c,0x00,0x56,0x9a,0x00,0x08,0x00,0x00] + vorps 2048(%rdx), %xmm28, %xmm19 + +// CHECK: vorps -2048(%rdx), %xmm28, %xmm19 +// CHECK: encoding: [0x62,0xe1,0x1c,0x00,0x56,0x5a,0x80] + vorps -2048(%rdx), %xmm28, %xmm19 + +// CHECK: vorps -2064(%rdx), %xmm28, %xmm19 +// CHECK: encoding: [0x62,0xe1,0x1c,0x00,0x56,0x9a,0xf0,0xf7,0xff,0xff] + vorps -2064(%rdx), %xmm28, %xmm19 + +// CHECK: vorps 508(%rdx){1to4}, %xmm28, %xmm19 +// CHECK: encoding: [0x62,0xe1,0x1c,0x10,0x56,0x5a,0x7f] + vorps 508(%rdx){1to4}, %xmm28, %xmm19 + +// CHECK: vorps 512(%rdx){1to4}, %xmm28, %xmm19 +// CHECK: encoding: [0x62,0xe1,0x1c,0x10,0x56,0x9a,0x00,0x02,0x00,0x00] + vorps 512(%rdx){1to4}, %xmm28, %xmm19 + +// CHECK: vorps -512(%rdx){1to4}, %xmm28, %xmm19 +// CHECK: encoding: [0x62,0xe1,0x1c,0x10,0x56,0x5a,0x80] + vorps -512(%rdx){1to4}, %xmm28, %xmm19 + +// CHECK: vorps -516(%rdx){1to4}, %xmm28, %xmm19 +// CHECK: encoding: [0x62,0xe1,0x1c,0x10,0x56,0x9a,0xfc,0xfd,0xff,0xff] + vorps -516(%rdx){1to4}, %xmm28, %xmm19 + +// CHECK: vorps %ymm26, %ymm26, %ymm27 +// CHECK: encoding: [0x62,0x01,0x2c,0x20,0x56,0xda] + vorps %ymm26, %ymm26, %ymm27 + +// CHECK: vorps %ymm26, %ymm26, %ymm27 {%k1} +// CHECK: encoding: [0x62,0x01,0x2c,0x21,0x56,0xda] + vorps %ymm26, %ymm26, %ymm27 {%k1} + +// CHECK: vorps %ymm26, %ymm26, %ymm27 {%k1} {z} +// CHECK: encoding: [0x62,0x01,0x2c,0xa1,0x56,0xda] + vorps %ymm26, %ymm26, %ymm27 {%k1} {z} + +// CHECK: vorps (%rcx), %ymm26, %ymm27 +// CHECK: encoding: [0x62,0x61,0x2c,0x20,0x56,0x19] + vorps (%rcx), %ymm26, %ymm27 + +// CHECK: vorps 291(%rax,%r14,8), %ymm26, %ymm27 +// CHECK: encoding: [0x62,0x21,0x2c,0x20,0x56,0x9c,0xf0,0x23,0x01,0x00,0x00] + vorps 291(%rax,%r14,8), %ymm26, %ymm27 + +// CHECK: vorps (%rcx){1to8}, %ymm26, %ymm27 +// CHECK: encoding: [0x62,0x61,0x2c,0x30,0x56,0x19] + vorps (%rcx){1to8}, %ymm26, %ymm27 + +// CHECK: vorps 4064(%rdx), %ymm26, %ymm27 +// CHECK: encoding: [0x62,0x61,0x2c,0x20,0x56,0x5a,0x7f] + vorps 4064(%rdx), %ymm26, %ymm27 + +// CHECK: vorps 4096(%rdx), %ymm26, %ymm27 +// CHECK: encoding: [0x62,0x61,0x2c,0x20,0x56,0x9a,0x00,0x10,0x00,0x00] + vorps 4096(%rdx), %ymm26, %ymm27 + +// CHECK: vorps -4096(%rdx), %ymm26, %ymm27 +// CHECK: encoding: [0x62,0x61,0x2c,0x20,0x56,0x5a,0x80] + vorps -4096(%rdx), %ymm26, %ymm27 + +// CHECK: vorps -4128(%rdx), %ymm26, %ymm27 +// CHECK: encoding: [0x62,0x61,0x2c,0x20,0x56,0x9a,0xe0,0xef,0xff,0xff] + vorps -4128(%rdx), %ymm26, %ymm27 + +// CHECK: vorps 508(%rdx){1to8}, %ymm26, %ymm27 +// CHECK: encoding: [0x62,0x61,0x2c,0x30,0x56,0x5a,0x7f] + vorps 508(%rdx){1to8}, %ymm26, %ymm27 + +// CHECK: vorps 512(%rdx){1to8}, %ymm26, %ymm27 +// CHECK: encoding: [0x62,0x61,0x2c,0x30,0x56,0x9a,0x00,0x02,0x00,0x00] + vorps 512(%rdx){1to8}, %ymm26, %ymm27 + +// CHECK: vorps -512(%rdx){1to8}, %ymm26, %ymm27 +// CHECK: encoding: [0x62,0x61,0x2c,0x30,0x56,0x5a,0x80] + vorps -512(%rdx){1to8}, %ymm26, %ymm27 + +// CHECK: vorps -516(%rdx){1to8}, %ymm26, %ymm27 +// CHECK: encoding: [0x62,0x61,0x2c,0x30,0x56,0x9a,0xfc,0xfd,0xff,0xff] + vorps -516(%rdx){1to8}, %ymm26, %ymm27 + +// CHECK: vxorpd %xmm23, %xmm21, %xmm18 +// CHECK: encoding: [0x62,0xa1,0xd5,0x00,0x57,0xd7] + vxorpd %xmm23, %xmm21, %xmm18 + +// CHECK: vxorpd %xmm23, %xmm21, %xmm18 {%k2} +// CHECK: encoding: [0x62,0xa1,0xd5,0x02,0x57,0xd7] + vxorpd %xmm23, %xmm21, %xmm18 {%k2} + +// CHECK: vxorpd %xmm23, %xmm21, %xmm18 {%k2} {z} +// CHECK: encoding: [0x62,0xa1,0xd5,0x82,0x57,0xd7] + vxorpd %xmm23, %xmm21, %xmm18 {%k2} {z} + +// CHECK: vxorpd (%rcx), %xmm21, %xmm18 +// CHECK: encoding: [0x62,0xe1,0xd5,0x00,0x57,0x11] + vxorpd (%rcx), %xmm21, %xmm18 + +// CHECK: vxorpd 291(%rax,%r14,8), %xmm21, %xmm18 +// CHECK: encoding: [0x62,0xa1,0xd5,0x00,0x57,0x94,0xf0,0x23,0x01,0x00,0x00] + vxorpd 291(%rax,%r14,8), %xmm21, %xmm18 + +// CHECK: vxorpd (%rcx){1to2}, %xmm21, %xmm18 +// CHECK: encoding: [0x62,0xe1,0xd5,0x10,0x57,0x11] + vxorpd (%rcx){1to2}, %xmm21, %xmm18 + +// CHECK: vxorpd 2032(%rdx), %xmm21, %xmm18 +// CHECK: encoding: [0x62,0xe1,0xd5,0x00,0x57,0x52,0x7f] + vxorpd 2032(%rdx), %xmm21, %xmm18 + +// CHECK: vxorpd 2048(%rdx), %xmm21, %xmm18 +// CHECK: encoding: [0x62,0xe1,0xd5,0x00,0x57,0x92,0x00,0x08,0x00,0x00] + vxorpd 2048(%rdx), %xmm21, %xmm18 + +// CHECK: vxorpd -2048(%rdx), %xmm21, %xmm18 +// CHECK: encoding: [0x62,0xe1,0xd5,0x00,0x57,0x52,0x80] + vxorpd -2048(%rdx), %xmm21, %xmm18 + +// CHECK: vxorpd -2064(%rdx), %xmm21, %xmm18 +// CHECK: encoding: [0x62,0xe1,0xd5,0x00,0x57,0x92,0xf0,0xf7,0xff,0xff] + vxorpd -2064(%rdx), %xmm21, %xmm18 + +// CHECK: vxorpd 1016(%rdx){1to2}, %xmm21, %xmm18 +// CHECK: encoding: [0x62,0xe1,0xd5,0x10,0x57,0x52,0x7f] + vxorpd 1016(%rdx){1to2}, %xmm21, %xmm18 + +// CHECK: vxorpd 1024(%rdx){1to2}, %xmm21, %xmm18 +// CHECK: encoding: [0x62,0xe1,0xd5,0x10,0x57,0x92,0x00,0x04,0x00,0x00] + vxorpd 1024(%rdx){1to2}, %xmm21, %xmm18 + +// CHECK: vxorpd -1024(%rdx){1to2}, %xmm21, %xmm18 +// CHECK: encoding: [0x62,0xe1,0xd5,0x10,0x57,0x52,0x80] + vxorpd -1024(%rdx){1to2}, %xmm21, %xmm18 + +// CHECK: vxorpd -1032(%rdx){1to2}, %xmm21, %xmm18 +// CHECK: encoding: [0x62,0xe1,0xd5,0x10,0x57,0x92,0xf8,0xfb,0xff,0xff] + vxorpd -1032(%rdx){1to2}, %xmm21, %xmm18 + +// CHECK: vxorpd %ymm19, %ymm29, %ymm24 +// CHECK: encoding: [0x62,0x21,0x95,0x20,0x57,0xc3] + vxorpd %ymm19, %ymm29, %ymm24 + +// CHECK: vxorpd %ymm19, %ymm29, %ymm24 {%k7} +// CHECK: encoding: [0x62,0x21,0x95,0x27,0x57,0xc3] + vxorpd %ymm19, %ymm29, %ymm24 {%k7} + +// CHECK: vxorpd %ymm19, %ymm29, %ymm24 {%k7} {z} +// CHECK: encoding: [0x62,0x21,0x95,0xa7,0x57,0xc3] + vxorpd %ymm19, %ymm29, %ymm24 {%k7} {z} + +// CHECK: vxorpd (%rcx), %ymm29, %ymm24 +// CHECK: encoding: [0x62,0x61,0x95,0x20,0x57,0x01] + vxorpd (%rcx), %ymm29, %ymm24 + +// CHECK: vxorpd 291(%rax,%r14,8), %ymm29, %ymm24 +// CHECK: encoding: [0x62,0x21,0x95,0x20,0x57,0x84,0xf0,0x23,0x01,0x00,0x00] + vxorpd 291(%rax,%r14,8), %ymm29, %ymm24 + +// CHECK: vxorpd (%rcx){1to4}, %ymm29, %ymm24 +// CHECK: encoding: [0x62,0x61,0x95,0x30,0x57,0x01] + vxorpd (%rcx){1to4}, %ymm29, %ymm24 + +// CHECK: vxorpd 4064(%rdx), %ymm29, %ymm24 +// CHECK: encoding: [0x62,0x61,0x95,0x20,0x57,0x42,0x7f] + vxorpd 4064(%rdx), %ymm29, %ymm24 + +// CHECK: vxorpd 4096(%rdx), %ymm29, %ymm24 +// CHECK: encoding: [0x62,0x61,0x95,0x20,0x57,0x82,0x00,0x10,0x00,0x00] + vxorpd 4096(%rdx), %ymm29, %ymm24 + +// CHECK: vxorpd -4096(%rdx), %ymm29, %ymm24 +// CHECK: encoding: [0x62,0x61,0x95,0x20,0x57,0x42,0x80] + vxorpd -4096(%rdx), %ymm29, %ymm24 + +// CHECK: vxorpd -4128(%rdx), %ymm29, %ymm24 +// CHECK: encoding: [0x62,0x61,0x95,0x20,0x57,0x82,0xe0,0xef,0xff,0xff] + vxorpd -4128(%rdx), %ymm29, %ymm24 + +// CHECK: vxorpd 1016(%rdx){1to4}, %ymm29, %ymm24 +// CHECK: encoding: [0x62,0x61,0x95,0x30,0x57,0x42,0x7f] + vxorpd 1016(%rdx){1to4}, %ymm29, %ymm24 + +// CHECK: vxorpd 1024(%rdx){1to4}, %ymm29, %ymm24 +// CHECK: encoding: [0x62,0x61,0x95,0x30,0x57,0x82,0x00,0x04,0x00,0x00] + vxorpd 1024(%rdx){1to4}, %ymm29, %ymm24 + +// CHECK: vxorpd -1024(%rdx){1to4}, %ymm29, %ymm24 +// CHECK: encoding: [0x62,0x61,0x95,0x30,0x57,0x42,0x80] + vxorpd -1024(%rdx){1to4}, %ymm29, %ymm24 + +// CHECK: vxorpd -1032(%rdx){1to4}, %ymm29, %ymm24 +// CHECK: encoding: [0x62,0x61,0x95,0x30,0x57,0x82,0xf8,0xfb,0xff,0xff] + vxorpd -1032(%rdx){1to4}, %ymm29, %ymm24 + +// CHECK: vxorps %xmm19, %xmm18, %xmm20 +// CHECK: encoding: [0x62,0xa1,0x6c,0x00,0x57,0xe3] + vxorps %xmm19, %xmm18, %xmm20 + +// CHECK: vxorps %xmm19, %xmm18, %xmm20 {%k1} +// CHECK: encoding: [0x62,0xa1,0x6c,0x01,0x57,0xe3] + vxorps %xmm19, %xmm18, %xmm20 {%k1} + +// CHECK: vxorps %xmm19, %xmm18, %xmm20 {%k1} {z} +// CHECK: encoding: [0x62,0xa1,0x6c,0x81,0x57,0xe3] + vxorps %xmm19, %xmm18, %xmm20 {%k1} {z} + +// CHECK: vxorps (%rcx), %xmm18, %xmm20 +// CHECK: encoding: [0x62,0xe1,0x6c,0x00,0x57,0x21] + vxorps (%rcx), %xmm18, %xmm20 + +// CHECK: vxorps 291(%rax,%r14,8), %xmm18, %xmm20 +// CHECK: encoding: [0x62,0xa1,0x6c,0x00,0x57,0xa4,0xf0,0x23,0x01,0x00,0x00] + vxorps 291(%rax,%r14,8), %xmm18, %xmm20 + +// CHECK: vxorps (%rcx){1to4}, %xmm18, %xmm20 +// CHECK: encoding: [0x62,0xe1,0x6c,0x10,0x57,0x21] + vxorps (%rcx){1to4}, %xmm18, %xmm20 + +// CHECK: vxorps 2032(%rdx), %xmm18, %xmm20 +// CHECK: encoding: [0x62,0xe1,0x6c,0x00,0x57,0x62,0x7f] + vxorps 2032(%rdx), %xmm18, %xmm20 + +// CHECK: vxorps 2048(%rdx), %xmm18, %xmm20 +// CHECK: encoding: [0x62,0xe1,0x6c,0x00,0x57,0xa2,0x00,0x08,0x00,0x00] + vxorps 2048(%rdx), %xmm18, %xmm20 + +// CHECK: vxorps -2048(%rdx), %xmm18, %xmm20 +// CHECK: encoding: [0x62,0xe1,0x6c,0x00,0x57,0x62,0x80] + vxorps -2048(%rdx), %xmm18, %xmm20 + +// CHECK: vxorps -2064(%rdx), %xmm18, %xmm20 +// CHECK: encoding: [0x62,0xe1,0x6c,0x00,0x57,0xa2,0xf0,0xf7,0xff,0xff] + vxorps -2064(%rdx), %xmm18, %xmm20 + +// CHECK: vxorps 508(%rdx){1to4}, %xmm18, %xmm20 +// CHECK: encoding: [0x62,0xe1,0x6c,0x10,0x57,0x62,0x7f] + vxorps 508(%rdx){1to4}, %xmm18, %xmm20 + +// CHECK: vxorps 512(%rdx){1to4}, %xmm18, %xmm20 +// CHECK: encoding: [0x62,0xe1,0x6c,0x10,0x57,0xa2,0x00,0x02,0x00,0x00] + vxorps 512(%rdx){1to4}, %xmm18, %xmm20 + +// CHECK: vxorps -512(%rdx){1to4}, %xmm18, %xmm20 +// CHECK: encoding: [0x62,0xe1,0x6c,0x10,0x57,0x62,0x80] + vxorps -512(%rdx){1to4}, %xmm18, %xmm20 + +// CHECK: vxorps -516(%rdx){1to4}, %xmm18, %xmm20 +// CHECK: encoding: [0x62,0xe1,0x6c,0x10,0x57,0xa2,0xfc,0xfd,0xff,0xff] + vxorps -516(%rdx){1to4}, %xmm18, %xmm20 + +// CHECK: vxorps %ymm24, %ymm20, %ymm27 +// CHECK: encoding: [0x62,0x01,0x5c,0x20,0x57,0xd8] + vxorps %ymm24, %ymm20, %ymm27 + +// CHECK: vxorps %ymm24, %ymm20, %ymm27 {%k2} +// CHECK: encoding: [0x62,0x01,0x5c,0x22,0x57,0xd8] + vxorps %ymm24, %ymm20, %ymm27 {%k2} + +// CHECK: vxorps %ymm24, %ymm20, %ymm27 {%k2} {z} +// CHECK: encoding: [0x62,0x01,0x5c,0xa2,0x57,0xd8] + vxorps %ymm24, %ymm20, %ymm27 {%k2} {z} + +// CHECK: vxorps (%rcx), %ymm20, %ymm27 +// CHECK: encoding: [0x62,0x61,0x5c,0x20,0x57,0x19] + vxorps (%rcx), %ymm20, %ymm27 + +// CHECK: vxorps 291(%rax,%r14,8), %ymm20, %ymm27 +// CHECK: encoding: [0x62,0x21,0x5c,0x20,0x57,0x9c,0xf0,0x23,0x01,0x00,0x00] + vxorps 291(%rax,%r14,8), %ymm20, %ymm27 + +// CHECK: vxorps (%rcx){1to8}, %ymm20, %ymm27 +// CHECK: encoding: [0x62,0x61,0x5c,0x30,0x57,0x19] + vxorps (%rcx){1to8}, %ymm20, %ymm27 + +// CHECK: vxorps 4064(%rdx), %ymm20, %ymm27 +// CHECK: encoding: [0x62,0x61,0x5c,0x20,0x57,0x5a,0x7f] + vxorps 4064(%rdx), %ymm20, %ymm27 + +// CHECK: vxorps 4096(%rdx), %ymm20, %ymm27 +// CHECK: encoding: [0x62,0x61,0x5c,0x20,0x57,0x9a,0x00,0x10,0x00,0x00] + vxorps 4096(%rdx), %ymm20, %ymm27 + +// CHECK: vxorps -4096(%rdx), %ymm20, %ymm27 +// CHECK: encoding: [0x62,0x61,0x5c,0x20,0x57,0x5a,0x80] + vxorps -4096(%rdx), %ymm20, %ymm27 + +// CHECK: vxorps -4128(%rdx), %ymm20, %ymm27 +// CHECK: encoding: [0x62,0x61,0x5c,0x20,0x57,0x9a,0xe0,0xef,0xff,0xff] + vxorps -4128(%rdx), %ymm20, %ymm27 + +// CHECK: vxorps 508(%rdx){1to8}, %ymm20, %ymm27 +// CHECK: encoding: [0x62,0x61,0x5c,0x30,0x57,0x5a,0x7f] + vxorps 508(%rdx){1to8}, %ymm20, %ymm27 + +// CHECK: vxorps 512(%rdx){1to8}, %ymm20, %ymm27 +// CHECK: encoding: [0x62,0x61,0x5c,0x30,0x57,0x9a,0x00,0x02,0x00,0x00] + vxorps 512(%rdx){1to8}, %ymm20, %ymm27 + +// CHECK: vxorps -512(%rdx){1to8}, %ymm20, %ymm27 +// CHECK: encoding: [0x62,0x61,0x5c,0x30,0x57,0x5a,0x80] + vxorps -512(%rdx){1to8}, %ymm20, %ymm27 + +// CHECK: vxorps -516(%rdx){1to8}, %ymm20, %ymm27 +// CHECK: encoding: [0x62,0x61,0x5c,0x30,0x57,0x9a,0xfc,0xfd,0xff,0xff] + vxorps -516(%rdx){1to8}, %ymm20, %ymm27 + +// CHECK: vandpd %xmm27, %xmm25, %xmm19 +// CHECK: encoding: [0x62,0x81,0xb5,0x00,0x54,0xdb] + vandpd %xmm27, %xmm25, %xmm19 + +// CHECK: vandpd %xmm27, %xmm25, %xmm19 {%k6} +// CHECK: encoding: [0x62,0x81,0xb5,0x06,0x54,0xdb] + vandpd %xmm27, %xmm25, %xmm19 {%k6} + +// CHECK: vandpd %xmm27, %xmm25, %xmm19 {%k6} {z} +// CHECK: encoding: [0x62,0x81,0xb5,0x86,0x54,0xdb] + vandpd %xmm27, %xmm25, %xmm19 {%k6} {z} + +// CHECK: vandpd (%rcx), %xmm25, %xmm19 +// CHECK: encoding: [0x62,0xe1,0xb5,0x00,0x54,0x19] + vandpd (%rcx), %xmm25, %xmm19 + +// CHECK: vandpd 4660(%rax,%r14,8), %xmm25, %xmm19 +// CHECK: encoding: [0x62,0xa1,0xb5,0x00,0x54,0x9c,0xf0,0x34,0x12,0x00,0x00] + vandpd 4660(%rax,%r14,8), %xmm25, %xmm19 + +// CHECK: vandpd (%rcx){1to2}, %xmm25, %xmm19 +// CHECK: encoding: [0x62,0xe1,0xb5,0x10,0x54,0x19] + vandpd (%rcx){1to2}, %xmm25, %xmm19 + +// CHECK: vandpd 2032(%rdx), %xmm25, %xmm19 +// CHECK: encoding: [0x62,0xe1,0xb5,0x00,0x54,0x5a,0x7f] + vandpd 2032(%rdx), %xmm25, %xmm19 + +// CHECK: vandpd 2048(%rdx), %xmm25, %xmm19 +// CHECK: encoding: [0x62,0xe1,0xb5,0x00,0x54,0x9a,0x00,0x08,0x00,0x00] + vandpd 2048(%rdx), %xmm25, %xmm19 + +// CHECK: vandpd -2048(%rdx), %xmm25, %xmm19 +// CHECK: encoding: [0x62,0xe1,0xb5,0x00,0x54,0x5a,0x80] + vandpd -2048(%rdx), %xmm25, %xmm19 + +// CHECK: vandpd -2064(%rdx), %xmm25, %xmm19 +// CHECK: encoding: [0x62,0xe1,0xb5,0x00,0x54,0x9a,0xf0,0xf7,0xff,0xff] + vandpd -2064(%rdx), %xmm25, %xmm19 + +// CHECK: vandpd 1016(%rdx){1to2}, %xmm25, %xmm19 +// CHECK: encoding: [0x62,0xe1,0xb5,0x10,0x54,0x5a,0x7f] + vandpd 1016(%rdx){1to2}, %xmm25, %xmm19 + +// CHECK: vandpd 1024(%rdx){1to2}, %xmm25, %xmm19 +// CHECK: encoding: [0x62,0xe1,0xb5,0x10,0x54,0x9a,0x00,0x04,0x00,0x00] + vandpd 1024(%rdx){1to2}, %xmm25, %xmm19 + +// CHECK: vandpd -1024(%rdx){1to2}, %xmm25, %xmm19 +// CHECK: encoding: [0x62,0xe1,0xb5,0x10,0x54,0x5a,0x80] + vandpd -1024(%rdx){1to2}, %xmm25, %xmm19 + +// CHECK: vandpd -1032(%rdx){1to2}, %xmm25, %xmm19 +// CHECK: encoding: [0x62,0xe1,0xb5,0x10,0x54,0x9a,0xf8,0xfb,0xff,0xff] + vandpd -1032(%rdx){1to2}, %xmm25, %xmm19 + +// CHECK: vandpd %ymm21, %ymm29, %ymm24 +// CHECK: encoding: [0x62,0x21,0x95,0x20,0x54,0xc5] + vandpd %ymm21, %ymm29, %ymm24 + +// CHECK: vandpd %ymm21, %ymm29, %ymm24 {%k2} +// CHECK: encoding: [0x62,0x21,0x95,0x22,0x54,0xc5] + vandpd %ymm21, %ymm29, %ymm24 {%k2} + +// CHECK: vandpd %ymm21, %ymm29, %ymm24 {%k2} {z} +// CHECK: encoding: [0x62,0x21,0x95,0xa2,0x54,0xc5] + vandpd %ymm21, %ymm29, %ymm24 {%k2} {z} + +// CHECK: vandpd (%rcx), %ymm29, %ymm24 +// CHECK: encoding: [0x62,0x61,0x95,0x20,0x54,0x01] + vandpd (%rcx), %ymm29, %ymm24 + +// CHECK: vandpd 4660(%rax,%r14,8), %ymm29, %ymm24 +// CHECK: encoding: [0x62,0x21,0x95,0x20,0x54,0x84,0xf0,0x34,0x12,0x00,0x00] + vandpd 4660(%rax,%r14,8), %ymm29, %ymm24 + +// CHECK: vandpd (%rcx){1to4}, %ymm29, %ymm24 +// CHECK: encoding: [0x62,0x61,0x95,0x30,0x54,0x01] + vandpd (%rcx){1to4}, %ymm29, %ymm24 + +// CHECK: vandpd 4064(%rdx), %ymm29, %ymm24 +// CHECK: encoding: [0x62,0x61,0x95,0x20,0x54,0x42,0x7f] + vandpd 4064(%rdx), %ymm29, %ymm24 + +// CHECK: vandpd 4096(%rdx), %ymm29, %ymm24 +// CHECK: encoding: [0x62,0x61,0x95,0x20,0x54,0x82,0x00,0x10,0x00,0x00] + vandpd 4096(%rdx), %ymm29, %ymm24 + +// CHECK: vandpd -4096(%rdx), %ymm29, %ymm24 +// CHECK: encoding: [0x62,0x61,0x95,0x20,0x54,0x42,0x80] + vandpd -4096(%rdx), %ymm29, %ymm24 + +// CHECK: vandpd -4128(%rdx), %ymm29, %ymm24 +// CHECK: encoding: [0x62,0x61,0x95,0x20,0x54,0x82,0xe0,0xef,0xff,0xff] + vandpd -4128(%rdx), %ymm29, %ymm24 + +// CHECK: vandpd 1016(%rdx){1to4}, %ymm29, %ymm24 +// CHECK: encoding: [0x62,0x61,0x95,0x30,0x54,0x42,0x7f] + vandpd 1016(%rdx){1to4}, %ymm29, %ymm24 + +// CHECK: vandpd 1024(%rdx){1to4}, %ymm29, %ymm24 +// CHECK: encoding: [0x62,0x61,0x95,0x30,0x54,0x82,0x00,0x04,0x00,0x00] + vandpd 1024(%rdx){1to4}, %ymm29, %ymm24 + +// CHECK: vandpd -1024(%rdx){1to4}, %ymm29, %ymm24 +// CHECK: encoding: [0x62,0x61,0x95,0x30,0x54,0x42,0x80] + vandpd -1024(%rdx){1to4}, %ymm29, %ymm24 + +// CHECK: vandpd -1032(%rdx){1to4}, %ymm29, %ymm24 +// CHECK: encoding: [0x62,0x61,0x95,0x30,0x54,0x82,0xf8,0xfb,0xff,0xff] + vandpd -1032(%rdx){1to4}, %ymm29, %ymm24 + +// CHECK: vandps %xmm17, %xmm25, %xmm22 +// CHECK: encoding: [0x62,0xa1,0x34,0x00,0x54,0xf1] + vandps %xmm17, %xmm25, %xmm22 + +// CHECK: vandps %xmm17, %xmm25, %xmm22 {%k3} +// CHECK: encoding: [0x62,0xa1,0x34,0x03,0x54,0xf1] + vandps %xmm17, %xmm25, %xmm22 {%k3} + +// CHECK: vandps %xmm17, %xmm25, %xmm22 {%k3} {z} +// CHECK: encoding: [0x62,0xa1,0x34,0x83,0x54,0xf1] + vandps %xmm17, %xmm25, %xmm22 {%k3} {z} + +// CHECK: vandps (%rcx), %xmm25, %xmm22 +// CHECK: encoding: [0x62,0xe1,0x34,0x00,0x54,0x31] + vandps (%rcx), %xmm25, %xmm22 + +// CHECK: vandps 4660(%rax,%r14,8), %xmm25, %xmm22 +// CHECK: encoding: [0x62,0xa1,0x34,0x00,0x54,0xb4,0xf0,0x34,0x12,0x00,0x00] + vandps 4660(%rax,%r14,8), %xmm25, %xmm22 + +// CHECK: vandps (%rcx){1to4}, %xmm25, %xmm22 +// CHECK: encoding: [0x62,0xe1,0x34,0x10,0x54,0x31] + vandps (%rcx){1to4}, %xmm25, %xmm22 + +// CHECK: vandps 2032(%rdx), %xmm25, %xmm22 +// CHECK: encoding: [0x62,0xe1,0x34,0x00,0x54,0x72,0x7f] + vandps 2032(%rdx), %xmm25, %xmm22 + +// CHECK: vandps 2048(%rdx), %xmm25, %xmm22 +// CHECK: encoding: [0x62,0xe1,0x34,0x00,0x54,0xb2,0x00,0x08,0x00,0x00] + vandps 2048(%rdx), %xmm25, %xmm22 + +// CHECK: vandps -2048(%rdx), %xmm25, %xmm22 +// CHECK: encoding: [0x62,0xe1,0x34,0x00,0x54,0x72,0x80] + vandps -2048(%rdx), %xmm25, %xmm22 + +// CHECK: vandps -2064(%rdx), %xmm25, %xmm22 +// CHECK: encoding: [0x62,0xe1,0x34,0x00,0x54,0xb2,0xf0,0xf7,0xff,0xff] + vandps -2064(%rdx), %xmm25, %xmm22 + +// CHECK: vandps 508(%rdx){1to4}, %xmm25, %xmm22 +// CHECK: encoding: [0x62,0xe1,0x34,0x10,0x54,0x72,0x7f] + vandps 508(%rdx){1to4}, %xmm25, %xmm22 + +// CHECK: vandps 512(%rdx){1to4}, %xmm25, %xmm22 +// CHECK: encoding: [0x62,0xe1,0x34,0x10,0x54,0xb2,0x00,0x02,0x00,0x00] + vandps 512(%rdx){1to4}, %xmm25, %xmm22 + +// CHECK: vandps -512(%rdx){1to4}, %xmm25, %xmm22 +// CHECK: encoding: [0x62,0xe1,0x34,0x10,0x54,0x72,0x80] + vandps -512(%rdx){1to4}, %xmm25, %xmm22 + +// CHECK: vandps -516(%rdx){1to4}, %xmm25, %xmm22 +// CHECK: encoding: [0x62,0xe1,0x34,0x10,0x54,0xb2,0xfc,0xfd,0xff,0xff] + vandps -516(%rdx){1to4}, %xmm25, %xmm22 + +// CHECK: vandps %ymm18, %ymm25, %ymm22 +// CHECK: encoding: [0x62,0xa1,0x34,0x20,0x54,0xf2] + vandps %ymm18, %ymm25, %ymm22 + +// CHECK: vandps %ymm18, %ymm25, %ymm22 {%k1} +// CHECK: encoding: [0x62,0xa1,0x34,0x21,0x54,0xf2] + vandps %ymm18, %ymm25, %ymm22 {%k1} + +// CHECK: vandps %ymm18, %ymm25, %ymm22 {%k1} {z} +// CHECK: encoding: [0x62,0xa1,0x34,0xa1,0x54,0xf2] + vandps %ymm18, %ymm25, %ymm22 {%k1} {z} + +// CHECK: vandps (%rcx), %ymm25, %ymm22 +// CHECK: encoding: [0x62,0xe1,0x34,0x20,0x54,0x31] + vandps (%rcx), %ymm25, %ymm22 + +// CHECK: vandps 4660(%rax,%r14,8), %ymm25, %ymm22 +// CHECK: encoding: [0x62,0xa1,0x34,0x20,0x54,0xb4,0xf0,0x34,0x12,0x00,0x00] + vandps 4660(%rax,%r14,8), %ymm25, %ymm22 + +// CHECK: vandps (%rcx){1to8}, %ymm25, %ymm22 +// CHECK: encoding: [0x62,0xe1,0x34,0x30,0x54,0x31] + vandps (%rcx){1to8}, %ymm25, %ymm22 + +// CHECK: vandps 4064(%rdx), %ymm25, %ymm22 +// CHECK: encoding: [0x62,0xe1,0x34,0x20,0x54,0x72,0x7f] + vandps 4064(%rdx), %ymm25, %ymm22 + +// CHECK: vandps 4096(%rdx), %ymm25, %ymm22 +// CHECK: encoding: [0x62,0xe1,0x34,0x20,0x54,0xb2,0x00,0x10,0x00,0x00] + vandps 4096(%rdx), %ymm25, %ymm22 + +// CHECK: vandps -4096(%rdx), %ymm25, %ymm22 +// CHECK: encoding: [0x62,0xe1,0x34,0x20,0x54,0x72,0x80] + vandps -4096(%rdx), %ymm25, %ymm22 + +// CHECK: vandps -4128(%rdx), %ymm25, %ymm22 +// CHECK: encoding: [0x62,0xe1,0x34,0x20,0x54,0xb2,0xe0,0xef,0xff,0xff] + vandps -4128(%rdx), %ymm25, %ymm22 + +// CHECK: vandps 508(%rdx){1to8}, %ymm25, %ymm22 +// CHECK: encoding: [0x62,0xe1,0x34,0x30,0x54,0x72,0x7f] + vandps 508(%rdx){1to8}, %ymm25, %ymm22 + +// CHECK: vandps 512(%rdx){1to8}, %ymm25, %ymm22 +// CHECK: encoding: [0x62,0xe1,0x34,0x30,0x54,0xb2,0x00,0x02,0x00,0x00] + vandps 512(%rdx){1to8}, %ymm25, %ymm22 + +// CHECK: vandps -512(%rdx){1to8}, %ymm25, %ymm22 +// CHECK: encoding: [0x62,0xe1,0x34,0x30,0x54,0x72,0x80] + vandps -512(%rdx){1to8}, %ymm25, %ymm22 + +// CHECK: vandps -516(%rdx){1to8}, %ymm25, %ymm22 +// CHECK: encoding: [0x62,0xe1,0x34,0x30,0x54,0xb2,0xfc,0xfd,0xff,0xff] + vandps -516(%rdx){1to8}, %ymm25, %ymm22 + +// CHECK: vandnpd %xmm23, %xmm18, %xmm19 +// CHECK: encoding: [0x62,0xa1,0xed,0x00,0x55,0xdf] + vandnpd %xmm23, %xmm18, %xmm19 + +// CHECK: vandnpd %xmm23, %xmm18, %xmm19 {%k1} +// CHECK: encoding: [0x62,0xa1,0xed,0x01,0x55,0xdf] + vandnpd %xmm23, %xmm18, %xmm19 {%k1} + +// CHECK: vandnpd %xmm23, %xmm18, %xmm19 {%k1} {z} +// CHECK: encoding: [0x62,0xa1,0xed,0x81,0x55,0xdf] + vandnpd %xmm23, %xmm18, %xmm19 {%k1} {z} + +// CHECK: vandnpd (%rcx), %xmm18, %xmm19 +// CHECK: encoding: [0x62,0xe1,0xed,0x00,0x55,0x19] + vandnpd (%rcx), %xmm18, %xmm19 + +// CHECK: vandnpd 4660(%rax,%r14,8), %xmm18, %xmm19 +// CHECK: encoding: [0x62,0xa1,0xed,0x00,0x55,0x9c,0xf0,0x34,0x12,0x00,0x00] + vandnpd 4660(%rax,%r14,8), %xmm18, %xmm19 + +// CHECK: vandnpd (%rcx){1to2}, %xmm18, %xmm19 +// CHECK: encoding: [0x62,0xe1,0xed,0x10,0x55,0x19] + vandnpd (%rcx){1to2}, %xmm18, %xmm19 + +// CHECK: vandnpd 2032(%rdx), %xmm18, %xmm19 +// CHECK: encoding: [0x62,0xe1,0xed,0x00,0x55,0x5a,0x7f] + vandnpd 2032(%rdx), %xmm18, %xmm19 + +// CHECK: vandnpd 2048(%rdx), %xmm18, %xmm19 +// CHECK: encoding: [0x62,0xe1,0xed,0x00,0x55,0x9a,0x00,0x08,0x00,0x00] + vandnpd 2048(%rdx), %xmm18, %xmm19 + +// CHECK: vandnpd -2048(%rdx), %xmm18, %xmm19 +// CHECK: encoding: [0x62,0xe1,0xed,0x00,0x55,0x5a,0x80] + vandnpd -2048(%rdx), %xmm18, %xmm19 + +// CHECK: vandnpd -2064(%rdx), %xmm18, %xmm19 +// CHECK: encoding: [0x62,0xe1,0xed,0x00,0x55,0x9a,0xf0,0xf7,0xff,0xff] + vandnpd -2064(%rdx), %xmm18, %xmm19 + +// CHECK: vandnpd 1016(%rdx){1to2}, %xmm18, %xmm19 +// CHECK: encoding: [0x62,0xe1,0xed,0x10,0x55,0x5a,0x7f] + vandnpd 1016(%rdx){1to2}, %xmm18, %xmm19 + +// CHECK: vandnpd 1024(%rdx){1to2}, %xmm18, %xmm19 +// CHECK: encoding: [0x62,0xe1,0xed,0x10,0x55,0x9a,0x00,0x04,0x00,0x00] + vandnpd 1024(%rdx){1to2}, %xmm18, %xmm19 + +// CHECK: vandnpd -1024(%rdx){1to2}, %xmm18, %xmm19 +// CHECK: encoding: [0x62,0xe1,0xed,0x10,0x55,0x5a,0x80] + vandnpd -1024(%rdx){1to2}, %xmm18, %xmm19 + +// CHECK: vandnpd -1032(%rdx){1to2}, %xmm18, %xmm19 +// CHECK: encoding: [0x62,0xe1,0xed,0x10,0x55,0x9a,0xf8,0xfb,0xff,0xff] + vandnpd -1032(%rdx){1to2}, %xmm18, %xmm19 + +// CHECK: vandnpd %ymm28, %ymm29, %ymm25 +// CHECK: encoding: [0x62,0x01,0x95,0x20,0x55,0xcc] + vandnpd %ymm28, %ymm29, %ymm25 + +// CHECK: vandnpd %ymm28, %ymm29, %ymm25 {%k7} +// CHECK: encoding: [0x62,0x01,0x95,0x27,0x55,0xcc] + vandnpd %ymm28, %ymm29, %ymm25 {%k7} + +// CHECK: vandnpd %ymm28, %ymm29, %ymm25 {%k7} {z} +// CHECK: encoding: [0x62,0x01,0x95,0xa7,0x55,0xcc] + vandnpd %ymm28, %ymm29, %ymm25 {%k7} {z} + +// CHECK: vandnpd (%rcx), %ymm29, %ymm25 +// CHECK: encoding: [0x62,0x61,0x95,0x20,0x55,0x09] + vandnpd (%rcx), %ymm29, %ymm25 + +// CHECK: vandnpd 4660(%rax,%r14,8), %ymm29, %ymm25 +// CHECK: encoding: [0x62,0x21,0x95,0x20,0x55,0x8c,0xf0,0x34,0x12,0x00,0x00] + vandnpd 4660(%rax,%r14,8), %ymm29, %ymm25 + +// CHECK: vandnpd (%rcx){1to4}, %ymm29, %ymm25 +// CHECK: encoding: [0x62,0x61,0x95,0x30,0x55,0x09] + vandnpd (%rcx){1to4}, %ymm29, %ymm25 + +// CHECK: vandnpd 4064(%rdx), %ymm29, %ymm25 +// CHECK: encoding: [0x62,0x61,0x95,0x20,0x55,0x4a,0x7f] + vandnpd 4064(%rdx), %ymm29, %ymm25 + +// CHECK: vandnpd 4096(%rdx), %ymm29, %ymm25 +// CHECK: encoding: [0x62,0x61,0x95,0x20,0x55,0x8a,0x00,0x10,0x00,0x00] + vandnpd 4096(%rdx), %ymm29, %ymm25 + +// CHECK: vandnpd -4096(%rdx), %ymm29, %ymm25 +// CHECK: encoding: [0x62,0x61,0x95,0x20,0x55,0x4a,0x80] + vandnpd -4096(%rdx), %ymm29, %ymm25 + +// CHECK: vandnpd -4128(%rdx), %ymm29, %ymm25 +// CHECK: encoding: [0x62,0x61,0x95,0x20,0x55,0x8a,0xe0,0xef,0xff,0xff] + vandnpd -4128(%rdx), %ymm29, %ymm25 + +// CHECK: vandnpd 1016(%rdx){1to4}, %ymm29, %ymm25 +// CHECK: encoding: [0x62,0x61,0x95,0x30,0x55,0x4a,0x7f] + vandnpd 1016(%rdx){1to4}, %ymm29, %ymm25 + +// CHECK: vandnpd 1024(%rdx){1to4}, %ymm29, %ymm25 +// CHECK: encoding: [0x62,0x61,0x95,0x30,0x55,0x8a,0x00,0x04,0x00,0x00] + vandnpd 1024(%rdx){1to4}, %ymm29, %ymm25 + +// CHECK: vandnpd -1024(%rdx){1to4}, %ymm29, %ymm25 +// CHECK: encoding: [0x62,0x61,0x95,0x30,0x55,0x4a,0x80] + vandnpd -1024(%rdx){1to4}, %ymm29, %ymm25 + +// CHECK: vandnpd -1032(%rdx){1to4}, %ymm29, %ymm25 +// CHECK: encoding: [0x62,0x61,0x95,0x30,0x55,0x8a,0xf8,0xfb,0xff,0xff] + vandnpd -1032(%rdx){1to4}, %ymm29, %ymm25 + +// CHECK: vandnps %xmm23, %xmm17, %xmm25 +// CHECK: encoding: [0x62,0x21,0x74,0x00,0x55,0xcf] + vandnps %xmm23, %xmm17, %xmm25 + +// CHECK: vandnps %xmm23, %xmm17, %xmm25 {%k5} +// CHECK: encoding: [0x62,0x21,0x74,0x05,0x55,0xcf] + vandnps %xmm23, %xmm17, %xmm25 {%k5} + +// CHECK: vandnps %xmm23, %xmm17, %xmm25 {%k5} {z} +// CHECK: encoding: [0x62,0x21,0x74,0x85,0x55,0xcf] + vandnps %xmm23, %xmm17, %xmm25 {%k5} {z} + +// CHECK: vandnps (%rcx), %xmm17, %xmm25 +// CHECK: encoding: [0x62,0x61,0x74,0x00,0x55,0x09] + vandnps (%rcx), %xmm17, %xmm25 + +// CHECK: vandnps 4660(%rax,%r14,8), %xmm17, %xmm25 +// CHECK: encoding: [0x62,0x21,0x74,0x00,0x55,0x8c,0xf0,0x34,0x12,0x00,0x00] + vandnps 4660(%rax,%r14,8), %xmm17, %xmm25 + +// CHECK: vandnps (%rcx){1to4}, %xmm17, %xmm25 +// CHECK: encoding: [0x62,0x61,0x74,0x10,0x55,0x09] + vandnps (%rcx){1to4}, %xmm17, %xmm25 + +// CHECK: vandnps 2032(%rdx), %xmm17, %xmm25 +// CHECK: encoding: [0x62,0x61,0x74,0x00,0x55,0x4a,0x7f] + vandnps 2032(%rdx), %xmm17, %xmm25 + +// CHECK: vandnps 2048(%rdx), %xmm17, %xmm25 +// CHECK: encoding: [0x62,0x61,0x74,0x00,0x55,0x8a,0x00,0x08,0x00,0x00] + vandnps 2048(%rdx), %xmm17, %xmm25 + +// CHECK: vandnps -2048(%rdx), %xmm17, %xmm25 +// CHECK: encoding: [0x62,0x61,0x74,0x00,0x55,0x4a,0x80] + vandnps -2048(%rdx), %xmm17, %xmm25 + +// CHECK: vandnps -2064(%rdx), %xmm17, %xmm25 +// CHECK: encoding: [0x62,0x61,0x74,0x00,0x55,0x8a,0xf0,0xf7,0xff,0xff] + vandnps -2064(%rdx), %xmm17, %xmm25 + +// CHECK: vandnps 508(%rdx){1to4}, %xmm17, %xmm25 +// CHECK: encoding: [0x62,0x61,0x74,0x10,0x55,0x4a,0x7f] + vandnps 508(%rdx){1to4}, %xmm17, %xmm25 + +// CHECK: vandnps 512(%rdx){1to4}, %xmm17, %xmm25 +// CHECK: encoding: [0x62,0x61,0x74,0x10,0x55,0x8a,0x00,0x02,0x00,0x00] + vandnps 512(%rdx){1to4}, %xmm17, %xmm25 + +// CHECK: vandnps -512(%rdx){1to4}, %xmm17, %xmm25 +// CHECK: encoding: [0x62,0x61,0x74,0x10,0x55,0x4a,0x80] + vandnps -512(%rdx){1to4}, %xmm17, %xmm25 + +// CHECK: vandnps -516(%rdx){1to4}, %xmm17, %xmm25 +// CHECK: encoding: [0x62,0x61,0x74,0x10,0x55,0x8a,0xfc,0xfd,0xff,0xff] + vandnps -516(%rdx){1to4}, %xmm17, %xmm25 + +// CHECK: vandnps %ymm23, %ymm19, %ymm18 +// CHECK: encoding: [0x62,0xa1,0x64,0x20,0x55,0xd7] + vandnps %ymm23, %ymm19, %ymm18 + +// CHECK: vandnps %ymm23, %ymm19, %ymm18 {%k6} +// CHECK: encoding: [0x62,0xa1,0x64,0x26,0x55,0xd7] + vandnps %ymm23, %ymm19, %ymm18 {%k6} + +// CHECK: vandnps %ymm23, %ymm19, %ymm18 {%k6} {z} +// CHECK: encoding: [0x62,0xa1,0x64,0xa6,0x55,0xd7] + vandnps %ymm23, %ymm19, %ymm18 {%k6} {z} + +// CHECK: vandnps (%rcx), %ymm19, %ymm18 +// CHECK: encoding: [0x62,0xe1,0x64,0x20,0x55,0x11] + vandnps (%rcx), %ymm19, %ymm18 + +// CHECK: vandnps 4660(%rax,%r14,8), %ymm19, %ymm18 +// CHECK: encoding: [0x62,0xa1,0x64,0x20,0x55,0x94,0xf0,0x34,0x12,0x00,0x00] + vandnps 4660(%rax,%r14,8), %ymm19, %ymm18 + +// CHECK: vandnps (%rcx){1to8}, %ymm19, %ymm18 +// CHECK: encoding: [0x62,0xe1,0x64,0x30,0x55,0x11] + vandnps (%rcx){1to8}, %ymm19, %ymm18 + +// CHECK: vandnps 4064(%rdx), %ymm19, %ymm18 +// CHECK: encoding: [0x62,0xe1,0x64,0x20,0x55,0x52,0x7f] + vandnps 4064(%rdx), %ymm19, %ymm18 + +// CHECK: vandnps 4096(%rdx), %ymm19, %ymm18 +// CHECK: encoding: [0x62,0xe1,0x64,0x20,0x55,0x92,0x00,0x10,0x00,0x00] + vandnps 4096(%rdx), %ymm19, %ymm18 + +// CHECK: vandnps -4096(%rdx), %ymm19, %ymm18 +// CHECK: encoding: [0x62,0xe1,0x64,0x20,0x55,0x52,0x80] + vandnps -4096(%rdx), %ymm19, %ymm18 + +// CHECK: vandnps -4128(%rdx), %ymm19, %ymm18 +// CHECK: encoding: [0x62,0xe1,0x64,0x20,0x55,0x92,0xe0,0xef,0xff,0xff] + vandnps -4128(%rdx), %ymm19, %ymm18 + +// CHECK: vandnps 508(%rdx){1to8}, %ymm19, %ymm18 +// CHECK: encoding: [0x62,0xe1,0x64,0x30,0x55,0x52,0x7f] + vandnps 508(%rdx){1to8}, %ymm19, %ymm18 + +// CHECK: vandnps 512(%rdx){1to8}, %ymm19, %ymm18 +// CHECK: encoding: [0x62,0xe1,0x64,0x30,0x55,0x92,0x00,0x02,0x00,0x00] + vandnps 512(%rdx){1to8}, %ymm19, %ymm18 + +// CHECK: vandnps -512(%rdx){1to8}, %ymm19, %ymm18 +// CHECK: encoding: [0x62,0xe1,0x64,0x30,0x55,0x52,0x80] + vandnps -512(%rdx){1to8}, %ymm19, %ymm18 + +// CHECK: vandnps -516(%rdx){1to8}, %ymm19, %ymm18 +// CHECK: encoding: [0x62,0xe1,0x64,0x30,0x55,0x92,0xfc,0xfd,0xff,0xff] + vandnps -516(%rdx){1to8}, %ymm19, %ymm18 + +// CHECK: vorpd %xmm18, %xmm29, %xmm26 +// CHECK: encoding: [0x62,0x21,0x95,0x00,0x56,0xd2] + vorpd %xmm18, %xmm29, %xmm26 + +// CHECK: vorpd %xmm18, %xmm29, %xmm26 {%k2} +// CHECK: encoding: [0x62,0x21,0x95,0x02,0x56,0xd2] + vorpd %xmm18, %xmm29, %xmm26 {%k2} + +// CHECK: vorpd %xmm18, %xmm29, %xmm26 {%k2} {z} +// CHECK: encoding: [0x62,0x21,0x95,0x82,0x56,0xd2] + vorpd %xmm18, %xmm29, %xmm26 {%k2} {z} + +// CHECK: vorpd (%rcx), %xmm29, %xmm26 +// CHECK: encoding: [0x62,0x61,0x95,0x00,0x56,0x11] + vorpd (%rcx), %xmm29, %xmm26 + +// CHECK: vorpd 4660(%rax,%r14,8), %xmm29, %xmm26 +// CHECK: encoding: [0x62,0x21,0x95,0x00,0x56,0x94,0xf0,0x34,0x12,0x00,0x00] + vorpd 4660(%rax,%r14,8), %xmm29, %xmm26 + +// CHECK: vorpd (%rcx){1to2}, %xmm29, %xmm26 +// CHECK: encoding: [0x62,0x61,0x95,0x10,0x56,0x11] + vorpd (%rcx){1to2}, %xmm29, %xmm26 + +// CHECK: vorpd 2032(%rdx), %xmm29, %xmm26 +// CHECK: encoding: [0x62,0x61,0x95,0x00,0x56,0x52,0x7f] + vorpd 2032(%rdx), %xmm29, %xmm26 + +// CHECK: vorpd 2048(%rdx), %xmm29, %xmm26 +// CHECK: encoding: [0x62,0x61,0x95,0x00,0x56,0x92,0x00,0x08,0x00,0x00] + vorpd 2048(%rdx), %xmm29, %xmm26 + +// CHECK: vorpd -2048(%rdx), %xmm29, %xmm26 +// CHECK: encoding: [0x62,0x61,0x95,0x00,0x56,0x52,0x80] + vorpd -2048(%rdx), %xmm29, %xmm26 + +// CHECK: vorpd -2064(%rdx), %xmm29, %xmm26 +// CHECK: encoding: [0x62,0x61,0x95,0x00,0x56,0x92,0xf0,0xf7,0xff,0xff] + vorpd -2064(%rdx), %xmm29, %xmm26 + +// CHECK: vorpd 1016(%rdx){1to2}, %xmm29, %xmm26 +// CHECK: encoding: [0x62,0x61,0x95,0x10,0x56,0x52,0x7f] + vorpd 1016(%rdx){1to2}, %xmm29, %xmm26 + +// CHECK: vorpd 1024(%rdx){1to2}, %xmm29, %xmm26 +// CHECK: encoding: [0x62,0x61,0x95,0x10,0x56,0x92,0x00,0x04,0x00,0x00] + vorpd 1024(%rdx){1to2}, %xmm29, %xmm26 + +// CHECK: vorpd -1024(%rdx){1to2}, %xmm29, %xmm26 +// CHECK: encoding: [0x62,0x61,0x95,0x10,0x56,0x52,0x80] + vorpd -1024(%rdx){1to2}, %xmm29, %xmm26 + +// CHECK: vorpd -1032(%rdx){1to2}, %xmm29, %xmm26 +// CHECK: encoding: [0x62,0x61,0x95,0x10,0x56,0x92,0xf8,0xfb,0xff,0xff] + vorpd -1032(%rdx){1to2}, %xmm29, %xmm26 + +// CHECK: vorpd %ymm22, %ymm19, %ymm28 +// CHECK: encoding: [0x62,0x21,0xe5,0x20,0x56,0xe6] + vorpd %ymm22, %ymm19, %ymm28 + +// CHECK: vorpd %ymm22, %ymm19, %ymm28 {%k1} +// CHECK: encoding: [0x62,0x21,0xe5,0x21,0x56,0xe6] + vorpd %ymm22, %ymm19, %ymm28 {%k1} + +// CHECK: vorpd %ymm22, %ymm19, %ymm28 {%k1} {z} +// CHECK: encoding: [0x62,0x21,0xe5,0xa1,0x56,0xe6] + vorpd %ymm22, %ymm19, %ymm28 {%k1} {z} + +// CHECK: vorpd (%rcx), %ymm19, %ymm28 +// CHECK: encoding: [0x62,0x61,0xe5,0x20,0x56,0x21] + vorpd (%rcx), %ymm19, %ymm28 + +// CHECK: vorpd 4660(%rax,%r14,8), %ymm19, %ymm28 +// CHECK: encoding: [0x62,0x21,0xe5,0x20,0x56,0xa4,0xf0,0x34,0x12,0x00,0x00] + vorpd 4660(%rax,%r14,8), %ymm19, %ymm28 + +// CHECK: vorpd (%rcx){1to4}, %ymm19, %ymm28 +// CHECK: encoding: [0x62,0x61,0xe5,0x30,0x56,0x21] + vorpd (%rcx){1to4}, %ymm19, %ymm28 + +// CHECK: vorpd 4064(%rdx), %ymm19, %ymm28 +// CHECK: encoding: [0x62,0x61,0xe5,0x20,0x56,0x62,0x7f] + vorpd 4064(%rdx), %ymm19, %ymm28 + +// CHECK: vorpd 4096(%rdx), %ymm19, %ymm28 +// CHECK: encoding: [0x62,0x61,0xe5,0x20,0x56,0xa2,0x00,0x10,0x00,0x00] + vorpd 4096(%rdx), %ymm19, %ymm28 + +// CHECK: vorpd -4096(%rdx), %ymm19, %ymm28 +// CHECK: encoding: [0x62,0x61,0xe5,0x20,0x56,0x62,0x80] + vorpd -4096(%rdx), %ymm19, %ymm28 + +// CHECK: vorpd -4128(%rdx), %ymm19, %ymm28 +// CHECK: encoding: [0x62,0x61,0xe5,0x20,0x56,0xa2,0xe0,0xef,0xff,0xff] + vorpd -4128(%rdx), %ymm19, %ymm28 + +// CHECK: vorpd 1016(%rdx){1to4}, %ymm19, %ymm28 +// CHECK: encoding: [0x62,0x61,0xe5,0x30,0x56,0x62,0x7f] + vorpd 1016(%rdx){1to4}, %ymm19, %ymm28 + +// CHECK: vorpd 1024(%rdx){1to4}, %ymm19, %ymm28 +// CHECK: encoding: [0x62,0x61,0xe5,0x30,0x56,0xa2,0x00,0x04,0x00,0x00] + vorpd 1024(%rdx){1to4}, %ymm19, %ymm28 + +// CHECK: vorpd -1024(%rdx){1to4}, %ymm19, %ymm28 +// CHECK: encoding: [0x62,0x61,0xe5,0x30,0x56,0x62,0x80] + vorpd -1024(%rdx){1to4}, %ymm19, %ymm28 + +// CHECK: vorpd -1032(%rdx){1to4}, %ymm19, %ymm28 +// CHECK: encoding: [0x62,0x61,0xe5,0x30,0x56,0xa2,0xf8,0xfb,0xff,0xff] + vorpd -1032(%rdx){1to4}, %ymm19, %ymm28 + +// CHECK: vorps %xmm24, %xmm22, %xmm28 +// CHECK: encoding: [0x62,0x01,0x4c,0x00,0x56,0xe0] + vorps %xmm24, %xmm22, %xmm28 + +// CHECK: vorps %xmm24, %xmm22, %xmm28 {%k6} +// CHECK: encoding: [0x62,0x01,0x4c,0x06,0x56,0xe0] + vorps %xmm24, %xmm22, %xmm28 {%k6} + +// CHECK: vorps %xmm24, %xmm22, %xmm28 {%k6} {z} +// CHECK: encoding: [0x62,0x01,0x4c,0x86,0x56,0xe0] + vorps %xmm24, %xmm22, %xmm28 {%k6} {z} + +// CHECK: vorps (%rcx), %xmm22, %xmm28 +// CHECK: encoding: [0x62,0x61,0x4c,0x00,0x56,0x21] + vorps (%rcx), %xmm22, %xmm28 + +// CHECK: vorps 4660(%rax,%r14,8), %xmm22, %xmm28 +// CHECK: encoding: [0x62,0x21,0x4c,0x00,0x56,0xa4,0xf0,0x34,0x12,0x00,0x00] + vorps 4660(%rax,%r14,8), %xmm22, %xmm28 + +// CHECK: vorps (%rcx){1to4}, %xmm22, %xmm28 +// CHECK: encoding: [0x62,0x61,0x4c,0x10,0x56,0x21] + vorps (%rcx){1to4}, %xmm22, %xmm28 + +// CHECK: vorps 2032(%rdx), %xmm22, %xmm28 +// CHECK: encoding: [0x62,0x61,0x4c,0x00,0x56,0x62,0x7f] + vorps 2032(%rdx), %xmm22, %xmm28 + +// CHECK: vorps 2048(%rdx), %xmm22, %xmm28 +// CHECK: encoding: [0x62,0x61,0x4c,0x00,0x56,0xa2,0x00,0x08,0x00,0x00] + vorps 2048(%rdx), %xmm22, %xmm28 + +// CHECK: vorps -2048(%rdx), %xmm22, %xmm28 +// CHECK: encoding: [0x62,0x61,0x4c,0x00,0x56,0x62,0x80] + vorps -2048(%rdx), %xmm22, %xmm28 + +// CHECK: vorps -2064(%rdx), %xmm22, %xmm28 +// CHECK: encoding: [0x62,0x61,0x4c,0x00,0x56,0xa2,0xf0,0xf7,0xff,0xff] + vorps -2064(%rdx), %xmm22, %xmm28 + +// CHECK: vorps 508(%rdx){1to4}, %xmm22, %xmm28 +// CHECK: encoding: [0x62,0x61,0x4c,0x10,0x56,0x62,0x7f] + vorps 508(%rdx){1to4}, %xmm22, %xmm28 + +// CHECK: vorps 512(%rdx){1to4}, %xmm22, %xmm28 +// CHECK: encoding: [0x62,0x61,0x4c,0x10,0x56,0xa2,0x00,0x02,0x00,0x00] + vorps 512(%rdx){1to4}, %xmm22, %xmm28 + +// CHECK: vorps -512(%rdx){1to4}, %xmm22, %xmm28 +// CHECK: encoding: [0x62,0x61,0x4c,0x10,0x56,0x62,0x80] + vorps -512(%rdx){1to4}, %xmm22, %xmm28 + +// CHECK: vorps -516(%rdx){1to4}, %xmm22, %xmm28 +// CHECK: encoding: [0x62,0x61,0x4c,0x10,0x56,0xa2,0xfc,0xfd,0xff,0xff] + vorps -516(%rdx){1to4}, %xmm22, %xmm28 + +// CHECK: vorps %ymm25, %ymm24, %ymm20 +// CHECK: encoding: [0x62,0x81,0x3c,0x20,0x56,0xe1] + vorps %ymm25, %ymm24, %ymm20 + +// CHECK: vorps %ymm25, %ymm24, %ymm20 {%k1} +// CHECK: encoding: [0x62,0x81,0x3c,0x21,0x56,0xe1] + vorps %ymm25, %ymm24, %ymm20 {%k1} + +// CHECK: vorps %ymm25, %ymm24, %ymm20 {%k1} {z} +// CHECK: encoding: [0x62,0x81,0x3c,0xa1,0x56,0xe1] + vorps %ymm25, %ymm24, %ymm20 {%k1} {z} + +// CHECK: vorps (%rcx), %ymm24, %ymm20 +// CHECK: encoding: [0x62,0xe1,0x3c,0x20,0x56,0x21] + vorps (%rcx), %ymm24, %ymm20 + +// CHECK: vorps 4660(%rax,%r14,8), %ymm24, %ymm20 +// CHECK: encoding: [0x62,0xa1,0x3c,0x20,0x56,0xa4,0xf0,0x34,0x12,0x00,0x00] + vorps 4660(%rax,%r14,8), %ymm24, %ymm20 + +// CHECK: vorps (%rcx){1to8}, %ymm24, %ymm20 +// CHECK: encoding: [0x62,0xe1,0x3c,0x30,0x56,0x21] + vorps (%rcx){1to8}, %ymm24, %ymm20 + +// CHECK: vorps 4064(%rdx), %ymm24, %ymm20 +// CHECK: encoding: [0x62,0xe1,0x3c,0x20,0x56,0x62,0x7f] + vorps 4064(%rdx), %ymm24, %ymm20 + +// CHECK: vorps 4096(%rdx), %ymm24, %ymm20 +// CHECK: encoding: [0x62,0xe1,0x3c,0x20,0x56,0xa2,0x00,0x10,0x00,0x00] + vorps 4096(%rdx), %ymm24, %ymm20 + +// CHECK: vorps -4096(%rdx), %ymm24, %ymm20 +// CHECK: encoding: [0x62,0xe1,0x3c,0x20,0x56,0x62,0x80] + vorps -4096(%rdx), %ymm24, %ymm20 + +// CHECK: vorps -4128(%rdx), %ymm24, %ymm20 +// CHECK: encoding: [0x62,0xe1,0x3c,0x20,0x56,0xa2,0xe0,0xef,0xff,0xff] + vorps -4128(%rdx), %ymm24, %ymm20 + +// CHECK: vorps 508(%rdx){1to8}, %ymm24, %ymm20 +// CHECK: encoding: [0x62,0xe1,0x3c,0x30,0x56,0x62,0x7f] + vorps 508(%rdx){1to8}, %ymm24, %ymm20 + +// CHECK: vorps 512(%rdx){1to8}, %ymm24, %ymm20 +// CHECK: encoding: [0x62,0xe1,0x3c,0x30,0x56,0xa2,0x00,0x02,0x00,0x00] + vorps 512(%rdx){1to8}, %ymm24, %ymm20 + +// CHECK: vorps -512(%rdx){1to8}, %ymm24, %ymm20 +// CHECK: encoding: [0x62,0xe1,0x3c,0x30,0x56,0x62,0x80] + vorps -512(%rdx){1to8}, %ymm24, %ymm20 + +// CHECK: vorps -516(%rdx){1to8}, %ymm24, %ymm20 +// CHECK: encoding: [0x62,0xe1,0x3c,0x30,0x56,0xa2,0xfc,0xfd,0xff,0xff] + vorps -516(%rdx){1to8}, %ymm24, %ymm20 + +// CHECK: vxorpd %xmm18, %xmm21, %xmm22 +// CHECK: encoding: [0x62,0xa1,0xd5,0x00,0x57,0xf2] + vxorpd %xmm18, %xmm21, %xmm22 + +// CHECK: vxorpd %xmm18, %xmm21, %xmm22 {%k3} +// CHECK: encoding: [0x62,0xa1,0xd5,0x03,0x57,0xf2] + vxorpd %xmm18, %xmm21, %xmm22 {%k3} + +// CHECK: vxorpd %xmm18, %xmm21, %xmm22 {%k3} {z} +// CHECK: encoding: [0x62,0xa1,0xd5,0x83,0x57,0xf2] + vxorpd %xmm18, %xmm21, %xmm22 {%k3} {z} + +// CHECK: vxorpd (%rcx), %xmm21, %xmm22 +// CHECK: encoding: [0x62,0xe1,0xd5,0x00,0x57,0x31] + vxorpd (%rcx), %xmm21, %xmm22 + +// CHECK: vxorpd 4660(%rax,%r14,8), %xmm21, %xmm22 +// CHECK: encoding: [0x62,0xa1,0xd5,0x00,0x57,0xb4,0xf0,0x34,0x12,0x00,0x00] + vxorpd 4660(%rax,%r14,8), %xmm21, %xmm22 + +// CHECK: vxorpd (%rcx){1to2}, %xmm21, %xmm22 +// CHECK: encoding: [0x62,0xe1,0xd5,0x10,0x57,0x31] + vxorpd (%rcx){1to2}, %xmm21, %xmm22 + +// CHECK: vxorpd 2032(%rdx), %xmm21, %xmm22 +// CHECK: encoding: [0x62,0xe1,0xd5,0x00,0x57,0x72,0x7f] + vxorpd 2032(%rdx), %xmm21, %xmm22 + +// CHECK: vxorpd 2048(%rdx), %xmm21, %xmm22 +// CHECK: encoding: [0x62,0xe1,0xd5,0x00,0x57,0xb2,0x00,0x08,0x00,0x00] + vxorpd 2048(%rdx), %xmm21, %xmm22 + +// CHECK: vxorpd -2048(%rdx), %xmm21, %xmm22 +// CHECK: encoding: [0x62,0xe1,0xd5,0x00,0x57,0x72,0x80] + vxorpd -2048(%rdx), %xmm21, %xmm22 + +// CHECK: vxorpd -2064(%rdx), %xmm21, %xmm22 +// CHECK: encoding: [0x62,0xe1,0xd5,0x00,0x57,0xb2,0xf0,0xf7,0xff,0xff] + vxorpd -2064(%rdx), %xmm21, %xmm22 + +// CHECK: vxorpd 1016(%rdx){1to2}, %xmm21, %xmm22 +// CHECK: encoding: [0x62,0xe1,0xd5,0x10,0x57,0x72,0x7f] + vxorpd 1016(%rdx){1to2}, %xmm21, %xmm22 + +// CHECK: vxorpd 1024(%rdx){1to2}, %xmm21, %xmm22 +// CHECK: encoding: [0x62,0xe1,0xd5,0x10,0x57,0xb2,0x00,0x04,0x00,0x00] + vxorpd 1024(%rdx){1to2}, %xmm21, %xmm22 + +// CHECK: vxorpd -1024(%rdx){1to2}, %xmm21, %xmm22 +// CHECK: encoding: [0x62,0xe1,0xd5,0x10,0x57,0x72,0x80] + vxorpd -1024(%rdx){1to2}, %xmm21, %xmm22 + +// CHECK: vxorpd -1032(%rdx){1to2}, %xmm21, %xmm22 +// CHECK: encoding: [0x62,0xe1,0xd5,0x10,0x57,0xb2,0xf8,0xfb,0xff,0xff] + vxorpd -1032(%rdx){1to2}, %xmm21, %xmm22 + +// CHECK: vxorpd %ymm27, %ymm21, %ymm25 +// CHECK: encoding: [0x62,0x01,0xd5,0x20,0x57,0xcb] + vxorpd %ymm27, %ymm21, %ymm25 + +// CHECK: vxorpd %ymm27, %ymm21, %ymm25 {%k7} +// CHECK: encoding: [0x62,0x01,0xd5,0x27,0x57,0xcb] + vxorpd %ymm27, %ymm21, %ymm25 {%k7} + +// CHECK: vxorpd %ymm27, %ymm21, %ymm25 {%k7} {z} +// CHECK: encoding: [0x62,0x01,0xd5,0xa7,0x57,0xcb] + vxorpd %ymm27, %ymm21, %ymm25 {%k7} {z} + +// CHECK: vxorpd (%rcx), %ymm21, %ymm25 +// CHECK: encoding: [0x62,0x61,0xd5,0x20,0x57,0x09] + vxorpd (%rcx), %ymm21, %ymm25 + +// CHECK: vxorpd 4660(%rax,%r14,8), %ymm21, %ymm25 +// CHECK: encoding: [0x62,0x21,0xd5,0x20,0x57,0x8c,0xf0,0x34,0x12,0x00,0x00] + vxorpd 4660(%rax,%r14,8), %ymm21, %ymm25 + +// CHECK: vxorpd (%rcx){1to4}, %ymm21, %ymm25 +// CHECK: encoding: [0x62,0x61,0xd5,0x30,0x57,0x09] + vxorpd (%rcx){1to4}, %ymm21, %ymm25 + +// CHECK: vxorpd 4064(%rdx), %ymm21, %ymm25 +// CHECK: encoding: [0x62,0x61,0xd5,0x20,0x57,0x4a,0x7f] + vxorpd 4064(%rdx), %ymm21, %ymm25 + +// CHECK: vxorpd 4096(%rdx), %ymm21, %ymm25 +// CHECK: encoding: [0x62,0x61,0xd5,0x20,0x57,0x8a,0x00,0x10,0x00,0x00] + vxorpd 4096(%rdx), %ymm21, %ymm25 + +// CHECK: vxorpd -4096(%rdx), %ymm21, %ymm25 +// CHECK: encoding: [0x62,0x61,0xd5,0x20,0x57,0x4a,0x80] + vxorpd -4096(%rdx), %ymm21, %ymm25 + +// CHECK: vxorpd -4128(%rdx), %ymm21, %ymm25 +// CHECK: encoding: [0x62,0x61,0xd5,0x20,0x57,0x8a,0xe0,0xef,0xff,0xff] + vxorpd -4128(%rdx), %ymm21, %ymm25 + +// CHECK: vxorpd 1016(%rdx){1to4}, %ymm21, %ymm25 +// CHECK: encoding: [0x62,0x61,0xd5,0x30,0x57,0x4a,0x7f] + vxorpd 1016(%rdx){1to4}, %ymm21, %ymm25 + +// CHECK: vxorpd 1024(%rdx){1to4}, %ymm21, %ymm25 +// CHECK: encoding: [0x62,0x61,0xd5,0x30,0x57,0x8a,0x00,0x04,0x00,0x00] + vxorpd 1024(%rdx){1to4}, %ymm21, %ymm25 + +// CHECK: vxorpd -1024(%rdx){1to4}, %ymm21, %ymm25 +// CHECK: encoding: [0x62,0x61,0xd5,0x30,0x57,0x4a,0x80] + vxorpd -1024(%rdx){1to4}, %ymm21, %ymm25 + +// CHECK: vxorpd -1032(%rdx){1to4}, %ymm21, %ymm25 +// CHECK: encoding: [0x62,0x61,0xd5,0x30,0x57,0x8a,0xf8,0xfb,0xff,0xff] + vxorpd -1032(%rdx){1to4}, %ymm21, %ymm25 + +// CHECK: vxorps %xmm21, %xmm21, %xmm17 +// CHECK: encoding: [0x62,0xa1,0x54,0x00,0x57,0xcd] + vxorps %xmm21, %xmm21, %xmm17 + +// CHECK: vxorps %xmm21, %xmm21, %xmm17 {%k5} +// CHECK: encoding: [0x62,0xa1,0x54,0x05,0x57,0xcd] + vxorps %xmm21, %xmm21, %xmm17 {%k5} + +// CHECK: vxorps %xmm21, %xmm21, %xmm17 {%k5} {z} +// CHECK: encoding: [0x62,0xa1,0x54,0x85,0x57,0xcd] + vxorps %xmm21, %xmm21, %xmm17 {%k5} {z} + +// CHECK: vxorps (%rcx), %xmm21, %xmm17 +// CHECK: encoding: [0x62,0xe1,0x54,0x00,0x57,0x09] + vxorps (%rcx), %xmm21, %xmm17 + +// CHECK: vxorps 4660(%rax,%r14,8), %xmm21, %xmm17 +// CHECK: encoding: [0x62,0xa1,0x54,0x00,0x57,0x8c,0xf0,0x34,0x12,0x00,0x00] + vxorps 4660(%rax,%r14,8), %xmm21, %xmm17 + +// CHECK: vxorps (%rcx){1to4}, %xmm21, %xmm17 +// CHECK: encoding: [0x62,0xe1,0x54,0x10,0x57,0x09] + vxorps (%rcx){1to4}, %xmm21, %xmm17 + +// CHECK: vxorps 2032(%rdx), %xmm21, %xmm17 +// CHECK: encoding: [0x62,0xe1,0x54,0x00,0x57,0x4a,0x7f] + vxorps 2032(%rdx), %xmm21, %xmm17 + +// CHECK: vxorps 2048(%rdx), %xmm21, %xmm17 +// CHECK: encoding: [0x62,0xe1,0x54,0x00,0x57,0x8a,0x00,0x08,0x00,0x00] + vxorps 2048(%rdx), %xmm21, %xmm17 + +// CHECK: vxorps -2048(%rdx), %xmm21, %xmm17 +// CHECK: encoding: [0x62,0xe1,0x54,0x00,0x57,0x4a,0x80] + vxorps -2048(%rdx), %xmm21, %xmm17 + +// CHECK: vxorps -2064(%rdx), %xmm21, %xmm17 +// CHECK: encoding: [0x62,0xe1,0x54,0x00,0x57,0x8a,0xf0,0xf7,0xff,0xff] + vxorps -2064(%rdx), %xmm21, %xmm17 + +// CHECK: vxorps 508(%rdx){1to4}, %xmm21, %xmm17 +// CHECK: encoding: [0x62,0xe1,0x54,0x10,0x57,0x4a,0x7f] + vxorps 508(%rdx){1to4}, %xmm21, %xmm17 + +// CHECK: vxorps 512(%rdx){1to4}, %xmm21, %xmm17 +// CHECK: encoding: [0x62,0xe1,0x54,0x10,0x57,0x8a,0x00,0x02,0x00,0x00] + vxorps 512(%rdx){1to4}, %xmm21, %xmm17 + +// CHECK: vxorps -512(%rdx){1to4}, %xmm21, %xmm17 +// CHECK: encoding: [0x62,0xe1,0x54,0x10,0x57,0x4a,0x80] + vxorps -512(%rdx){1to4}, %xmm21, %xmm17 + +// CHECK: vxorps -516(%rdx){1to4}, %xmm21, %xmm17 +// CHECK: encoding: [0x62,0xe1,0x54,0x10,0x57,0x8a,0xfc,0xfd,0xff,0xff] + vxorps -516(%rdx){1to4}, %xmm21, %xmm17 + +// CHECK: vxorps %ymm22, %ymm25, %ymm28 +// CHECK: encoding: [0x62,0x21,0x34,0x20,0x57,0xe6] + vxorps %ymm22, %ymm25, %ymm28 + +// CHECK: vxorps %ymm22, %ymm25, %ymm28 {%k3} +// CHECK: encoding: [0x62,0x21,0x34,0x23,0x57,0xe6] + vxorps %ymm22, %ymm25, %ymm28 {%k3} + +// CHECK: vxorps %ymm22, %ymm25, %ymm28 {%k3} {z} +// CHECK: encoding: [0x62,0x21,0x34,0xa3,0x57,0xe6] + vxorps %ymm22, %ymm25, %ymm28 {%k3} {z} + +// CHECK: vxorps (%rcx), %ymm25, %ymm28 +// CHECK: encoding: [0x62,0x61,0x34,0x20,0x57,0x21] + vxorps (%rcx), %ymm25, %ymm28 + +// CHECK: vxorps 4660(%rax,%r14,8), %ymm25, %ymm28 +// CHECK: encoding: [0x62,0x21,0x34,0x20,0x57,0xa4,0xf0,0x34,0x12,0x00,0x00] + vxorps 4660(%rax,%r14,8), %ymm25, %ymm28 + +// CHECK: vxorps (%rcx){1to8}, %ymm25, %ymm28 +// CHECK: encoding: [0x62,0x61,0x34,0x30,0x57,0x21] + vxorps (%rcx){1to8}, %ymm25, %ymm28 + +// CHECK: vxorps 4064(%rdx), %ymm25, %ymm28 +// CHECK: encoding: [0x62,0x61,0x34,0x20,0x57,0x62,0x7f] + vxorps 4064(%rdx), %ymm25, %ymm28 + +// CHECK: vxorps 4096(%rdx), %ymm25, %ymm28 +// CHECK: encoding: [0x62,0x61,0x34,0x20,0x57,0xa2,0x00,0x10,0x00,0x00] + vxorps 4096(%rdx), %ymm25, %ymm28 + +// CHECK: vxorps -4096(%rdx), %ymm25, %ymm28 +// CHECK: encoding: [0x62,0x61,0x34,0x20,0x57,0x62,0x80] + vxorps -4096(%rdx), %ymm25, %ymm28 + +// CHECK: vxorps -4128(%rdx), %ymm25, %ymm28 +// CHECK: encoding: [0x62,0x61,0x34,0x20,0x57,0xa2,0xe0,0xef,0xff,0xff] + vxorps -4128(%rdx), %ymm25, %ymm28 + +// CHECK: vxorps 508(%rdx){1to8}, %ymm25, %ymm28 +// CHECK: encoding: [0x62,0x61,0x34,0x30,0x57,0x62,0x7f] + vxorps 508(%rdx){1to8}, %ymm25, %ymm28 + +// CHECK: vxorps 512(%rdx){1to8}, %ymm25, %ymm28 +// CHECK: encoding: [0x62,0x61,0x34,0x30,0x57,0xa2,0x00,0x02,0x00,0x00] + vxorps 512(%rdx){1to8}, %ymm25, %ymm28 + +// CHECK: vxorps -512(%rdx){1to8}, %ymm25, %ymm28 +// CHECK: encoding: [0x62,0x61,0x34,0x30,0x57,0x62,0x80] + vxorps -512(%rdx){1to8}, %ymm25, %ymm28 + +// CHECK: vxorps -516(%rdx){1to8}, %ymm25, %ymm28 +// CHECK: encoding: [0x62,0x61,0x34,0x30,0x57,0xa2,0xfc,0xfd,0xff,0xff] + vxorps -516(%rdx){1to8}, %ymm25, %ymm28 + diff --git a/test/MC/X86/x86-64-avx512f_vl.s b/test/MC/X86/x86-64-avx512f_vl.s index 09386b0bd57..837b030646c 100644 --- a/test/MC/X86/x86-64-avx512f_vl.s +++ b/test/MC/X86/x86-64-avx512f_vl.s @@ -8955,3 +8955,227 @@ vaddpd {rz-sae}, %zmm2, %zmm1, %zmm1 // CHECK: vprorq $123, -1032(%rdx){1to4}, %ymm21 // CHECK: encoding: [0x62,0xf1,0xd5,0x30,0x72,0x82,0xf8,0xfb,0xff,0xff,0x7b] vprorq $123, -1032(%rdx){1to4}, %ymm21 + +// CHECK: vpmuludq %xmm18, %xmm22, %xmm27 +// CHECK: encoding: [0x62,0x21,0xcd,0x00,0xf4,0xda] + vpmuludq %xmm18, %xmm22, %xmm27 + +// CHECK: vpmuludq %xmm18, %xmm22, %xmm27 {%k5} +// CHECK: encoding: [0x62,0x21,0xcd,0x05,0xf4,0xda] + vpmuludq %xmm18, %xmm22, %xmm27 {%k5} + +// CHECK: vpmuludq %xmm18, %xmm22, %xmm27 {%k5} {z} +// CHECK: encoding: [0x62,0x21,0xcd,0x85,0xf4,0xda] + vpmuludq %xmm18, %xmm22, %xmm27 {%k5} {z} + +// CHECK: vpmuludq (%rcx), %xmm22, %xmm27 +// CHECK: encoding: [0x62,0x61,0xcd,0x00,0xf4,0x19] + vpmuludq (%rcx), %xmm22, %xmm27 + +// CHECK: vpmuludq 291(%rax,%r14,8), %xmm22, %xmm27 +// CHECK: encoding: [0x62,0x21,0xcd,0x00,0xf4,0x9c,0xf0,0x23,0x01,0x00,0x00] + vpmuludq 291(%rax,%r14,8), %xmm22, %xmm27 + +// CHECK: vpmuludq (%rcx){1to2}, %xmm22, %xmm27 +// CHECK: encoding: [0x62,0x61,0xcd,0x10,0xf4,0x19] + vpmuludq (%rcx){1to2}, %xmm22, %xmm27 + +// CHECK: vpmuludq 2032(%rdx), %xmm22, %xmm27 +// CHECK: encoding: [0x62,0x61,0xcd,0x00,0xf4,0x5a,0x7f] + vpmuludq 2032(%rdx), %xmm22, %xmm27 + +// CHECK: vpmuludq 2048(%rdx), %xmm22, %xmm27 +// CHECK: encoding: [0x62,0x61,0xcd,0x00,0xf4,0x9a,0x00,0x08,0x00,0x00] + vpmuludq 2048(%rdx), %xmm22, %xmm27 + +// CHECK: vpmuludq -2048(%rdx), %xmm22, %xmm27 +// CHECK: encoding: [0x62,0x61,0xcd,0x00,0xf4,0x5a,0x80] + vpmuludq -2048(%rdx), %xmm22, %xmm27 + +// CHECK: vpmuludq -2064(%rdx), %xmm22, %xmm27 +// CHECK: encoding: [0x62,0x61,0xcd,0x00,0xf4,0x9a,0xf0,0xf7,0xff,0xff] + vpmuludq -2064(%rdx), %xmm22, %xmm27 + +// CHECK: vpmuludq 1016(%rdx){1to2}, %xmm22, %xmm27 +// CHECK: encoding: [0x62,0x61,0xcd,0x10,0xf4,0x5a,0x7f] + vpmuludq 1016(%rdx){1to2}, %xmm22, %xmm27 + +// CHECK: vpmuludq 1024(%rdx){1to2}, %xmm22, %xmm27 +// CHECK: encoding: [0x62,0x61,0xcd,0x10,0xf4,0x9a,0x00,0x04,0x00,0x00] + vpmuludq 1024(%rdx){1to2}, %xmm22, %xmm27 + +// CHECK: vpmuludq -1024(%rdx){1to2}, %xmm22, %xmm27 +// CHECK: encoding: [0x62,0x61,0xcd,0x10,0xf4,0x5a,0x80] + vpmuludq -1024(%rdx){1to2}, %xmm22, %xmm27 + +// CHECK: vpmuludq -1032(%rdx){1to2}, %xmm22, %xmm27 +// CHECK: encoding: [0x62,0x61,0xcd,0x10,0xf4,0x9a,0xf8,0xfb,0xff,0xff] + vpmuludq -1032(%rdx){1to2}, %xmm22, %xmm27 + +// CHECK: vpmuludq %ymm18, %ymm27, %ymm30 +// CHECK: encoding: [0x62,0x21,0xa5,0x20,0xf4,0xf2] + vpmuludq %ymm18, %ymm27, %ymm30 + +// CHECK: vpmuludq %ymm18, %ymm27, %ymm30 {%k7} +// CHECK: encoding: [0x62,0x21,0xa5,0x27,0xf4,0xf2] + vpmuludq %ymm18, %ymm27, %ymm30 {%k7} + +// CHECK: vpmuludq %ymm18, %ymm27, %ymm30 {%k7} {z} +// CHECK: encoding: [0x62,0x21,0xa5,0xa7,0xf4,0xf2] + vpmuludq %ymm18, %ymm27, %ymm30 {%k7} {z} + +// CHECK: vpmuludq (%rcx), %ymm27, %ymm30 +// CHECK: encoding: [0x62,0x61,0xa5,0x20,0xf4,0x31] + vpmuludq (%rcx), %ymm27, %ymm30 + +// CHECK: vpmuludq 291(%rax,%r14,8), %ymm27, %ymm30 +// CHECK: encoding: [0x62,0x21,0xa5,0x20,0xf4,0xb4,0xf0,0x23,0x01,0x00,0x00] + vpmuludq 291(%rax,%r14,8), %ymm27, %ymm30 + +// CHECK: vpmuludq (%rcx){1to4}, %ymm27, %ymm30 +// CHECK: encoding: [0x62,0x61,0xa5,0x30,0xf4,0x31] + vpmuludq (%rcx){1to4}, %ymm27, %ymm30 + +// CHECK: vpmuludq 4064(%rdx), %ymm27, %ymm30 +// CHECK: encoding: [0x62,0x61,0xa5,0x20,0xf4,0x72,0x7f] + vpmuludq 4064(%rdx), %ymm27, %ymm30 + +// CHECK: vpmuludq 4096(%rdx), %ymm27, %ymm30 +// CHECK: encoding: [0x62,0x61,0xa5,0x20,0xf4,0xb2,0x00,0x10,0x00,0x00] + vpmuludq 4096(%rdx), %ymm27, %ymm30 + +// CHECK: vpmuludq -4096(%rdx), %ymm27, %ymm30 +// CHECK: encoding: [0x62,0x61,0xa5,0x20,0xf4,0x72,0x80] + vpmuludq -4096(%rdx), %ymm27, %ymm30 + +// CHECK: vpmuludq -4128(%rdx), %ymm27, %ymm30 +// CHECK: encoding: [0x62,0x61,0xa5,0x20,0xf4,0xb2,0xe0,0xef,0xff,0xff] + vpmuludq -4128(%rdx), %ymm27, %ymm30 + +// CHECK: vpmuludq 1016(%rdx){1to4}, %ymm27, %ymm30 +// CHECK: encoding: [0x62,0x61,0xa5,0x30,0xf4,0x72,0x7f] + vpmuludq 1016(%rdx){1to4}, %ymm27, %ymm30 + +// CHECK: vpmuludq 1024(%rdx){1to4}, %ymm27, %ymm30 +// CHECK: encoding: [0x62,0x61,0xa5,0x30,0xf4,0xb2,0x00,0x04,0x00,0x00] + vpmuludq 1024(%rdx){1to4}, %ymm27, %ymm30 + +// CHECK: vpmuludq -1024(%rdx){1to4}, %ymm27, %ymm30 +// CHECK: encoding: [0x62,0x61,0xa5,0x30,0xf4,0x72,0x80] + vpmuludq -1024(%rdx){1to4}, %ymm27, %ymm30 + +// CHECK: vpmuludq -1032(%rdx){1to4}, %ymm27, %ymm30 +// CHECK: encoding: [0x62,0x61,0xa5,0x30,0xf4,0xb2,0xf8,0xfb,0xff,0xff] + vpmuludq -1032(%rdx){1to4}, %ymm27, %ymm30 + +// CHECK: vpmuldq %xmm26, %xmm24, %xmm25 +// CHECK: encoding: [0x62,0x02,0xbd,0x00,0x28,0xca] + vpmuldq %xmm26, %xmm24, %xmm25 + +// CHECK: vpmuldq %xmm26, %xmm24, %xmm25 {%k3} +// CHECK: encoding: [0x62,0x02,0xbd,0x03,0x28,0xca] + vpmuldq %xmm26, %xmm24, %xmm25 {%k3} + +// CHECK: vpmuldq %xmm26, %xmm24, %xmm25 {%k3} {z} +// CHECK: encoding: [0x62,0x02,0xbd,0x83,0x28,0xca] + vpmuldq %xmm26, %xmm24, %xmm25 {%k3} {z} + +// CHECK: vpmuldq (%rcx), %xmm24, %xmm25 +// CHECK: encoding: [0x62,0x62,0xbd,0x00,0x28,0x09] + vpmuldq (%rcx), %xmm24, %xmm25 + +// CHECK: vpmuldq 291(%rax,%r14,8), %xmm24, %xmm25 +// CHECK: encoding: [0x62,0x22,0xbd,0x00,0x28,0x8c,0xf0,0x23,0x01,0x00,0x00] + vpmuldq 291(%rax,%r14,8), %xmm24, %xmm25 + +// CHECK: vpmuldq (%rcx){1to2}, %xmm24, %xmm25 +// CHECK: encoding: [0x62,0x62,0xbd,0x10,0x28,0x09] + vpmuldq (%rcx){1to2}, %xmm24, %xmm25 + +// CHECK: vpmuldq 2032(%rdx), %xmm24, %xmm25 +// CHECK: encoding: [0x62,0x62,0xbd,0x00,0x28,0x4a,0x7f] + vpmuldq 2032(%rdx), %xmm24, %xmm25 + +// CHECK: vpmuldq 2048(%rdx), %xmm24, %xmm25 +// CHECK: encoding: [0x62,0x62,0xbd,0x00,0x28,0x8a,0x00,0x08,0x00,0x00] + vpmuldq 2048(%rdx), %xmm24, %xmm25 + +// CHECK: vpmuldq -2048(%rdx), %xmm24, %xmm25 +// CHECK: encoding: [0x62,0x62,0xbd,0x00,0x28,0x4a,0x80] + vpmuldq -2048(%rdx), %xmm24, %xmm25 + +// CHECK: vpmuldq -2064(%rdx), %xmm24, %xmm25 +// CHECK: encoding: [0x62,0x62,0xbd,0x00,0x28,0x8a,0xf0,0xf7,0xff,0xff] + vpmuldq -2064(%rdx), %xmm24, %xmm25 + +// CHECK: vpmuldq 1016(%rdx){1to2}, %xmm24, %xmm25 +// CHECK: encoding: [0x62,0x62,0xbd,0x10,0x28,0x4a,0x7f] + vpmuldq 1016(%rdx){1to2}, %xmm24, %xmm25 + +// CHECK: vpmuldq 1024(%rdx){1to2}, %xmm24, %xmm25 +// CHECK: encoding: [0x62,0x62,0xbd,0x10,0x28,0x8a,0x00,0x04,0x00,0x00] + vpmuldq 1024(%rdx){1to2}, %xmm24, %xmm25 + +// CHECK: vpmuldq -1024(%rdx){1to2}, %xmm24, %xmm25 +// CHECK: encoding: [0x62,0x62,0xbd,0x10,0x28,0x4a,0x80] + vpmuldq -1024(%rdx){1to2}, %xmm24, %xmm25 + +// CHECK: vpmuldq -1032(%rdx){1to2}, %xmm24, %xmm25 +// CHECK: encoding: [0x62,0x62,0xbd,0x10,0x28,0x8a,0xf8,0xfb,0xff,0xff] + vpmuldq -1032(%rdx){1to2}, %xmm24, %xmm25 + +// CHECK: vpmuldq %ymm26, %ymm18, %ymm19 +// CHECK: encoding: [0x62,0x82,0xed,0x20,0x28,0xda] + vpmuldq %ymm26, %ymm18, %ymm19 + +// CHECK: vpmuldq %ymm26, %ymm18, %ymm19 {%k6} +// CHECK: encoding: [0x62,0x82,0xed,0x26,0x28,0xda] + vpmuldq %ymm26, %ymm18, %ymm19 {%k6} + +// CHECK: vpmuldq %ymm26, %ymm18, %ymm19 {%k6} {z} +// CHECK: encoding: [0x62,0x82,0xed,0xa6,0x28,0xda] + vpmuldq %ymm26, %ymm18, %ymm19 {%k6} {z} + +// CHECK: vpmuldq (%rcx), %ymm18, %ymm19 +// CHECK: encoding: [0x62,0xe2,0xed,0x20,0x28,0x19] + vpmuldq (%rcx), %ymm18, %ymm19 + +// CHECK: vpmuldq 291(%rax,%r14,8), %ymm18, %ymm19 +// CHECK: encoding: [0x62,0xa2,0xed,0x20,0x28,0x9c,0xf0,0x23,0x01,0x00,0x00] + vpmuldq 291(%rax,%r14,8), %ymm18, %ymm19 + +// CHECK: vpmuldq (%rcx){1to4}, %ymm18, %ymm19 +// CHECK: encoding: [0x62,0xe2,0xed,0x30,0x28,0x19] + vpmuldq (%rcx){1to4}, %ymm18, %ymm19 + +// CHECK: vpmuldq 4064(%rdx), %ymm18, %ymm19 +// CHECK: encoding: [0x62,0xe2,0xed,0x20,0x28,0x5a,0x7f] + vpmuldq 4064(%rdx), %ymm18, %ymm19 + +// CHECK: vpmuldq 4096(%rdx), %ymm18, %ymm19 +// CHECK: encoding: [0x62,0xe2,0xed,0x20,0x28,0x9a,0x00,0x10,0x00,0x00] + vpmuldq 4096(%rdx), %ymm18, %ymm19 + +// CHECK: vpmuldq -4096(%rdx), %ymm18, %ymm19 +// CHECK: encoding: [0x62,0xe2,0xed,0x20,0x28,0x5a,0x80] + vpmuldq -4096(%rdx), %ymm18, %ymm19 + +// CHECK: vpmuldq -4128(%rdx), %ymm18, %ymm19 +// CHECK: encoding: [0x62,0xe2,0xed,0x20,0x28,0x9a,0xe0,0xef,0xff,0xff] + vpmuldq -4128(%rdx), %ymm18, %ymm19 + +// CHECK: vpmuldq 1016(%rdx){1to4}, %ymm18, %ymm19 +// CHECK: encoding: [0x62,0xe2,0xed,0x30,0x28,0x5a,0x7f] + vpmuldq 1016(%rdx){1to4}, %ymm18, %ymm19 + +// CHECK: vpmuldq 1024(%rdx){1to4}, %ymm18, %ymm19 +// CHECK: encoding: [0x62,0xe2,0xed,0x30,0x28,0x9a,0x00,0x04,0x00,0x00] + vpmuldq 1024(%rdx){1to4}, %ymm18, %ymm19 + +// CHECK: vpmuldq -1024(%rdx){1to4}, %ymm18, %ymm19 +// CHECK: encoding: [0x62,0xe2,0xed,0x30,0x28,0x5a,0x80] + vpmuldq -1024(%rdx){1to4}, %ymm18, %ymm19 + +// CHECK: vpmuldq -1032(%rdx){1to4}, %ymm18, %ymm19 +// CHECK: encoding: [0x62,0xe2,0xed,0x30,0x28,0x9a,0xf8,0xfb,0xff,0xff] + vpmuldq -1032(%rdx){1to4}, %ymm18, %ymm19 -- 2.34.1