From: Igor Breger Date: Mon, 29 Jun 2015 09:10:00 +0000 (+0000) Subject: AVX-512: Implemented missing encoding and intrinsics for FMA instructions X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=048a1eb977491bf8e0834ed8bba091733de85f11;p=oota-llvm.git AVX-512: Implemented missing encoding and intrinsics for FMA instructions Added tests for DAG lowering ,encoding and intrinsics Differential Revision: http://reviews.llvm.org/D10796 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@240926 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td index 5c8b089b4dd..15f5078ccef 100644 --- a/include/llvm/IR/IntrinsicsX86.td +++ b/include/llvm/IR/IntrinsicsX86.td @@ -2450,36 +2450,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; - def int_x86_fma_mask_vfmadd_ps_512 : GCCBuiltin<"__builtin_ia32_vfmaddps512_mask">, - Intrinsic<[llvm_v16f32_ty], - [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, - llvm_i16_ty, llvm_i32_ty], - [IntrNoMem]>; - def int_x86_fma_mask_vfmadd_ps_256 : GCCBuiltin<"__builtin_ia32_vfmaddps256_mask">, - Intrinsic<[llvm_v8f32_ty], - [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, - llvm_i8_ty], - [IntrNoMem]>; - def int_x86_fma_mask_vfmadd_ps_128 : GCCBuiltin<"__builtin_ia32_vfmaddps128_mask">, - Intrinsic<[llvm_v4f32_ty], - [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, - llvm_i8_ty], - [IntrNoMem]>; - def int_x86_fma_mask_vfmadd_pd_512 : GCCBuiltin<"__builtin_ia32_vfmaddpd512_mask">, - Intrinsic<[llvm_v8f64_ty], - [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, - llvm_i8_ty, llvm_i32_ty], - [IntrNoMem]>; - def int_x86_fma_mask_vfmadd_pd_256 : GCCBuiltin<"__builtin_ia32_vfmaddpd256_mask">, - Intrinsic<[llvm_v4f64_ty], - [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, - llvm_i8_ty], - [IntrNoMem]>; - def int_x86_fma_mask_vfmadd_pd_128 : GCCBuiltin<"__builtin_ia32_vfmaddpd128_mask">, - Intrinsic<[llvm_v2f64_ty], - [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, - llvm_i8_ty], - [IntrNoMem]>; + def int_x86_fma_vfmsub_ss : GCCBuiltin<"__builtin_ia32_vfmsubss">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], @@ -2504,36 +2475,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; - def int_x86_fma_mask_vfmsub_ps_512 : GCCBuiltin<"__builtin_ia32_vfmsubps512_mask">, - Intrinsic<[llvm_v16f32_ty], - [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, - llvm_i16_ty, llvm_i32_ty], - [IntrNoMem]>; - def int_x86_fma_mask_vfmsub_ps_256 : GCCBuiltin<"__builtin_ia32_vfmsubps256_mask">, - Intrinsic<[llvm_v8f32_ty], - [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, - llvm_i8_ty], - [IntrNoMem]>; - def int_x86_fma_mask_vfmsub_ps_128 : GCCBuiltin<"__builtin_ia32_vfmsubps128_mask">, - Intrinsic<[llvm_v4f32_ty], - [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, - llvm_i8_ty], - [IntrNoMem]>; - def int_x86_fma_mask_vfmsub_pd_512 : GCCBuiltin<"__builtin_ia32_vfmsubpd512_mask">, - Intrinsic<[llvm_v8f64_ty], - [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, - llvm_i8_ty, llvm_i32_ty], - [IntrNoMem]>; - def int_x86_fma_mask_vfmsub_pd_256 : GCCBuiltin<"__builtin_ia32_vfmsubpd256_mask">, - Intrinsic<[llvm_v4f64_ty], - [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, - llvm_i8_ty], - [IntrNoMem]>; - def int_x86_fma_mask_vfmsub_pd_128 : GCCBuiltin<"__builtin_ia32_vfmsubpd128_mask">, - Intrinsic<[llvm_v2f64_ty], - [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, - llvm_i8_ty], - [IntrNoMem]>; def int_x86_fma_vfnmadd_ss : GCCBuiltin<"__builtin_ia32_vfnmaddss">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], @@ -2558,36 +2499,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; - def int_x86_fma_mask_vfnmadd_ps_512 : GCCBuiltin<"__builtin_ia32_vfnmaddps512_mask">, - Intrinsic<[llvm_v16f32_ty], - [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, - llvm_i16_ty, llvm_i32_ty], - [IntrNoMem]>; - def int_x86_fma_mask_vfnmadd_ps_256 : GCCBuiltin<"__builtin_ia32_vfnmaddps256_mask">, - Intrinsic<[llvm_v8f32_ty], - [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, - llvm_i8_ty], - [IntrNoMem]>; - def int_x86_fma_mask_vfnmadd_ps_128 : GCCBuiltin<"__builtin_ia32_vfnmaddps128_mask">, - Intrinsic<[llvm_v4f32_ty], - [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, - llvm_i8_ty], - [IntrNoMem]>; - def int_x86_fma_mask_vfnmadd_pd_512 : GCCBuiltin<"__builtin_ia32_vfnmaddpd512_mask">, - Intrinsic<[llvm_v8f64_ty], - [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, - llvm_i8_ty, llvm_i32_ty], - [IntrNoMem]>; - def int_x86_fma_mask_vfnmadd_pd_256 : GCCBuiltin<"__builtin_ia32_vfnmaddpd256_mask">, - Intrinsic<[llvm_v4f64_ty], - [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, - llvm_i8_ty], - [IntrNoMem]>; - def int_x86_fma_mask_vfnmadd_pd_128 : GCCBuiltin<"__builtin_ia32_vfnmaddpd128_mask">, - Intrinsic<[llvm_v2f64_ty], - [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, - llvm_i8_ty], - [IntrNoMem]>; def int_x86_fma_vfnmsub_ss : GCCBuiltin<"__builtin_ia32_vfnmsubss">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], @@ -2612,36 +2523,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; - def int_x86_fma_mask_vfnmsub_ps_512 : GCCBuiltin<"__builtin_ia32_vfnmsubps512_mask">, - Intrinsic<[llvm_v16f32_ty], - [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, - llvm_i16_ty, llvm_i32_ty], - [IntrNoMem]>; - def int_x86_fma_mask_vfnmsub_ps_256 : GCCBuiltin<"__builtin_ia32_vfnmsubps256_mask">, - Intrinsic<[llvm_v8f32_ty], - [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, - llvm_i8_ty], - [IntrNoMem]>; - def int_x86_fma_mask_vfnmsub_ps_128 : GCCBuiltin<"__builtin_ia32_vfnmsubps128_mask">, - Intrinsic<[llvm_v4f32_ty], - [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, - llvm_i8_ty], - [IntrNoMem]>; - def int_x86_fma_mask_vfnmsub_pd_512 : GCCBuiltin<"__builtin_ia32_vfnmsubpd512_mask">, - Intrinsic<[llvm_v8f64_ty], - [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, - llvm_i8_ty, llvm_i32_ty], - [IntrNoMem]>; - def int_x86_fma_mask_vfnmsub_pd_256 : GCCBuiltin<"__builtin_ia32_vfnmsubpd256_mask">, - Intrinsic<[llvm_v4f64_ty], - [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, - llvm_i8_ty], - [IntrNoMem]>; - def int_x86_fma_mask_vfnmsub_pd_128 : GCCBuiltin<"__builtin_ia32_vfnmsubpd128_mask">, - Intrinsic<[llvm_v2f64_ty], - [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, - llvm_i8_ty], - [IntrNoMem]>; def int_x86_fma_vfmaddsub_ps : GCCBuiltin<"__builtin_ia32_vfmaddsubps">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], @@ -2660,36 +2541,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; - def int_x86_fma_mask_vfmaddsub_ps_512 : GCCBuiltin<"__builtin_ia32_vfmaddsubps512_mask">, - Intrinsic<[llvm_v16f32_ty], - [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, - llvm_i16_ty, llvm_i32_ty], - [IntrNoMem]>; - def int_x86_fma_mask_vfmaddsub_ps_256 : GCCBuiltin<"__builtin_ia32_vfmaddsubps256_mask">, - Intrinsic<[llvm_v8f32_ty], - [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, - llvm_i8_ty], - [IntrNoMem]>; - def int_x86_fma_mask_vfmaddsub_ps_128 : GCCBuiltin<"__builtin_ia32_vfmaddsubps128_mask">, - Intrinsic<[llvm_v4f32_ty], - [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, - llvm_i8_ty], - [IntrNoMem]>; - def int_x86_fma_mask_vfmaddsub_pd_512 : GCCBuiltin<"__builtin_ia32_vfmaddsubpd512_mask">, - Intrinsic<[llvm_v8f64_ty], - [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, - llvm_i8_ty, llvm_i32_ty], - [IntrNoMem]>; - def int_x86_fma_mask_vfmaddsub_pd_256 : GCCBuiltin<"__builtin_ia32_vfmaddsubpd256_mask">, - Intrinsic<[llvm_v4f64_ty], - [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, - llvm_i8_ty], - [IntrNoMem]>; - def int_x86_fma_mask_vfmaddsub_pd_128 : GCCBuiltin<"__builtin_ia32_vfmaddsubpd128_mask">, - Intrinsic<[llvm_v2f64_ty], - [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, - llvm_i8_ty], - [IntrNoMem]>; def int_x86_fma_vfmsubadd_ps : GCCBuiltin<"__builtin_ia32_vfmsubaddps">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], @@ -2708,36 +2559,403 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; - def int_x86_fma_mask_vfmsubadd_ps_512 : GCCBuiltin<"__builtin_ia32_vfmsubaddps512_mask">, - Intrinsic<[llvm_v16f32_ty], - [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, - llvm_i16_ty, llvm_i32_ty], - [IntrNoMem]>; - def int_x86_fma_mask_vfmsubadd_ps_256 : GCCBuiltin<"__builtin_ia32_vfmsubaddps256_mask">, - Intrinsic<[llvm_v8f32_ty], - [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, - llvm_i8_ty], - [IntrNoMem]>; - def int_x86_fma_mask_vfmsubadd_ps_128 : GCCBuiltin<"__builtin_ia32_vfmsubaddps128_mask">, - Intrinsic<[llvm_v4f32_ty], - [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, - llvm_i8_ty], - [IntrNoMem]>; - def int_x86_fma_mask_vfmsubadd_pd_512 : GCCBuiltin<"__builtin_ia32_vfmsubaddpd512_mask">, - Intrinsic<[llvm_v8f64_ty], - [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, - llvm_i8_ty, llvm_i32_ty], - [IntrNoMem]>; - def int_x86_fma_mask_vfmsubadd_pd_256 : GCCBuiltin<"__builtin_ia32_vfmsubaddpd256_mask">, - Intrinsic<[llvm_v4f64_ty], - [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, - llvm_i8_ty], - [IntrNoMem]>; - def int_x86_fma_mask_vfmsubadd_pd_128 : GCCBuiltin<"__builtin_ia32_vfmsubaddpd128_mask">, - Intrinsic<[llvm_v2f64_ty], - [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, - llvm_i8_ty], - [IntrNoMem]>; + + def int_x86_avx512_mask_vfmadd_pd_128 : + GCCBuiltin<"__builtin_ia32_vfmaddpd128_mask">, + Intrinsic<[llvm_v2f64_ty], + [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask3_vfmadd_pd_128 : + GCCBuiltin<"__builtin_ia32_vfmaddpd128_mask3">, + Intrinsic<[llvm_v2f64_ty], + [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_maskz_vfmadd_pd_128 : + GCCBuiltin<"__builtin_ia32_vfmaddpd128_maskz">, + Intrinsic<[llvm_v2f64_ty], + [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_vfmadd_pd_256 : + GCCBuiltin<"__builtin_ia32_vfmaddpd256_mask">, + Intrinsic<[llvm_v4f64_ty], + [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask3_vfmadd_pd_256 : + GCCBuiltin<"__builtin_ia32_vfmaddpd256_mask3">, + Intrinsic<[llvm_v4f64_ty], + [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_maskz_vfmadd_pd_256 : + GCCBuiltin<"__builtin_ia32_vfmaddpd256_maskz">, + Intrinsic<[llvm_v4f64_ty], + [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_vfmadd_pd_512 : + GCCBuiltin<"__builtin_ia32_vfmaddpd512_mask">, + Intrinsic<[llvm_v8f64_ty], + [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty, + llvm_i32_ty], [IntrNoMem]>; + + def int_x86_avx512_mask3_vfmadd_pd_512 : + GCCBuiltin<"__builtin_ia32_vfmaddpd512_mask3">, + Intrinsic<[llvm_v8f64_ty], + [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty, + llvm_i32_ty], [IntrNoMem]>; + + def int_x86_avx512_maskz_vfmadd_pd_512 : + GCCBuiltin<"__builtin_ia32_vfmaddpd512_maskz">, + Intrinsic<[llvm_v8f64_ty], + [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty, + llvm_i32_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_vfmadd_ps_128 : + GCCBuiltin<"__builtin_ia32_vfmaddps128_mask">, + Intrinsic<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask3_vfmadd_ps_128 : + GCCBuiltin<"__builtin_ia32_vfmaddps128_mask3">, + Intrinsic<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_maskz_vfmadd_ps_128 : + GCCBuiltin<"__builtin_ia32_vfmaddps128_maskz">, + Intrinsic<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_vfmadd_ps_256 : + GCCBuiltin<"__builtin_ia32_vfmaddps256_mask">, + Intrinsic<[llvm_v8f32_ty], + [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask3_vfmadd_ps_256 : + GCCBuiltin<"__builtin_ia32_vfmaddps256_mask3">, + Intrinsic<[llvm_v8f32_ty], + [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_maskz_vfmadd_ps_256 : + GCCBuiltin<"__builtin_ia32_vfmaddps256_maskz">, + Intrinsic<[llvm_v8f32_ty], + [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_vfmadd_ps_512 : + GCCBuiltin<"__builtin_ia32_vfmaddps512_mask">, + Intrinsic<[llvm_v16f32_ty], + [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, + llvm_i32_ty], [IntrNoMem]>; + + def int_x86_avx512_mask3_vfmadd_ps_512 : + GCCBuiltin<"__builtin_ia32_vfmaddps512_mask3">, + Intrinsic<[llvm_v16f32_ty], + [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, + llvm_i32_ty], [IntrNoMem]>; + + def int_x86_avx512_maskz_vfmadd_ps_512 : + GCCBuiltin<"__builtin_ia32_vfmaddps512_maskz">, + Intrinsic<[llvm_v16f32_ty], + [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, + llvm_i32_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_vfmaddsub_pd_128 : + GCCBuiltin<"__builtin_ia32_vfmaddsubpd128_mask">, + Intrinsic<[llvm_v2f64_ty], + [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask3_vfmaddsub_pd_128 : + GCCBuiltin<"__builtin_ia32_vfmaddsubpd128_mask3">, + Intrinsic<[llvm_v2f64_ty], + [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_maskz_vfmaddsub_pd_128 : + GCCBuiltin<"__builtin_ia32_vfmaddsubpd128_maskz">, + Intrinsic<[llvm_v2f64_ty], + [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_vfmaddsub_pd_256 : + GCCBuiltin<"__builtin_ia32_vfmaddsubpd256_mask">, + Intrinsic<[llvm_v4f64_ty], + [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask3_vfmaddsub_pd_256 : + GCCBuiltin<"__builtin_ia32_vfmaddsubpd256_mask3">, + Intrinsic<[llvm_v4f64_ty], + [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_maskz_vfmaddsub_pd_256 : + GCCBuiltin<"__builtin_ia32_vfmaddsubpd256_maskz">, + Intrinsic<[llvm_v4f64_ty], + [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_vfmaddsub_pd_512 : + GCCBuiltin<"__builtin_ia32_vfmaddsubpd512_mask">, + Intrinsic<[llvm_v8f64_ty], + [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty, + llvm_i32_ty], [IntrNoMem]>; + + def int_x86_avx512_mask3_vfmaddsub_pd_512 : + GCCBuiltin<"__builtin_ia32_vfmaddsubpd512_mask3">, + Intrinsic<[llvm_v8f64_ty], + [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty, + llvm_i32_ty], [IntrNoMem]>; + + def int_x86_avx512_maskz_vfmaddsub_pd_512 : + GCCBuiltin<"__builtin_ia32_vfmaddsubpd512_maskz">, + Intrinsic<[llvm_v8f64_ty], + [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty, + llvm_i32_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_vfmaddsub_ps_128 : + GCCBuiltin<"__builtin_ia32_vfmaddsubps128_mask">, + Intrinsic<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask3_vfmaddsub_ps_128 : + GCCBuiltin<"__builtin_ia32_vfmaddsubps128_mask3">, + Intrinsic<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_maskz_vfmaddsub_ps_128 : + GCCBuiltin<"__builtin_ia32_vfmaddsubps128_maskz">, + Intrinsic<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_vfmaddsub_ps_256 : + GCCBuiltin<"__builtin_ia32_vfmaddsubps256_mask">, + Intrinsic<[llvm_v8f32_ty], + [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask3_vfmaddsub_ps_256 : + GCCBuiltin<"__builtin_ia32_vfmaddsubps256_mask3">, + Intrinsic<[llvm_v8f32_ty], + [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_maskz_vfmaddsub_ps_256 : + GCCBuiltin<"__builtin_ia32_vfmaddsubps256_maskz">, + Intrinsic<[llvm_v8f32_ty], + [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_vfmaddsub_ps_512 : + GCCBuiltin<"__builtin_ia32_vfmaddsubps512_mask">, + Intrinsic<[llvm_v16f32_ty], + [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, + llvm_i32_ty], [IntrNoMem]>; + + def int_x86_avx512_mask3_vfmaddsub_ps_512 : + GCCBuiltin<"__builtin_ia32_vfmaddsubps512_mask3">, + Intrinsic<[llvm_v16f32_ty], + [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, + llvm_i32_ty], [IntrNoMem]>; + + def int_x86_avx512_maskz_vfmaddsub_ps_512 : + GCCBuiltin<"__builtin_ia32_vfmaddsubps512_maskz">, + Intrinsic<[llvm_v16f32_ty], + [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, + llvm_i32_ty], [IntrNoMem]>; + + def int_x86_avx512_mask3_vfmsub_pd_128 : + GCCBuiltin<"__builtin_ia32_vfmsubpd128_mask3">, + Intrinsic<[llvm_v2f64_ty], + [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask3_vfmsub_pd_256 : + GCCBuiltin<"__builtin_ia32_vfmsubpd256_mask3">, + Intrinsic<[llvm_v4f64_ty], + [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask3_vfmsub_pd_512 : + GCCBuiltin<"__builtin_ia32_vfmsubpd512_mask3">, + Intrinsic<[llvm_v8f64_ty], + [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty, + llvm_i32_ty], [IntrNoMem]>; + + def int_x86_avx512_mask3_vfmsub_ps_128 : + GCCBuiltin<"__builtin_ia32_vfmsubps128_mask3">, + Intrinsic<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask3_vfmsub_ps_256 : + GCCBuiltin<"__builtin_ia32_vfmsubps256_mask3">, + Intrinsic<[llvm_v8f32_ty], + [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask3_vfmsub_ps_512 : + GCCBuiltin<"__builtin_ia32_vfmsubps512_mask3">, + Intrinsic<[llvm_v16f32_ty], + [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, + llvm_i32_ty], [IntrNoMem]>; + + def int_x86_avx512_mask3_vfmsubadd_pd_128 : + GCCBuiltin<"__builtin_ia32_vfmsubaddpd128_mask3">, + Intrinsic<[llvm_v2f64_ty], + [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask3_vfmsubadd_pd_256 : + GCCBuiltin<"__builtin_ia32_vfmsubaddpd256_mask3">, + Intrinsic<[llvm_v4f64_ty], + [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask3_vfmsubadd_pd_512 : + GCCBuiltin<"__builtin_ia32_vfmsubaddpd512_mask3">, + Intrinsic<[llvm_v8f64_ty], + [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty, + llvm_i32_ty], [IntrNoMem]>; + + def int_x86_avx512_mask3_vfmsubadd_ps_128 : + GCCBuiltin<"__builtin_ia32_vfmsubaddps128_mask3">, + Intrinsic<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask3_vfmsubadd_ps_256 : + GCCBuiltin<"__builtin_ia32_vfmsubaddps256_mask3">, + Intrinsic<[llvm_v8f32_ty], + [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask3_vfmsubadd_ps_512 : + GCCBuiltin<"__builtin_ia32_vfmsubaddps512_mask3">, + Intrinsic<[llvm_v16f32_ty], + [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, + llvm_i32_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_vfnmadd_pd_128 : + GCCBuiltin<"__builtin_ia32_vfnmaddpd128_mask">, + Intrinsic<[llvm_v2f64_ty], + [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_vfnmadd_pd_256 : + GCCBuiltin<"__builtin_ia32_vfnmaddpd256_mask">, + Intrinsic<[llvm_v4f64_ty], + [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_vfnmadd_pd_512 : + GCCBuiltin<"__builtin_ia32_vfnmaddpd512_mask">, + Intrinsic<[llvm_v8f64_ty], + [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty, + llvm_i32_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_vfnmadd_ps_128 : + GCCBuiltin<"__builtin_ia32_vfnmaddps128_mask">, + Intrinsic<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_vfnmadd_ps_256 : + GCCBuiltin<"__builtin_ia32_vfnmaddps256_mask">, + Intrinsic<[llvm_v8f32_ty], + [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_vfnmadd_ps_512 : + GCCBuiltin<"__builtin_ia32_vfnmaddps512_mask">, + Intrinsic<[llvm_v16f32_ty], + [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, + llvm_i32_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_vfnmsub_pd_128 : + GCCBuiltin<"__builtin_ia32_vfnmsubpd128_mask">, + Intrinsic<[llvm_v2f64_ty], + [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask3_vfnmsub_pd_128 : + GCCBuiltin<"__builtin_ia32_vfnmsubpd128_mask3">, + Intrinsic<[llvm_v2f64_ty], + [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_vfnmsub_pd_256 : + GCCBuiltin<"__builtin_ia32_vfnmsubpd256_mask">, + Intrinsic<[llvm_v4f64_ty], + [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask3_vfnmsub_pd_256 : + GCCBuiltin<"__builtin_ia32_vfnmsubpd256_mask3">, + Intrinsic<[llvm_v4f64_ty], + [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_vfnmsub_pd_512 : + GCCBuiltin<"__builtin_ia32_vfnmsubpd512_mask">, + Intrinsic<[llvm_v8f64_ty], + [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty, + llvm_i32_ty], [IntrNoMem]>; + + def int_x86_avx512_mask3_vfnmsub_pd_512 : + GCCBuiltin<"__builtin_ia32_vfnmsubpd512_mask3">, + Intrinsic<[llvm_v8f64_ty], + [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty, + llvm_i32_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_vfnmsub_ps_128 : + GCCBuiltin<"__builtin_ia32_vfnmsubps128_mask">, + Intrinsic<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask3_vfnmsub_ps_128 : + GCCBuiltin<"__builtin_ia32_vfnmsubps128_mask3">, + Intrinsic<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_vfnmsub_ps_256 : + GCCBuiltin<"__builtin_ia32_vfnmsubps256_mask">, + Intrinsic<[llvm_v8f32_ty], + [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask3_vfnmsub_ps_256 : + GCCBuiltin<"__builtin_ia32_vfnmsubps256_mask3">, + Intrinsic<[llvm_v8f32_ty], + [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_vfnmsub_ps_512 : + GCCBuiltin<"__builtin_ia32_vfnmsubps512_mask">, + Intrinsic<[llvm_v16f32_ty], + [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, + llvm_i32_ty], [IntrNoMem]>; + + def int_x86_avx512_mask3_vfnmsub_ps_512 : + GCCBuiltin<"__builtin_ia32_vfnmsubps512_mask3">, + Intrinsic<[llvm_v16f32_ty], + [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, + llvm_i32_ty], [IntrNoMem]>; + } //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index d36bcd98e1d..76f3fd63ad3 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -15126,7 +15126,8 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget Mask, PassThru, Subtarget, DAG); } case VPERM_3OP_MASKZ: - case VPERM_3OP_MASK: + case VPERM_3OP_MASK: + case FMA_OP_MASK3: case FMA_OP_MASKZ: case FMA_OP_MASK: { SDValue Src1 = Op.getOperand(1); @@ -15134,9 +15135,16 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget SDValue Src3 = Op.getOperand(3); SDValue Mask = Op.getOperand(4); EVT VT = Op.getValueType(); - SDValue PassThru = - (IntrData->Type == VPERM_3OP_MASKZ || IntrData->Type == FMA_OP_MASKZ) ? - getZeroVector(VT, Subtarget, DAG, dl) : Src1; + SDValue PassThru = SDValue(); + + // set PassThru element + if (IntrData->Type == VPERM_3OP_MASKZ || IntrData->Type == FMA_OP_MASKZ) + PassThru = getZeroVector(VT, Subtarget, DAG, dl); + else if (IntrData->Type == FMA_OP_MASK3) + PassThru = Src3; + else + PassThru = Src1; + // We specify 2 possible opcodes for intrinsics with rounding modes. // First, we check if the intrinsic may have non-default rounding mode, // (IntrData->Opc1 != 0), then we check the rounding mode operand. diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 79f0ceb1315..db1f432b0e9 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -4005,147 +4005,203 @@ let Predicates = [HasAVX512] in { // let Constraints = "$src1 = $dst" in { -// Omitting the parameter OpNode (= null_frag) disables ISel pattern matching. -multiclass avx512_fma3p_rm opc, string OpcodeStr, X86VectorVTInfo _, - SDPatternOperator OpNode = null_frag> { +multiclass avx512_fma3p_213_rm opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> { defm r: AVX512_maskable_3src, AVX512FMA3Base; - let mayLoad = 1 in - defm m: AVX512_maskable_3src, AVX512FMA3Base; - defm mb: AVX512_maskable_3src, AVX512FMA3Base, EVEX_B; - } -} // Constraints = "$src1 = $dst" + } +} -let Constraints = "$src1 = $dst" in { -// Omitting the parameter OpNode (= null_frag) disables ISel pattern matching. -multiclass avx512_fma3_round_rrb opc, string OpcodeStr, - X86VectorVTInfo _, - SDPatternOperator OpNode> { - defm rb: AVX512_maskable_3src opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> { + defm rb: AVX512_maskable_3src, AVX512FMA3Base, EVEX_B, EVEX_RC; - } +} } // Constraints = "$src1 = $dst" -multiclass avx512_fma3_round_forms opc213, string OpcodeStr, - X86VectorVTInfo VTI, SDPatternOperator OpNode> { - defm v213r : avx512_fma3_round_rrb, EVEX_CD8; +multiclass avx512_fma3p_213_common opc, string OpcodeStr, SDNode OpNode, + SDNode OpNodeRnd, AVX512VLVectorVTInfo _> { + let Predicates = [HasAVX512] in { + defm Z : avx512_fma3p_213_rm, + avx512_fma3_213_round, + EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; + } + let Predicates = [HasVLX, HasAVX512] in { + defm Z256 : avx512_fma3p_213_rm, + EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; + defm Z128 : avx512_fma3p_213_rm, + EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; + } } -multiclass avx512_fma3p_forms opc213, bits<8> opc231, - string OpcodeStr, X86VectorVTInfo VTI, - SDPatternOperator OpNode> { - defm v213r : avx512_fma3p_rm, EVEX_CD8; - defm v231r : avx512_fma3p_rm, EVEX_CD8; +multiclass avx512_fma3p_213_f opc, string OpcodeStr, SDNode OpNode, + SDNode OpNodeRnd > { + defm PS : avx512_fma3p_213_common; + defm PD : avx512_fma3p_213_common, VEX_W; } -multiclass avx512_fma3p opc213, bits<8> opc231, - string OpcodeStr, - SDPatternOperator OpNode, - SDPatternOperator OpNodeRnd> { -let ExeDomain = SSEPackedSingle in { - defm NAME##PSZ : avx512_fma3p_forms, - avx512_fma3_round_forms, EVEX_V512; - defm NAME##PSZ256 : avx512_fma3p_forms, EVEX_V256; - defm NAME##PSZ128 : avx512_fma3p_forms, EVEX_V128; +defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", X86Fmadd, X86FmaddRnd>; +defm VFMSUB213 : avx512_fma3p_213_f<0xAA, "vfmsub213", X86Fmsub, X86FmsubRnd>; +defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub, X86FmaddsubRnd>; +defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd, X86FmsubaddRnd>; +defm VFNMADD213 : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86Fnmadd, X86FnmaddRnd>; +defm VFNMSUB213 : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86Fnmsub, X86FnmsubRnd>; + + +let Constraints = "$src1 = $dst" in { +multiclass avx512_fma3p_231_rm opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> { + defm r: AVX512_maskable_3src, + AVX512FMA3Base; + + let mayLoad = 1 in { + defm m: AVX512_maskable_3src, + AVX512FMA3Base; + + defm mb: AVX512_maskable_3src, AVX512FMA3Base, EVEX_B; } -let ExeDomain = SSEPackedDouble in { - defm NAME##PDZ : avx512_fma3p_forms, - avx512_fma3_round_forms, EVEX_V512, VEX_W; - defm NAME##PDZ256 : avx512_fma3p_forms, - EVEX_V256, VEX_W; - defm NAME##PDZ128 : avx512_fma3p_forms, - EVEX_V128, VEX_W; +} + +multiclass avx512_fma3_231_round opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> { + defm rb: AVX512_maskable_3src, + AVX512FMA3Base, EVEX_B, EVEX_RC; +} +} // Constraints = "$src1 = $dst" + +multiclass avx512_fma3p_231_common opc, string OpcodeStr, SDNode OpNode, + SDNode OpNodeRnd, AVX512VLVectorVTInfo _> { + let Predicates = [HasAVX512] in { + defm Z : avx512_fma3p_231_rm, + avx512_fma3_231_round, + EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; + } + let Predicates = [HasVLX, HasAVX512] in { + defm Z256 : avx512_fma3p_231_rm, + EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; + defm Z128 : avx512_fma3p_231_rm, + EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; } } -defm VFMADD : avx512_fma3p<0xA8, 0xB8, "vfmadd", X86Fmadd, X86FmaddRnd>; -defm VFMSUB : avx512_fma3p<0xAA, 0xBA, "vfmsub", X86Fmsub, X86FmsubRnd>; -defm VFMADDSUB : avx512_fma3p<0xA6, 0xB6, "vfmaddsub", X86Fmaddsub, X86FmaddsubRnd>; -defm VFMSUBADD : avx512_fma3p<0xA7, 0xB7, "vfmsubadd", X86Fmsubadd, X86FmsubaddRnd>; -defm VFNMADD : avx512_fma3p<0xAC, 0xBC, "vfnmadd", X86Fnmadd, X86FnmaddRnd>; -defm VFNMSUB : avx512_fma3p<0xAE, 0xBE, "vfnmsub", X86Fnmsub, X86FnmsubRnd>; +multiclass avx512_fma3p_231_f opc, string OpcodeStr, SDNode OpNode, + SDNode OpNodeRnd > { + defm PS : avx512_fma3p_231_common; + defm PD : avx512_fma3p_231_common, VEX_W; +} + +defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", X86Fmadd, X86FmaddRnd>; +defm VFMSUB231 : avx512_fma3p_231_f<0xBA, "vfmsub231", X86Fmsub, X86FmsubRnd>; +defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub, X86FmaddsubRnd>; +defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd, X86FmsubaddRnd>; +defm VFNMADD231 : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86Fnmadd, X86FnmaddRnd>; +defm VFNMSUB231 : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86Fnmsub, X86FnmsubRnd>; let Constraints = "$src1 = $dst" in { -multiclass avx512_fma3p_m132 opc, string OpcodeStr, SDNode OpNode, - X86VectorVTInfo _> { - let mayLoad = 1 in - def m: AVX512FMA3; - def mb: AVX512FMA3, EVEX_B; +multiclass avx512_fma3p_132_rm opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> { + defm r: AVX512_maskable_3src, + AVX512FMA3Base; + + let mayLoad = 1 in { + defm m: AVX512_maskable_3src, + AVX512FMA3Base; + + defm mb: AVX512_maskable_3src, AVX512FMA3Base, EVEX_B; + } } -} // Constraints = "$src1 = $dst" -multiclass avx512_fma3p_m132_f opc, string OpcodeStr, SDNode OpNode> { +multiclass avx512_fma3_132_round opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> { + defm rb: AVX512_maskable_3src, + AVX512FMA3Base, EVEX_B, EVEX_RC; +} +} // Constraints = "$src1 = $dst" -let ExeDomain = SSEPackedSingle in { - defm NAME##PSZ : avx512_fma3p_m132, EVEX_V512, - EVEX_CD8<32, CD8VF>; - defm NAME##PSZ256 : avx512_fma3p_m132, EVEX_V256, - EVEX_CD8<32, CD8VF>; - defm NAME##PSZ128 : avx512_fma3p_m132, EVEX_V128, - EVEX_CD8<32, CD8VF>; +multiclass avx512_fma3p_132_common opc, string OpcodeStr, SDNode OpNode, + SDNode OpNodeRnd, AVX512VLVectorVTInfo _> { + let Predicates = [HasAVX512] in { + defm Z : avx512_fma3p_132_rm, + avx512_fma3_132_round, + EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; } -let ExeDomain = SSEPackedDouble in { - defm NAME##PDZ : avx512_fma3p_m132, EVEX_V512, - VEX_W, EVEX_CD8<32, CD8VF>; - defm NAME##PDZ256 : avx512_fma3p_m132, EVEX_V256, - VEX_W, EVEX_CD8<32, CD8VF>; - defm NAME##PDZ128 : avx512_fma3p_m132, EVEX_V128, - VEX_W, EVEX_CD8<32, CD8VF>; + let Predicates = [HasVLX, HasAVX512] in { + defm Z256 : avx512_fma3p_132_rm, + EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; + defm Z128 : avx512_fma3p_132_rm, + EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; } } -defm VFMADD132 : avx512_fma3p_m132_f<0x98, "vfmadd132", X86Fmadd>; -defm VFMSUB132 : avx512_fma3p_m132_f<0x9A, "vfmsub132", X86Fmsub>; -defm VFMADDSUB132 : avx512_fma3p_m132_f<0x96, "vfmaddsub132", X86Fmaddsub>; -defm VFMSUBADD132 : avx512_fma3p_m132_f<0x97, "vfmsubadd132", X86Fmsubadd>; -defm VFNMADD132 : avx512_fma3p_m132_f<0x9C, "vfnmadd132", X86Fnmadd>; -defm VFNMSUB132 : avx512_fma3p_m132_f<0x9E, "vfnmsub132", X86Fnmsub>; +multiclass avx512_fma3p_132_f opc, string OpcodeStr, SDNode OpNode, + SDNode OpNodeRnd > { + defm PS : avx512_fma3p_132_common; + defm PD : avx512_fma3p_132_common, VEX_W; +} + +defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", X86Fmadd, X86FmaddRnd>; +defm VFMSUB132 : avx512_fma3p_132_f<0x9A, "vfmsub132", X86Fmsub, X86FmsubRnd>; +defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub, X86FmaddsubRnd>; +defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd, X86FmsubaddRnd>; +defm VFNMADD132 : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86Fnmadd, X86FnmaddRnd>; +defm VFNMSUB132 : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86Fnmsub, X86FnmsubRnd>; // Scalar FMA let Constraints = "$src1 = $dst" in { diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h index d60dfb5efac..c9823a24a5e 100644 --- a/lib/Target/X86/X86IntrinsicsInfo.h +++ b/lib/Target/X86/X86IntrinsicsInfo.h @@ -22,7 +22,7 @@ enum IntrinsicType { INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP, CMP_MASK, CMP_MASK_CC, VSHIFT, VSHIFT_MASK, COMI, INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM, - INTR_TYPE_3OP_MASK, FMA_OP_MASK, FMA_OP_MASKZ, VPERM_3OP_MASK, + INTR_TYPE_3OP_MASK, FMA_OP_MASK, FMA_OP_MASKZ, FMA_OP_MASK3, VPERM_3OP_MASK, VPERM_3OP_MASKZ, INTR_TYPE_SCALAR_MASK_RM, COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM, EXPAND_FROM_MEM, BLEND @@ -268,6 +268,52 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_cvtusi642ss, INTR_TYPE_3OP, X86ISD::UINT_TO_FP_RND, 0), X86_INTRINSIC_DATA(avx512_exp2_pd, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0), X86_INTRINSIC_DATA(avx512_exp2_ps, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0), + + X86_INTRINSIC_DATA(avx512_mask3_vfmadd_pd_128, FMA_OP_MASK3, X86ISD::FMADD, 0), + X86_INTRINSIC_DATA(avx512_mask3_vfmadd_pd_256, FMA_OP_MASK3, X86ISD::FMADD, 0), + X86_INTRINSIC_DATA(avx512_mask3_vfmadd_pd_512, FMA_OP_MASK3, X86ISD::FMADD, + X86ISD::FMADD_RND), + X86_INTRINSIC_DATA(avx512_mask3_vfmadd_ps_128, FMA_OP_MASK3, X86ISD::FMADD, 0), + X86_INTRINSIC_DATA(avx512_mask3_vfmadd_ps_256, FMA_OP_MASK3, X86ISD::FMADD, 0), + X86_INTRINSIC_DATA(avx512_mask3_vfmadd_ps_512, FMA_OP_MASK3, X86ISD::FMADD, + X86ISD::FMADD_RND), + + X86_INTRINSIC_DATA(avx512_mask3_vfmaddsub_pd_128, FMA_OP_MASK3, X86ISD::FMADDSUB, 0), + X86_INTRINSIC_DATA(avx512_mask3_vfmaddsub_pd_256, FMA_OP_MASK3, X86ISD::FMADDSUB, 0), + X86_INTRINSIC_DATA(avx512_mask3_vfmaddsub_pd_512, FMA_OP_MASK3, X86ISD::FMADDSUB, + X86ISD::FMADDSUB_RND), + X86_INTRINSIC_DATA(avx512_mask3_vfmaddsub_ps_128, FMA_OP_MASK3, X86ISD::FMADDSUB, 0), + X86_INTRINSIC_DATA(avx512_mask3_vfmaddsub_ps_256, FMA_OP_MASK3, X86ISD::FMADDSUB, 0), + X86_INTRINSIC_DATA(avx512_mask3_vfmaddsub_ps_512, FMA_OP_MASK3, X86ISD::FMADDSUB, + X86ISD::FMADDSUB_RND), + + X86_INTRINSIC_DATA(avx512_mask3_vfmsub_pd_128, FMA_OP_MASK3, X86ISD::FMSUB, 0), + X86_INTRINSIC_DATA(avx512_mask3_vfmsub_pd_256, FMA_OP_MASK3, X86ISD::FMSUB, 0), + X86_INTRINSIC_DATA(avx512_mask3_vfmsub_pd_512, FMA_OP_MASK3, X86ISD::FMSUB, + X86ISD::FMSUB_RND), + X86_INTRINSIC_DATA(avx512_mask3_vfmsub_ps_128, FMA_OP_MASK3, X86ISD::FMSUB, 0), + X86_INTRINSIC_DATA(avx512_mask3_vfmsub_ps_256, FMA_OP_MASK3, X86ISD::FMSUB, 0), + X86_INTRINSIC_DATA(avx512_mask3_vfmsub_ps_512, FMA_OP_MASK3, X86ISD::FMSUB, + X86ISD::FMSUB_RND), + + X86_INTRINSIC_DATA(avx512_mask3_vfmsubadd_pd_128, FMA_OP_MASK3, X86ISD::FMSUBADD, 0), + X86_INTRINSIC_DATA(avx512_mask3_vfmsubadd_pd_256, FMA_OP_MASK3, X86ISD::FMSUBADD, 0), + X86_INTRINSIC_DATA(avx512_mask3_vfmsubadd_pd_512, FMA_OP_MASK3, X86ISD::FMSUBADD, + X86ISD::FMSUBADD_RND), + X86_INTRINSIC_DATA(avx512_mask3_vfmsubadd_ps_128, FMA_OP_MASK3, X86ISD::FMSUBADD, 0), + X86_INTRINSIC_DATA(avx512_mask3_vfmsubadd_ps_256, FMA_OP_MASK3, X86ISD::FMSUBADD, 0), + X86_INTRINSIC_DATA(avx512_mask3_vfmsubadd_ps_512, FMA_OP_MASK3, X86ISD::FMSUBADD, + X86ISD::FMSUBADD_RND), + + X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_pd_128, FMA_OP_MASK3, X86ISD::FNMSUB, 0), + X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_pd_256, FMA_OP_MASK3, X86ISD::FNMSUB, 0), + X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_pd_512, FMA_OP_MASK3, X86ISD::FNMSUB, + X86ISD::FNMSUB_RND), + X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_ps_128, FMA_OP_MASK3, X86ISD::FNMSUB, 0), + X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_ps_256, FMA_OP_MASK3, X86ISD::FNMSUB, 0), + X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_ps_512, FMA_OP_MASK3, X86ISD::FNMSUB, + X86ISD::FNMSUB_RND), + X86_INTRINSIC_DATA(avx512_mask_add_pd_128, INTR_TYPE_2OP_MASK, ISD::FADD, 0), X86_INTRINSIC_DATA(avx512_mask_add_pd_256, INTR_TYPE_2OP_MASK, ISD::FADD, 0), X86_INTRINSIC_DATA(avx512_mask_add_pd_512, INTR_TYPE_2OP_MASK, ISD::FADD, @@ -714,6 +760,44 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_ucmp_w_512, CMP_MASK_CC, X86ISD::CMPMU, 0), X86_INTRINSIC_DATA(avx512_mask_valign_d_512, INTR_TYPE_3OP_MASK, X86ISD::VALIGN, 0), X86_INTRINSIC_DATA(avx512_mask_valign_q_512, INTR_TYPE_3OP_MASK, X86ISD::VALIGN, 0), + + X86_INTRINSIC_DATA(avx512_mask_vfmadd_pd_128, FMA_OP_MASK, X86ISD::FMADD, 0), + X86_INTRINSIC_DATA(avx512_mask_vfmadd_pd_256, FMA_OP_MASK, X86ISD::FMADD, 0), + X86_INTRINSIC_DATA(avx512_mask_vfmadd_pd_512, FMA_OP_MASK, X86ISD::FMADD, + X86ISD::FMADD_RND), + X86_INTRINSIC_DATA(avx512_mask_vfmadd_ps_128, FMA_OP_MASK, X86ISD::FMADD, 0), + X86_INTRINSIC_DATA(avx512_mask_vfmadd_ps_256, FMA_OP_MASK, X86ISD::FMADD, 0), + X86_INTRINSIC_DATA(avx512_mask_vfmadd_ps_512, FMA_OP_MASK, X86ISD::FMADD, + X86ISD::FMADD_RND), + + X86_INTRINSIC_DATA(avx512_mask_vfmaddsub_pd_128, FMA_OP_MASK, X86ISD::FMADDSUB, 0), + X86_INTRINSIC_DATA(avx512_mask_vfmaddsub_pd_256, FMA_OP_MASK, X86ISD::FMADDSUB, 0), + X86_INTRINSIC_DATA(avx512_mask_vfmaddsub_pd_512, FMA_OP_MASK, X86ISD::FMADDSUB, + X86ISD::FMADDSUB_RND), + X86_INTRINSIC_DATA(avx512_mask_vfmaddsub_ps_128, FMA_OP_MASK, X86ISD::FMADDSUB, 0), + X86_INTRINSIC_DATA(avx512_mask_vfmaddsub_ps_256, FMA_OP_MASK, X86ISD::FMADDSUB, 0), + X86_INTRINSIC_DATA(avx512_mask_vfmaddsub_ps_512, FMA_OP_MASK, X86ISD::FMADDSUB, + X86ISD::FMADDSUB_RND), + + X86_INTRINSIC_DATA(avx512_mask_vfnmadd_pd_128, FMA_OP_MASK, X86ISD::FNMADD, 0), + X86_INTRINSIC_DATA(avx512_mask_vfnmadd_pd_256, FMA_OP_MASK, X86ISD::FNMADD, 0), + X86_INTRINSIC_DATA(avx512_mask_vfnmadd_pd_512, FMA_OP_MASK, X86ISD::FNMADD, + X86ISD::FNMADD_RND), + X86_INTRINSIC_DATA(avx512_mask_vfnmadd_ps_128, FMA_OP_MASK, X86ISD::FNMADD, 0), + X86_INTRINSIC_DATA(avx512_mask_vfnmadd_ps_256, FMA_OP_MASK, X86ISD::FNMADD, 0), + X86_INTRINSIC_DATA(avx512_mask_vfnmadd_ps_512, FMA_OP_MASK, X86ISD::FNMADD, + X86ISD::FNMADD_RND), + + X86_INTRINSIC_DATA(avx512_mask_vfnmsub_pd_128, FMA_OP_MASK, X86ISD::FNMSUB, 0), + X86_INTRINSIC_DATA(avx512_mask_vfnmsub_pd_256, FMA_OP_MASK, X86ISD::FNMSUB, 0), + X86_INTRINSIC_DATA(avx512_mask_vfnmsub_pd_512, FMA_OP_MASK, X86ISD::FNMSUB, + X86ISD::FNMSUB_RND), + X86_INTRINSIC_DATA(avx512_mask_vfnmsub_ps_128, FMA_OP_MASK, X86ISD::FNMSUB, 0), + X86_INTRINSIC_DATA(avx512_mask_vfnmsub_ps_256, FMA_OP_MASK, X86ISD::FNMSUB, 0), + X86_INTRINSIC_DATA(avx512_mask_vfnmsub_ps_512, FMA_OP_MASK, X86ISD::FNMSUB, + X86ISD::FNMSUB_RND), + + X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_128, VPERM_3OP_MASK, X86ISD::VPERMIV3, 0), X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_256, VPERM_3OP_MASK, @@ -802,6 +886,25 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_xor_ps_128, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0), X86_INTRINSIC_DATA(avx512_mask_xor_ps_256, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0), X86_INTRINSIC_DATA(avx512_mask_xor_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0), + + X86_INTRINSIC_DATA(avx512_maskz_vfmadd_pd_128, FMA_OP_MASKZ, X86ISD::FMADD, 0), + X86_INTRINSIC_DATA(avx512_maskz_vfmadd_pd_256, FMA_OP_MASKZ, X86ISD::FMADD, 0), + X86_INTRINSIC_DATA(avx512_maskz_vfmadd_pd_512, FMA_OP_MASKZ, X86ISD::FMADD, + X86ISD::FMADD_RND), + X86_INTRINSIC_DATA(avx512_maskz_vfmadd_ps_128, FMA_OP_MASKZ, X86ISD::FMADD, 0), + X86_INTRINSIC_DATA(avx512_maskz_vfmadd_ps_256, FMA_OP_MASKZ, X86ISD::FMADD, 0), + X86_INTRINSIC_DATA(avx512_maskz_vfmadd_ps_512, FMA_OP_MASKZ, X86ISD::FMADD, + X86ISD::FMADD_RND), + + X86_INTRINSIC_DATA(avx512_maskz_vfmaddsub_pd_128, FMA_OP_MASKZ, X86ISD::FMADDSUB, 0), + X86_INTRINSIC_DATA(avx512_maskz_vfmaddsub_pd_256, FMA_OP_MASKZ, X86ISD::FMADDSUB, 0), + X86_INTRINSIC_DATA(avx512_maskz_vfmaddsub_pd_512, FMA_OP_MASKZ, X86ISD::FMADDSUB, + X86ISD::FMADDSUB_RND), + X86_INTRINSIC_DATA(avx512_maskz_vfmaddsub_ps_128, FMA_OP_MASKZ, X86ISD::FMADDSUB, 0), + X86_INTRINSIC_DATA(avx512_maskz_vfmaddsub_ps_256, FMA_OP_MASKZ, X86ISD::FMADDSUB, 0), + X86_INTRINSIC_DATA(avx512_maskz_vfmaddsub_ps_512, FMA_OP_MASKZ, X86ISD::FMADDSUB, + X86ISD::FMADDSUB_RND), + X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_d_128, VPERM_3OP_MASKZ, X86ISD::VPERMV3, 0), X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_d_256, VPERM_3OP_MASKZ, @@ -855,54 +958,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx_vperm2f128_pd_256, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0), X86_INTRINSIC_DATA(avx_vperm2f128_ps_256, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0), X86_INTRINSIC_DATA(avx_vperm2f128_si_256, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0), - X86_INTRINSIC_DATA(fma_mask_vfmadd_pd_128, FMA_OP_MASK, X86ISD::FMADD, 0), - X86_INTRINSIC_DATA(fma_mask_vfmadd_pd_256, FMA_OP_MASK, X86ISD::FMADD, 0), - X86_INTRINSIC_DATA(fma_mask_vfmadd_pd_512, FMA_OP_MASK, X86ISD::FMADD, - X86ISD::FMADD_RND), - X86_INTRINSIC_DATA(fma_mask_vfmadd_ps_128, FMA_OP_MASK, X86ISD::FMADD, 0), - X86_INTRINSIC_DATA(fma_mask_vfmadd_ps_256, FMA_OP_MASK, X86ISD::FMADD, 0), - X86_INTRINSIC_DATA(fma_mask_vfmadd_ps_512, FMA_OP_MASK, X86ISD::FMADD, - X86ISD::FMADD_RND), - X86_INTRINSIC_DATA(fma_mask_vfmaddsub_pd_128, FMA_OP_MASK, X86ISD::FMADDSUB, 0), - X86_INTRINSIC_DATA(fma_mask_vfmaddsub_pd_256, FMA_OP_MASK, X86ISD::FMADDSUB, 0), - X86_INTRINSIC_DATA(fma_mask_vfmaddsub_pd_512, FMA_OP_MASK, X86ISD::FMADDSUB, - X86ISD::FMADDSUB_RND), - X86_INTRINSIC_DATA(fma_mask_vfmaddsub_ps_128, FMA_OP_MASK, X86ISD::FMADDSUB, 0), - X86_INTRINSIC_DATA(fma_mask_vfmaddsub_ps_256, FMA_OP_MASK, X86ISD::FMADDSUB, 0), - X86_INTRINSIC_DATA(fma_mask_vfmaddsub_ps_512, FMA_OP_MASK, X86ISD::FMADDSUB, - X86ISD::FMADDSUB_RND), - X86_INTRINSIC_DATA(fma_mask_vfmsub_pd_128, FMA_OP_MASK, X86ISD::FMSUB, 0), - X86_INTRINSIC_DATA(fma_mask_vfmsub_pd_256, FMA_OP_MASK, X86ISD::FMSUB, 0), - X86_INTRINSIC_DATA(fma_mask_vfmsub_pd_512, FMA_OP_MASK, X86ISD::FMSUB, - X86ISD::FMSUB_RND), - X86_INTRINSIC_DATA(fma_mask_vfmsub_ps_128, FMA_OP_MASK, X86ISD::FMSUB, 0), - X86_INTRINSIC_DATA(fma_mask_vfmsub_ps_256, FMA_OP_MASK, X86ISD::FMSUB, 0), - X86_INTRINSIC_DATA(fma_mask_vfmsub_ps_512, FMA_OP_MASK, X86ISD::FMSUB, - X86ISD::FMSUB_RND), - X86_INTRINSIC_DATA(fma_mask_vfmsubadd_pd_128, FMA_OP_MASK, X86ISD::FMSUBADD, 0), - X86_INTRINSIC_DATA(fma_mask_vfmsubadd_pd_256, FMA_OP_MASK, X86ISD::FMSUBADD, 0), - X86_INTRINSIC_DATA(fma_mask_vfmsubadd_pd_512, FMA_OP_MASK, X86ISD::FMSUBADD, - X86ISD::FMSUBADD_RND), - X86_INTRINSIC_DATA(fma_mask_vfmsubadd_ps_128, FMA_OP_MASK, X86ISD::FMSUBADD, 0), - X86_INTRINSIC_DATA(fma_mask_vfmsubadd_ps_256, FMA_OP_MASK, X86ISD::FMSUBADD, 0), - X86_INTRINSIC_DATA(fma_mask_vfmsubadd_ps_512, FMA_OP_MASK, X86ISD::FMSUBADD, - X86ISD::FMSUBADD_RND), - X86_INTRINSIC_DATA(fma_mask_vfnmadd_pd_128, FMA_OP_MASK, X86ISD::FNMADD, 0), - X86_INTRINSIC_DATA(fma_mask_vfnmadd_pd_256, FMA_OP_MASK, X86ISD::FNMADD, 0), - X86_INTRINSIC_DATA(fma_mask_vfnmadd_pd_512, FMA_OP_MASK, X86ISD::FNMADD, - X86ISD::FNMADD_RND), - X86_INTRINSIC_DATA(fma_mask_vfnmadd_ps_128, FMA_OP_MASK, X86ISD::FNMADD, 0), - X86_INTRINSIC_DATA(fma_mask_vfnmadd_ps_256, FMA_OP_MASK, X86ISD::FNMADD, 0), - X86_INTRINSIC_DATA(fma_mask_vfnmadd_ps_512, FMA_OP_MASK, X86ISD::FNMADD, - X86ISD::FNMADD_RND), - X86_INTRINSIC_DATA(fma_mask_vfnmsub_pd_128, FMA_OP_MASK, X86ISD::FNMSUB, 0), - X86_INTRINSIC_DATA(fma_mask_vfnmsub_pd_256, FMA_OP_MASK, X86ISD::FNMSUB, 0), - X86_INTRINSIC_DATA(fma_mask_vfnmsub_pd_512, FMA_OP_MASK, X86ISD::FNMSUB, - X86ISD::FNMSUB_RND), - X86_INTRINSIC_DATA(fma_mask_vfnmsub_ps_128, FMA_OP_MASK, X86ISD::FNMSUB, 0), - X86_INTRINSIC_DATA(fma_mask_vfnmsub_ps_256, FMA_OP_MASK, X86ISD::FNMSUB, 0), - X86_INTRINSIC_DATA(fma_mask_vfnmsub_ps_512, FMA_OP_MASK, X86ISD::FNMSUB, - X86ISD::FNMSUB_RND), X86_INTRINSIC_DATA(fma_vfmadd_pd, INTR_TYPE_3OP, X86ISD::FMADD, 0), X86_INTRINSIC_DATA(fma_vfmadd_pd_256, INTR_TYPE_3OP, X86ISD::FMADD, 0), X86_INTRINSIC_DATA(fma_vfmadd_ps, INTR_TYPE_3OP, X86ISD::FMADD, 0), diff --git a/test/CodeGen/X86/avx512-fma-intrinsics.ll b/test/CodeGen/X86/avx512-fma-intrinsics.ll index c1169f1acb8..c30fc909f09 100644 --- a/test/CodeGen/X86/avx512-fma-intrinsics.ll +++ b/test/CodeGen/X86/avx512-fma-intrinsics.ll @@ -1,422 +1,675 @@ ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f --show-mc-encoding | FileCheck %s -declare <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) -declare <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) -declare <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) - -define <8 x double> @test_x86_vfmsubpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { - ; CHECK-LABEL: test_x86_vfmsubpd_z - ; CHECK: vfmsub213pd %zmm - %res = call <8 x double> @llvm.x86.fma.mask.vfmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind - ret <8 x double> %res -} -declare <8 x double> @llvm.x86.fma.mask.vfmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone - -define <8 x double> @test_mask_vfmsub_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { - ; CHECK-LABEL: test_mask_vfmsub_pd - ; CHECK: vfmsub213pd %zmm - %res = call <8 x double> @llvm.x86.fma.mask.vfmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind - ret <8 x double> %res -} +declare <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) +declare <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) define <16 x float> @test_x86_vfnmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { ; CHECK-LABEL: test_x86_vfnmadd_ps_z ; CHECK: vfnmadd213ps %zmm - %res = call <16 x float> @llvm.x86.fma.mask.vfnmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind + %res = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind ret <16 x float> %res } -declare <16 x float> @llvm.x86.fma.mask.vfnmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone +declare <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone define <16 x float> @test_mask_vfnmadd_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) { ; CHECK-LABEL: test_mask_vfnmadd_ps ; CHECK: vfnmadd213ps %zmm - %res = call <16 x float> @llvm.x86.fma.mask.vfnmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind + %res = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind ret <16 x float> %res } define <8 x double> @test_x86_vfnmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { ; CHECK-LABEL: test_x86_vfnmadd_pd_z ; CHECK: vfnmadd213pd %zmm - %res = call <8 x double> @llvm.x86.fma.mask.vfnmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind + %res = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind ret <8 x double> %res } -declare <8 x double> @llvm.x86.fma.mask.vfnmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone +declare <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone define <8 x double> @test_mask_vfnmadd_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { ; CHECK-LABEL: test_mask_vfnmadd_pd ; CHECK: vfnmadd213pd %zmm - %res = call <8 x double> @llvm.x86.fma.mask.vfnmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind + %res = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind ret <8 x double> %res } define <16 x float> @test_x86_vfnmsubps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { ; CHECK-LABEL: test_x86_vfnmsubps_z ; CHECK: vfnmsub213ps %zmm - %res = call <16 x float> @llvm.x86.fma.mask.vfnmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind + %res = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind ret <16 x float> %res } -declare <16 x float> @llvm.x86.fma.mask.vfnmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone +declare <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone define <16 x float> @test_mask_vfnmsub_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) { ; CHECK-LABEL: test_mask_vfnmsub_ps ; CHECK: vfnmsub213ps %zmm - %res = call <16 x float> @llvm.x86.fma.mask.vfnmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind + %res = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind ret <16 x float> %res } define <8 x double> @test_x86_vfnmsubpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { ; CHECK-LABEL: test_x86_vfnmsubpd_z ; CHECK: vfnmsub213pd %zmm - %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind + %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind ret <8 x double> %res } -declare <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone +declare <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone define <8 x double> @test_mask_vfnmsub_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { ; CHECK-LABEL: test_mask_vfnmsub_pd ; CHECK: vfnmsub213pd %zmm - %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind + %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind ret <8 x double> %res } define <16 x float> @test_x86_vfmaddsubps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { ; CHECK-LABEL: test_x86_vfmaddsubps_z ; CHECK: vfmaddsub213ps %zmm - %res = call <16 x float> @llvm.x86.fma.mask.vfmaddsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind + %res = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind ret <16 x float> %res } define <16 x float> @test_mask_fmaddsub_ps(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) { ; CHECK-LABEL: test_mask_fmaddsub_ps: ; CHECK: vfmaddsub213ps %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0xa6,0xc2] - %res = call <16 x float> @llvm.x86.fma.mask.vfmaddsub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask, i32 4) + %res = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask, i32 4) ret <16 x float> %res } -declare <16 x float> @llvm.x86.fma.mask.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone +declare <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone define <8 x double> @test_x86_vfmaddsubpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { ; CHECK-LABEL: test_x86_vfmaddsubpd_z ; CHECK: vfmaddsub213pd %zmm - %res = call <8 x double> @llvm.x86.fma.mask.vfmaddsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind + %res = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind ret <8 x double> %res } -declare <8 x double> @llvm.x86.fma.mask.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone +declare <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone define <8 x double> @test_mask_vfmaddsub_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { ; CHECK-LABEL: test_mask_vfmaddsub_pd ; CHECK: vfmaddsub213pd %zmm - %res = call <8 x double> @llvm.x86.fma.mask.vfmaddsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind - ret <8 x double> %res -} - -define <16 x float> @test_x86_vfmsubaddps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { - ; CHECK-LABEL: test_x86_vfmsubaddps_z - ; CHECK: vfmsubadd213ps %zmm - %res = call <16 x float> @llvm.x86.fma.mask.vfmsubadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind - ret <16 x float> %res -} -declare <16 x float> @llvm.x86.fma.mask.vfmsubadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone - -define <16 x float> @test_mask_vfmsubadd_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) { - ; CHECK-LABEL: test_mask_vfmsubadd_ps - ; CHECK: vfmsubadd213ps %zmm - %res = call <16 x float> @llvm.x86.fma.mask.vfmsubadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind - ret <16 x float> %res -} - -define <8 x double> @test_x86_vfmsubaddpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { - ; CHECK-LABEL: test_x86_vfmsubaddpd_z - ; CHECK: vfmsubadd213pd %zmm - %res = call <8 x double> @llvm.x86.fma.mask.vfmsubadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind + %res = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind ret <8 x double> %res } -declare <8 x double> @llvm.x86.fma.mask.vfmsubadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone -define <8 x double> @test_mask_vfmsubadd_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { - ; CHECK-LABEL: test_mask_vfmsubadd_pd - ; CHECK: vfmsubadd213pd %zmm - %res = call <8 x double> @llvm.x86.fma.mask.vfmsubadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind - ret <8 x double> %res +define <8 x double>@test_int_x86_avx512_mask_vfmaddsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ +; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_pd_512: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vmovaps %zmm0, %zmm3 +; CHECK-NEXT: vfmaddsub213pd %zmm2, %zmm1, %zmm3 {%k1} +; CHECK-NEXT: vfmaddsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 +; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0 +; CHECK-NEXT: retq + %res = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) + %res1 = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0) + %res2 = fadd <8 x double> %res, %res1 + ret <8 x double> %res2 +} + +declare <8 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) + +define <8 x double>@test_int_x86_avx512_mask3_vfmaddsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ +; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_512: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vmovaps %zmm2, %zmm3 +; CHECK-NEXT: vfmaddsub231pd %zmm1, %zmm0, %zmm3 {%k1} +; CHECK-NEXT: vfmaddsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 +; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0 +; CHECK-NEXT: retq + %res = call <8 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) + %res1 = call <8 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0) + %res2 = fadd <8 x double> %res, %res1 + ret <8 x double> %res2 +} + +declare <8 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) + +define <8 x double>@test_int_x86_avx512_maskz_vfmaddsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ +; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_512: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vmovaps %zmm0, %zmm3 +; CHECK-NEXT: vfmaddsub213pd %zmm2, %zmm1, %zmm3 {%k1} {z} +; CHECK-NEXT: vfmaddsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 +; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0 +; CHECK-NEXT: retq + %res = call <8 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) + %res1 = call <8 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0) + %res2 = fadd <8 x double> %res, %res1 + ret <8 x double> %res2 +} + +define <16 x float>@test_int_x86_avx512_mask_vfmaddsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ +; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_ps_512: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vmovaps %zmm0, %zmm3 +; CHECK-NEXT: vfmaddsub213ps %zmm2, %zmm1, %zmm3 {%k1} +; CHECK-NEXT: vfmaddsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 +; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0 +; CHECK-NEXT: retq + %res = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) + %res1 = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0) + %res2 = fadd <16 x float> %res, %res1 + ret <16 x float> %res2 +} + +declare <16 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) + +define <16 x float>@test_int_x86_avx512_mask3_vfmaddsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ +; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_512: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vmovaps %zmm2, %zmm3 +; CHECK-NEXT: vfmaddsub231ps %zmm1, %zmm0, %zmm3 {%k1} +; CHECK-NEXT: vfmaddsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 +; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0 +; CHECK-NEXT: retq + %res = call <16 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) + %res1 = call <16 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0) + %res2 = fadd <16 x float> %res, %res1 + ret <16 x float> %res2 +} + +declare <16 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) + +define <16 x float>@test_int_x86_avx512_maskz_vfmaddsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ +; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_512: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vmovaps %zmm0, %zmm3 +; CHECK-NEXT: vfmaddsub213ps %zmm2, %zmm1, %zmm3 {%k1} {z} +; CHECK-NEXT: vfmaddsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 +; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0 +; CHECK-NEXT: retq + %res = call <16 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) + %res1 = call <16 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0) + %res2 = fadd <16 x float> %res, %res1 + ret <16 x float> %res2 +} + +declare <8 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) + +define <8 x double>@test_int_x86_avx512_mask3_vfmsubadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ +; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_512: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vmovaps %zmm2, %zmm3 +; CHECK-NEXT: vfmsubadd231pd %zmm1, %zmm0, %zmm3 {%k1} +; CHECK-NEXT: vfmsubadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 +; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0 +; CHECK-NEXT: retq + %res = call <8 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) + %res1 = call <8 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0) + %res2 = fadd <8 x double> %res, %res1 + ret <8 x double> %res2 +} + +declare <16 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) + +define <16 x float>@test_int_x86_avx512_mask3_vfmsubadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ +; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_512: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vmovaps %zmm2, %zmm3 +; CHECK-NEXT: vfmsubadd231ps %zmm1, %zmm0, %zmm3 {%k1} +; CHECK-NEXT: vfmsubadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 +; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0 +; CHECK-NEXT: retq + %res = call <16 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) + %res1 = call <16 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0) + %res2 = fadd <16 x float> %res, %res1 + ret <16 x float> %res2 } define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rne(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) { ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rne ; CHECK: vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x19,0xa8,0xc2] - %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 0) nounwind + %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 0) nounwind ret <16 x float> %res } define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtn(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) { ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtn ; CHECK: vfmadd213ps {rd-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x39,0xa8,0xc2] - %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 1) nounwind + %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 1) nounwind ret <16 x float> %res } define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtp(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) { ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtp ; CHECK: vfmadd213ps {ru-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x59,0xa8,0xc2] - %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 2) nounwind + %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 2) nounwind ret <16 x float> %res } define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtz(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) { ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtz ; CHECK: vfmadd213ps {rz-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x79,0xa8,0xc2] - %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 3) nounwind + %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 3) nounwind ret <16 x float> %res } define <16 x float> @test_mask_round_vfmadd512_ps_rrb_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) { ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_current ; CHECK: vfmadd213ps %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0xa8,0xc2] - %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind + %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind ret <16 x float> %res } define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rne(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rne ; CHECK: vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x18,0xa8,0xc2] - %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 0) nounwind + %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 0) nounwind ret <16 x float> %res } define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtn(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtn ; CHECK: vfmadd213ps {rd-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x38,0xa8,0xc2] - %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 1) nounwind + %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 1) nounwind ret <16 x float> %res } define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtp(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtp ; CHECK: vfmadd213ps {ru-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x58,0xa8,0xc2] - %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 2) nounwind + %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 2) nounwind ret <16 x float> %res } define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtz(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtz ; CHECK: vfmadd213ps {rz-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x78,0xa8,0xc2] - %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 3) nounwind + %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 3) nounwind ret <16 x float> %res } define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_current ; CHECK: vfmadd213ps %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x48,0xa8,0xc2] - %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind - ret <16 x float> %res -} - -define <16 x float> @test_mask_round_vfmsub512_ps_rrb_rne(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) { - ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrb_rne - ; CHECK: vfmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x19,0xaa,0xc2] - %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 0) nounwind + %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind ret <16 x float> %res } -define <16 x float> @test_mask_round_vfmsub512_ps_rrb_rtn(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) { - ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrb_rtn - ; CHECK: vfmsub213ps {rd-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x39,0xaa,0xc2] - %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 1) nounwind - ret <16 x float> %res -} - -define <16 x float> @test_mask_round_vfmsub512_ps_rrb_rtp(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) { - ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrb_rtp - ; CHECK: vfmsub213ps {ru-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x59,0xaa,0xc2] - %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 2) nounwind - ret <16 x float> %res -} - -define <16 x float> @test_mask_round_vfmsub512_ps_rrb_rtz(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) { - ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrb_rtz - ; CHECK: vfmsub213ps {rz-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x79,0xaa,0xc2] - %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 3) nounwind - ret <16 x float> %res -} - -define <16 x float> @test_mask_round_vfmsub512_ps_rrb_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) { - ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrb_current - ; CHECK: vfmsub213ps %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0xaa,0xc2] - %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind - ret <16 x float> %res -} - -define <16 x float> @test_mask_round_vfmsub512_ps_rrbz_rne(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { - ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrbz_rne - ; CHECK: vfmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x18,0xaa,0xc2] - %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 0) nounwind - ret <16 x float> %res -} - -define <16 x float> @test_mask_round_vfmsub512_ps_rrbz_rtn(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { - ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrbz_rtn - ; CHECK: vfmsub213ps {rd-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x38,0xaa,0xc2] - %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 1) nounwind - ret <16 x float> %res -} - -define <16 x float> @test_mask_round_vfmsub512_ps_rrbz_rtp(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { - ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrbz_rtp - ; CHECK: vfmsub213ps {ru-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x58,0xaa,0xc2] - %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 2) nounwind - ret <16 x float> %res -} - -define <16 x float> @test_mask_round_vfmsub512_ps_rrbz_rtz(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { - ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrbz_rtz - ; CHECK: vfmsub213ps {rz-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x78,0xaa,0xc2] - %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 3) nounwind - ret <16 x float> %res -} - -define <16 x float> @test_mask_round_vfmsub512_ps_rrbz_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { - ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrbz_current - ; CHECK: vfmsub213ps %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x48,0xaa,0xc2] - %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind - ret <16 x float> %res +declare <8 x double> @llvm.x86.avx512.mask3.vfmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) + +define <8 x double>@test_int_x86_avx512_mask3_vfmsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ +; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_512: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vmovaps %zmm2, %zmm3 +; CHECK-NEXT: vfmsub231pd %zmm1, %zmm0, %zmm3 {%k1} +; CHECK-NEXT: vfmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 +; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0 +; CHECK-NEXT: retq + %res = call <8 x double> @llvm.x86.avx512.mask3.vfmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) + %res1 = call <8 x double> @llvm.x86.avx512.mask3.vfmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0) + %res2 = fadd <8 x double> %res, %res1 + ret <8 x double> %res2 +} + +declare <16 x float> @llvm.x86.avx512.mask3.vfmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) + +define <16 x float>@test_int_x86_avx512_mask3_vfmsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ +; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_512: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vmovaps %zmm2, %zmm3 +; CHECK-NEXT: vfmsub231ps %zmm1, %zmm0, %zmm3 {%k1} +; CHECK-NEXT: vfmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 +; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0 +; CHECK-NEXT: retq + %res = call <16 x float> @llvm.x86.avx512.mask3.vfmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) + %res1 = call <16 x float> @llvm.x86.avx512.mask3.vfmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0) + %res2 = fadd <16 x float> %res, %res1 + ret <16 x float> %res2 } define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rne ; CHECK: vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x19,0xa8,0xc2] - %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 0) nounwind + %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 0) nounwind ret <8 x double> %res } define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtn ; CHECK: vfmadd213pd {rd-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x39,0xa8,0xc2] - %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 1) nounwind + %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 1) nounwind ret <8 x double> %res } define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtp ; CHECK: vfmadd213pd {ru-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x59,0xa8,0xc2] - %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 2) nounwind + %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 2) nounwind ret <8 x double> %res } define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtz ; CHECK: vfmadd213pd {rz-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x79,0xa8,0xc2] - %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 3) nounwind + %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 3) nounwind ret <8 x double> %res } define <8 x double> @test_mask_round_vfmadd512_pd_rrb_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_current ; CHECK: vfmadd213pd %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0xa8,0xc2] - %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind + %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind ret <8 x double> %res } define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rne ; CHECK: vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x18,0xa8,0xc2] - %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 0) nounwind + %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 0) nounwind ret <8 x double> %res } define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtn ; CHECK: vfmadd213pd {rd-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x38,0xa8,0xc2] - %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 1) nounwind + %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 1) nounwind ret <8 x double> %res } define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtp ; CHECK: vfmadd213pd {ru-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x58,0xa8,0xc2] - %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 2) nounwind + %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 2) nounwind ret <8 x double> %res } define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtz ; CHECK: vfmadd213pd {rz-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x78,0xa8,0xc2] - %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 3) nounwind + %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 3) nounwind ret <8 x double> %res } define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_current ; CHECK: vfmadd213pd %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x48,0xa8,0xc2] - %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind + %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind ret <8 x double> %res } +define <8 x double>@test_int_x86_avx512_mask_vfmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ +; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_pd_512: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vmovaps %zmm0, %zmm3 +; CHECK-NEXT: vfmadd213pd %zmm2, %zmm1, %zmm3 {%k1} +; CHECK-NEXT: vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 +; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0 +; CHECK-NEXT: retq + %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) + %res1 = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0) + %res2 = fadd <8 x double> %res, %res1 + ret <8 x double> %res2 +} + +declare <8 x double> @llvm.x86.avx512.mask3.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) + +define <8 x double>@test_int_x86_avx512_mask3_vfmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ +; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_512: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vmovaps %zmm2, %zmm3 +; CHECK-NEXT: vfmadd231pd %zmm1, %zmm0, %zmm3 {%k1} +; CHECK-NEXT: vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 +; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0 +; CHECK-NEXT: retq + %res = call <8 x double> @llvm.x86.avx512.mask3.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) + %res1 = call <8 x double> @llvm.x86.avx512.mask3.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0) + %res2 = fadd <8 x double> %res, %res1 + ret <8 x double> %res2 +} + +declare <8 x double> @llvm.x86.avx512.maskz.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) + +define <8 x double>@test_int_x86_avx512_maskz_vfmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ +; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_512: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vmovaps %zmm0, %zmm3 +; CHECK-NEXT: vfmadd213pd %zmm2, %zmm1, %zmm3 {%k1} {z} +; CHECK-NEXT: vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 +; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0 +; CHECK-NEXT: retq + %res = call <8 x double> @llvm.x86.avx512.maskz.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) + %res1 = call <8 x double> @llvm.x86.avx512.maskz.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0) + %res2 = fadd <8 x double> %res, %res1 + ret <8 x double> %res2 +} + +define <16 x float>@test_int_x86_avx512_mask_vfmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ +; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_ps_512: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vmovaps %zmm0, %zmm3 +; CHECK-NEXT: vfmadd213ps %zmm2, %zmm1, %zmm3 {%k1} +; CHECK-NEXT: vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 +; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0 +; CHECK-NEXT: retq + %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) + %res1 = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0) + %res2 = fadd <16 x float> %res, %res1 + ret <16 x float> %res2 +} + +declare <16 x float> @llvm.x86.avx512.mask3.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) + +define <16 x float>@test_int_x86_avx512_mask3_vfmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ +; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_512: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vmovaps %zmm2, %zmm3 +; CHECK-NEXT: vfmadd231ps %zmm1, %zmm0, %zmm3 {%k1} +; CHECK-NEXT: vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 +; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0 +; CHECK-NEXT: retq + %res = call <16 x float> @llvm.x86.avx512.mask3.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) + %res1 = call <16 x float> @llvm.x86.avx512.mask3.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0) + %res2 = fadd <16 x float> %res, %res1 + ret <16 x float> %res2 +} + +declare <16 x float> @llvm.x86.avx512.maskz.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) + +define <16 x float>@test_int_x86_avx512_maskz_vfmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ +; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_512: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vmovaps %zmm0, %zmm3 +; CHECK-NEXT: vfmadd213ps %zmm2, %zmm1, %zmm3 {%k1} {z} +; CHECK-NEXT: vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 +; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0 +; CHECK-NEXT: retq + %res = call <16 x float> @llvm.x86.avx512.maskz.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) + %res1 = call <16 x float> @llvm.x86.avx512.maskz.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0) + %res2 = fadd <16 x float> %res, %res1 + ret <16 x float> %res2 +} + define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rne ; CHECK: vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x19,0xae,0xc2] - %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 0) nounwind + %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 0) nounwind ret <8 x double> %res } define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtn ; CHECK: vfnmsub213pd {rd-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x39,0xae,0xc2] - %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 1) nounwind + %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 1) nounwind ret <8 x double> %res } define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtp ; CHECK: vfnmsub213pd {ru-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x59,0xae,0xc2] - %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 2) nounwind + %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 2) nounwind ret <8 x double> %res } define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtz ; CHECK: vfnmsub213pd {rz-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x79,0xae,0xc2] - %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 3) nounwind + %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 3) nounwind ret <8 x double> %res } define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_current ; CHECK: vfnmsub213pd %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0xae,0xc2] - %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind + %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind ret <8 x double> %res } define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rne ; CHECK: vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x18,0xae,0xc2] - %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 0) nounwind + %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 0) nounwind ret <8 x double> %res } define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtn ; CHECK: vfnmsub213pd {rd-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x38,0xae,0xc2] - %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 1) nounwind + %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 1) nounwind ret <8 x double> %res } define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtp ; CHECK: vfnmsub213pd {ru-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x58,0xae,0xc2] - %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 2) nounwind + %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 2) nounwind ret <8 x double> %res } define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtz ; CHECK: vfnmsub213pd {rz-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x78,0xae,0xc2] - %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 3) nounwind + %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 3) nounwind ret <8 x double> %res } define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_current ; CHECK: vfnmsub213pd %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x48,0xae,0xc2] - %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind + %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind ret <8 x double> %res } + +define <8 x double>@test_int_x86_avx512_mask_vfnmsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ +; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_pd_512: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vmovaps %zmm0, %zmm3 +; CHECK-NEXT: vfnmsub213pd %zmm2, %zmm1, %zmm3 {%k1} +; CHECK-NEXT: vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 +; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0 +; CHECK-NEXT: retq + %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) + %res1 = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0) + %res2 = fadd <8 x double> %res, %res1 + ret <8 x double> %res2 +} + +declare <8 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) + +define <8 x double>@test_int_x86_avx512_mask3_vfnmsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ +; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_512: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vmovaps %zmm2, %zmm3 +; CHECK-NEXT: vfnmsub231pd %zmm1, %zmm0, %zmm3 {%k1} +; CHECK-NEXT: vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 +; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0 +; CHECK-NEXT: retq + %res = call <8 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) + %res1 = call <8 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0) + %res2 = fadd <8 x double> %res, %res1 + ret <8 x double> %res2 +} + +define <16 x float>@test_int_x86_avx512_mask_vfnmsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ +; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_ps_512: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vmovaps %zmm0, %zmm3 +; CHECK-NEXT: vfnmsub213ps %zmm2, %zmm1, %zmm3 {%k1} +; CHECK-NEXT: vfnmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 +; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0 +; CHECK-NEXT: retq + %res = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) + %res1 = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0) + %res2 = fadd <16 x float> %res, %res1 + ret <16 x float> %res2 +} + +declare <16 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) + +define <16 x float>@test_int_x86_avx512_mask3_vfnmsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ +; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_512: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vmovaps %zmm2, %zmm3 +; CHECK-NEXT: vfnmsub231ps %zmm1, %zmm0, %zmm3 {%k1} +; CHECK-NEXT: vfnmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 +; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0 +; CHECK-NEXT: retq + %res = call <16 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) + %res1 = call <16 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0) + %res2 = fadd <16 x float> %res, %res1 + ret <16 x float> %res2 +} + +define <8 x double>@test_int_x86_avx512_mask_vfnmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ +; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_pd_512: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vmovaps %zmm0, %zmm3 +; CHECK-NEXT: vfnmadd213pd %zmm2, %zmm1, %zmm3 {%k1} +; CHECK-NEXT: vfnmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 +; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0 +; CHECK-NEXT: retq + %res = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) + %res1 = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0) + %res2 = fadd <8 x double> %res, %res1 + ret <8 x double> %res2 +} + +define <16 x float>@test_int_x86_avx512_mask_vfnmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ +; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_ps_512: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vmovaps %zmm0, %zmm3 +; CHECK-NEXT: vfnmadd213ps %zmm2, %zmm1, %zmm3 {%k1} +; CHECK-NEXT: vfnmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 +; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0 +; CHECK-NEXT: retq + %res = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) + %res1 = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0) + %res2 = fadd <16 x float> %res, %res1 + ret <16 x float> %res2 +} diff --git a/test/CodeGen/X86/avx512-fma.ll b/test/CodeGen/X86/avx512-fma.ll index c5603d4c72e..c4a44e60c5f 100644 --- a/test/CodeGen/X86/avx512-fma.ll +++ b/test/CodeGen/X86/avx512-fma.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f -fp-contract=fast | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx -fp-contract=fast | FileCheck %s --check-prefix=SKX ; CHECK-LABEL: test_x86_fmadd_ps_z ; CHECK: vfmadd213ps %zmm2, %zmm1, %zmm0 @@ -64,20 +65,94 @@ define double @test_x86_fmsub_sd_z(double %a0, double %a1, double %a2) { ret double %res } -;CHECK-LABEL: test132_br -;CHECK: vfmadd132ps LCP{{.*}}(%rip){1to16} -;CHECK: ret -define <16 x float> @test132_br(<16 x float> %a1, <16 x float> %a2) nounwind { +define <16 x float> @test231_br(<16 x float> %a1, <16 x float> %a2) nounwind { +; CHECK-LABEL: test231_br: +; CHECK: ## BB#0: +; CHECK-NEXT: vfmadd231ps {{.*}}(%rip){1to16}, %zmm0, %zmm1 +; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: retq %b1 = fmul <16 x float> %a1, %b2 = fadd <16 x float> %b1, %a2 ret <16 x float> %b2 } -;CHECK-LABEL: test213_br -;CHECK: vfmadd213ps LCP{{.*}}(%rip){1to16} -;CHECK: ret define <16 x float> @test213_br(<16 x float> %a1, <16 x float> %a2) nounwind { +; CHECK-LABEL: test213_br: +; CHECK: ## BB#0: +; CHECK-NEXT: vfmadd213ps {{.*}}(%rip){1to16}, %zmm1, %zmm0 +; CHECK-NEXT: retq %b1 = fmul <16 x float> %a1, %a2 %b2 = fadd <16 x float> %b1, ret <16 x float> %b2 } + +;mask (a*c+b , a) +define <16 x float> @test_x86_fmadd132_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) { +; CHECK-LABEL: test_x86_fmadd132_ps: +; CHECK: ## BB#0: +; CHECK-NEXT: vpmovsxbd %xmm2, %zmm2 +; CHECK-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2 +; CHECK-NEXT: vptestmd %zmm2, %zmm2, %k1 +; CHECK-NEXT: vfmadd132ps (%rdi), %zmm1, %zmm0 {%k1} +; CHECK-NEXT: retq +; +; SKX-LABEL: test_x86_fmadd132_ps: +; SKX: ## BB#0: +; SKX-NEXT: vpmovb2m %xmm2, %k1 +; SKX-NEXT: vfmadd132ps (%rdi), %zmm1, %zmm0 {%k1} +; SKX-NEXT: retq + %a2 = load <16 x float>,<16 x float> *%a2_ptrt,align 1 + %x = fmul <16 x float> %a0, %a2 + %y = fadd <16 x float> %x, %a1 + %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a0 + ret <16 x float> %res +} + +;mask (a*c+b , b) +define <16 x float> @test_x86_fmadd231_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) { +; CHECK-LABEL: test_x86_fmadd231_ps: +; CHECK: ## BB#0: +; CHECK-NEXT: vpmovsxbd %xmm2, %zmm2 +; CHECK-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2 +; CHECK-NEXT: vptestmd %zmm2, %zmm2, %k1 +; CHECK-NEXT: vfmadd231ps (%rdi), %zmm0, %zmm1 {%k1} +; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: retq +; +; SKX-LABEL: test_x86_fmadd231_ps: +; SKX: ## BB#0: +; SKX-NEXT: vpmovb2m %xmm2, %k1 +; SKX-NEXT: vfmadd231ps (%rdi), %zmm0, %zmm1 {%k1} +; SKX-NEXT: vmovaps %zmm1, %zmm0 +; SKX-NEXT: retq + %a2 = load <16 x float>,<16 x float> *%a2_ptrt,align 1 + %x = fmul <16 x float> %a0, %a2 + %y = fadd <16 x float> %x, %a1 + %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a1 + ret <16 x float> %res +} + +;mask (b*a+c , b) +define <16 x float> @test_x86_fmadd213_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) { +; CHECK-LABEL: test_x86_fmadd213_ps: +; CHECK: ## BB#0: +; CHECK-NEXT: vpmovsxbd %xmm2, %zmm2 +; CHECK-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2 +; CHECK-NEXT: vptestmd %zmm2, %zmm2, %k1 +; CHECK-NEXT: vfmadd213ps (%rdi), %zmm0, %zmm1 {%k1} +; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: retq +; +; SKX-LABEL: test_x86_fmadd213_ps: +; SKX: ## BB#0: +; SKX-NEXT: vpmovb2m %xmm2, %k1 +; SKX-NEXT: vfmadd213ps (%rdi), %zmm0, %zmm1 {%k1} +; SKX-NEXT: vmovaps %zmm1, %zmm0 +; SKX-NEXT: retq + %a2 = load <16 x float>,<16 x float> *%a2_ptrt,align 1 + %x = fmul <16 x float> %a1, %a0 + %y = fadd <16 x float> %x, %a2 + %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a1 + ret <16 x float> %res +} + diff --git a/test/CodeGen/X86/avx512bwvl-intrinsics.ll b/test/CodeGen/X86/avx512bwvl-intrinsics.ll index 8a662048a64..0119d3945f4 100644 --- a/test/CodeGen/X86/avx512bwvl-intrinsics.ll +++ b/test/CodeGen/X86/avx512bwvl-intrinsics.ll @@ -612,248 +612,925 @@ define <8 x i8> @test_mask_ucmp_w_128(<8 x i16> %a0, <8 x i16> %a1, i8 %mask) { declare i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16>, <8 x i16>, i32, i8) nounwind readnone -declare <8 x float> @llvm.x86.fma.mask.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone +declare <8 x float> @llvm.x86.avx512.mask.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone define <8 x float> @test_mask_vfmadd256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) { ; CHECK-LABEL: test_mask_vfmadd256_ps ; CHECK: vfmadd213ps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0xa8,0xc2] - %res = call <8 x float> @llvm.x86.fma.mask.vfmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind + %res = call <8 x float> @llvm.x86.avx512.mask.vfmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind ret <8 x float> %res } -declare <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone +declare <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone define <4 x float> @test_mask_vfmadd128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { ; CHECK-LABEL: test_mask_vfmadd128_ps ; CHECK: vfmadd213ps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa8,0xc2] - %res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind + %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind ret <4 x float> %res } -declare <4 x double> @llvm.x86.fma.mask.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) +declare <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) define <4 x double> @test_mask_fmadd256_pd(<4 x double> %a, <4 x double> %b, <4 x double> %c, i8 %mask) { ; CHECK-LABEL: test_mask_fmadd256_pd: ; CHECK: vfmadd213pd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xa8,0xc2] - %res = call <4 x double> @llvm.x86.fma.mask.vfmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c, i8 %mask) + %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c, i8 %mask) ret <4 x double> %res } -declare <2 x double> @llvm.x86.fma.mask.vfmadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) +declare <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) define <2 x double> @test_mask_fmadd128_pd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) { ; CHECK-LABEL: test_mask_fmadd128_pd: ; CHECK: vfmadd213pd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa8,0xc2] - %res = call <2 x double> @llvm.x86.fma.mask.vfmadd.pd.128(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) + %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) ret <2 x double> %res } -declare <8 x float> @llvm.x86.fma.mask.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone - -define <8 x float> @test_mask_vfmsub256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) { - ; CHECK-LABEL: test_mask_vfmsub256_ps - ; CHECK: vfmsub213ps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0xaa,0xc2] - %res = call <8 x float> @llvm.x86.fma.mask.vfmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind - ret <8 x float> %res -} - -declare <4 x float> @llvm.x86.fma.mask.vfmsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone - -define <4 x float> @test_mask_vfmsub128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { - ; CHECK-LABEL: test_mask_vfmsub128_ps - ; CHECK: vfmsub213ps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xaa,0xc2] - %res = call <4 x float> @llvm.x86.fma.mask.vfmsub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind - ret <4 x float> %res -} - -declare <4 x double> @llvm.x86.fma.mask.vfmsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone - -define <4 x double> @test_mask_vfmsub256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) { - ; CHECK-LABEL: test_mask_vfmsub256_pd - ; CHECK: vfmsub213pd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xaa,0xc2] - %res = call <4 x double> @llvm.x86.fma.mask.vfmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind - ret <4 x double> %res -} - -declare <2 x double> @llvm.x86.fma.mask.vfmsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone - -define <2 x double> @test_mask_vfmsub128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) { - ; CHECK-LABEL: test_mask_vfmsub128_pd - ; CHECK: vfmsub213pd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xaa,0xc2] - %res = call <2 x double> @llvm.x86.fma.mask.vfmsub.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind - ret <2 x double> %res -} - -declare <8 x float> @llvm.x86.fma.mask.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone +define <2 x double>@test_int_x86_avx512_mask_vfmadd_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_pd_128: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vmovaps %zmm0, %zmm3 +; CHECK-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm3 {%k1} +; CHECK-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 +; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0 +; CHECK-NEXT: retq + %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) + %res1 = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) + %res2 = fadd <2 x double> %res, %res1 + ret <2 x double> %res2 +} + +declare <2 x double> @llvm.x86.avx512.mask3.vfmadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) + +define <2 x double>@test_int_x86_avx512_mask3_vfmadd_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_128: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vmovaps %zmm2, %zmm3 +; CHECK-NEXT: vfmadd231pd %xmm1, %xmm0, %xmm3 {%k1} +; CHECK-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 +; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0 +; CHECK-NEXT: retq + %res = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) + %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) + %res2 = fadd <2 x double> %res, %res1 + ret <2 x double> %res2 +} + +declare <2 x double> @llvm.x86.avx512.maskz.vfmadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) + +define <2 x double>@test_int_x86_avx512_maskz_vfmadd_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_128: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vmovaps %zmm0, %zmm3 +; CHECK-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm3 {%k1} {z} +; CHECK-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 +; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0 +; CHECK-NEXT: retq + %res = call <2 x double> @llvm.x86.avx512.maskz.vfmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) + %res1 = call <2 x double> @llvm.x86.avx512.maskz.vfmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) + %res2 = fadd <2 x double> %res, %res1 + ret <2 x double> %res2 +} + +define <4 x double>@test_int_x86_avx512_mask_vfmadd_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_pd_256: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vmovaps %zmm0, %zmm3 +; CHECK-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm3 {%k1} +; CHECK-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 +; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0 +; CHECK-NEXT: retq + %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) + %res1 = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) + %res2 = fadd <4 x double> %res, %res1 + ret <4 x double> %res2 +} + +declare <4 x double> @llvm.x86.avx512.mask3.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) + +define <4 x double>@test_int_x86_avx512_mask3_vfmadd_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_256: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vmovaps %zmm2, %zmm3 +; CHECK-NEXT: vfmadd231pd %ymm1, %ymm0, %ymm3 {%k1} +; CHECK-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 +; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0 +; CHECK-NEXT: retq + %res = call <4 x double> @llvm.x86.avx512.mask3.vfmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) + %res1 = call <4 x double> @llvm.x86.avx512.mask3.vfmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) + %res2 = fadd <4 x double> %res, %res1 + ret <4 x double> %res2 +} + +declare <4 x double> @llvm.x86.avx512.maskz.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) + +define <4 x double>@test_int_x86_avx512_maskz_vfmadd_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_256: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vmovaps %zmm0, %zmm3 +; CHECK-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm3 {%k1} {z} +; CHECK-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 +; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0 +; CHECK-NEXT: retq + %res = call <4 x double> @llvm.x86.avx512.maskz.vfmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) + %res1 = call <4 x double> @llvm.x86.avx512.maskz.vfmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) + %res2 = fadd <4 x double> %res, %res1 + ret <4 x double> %res2 +} + +define <4 x float>@test_int_x86_avx512_mask_vfmadd_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_ps_128: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vmovaps %zmm0, %zmm3 +; CHECK-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm3 {%k1} +; CHECK-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 +; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 +; CHECK-NEXT: retq + %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) + %res1 = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) + %res2 = fadd <4 x float> %res, %res1 + ret <4 x float> %res2 +} + +declare <4 x float> @llvm.x86.avx512.mask3.vfmadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) + +define <4 x float>@test_int_x86_avx512_mask3_vfmadd_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_128: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vmovaps %zmm2, %zmm3 +; CHECK-NEXT: vfmadd231ps %xmm1, %xmm0, %xmm3 {%k1} +; CHECK-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 +; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 +; CHECK-NEXT: retq + %res = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) + %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) + %res2 = fadd <4 x float> %res, %res1 + ret <4 x float> %res2 +} + +declare <4 x float> @llvm.x86.avx512.maskz.vfmadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) + +define <4 x float>@test_int_x86_avx512_maskz_vfmadd_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_128: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vmovaps %zmm0, %zmm3 +; CHECK-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm3 {%k1} {z} +; CHECK-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 +; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 +; CHECK-NEXT: retq + %res = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) + %res1 = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) + %res2 = fadd <4 x float> %res, %res1 + ret <4 x float> %res2 +} + +define <8 x float>@test_int_x86_avx512_mask_vfmadd_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_ps_256: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vmovaps %zmm0, %zmm3 +; CHECK-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm3 {%k1} +; CHECK-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 +; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0 +; CHECK-NEXT: retq + %res = call <8 x float> @llvm.x86.avx512.mask.vfmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) + %res1 = call <8 x float> @llvm.x86.avx512.mask.vfmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) + %res2 = fadd <8 x float> %res, %res1 + ret <8 x float> %res2 +} + +declare <8 x float> @llvm.x86.avx512.mask3.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) + +define <8 x float>@test_int_x86_avx512_mask3_vfmadd_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_256: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vmovaps %zmm2, %zmm3 +; CHECK-NEXT: vfmadd231ps %ymm1, %ymm0, %ymm3 {%k1} +; CHECK-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 +; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0 +; CHECK-NEXT: retq + %res = call <8 x float> @llvm.x86.avx512.mask3.vfmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) + %res1 = call <8 x float> @llvm.x86.avx512.mask3.vfmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) + %res2 = fadd <8 x float> %res, %res1 + ret <8 x float> %res2 +} + +declare <8 x float> @llvm.x86.avx512.maskz.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) + +define <8 x float>@test_int_x86_avx512_maskz_vfmadd_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_256: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vmovaps %zmm0, %zmm3 +; CHECK-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm3 {%k1} {z} +; CHECK-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 +; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0 +; CHECK-NEXT: retq + %res = call <8 x float> @llvm.x86.avx512.maskz.vfmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) + %res1 = call <8 x float> @llvm.x86.avx512.maskz.vfmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) + %res2 = fadd <8 x float> %res, %res1 + ret <8 x float> %res2 +} + + +declare <2 x double> @llvm.x86.avx512.mask3.vfmsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) + +define <2 x double>@test_int_x86_avx512_mask3_vfmsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_128: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vmovaps %zmm2, %zmm3 +; CHECK-NEXT: vfmsub231pd %xmm1, %xmm0, %xmm3 {%k1} +; CHECK-NEXT: vfmsub213pd %xmm2, %xmm1, %xmm0 +; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0 +; CHECK-NEXT: retq + %res = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) + %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) + %res2 = fadd <2 x double> %res, %res1 + ret <2 x double> %res2 +} + + +declare <4 x double> @llvm.x86.avx512.mask3.vfmsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) + +define <4 x double>@test_int_x86_avx512_mask3_vfmsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_256: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vmovaps %zmm2, %zmm3 +; CHECK-NEXT: vfmsub231pd %ymm1, %ymm0, %ymm3 {%k1} +; CHECK-NEXT: vfmsub213pd %ymm2, %ymm1, %ymm0 +; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0 +; CHECK-NEXT: retq + %res = call <4 x double> @llvm.x86.avx512.mask3.vfmsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) + %res1 = call <4 x double> @llvm.x86.avx512.mask3.vfmsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) + %res2 = fadd <4 x double> %res, %res1 + ret <4 x double> %res2 +} + +declare <4 x float> @llvm.x86.avx512.mask3.vfmsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) + +define <4 x float>@test_int_x86_avx512_mask3_vfmsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_128: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vmovaps %zmm2, %zmm3 +; CHECK-NEXT: vfmsub231ps %xmm1, %xmm0, %xmm3 {%k1} +; CHECK-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0 +; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 +; CHECK-NEXT: retq + %res = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) + %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) + %res2 = fadd <4 x float> %res, %res1 + ret <4 x float> %res2 +} + +declare <8 x float> @llvm.x86.avx512.mask3.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) + +define <8 x float>@test_int_x86_avx512_mask3_vfmsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_256: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vmovaps %zmm2, %zmm3 +; CHECK-NEXT: vfmsub231ps %ymm1, %ymm0, %ymm3 {%k1} +; CHECK-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0 +; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0 +; CHECK-NEXT: retq + %res = call <8 x float> @llvm.x86.avx512.mask3.vfmsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) + %res1 = call <8 x float> @llvm.x86.avx512.mask3.vfmsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) + %res2 = fadd <8 x float> %res, %res1 + ret <8 x float> %res2 +} + +declare <8 x float> @llvm.x86.avx512.mask.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone define <8 x float> @test_mask_vfnmadd256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) { ; CHECK-LABEL: test_mask_vfnmadd256_ps ; CHECK: vfnmadd213ps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0xac,0xc2] - %res = call <8 x float> @llvm.x86.fma.mask.vfnmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind + %res = call <8 x float> @llvm.x86.avx512.mask.vfnmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind ret <8 x float> %res } -declare <4 x float> @llvm.x86.fma.mask.vfnmadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone +declare <4 x float> @llvm.x86.avx512.mask.vfnmadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone define <4 x float> @test_mask_vfnmadd128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { ; CHECK-LABEL: test_mask_vfnmadd128_ps ; CHECK: vfnmadd213ps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xac,0xc2] - %res = call <4 x float> @llvm.x86.fma.mask.vfnmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind + %res = call <4 x float> @llvm.x86.avx512.mask.vfnmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind ret <4 x float> %res } -declare <4 x double> @llvm.x86.fma.mask.vfnmadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone +declare <4 x double> @llvm.x86.avx512.mask.vfnmadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone define <4 x double> @test_mask_vfnmadd256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) { ; CHECK-LABEL: test_mask_vfnmadd256_pd ; CHECK: vfnmadd213pd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xac,0xc2] - %res = call <4 x double> @llvm.x86.fma.mask.vfnmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind + %res = call <4 x double> @llvm.x86.avx512.mask.vfnmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind ret <4 x double> %res } -declare <2 x double> @llvm.x86.fma.mask.vfnmadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone +declare <2 x double> @llvm.x86.avx512.mask.vfnmadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone define <2 x double> @test_mask_vfnmadd128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) { ; CHECK-LABEL: test_mask_vfnmadd128_pd ; CHECK: vfnmadd213pd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xac,0xc2] - %res = call <2 x double> @llvm.x86.fma.mask.vfnmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind + %res = call <2 x double> @llvm.x86.avx512.mask.vfnmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind ret <2 x double> %res } -declare <8 x float> @llvm.x86.fma.mask.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone +declare <8 x float> @llvm.x86.avx512.mask.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone define <8 x float> @test_mask_vfnmsub256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) { ; CHECK-LABEL: test_mask_vfnmsub256_ps ; CHECK: vfnmsub213ps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0xae,0xc2] - %res = call <8 x float> @llvm.x86.fma.mask.vfnmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind + %res = call <8 x float> @llvm.x86.avx512.mask.vfnmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind ret <8 x float> %res } -declare <4 x float> @llvm.x86.fma.mask.vfnmsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone +declare <4 x float> @llvm.x86.avx512.mask.vfnmsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone define <4 x float> @test_mask_vfnmsub128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { ; CHECK-LABEL: test_mask_vfnmsub128_ps ; CHECK: vfnmsub213ps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xae,0xc2] - %res = call <4 x float> @llvm.x86.fma.mask.vfnmsub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind + %res = call <4 x float> @llvm.x86.avx512.mask.vfnmsub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind ret <4 x float> %res } -declare <4 x double> @llvm.x86.fma.mask.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone +declare <4 x double> @llvm.x86.avx512.mask.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone define <4 x double> @test_mask_vfnmsub256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) { ; CHECK-LABEL: test_mask_vfnmsub256_pd ; CHECK: vfnmsub213pd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xae,0xc2] - %res = call <4 x double> @llvm.x86.fma.mask.vfnmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind + %res = call <4 x double> @llvm.x86.avx512.mask.vfnmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind ret <4 x double> %res } -declare <2 x double> @llvm.x86.fma.mask.vfnmsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone +declare <2 x double> @llvm.x86.avx512.mask.vfnmsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone define <2 x double> @test_mask_vfnmsub128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) { ; CHECK-LABEL: test_mask_vfnmsub128_pd ; CHECK: vfnmsub213pd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xae,0xc2] - %res = call <2 x double> @llvm.x86.fma.mask.vfnmsub.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind + %res = call <2 x double> @llvm.x86.avx512.mask.vfnmsub.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind ret <2 x double> %res } -declare <8 x float> @llvm.x86.fma.mask.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone + +define <2 x double>@test_int_x86_avx512_mask_vfnmsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_pd_128: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vmovaps %zmm0, %zmm3 +; CHECK-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm3 {%k1} +; CHECK-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0 +; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0 +; CHECK-NEXT: retq + %res = call <2 x double> @llvm.x86.avx512.mask.vfnmsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) + %res1 = call <2 x double> @llvm.x86.avx512.mask.vfnmsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) + %res2 = fadd <2 x double> %res, %res1 + ret <2 x double> %res2 +} + +declare <2 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) + +define <2 x double>@test_int_x86_avx512_mask3_vfnmsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_128: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vmovaps %zmm2, %zmm3 +; CHECK-NEXT: vfnmsub231pd %xmm1, %xmm0, %xmm3 {%k1} +; CHECK-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0 +; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0 +; CHECK-NEXT: retq + %res = call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) + %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) + %res2 = fadd <2 x double> %res, %res1 + ret <2 x double> %res2 +} + +define <4 x double>@test_int_x86_avx512_mask_vfnmsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_pd_256: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vmovaps %zmm0, %zmm3 +; CHECK-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm3 {%k1} +; CHECK-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0 +; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0 +; CHECK-NEXT: retq + %res = call <4 x double> @llvm.x86.avx512.mask.vfnmsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) + %res1 = call <4 x double> @llvm.x86.avx512.mask.vfnmsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) + %res2 = fadd <4 x double> %res, %res1 + ret <4 x double> %res2 +} + +declare <4 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) + +define <4 x double>@test_int_x86_avx512_mask3_vfnmsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_256: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vmovaps %zmm2, %zmm3 +; CHECK-NEXT: vfnmsub231pd %ymm1, %ymm0, %ymm3 {%k1} +; CHECK-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0 +; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0 +; CHECK-NEXT: retq + %res = call <4 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) + %res1 = call <4 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) + %res2 = fadd <4 x double> %res, %res1 + ret <4 x double> %res2 +} + +define <4 x float>@test_int_x86_avx512_mask_vfnmsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_ps_128: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vmovaps %zmm0, %zmm3 +; CHECK-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm3 {%k1} +; CHECK-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 +; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 +; CHECK-NEXT: retq + %res = call <4 x float> @llvm.x86.avx512.mask.vfnmsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) + %res1 = call <4 x float> @llvm.x86.avx512.mask.vfnmsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) + %res2 = fadd <4 x float> %res, %res1 + ret <4 x float> %res2 +} + +declare <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) + +define <4 x float>@test_int_x86_avx512_mask3_vfnmsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_128: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vmovaps %zmm2, %zmm3 +; CHECK-NEXT: vfnmsub231ps %xmm1, %xmm0, %xmm3 {%k1} +; CHECK-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 +; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 +; CHECK-NEXT: retq + %res = call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) + %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) + %res2 = fadd <4 x float> %res, %res1 + ret <4 x float> %res2 +} + +define <8 x float>@test_int_x86_avx512_mask_vfnmsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_ps_256: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vmovaps %zmm0, %zmm3 +; CHECK-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm3 {%k1} +; CHECK-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0 +; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0 +; CHECK-NEXT: retq + %res = call <8 x float> @llvm.x86.avx512.mask.vfnmsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) + %res1 = call <8 x float> @llvm.x86.avx512.mask.vfnmsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) + %res2 = fadd <8 x float> %res, %res1 + ret <8 x float> %res2 +} + +declare <8 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) + +define <8 x float>@test_int_x86_avx512_mask3_vfnmsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_256: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vmovaps %zmm2, %zmm3 +; CHECK-NEXT: vfnmsub231ps %ymm1, %ymm0, %ymm3 {%k1} +; CHECK-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0 +; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0 +; CHECK-NEXT: retq + %res = call <8 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) + %res1 = call <8 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) + %res2 = fadd <8 x float> %res, %res1 + ret <8 x float> %res2 +} + +define <2 x double>@test_int_x86_avx512_mask_vfnmadd_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_pd_128: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vmovaps %zmm0, %zmm3 +; CHECK-NEXT: vfnmadd213pd %xmm2, %xmm1, %xmm3 {%k1} +; CHECK-NEXT: vfnmadd213pd %xmm2, %xmm1, %xmm0 +; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0 +; CHECK-NEXT: retq + %res = call <2 x double> @llvm.x86.avx512.mask.vfnmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) + %res1 = call <2 x double> @llvm.x86.avx512.mask.vfnmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) + %res2 = fadd <2 x double> %res, %res1 + ret <2 x double> %res2 +} + +define <4 x double>@test_int_x86_avx512_mask_vfnmadd_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_pd_256: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vmovaps %zmm0, %zmm3 +; CHECK-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm3 {%k1} +; CHECK-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0 +; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0 +; CHECK-NEXT: retq + %res = call <4 x double> @llvm.x86.avx512.mask.vfnmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) + %res1 = call <4 x double> @llvm.x86.avx512.mask.vfnmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) + %res2 = fadd <4 x double> %res, %res1 + ret <4 x double> %res2 +} + +define <4 x float>@test_int_x86_avx512_mask_vfnmadd_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_ps_128: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vmovaps %zmm0, %zmm3 +; CHECK-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm3 {%k1} +; CHECK-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 +; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 +; CHECK-NEXT: retq + %res = call <4 x float> @llvm.x86.avx512.mask.vfnmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) + %res1 = call <4 x float> @llvm.x86.avx512.mask.vfnmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) + %res2 = fadd <4 x float> %res, %res1 + ret <4 x float> %res2 +} + +define <8 x float>@test_int_x86_avx512_mask_vfnmadd_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_ps_256: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vmovaps %zmm0, %zmm3 +; CHECK-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm3 {%k1} +; CHECK-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 +; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0 +; CHECK-NEXT: retq + %res = call <8 x float> @llvm.x86.avx512.mask.vfnmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) + %res1 = call <8 x float> @llvm.x86.avx512.mask.vfnmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) + %res2 = fadd <8 x float> %res, %res1 + ret <8 x float> %res2 +} + +declare <8 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone define <8 x float> @test_mask_fmaddsub256_ps(<8 x float> %a, <8 x float> %b, <8 x float> %c, i8 %mask) { ; CHECK-LABEL: test_mask_fmaddsub256_ps: ; CHECK: vfmaddsub213ps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0xa6,0xc2] - %res = call <8 x float> @llvm.x86.fma.mask.vfmaddsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c, i8 %mask) + %res = call <8 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c, i8 %mask) ret <8 x float> %res } -declare <4 x float> @llvm.x86.fma.mask.vfmaddsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone +declare <4 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone define <4 x float> @test_mask_fmaddsub128_ps(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) { ; CHECK-LABEL: test_mask_fmaddsub128_ps: ; CHECK: vfmaddsub213ps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa6,0xc2] - %res = call <4 x float> @llvm.x86.fma.mask.vfmaddsub.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) + %res = call <4 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) ret <4 x float> %res } -declare <4 x double> @llvm.x86.fma.mask.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone +declare <4 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone define <4 x double> @test_mask_vfmaddsub256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) { ; CHECK-LABEL: test_mask_vfmaddsub256_pd ; CHECK: vfmaddsub213pd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xa6,0xc2] - %res = call <4 x double> @llvm.x86.fma.mask.vfmaddsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind + %res = call <4 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind ret <4 x double> %res } -declare <2 x double> @llvm.x86.fma.mask.vfmaddsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone +declare <2 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone define <2 x double> @test_mask_vfmaddsub128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) { ; CHECK-LABEL: test_mask_vfmaddsub128_pd ; CHECK: vfmaddsub213pd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa6,0xc2] - %res = call <2 x double> @llvm.x86.fma.mask.vfmaddsub.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind + %res = call <2 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind ret <2 x double> %res } -declare <8 x float> @llvm.x86.fma.mask.vfmsubadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone - -define <8 x float> @test_mask_vfmsubadd256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) { - ; CHECK-LABEL: test_mask_vfmsubadd256_ps - ; CHECK: vfmsubadd213ps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0xa7,0xc2] - %res = call <8 x float> @llvm.x86.fma.mask.vfmsubadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind - ret <8 x float> %res +define <2 x double>@test_int_x86_avx512_mask_vfmaddsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_pd_128: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vmovaps %zmm0, %zmm3 +; CHECK-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm3 {%k1} +; CHECK-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm0 +; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0 +; CHECK-NEXT: retq + %res = call <2 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) + %res1 = call <2 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) + %res2 = fadd <2 x double> %res, %res1 + ret <2 x double> %res2 +} + +declare <2 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) + +define <2 x double>@test_int_x86_avx512_mask3_vfmaddsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_128: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vmovaps %zmm2, %zmm3 +; CHECK-NEXT: vfmaddsub231pd %xmm1, %xmm0, %xmm3 {%k1} +; CHECK-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm0 +; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0 +; CHECK-NEXT: retq + %res = call <2 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) + %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) + %res2 = fadd <2 x double> %res, %res1 + ret <2 x double> %res2 +} + +declare <2 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) + +define <2 x double>@test_int_x86_avx512_maskz_vfmaddsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_128: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vmovaps %zmm0, %zmm3 +; CHECK-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm3 {%k1} {z} +; CHECK-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm0 +; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0 +; CHECK-NEXT: retq + %res = call <2 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) + %res1 = call <2 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) + %res2 = fadd <2 x double> %res, %res1 + ret <2 x double> %res2 +} + +define <4 x double>@test_int_x86_avx512_mask_vfmaddsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_pd_256: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vmovaps %zmm0, %zmm3 +; CHECK-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm3 {%k1} +; CHECK-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm0 +; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0 +; CHECK-NEXT: retq + %res = call <4 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) + %res1 = call <4 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) + %res2 = fadd <4 x double> %res, %res1 + ret <4 x double> %res2 +} + +declare <4 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) + +define <4 x double>@test_int_x86_avx512_mask3_vfmaddsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_256: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vmovaps %zmm2, %zmm3 +; CHECK-NEXT: vfmaddsub231pd %ymm1, %ymm0, %ymm3 {%k1} +; CHECK-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm0 +; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0 +; CHECK-NEXT: retq + %res = call <4 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) + %res1 = call <4 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) + %res2 = fadd <4 x double> %res, %res1 + ret <4 x double> %res2 +} + +declare <4 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) + +define <4 x double>@test_int_x86_avx512_maskz_vfmaddsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_256: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vmovaps %zmm0, %zmm3 +; CHECK-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm3 {%k1} {z} +; CHECK-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm0 +; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0 +; CHECK-NEXT: retq + %res = call <4 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) + %res1 = call <4 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) + %res2 = fadd <4 x double> %res, %res1 + ret <4 x double> %res2 +} + +define <4 x float>@test_int_x86_avx512_mask_vfmaddsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_ps_128: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vmovaps %zmm0, %zmm3 +; CHECK-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm3 {%k1} +; CHECK-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm0 +; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 +; CHECK-NEXT: retq + %res = call <4 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) + %res1 = call <4 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) + %res2 = fadd <4 x float> %res, %res1 + ret <4 x float> %res2 +} + +declare <4 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) + +define <4 x float>@test_int_x86_avx512_mask3_vfmaddsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_128: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vmovaps %zmm2, %zmm3 +; CHECK-NEXT: vfmaddsub231ps %xmm1, %xmm0, %xmm3 {%k1} +; CHECK-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm0 +; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 +; CHECK-NEXT: retq + %res = call <4 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) + %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) + %res2 = fadd <4 x float> %res, %res1 + ret <4 x float> %res2 +} + +declare <4 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) + +define <4 x float>@test_int_x86_avx512_maskz_vfmaddsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_128: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vmovaps %zmm0, %zmm3 +; CHECK-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm3 {%k1} {z} +; CHECK-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm0 +; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 +; CHECK-NEXT: retq + %res = call <4 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) + %res1 = call <4 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) + %res2 = fadd <4 x float> %res, %res1 + ret <4 x float> %res2 +} + +define <8 x float>@test_int_x86_avx512_mask_vfmaddsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_ps_256: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vmovaps %zmm0, %zmm3 +; CHECK-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm3 {%k1} +; CHECK-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm0 +; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0 +; CHECK-NEXT: retq + %res = call <8 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) + %res1 = call <8 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) + %res2 = fadd <8 x float> %res, %res1 + ret <8 x float> %res2 +} + +declare <8 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) + +define <8 x float>@test_int_x86_avx512_mask3_vfmaddsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_256: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vmovaps %zmm2, %zmm3 +; CHECK-NEXT: vfmaddsub231ps %ymm1, %ymm0, %ymm3 {%k1} +; CHECK-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm0 +; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0 +; CHECK-NEXT: retq + %res = call <8 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) + %res1 = call <8 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) + %res2 = fadd <8 x float> %res, %res1 + ret <8 x float> %res2 +} + +declare <8 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) + +define <8 x float>@test_int_x86_avx512_maskz_vfmaddsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_256: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vmovaps %zmm0, %zmm3 +; CHECK-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm3 {%k1} {z} +; CHECK-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm0 +; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0 +; CHECK-NEXT: retq + %res = call <8 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) + %res1 = call <8 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) + %res2 = fadd <8 x float> %res, %res1 + ret <8 x float> %res2 +} + +declare <2 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) + +define <2 x double>@test_int_x86_avx512_mask3_vfmsubadd_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_128: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vmovaps %zmm2, %zmm3 +; CHECK-NEXT: vfmsubadd231pd %xmm1, %xmm0, %xmm3 {%k1} +; CHECK-NEXT: vfmsubadd213pd %xmm2, %xmm1, %xmm0 +; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0 +; CHECK-NEXT: retq + %res = call <2 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) + %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) + %res2=fadd <2 x double> %res, %res1 + ret <2 x double> %res2 +} + +declare <4 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) + +define <4 x double>@test_int_x86_avx512_mask3_vfmsubadd_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_256: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vmovaps %zmm2, %zmm3 +; CHECK-NEXT: vfmsubadd231pd %ymm1, %ymm0, %ymm3 {%k1} +; CHECK-NEXT: vfmsubadd213pd %ymm2, %ymm1, %ymm0 +; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0 +; CHECK-NEXT: retq + %res = call <4 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) + %res1 = call <4 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) + %res2=fadd <4 x double> %res, %res1 + ret <4 x double> %res2 +} + +declare <4 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) + +define <4 x float>@test_int_x86_avx512_mask3_vfmsubadd_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_128: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vmovaps %zmm2, %zmm3 +; CHECK-NEXT: vfmsubadd231ps %xmm1, %xmm0, %xmm3 {%k1} +; CHECK-NEXT: vfmsubadd213ps %xmm2, %xmm1, %xmm0 +; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 +; CHECK-NEXT: retq + %res = call <4 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) + %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) + %res2=fadd <4 x float> %res, %res1 + ret <4 x float> %res2 +} + +declare <8 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) + +define <8 x float>@test_int_x86_avx512_mask3_vfmsubadd_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_256: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vmovaps %zmm2, %zmm3 +; CHECK-NEXT: vfmsubadd231ps %ymm1, %ymm0, %ymm3 {%k1} +; CHECK-NEXT: vfmsubadd213ps %ymm2, %ymm1, %ymm0 +; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0 +; CHECK-NEXT: retq + %res = call <8 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) + %res1 = call <8 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) + %res2=fadd <8 x float> %res, %res1 + ret <8 x float> %res2 } -declare <4 x float> @llvm.x86.fma.mask.vfmsubadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone - -define <4 x float> @test_mask_vfmsubadd128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { - ; CHECK-LABEL: test_mask_vfmsubadd128_ps - ; CHECK: vfmsubadd213ps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa7,0xc2] - %res = call <4 x float> @llvm.x86.fma.mask.vfmsubadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind - ret <4 x float> %res -} - -declare <4 x double> @llvm.x86.fma.mask.vfmsubadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone - -define <4 x double> @test_mask_vfmsubadd256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) { - ; CHECK-LABEL: test_mask_vfmsubadd256_pd - ; CHECK: vfmsubadd213pd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xa7,0xc2] - %res = call <4 x double> @llvm.x86.fma.mask.vfmsubadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind - ret <4 x double> %res -} -declare <2 x double> @llvm.x86.fma.mask.vfmsubadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone - -define <2 x double> @test_mask_vfmsubadd128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) { - ; CHECK-LABEL: test_mask_vfmsubadd128_pd - ; CHECK: vfmsubadd213pd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa7,0xc2] - %res = call <2 x double> @llvm.x86.fma.mask.vfmsubadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind - ret <2 x double> %res -} - -define <2 x double> @test_mask_vfmsubadd128rm_pd(<2 x double> %a0, <2 x double> %a1, <2 x double>* %ptr_a2, i8 %mask) { - ; CHECK-LABEL: test_mask_vfmsubadd128rm_pd - ; CHECK: vfmsubadd213pd (%rdi), %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa7,0x07] - %a2 = load <2 x double>, <2 x double>* %ptr_a2 - %res = call <2 x double> @llvm.x86.fma.mask.vfmsubadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind - ret <2 x double> %res -} -declare <8 x double> @llvm.x86.fma.mask.vfmsubadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone -define <8 x double> @test_mask_vfmsubaddrm_pd(<8 x double> %a0, <8 x double> %a1, <8 x double>* %ptr_a2, i8 %mask) { - ; CHECK-LABEL: test_mask_vfmsubaddrm_pd - ; CHECK: vfmsubadd213pd (%rdi), %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0xa7,0x07] - %a2 = load <8 x double>, <8 x double>* %ptr_a2, align 8 - %res = call <8 x double> @llvm.x86.fma.mask.vfmsubadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind - ret <8 x double> %res -} define <4 x float> @test_mask_vfmadd128_ps_r(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { ; CHECK-LABEL: test_mask_vfmadd128_ps_r ; CHECK: vfmadd213ps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa8,0xc2] - %res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind + %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind ret <4 x float> %res } define <4 x float> @test_mask_vfmadd128_ps_rz(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { ; CHECK-LABEL: test_mask_vfmadd128_ps_rz ; CHECK: vfmadd213ps %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x08,0xa8,0xc2] - %res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind + %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind ret <4 x float> %res } @@ -861,7 +1538,7 @@ define <4 x float> @test_mask_vfmadd128_ps_rmk(<4 x float> %a0, <4 x float> %a1, ; CHECK-LABEL: test_mask_vfmadd128_ps_rmk ; CHECK: vfmadd213ps (%rdi), %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa8,0x07] %a2 = load <4 x float>, <4 x float>* %ptr_a2 - %res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind + %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind ret <4 x float> %res } @@ -869,7 +1546,7 @@ define <4 x float> @test_mask_vfmadd128_ps_rmka(<4 x float> %a0, <4 x float> %a1 ; CHECK-LABEL: test_mask_vfmadd128_ps_rmka ; CHECK: vfmadd213ps (%rdi), %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa8,0x07] %a2 = load <4 x float>, <4 x float>* %ptr_a2, align 8 - %res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind + %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind ret <4 x float> %res } @@ -877,7 +1554,7 @@ define <4 x float> @test_mask_vfmadd128_ps_rmkz(<4 x float> %a0, <4 x float> %a1 ; CHECK-LABEL: test_mask_vfmadd128_ps_rmkz ; CHECK: vfmadd213ps (%rdi), %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0x71,0xa8,0x07] %a2 = load <4 x float>, <4 x float>* %ptr_a2 - %res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind + %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind ret <4 x float> %res } @@ -885,7 +1562,7 @@ define <4 x float> @test_mask_vfmadd128_ps_rmkza(<4 x float> %a0, <4 x float> %a ; CHECK-LABEL: test_mask_vfmadd128_ps_rmkza ; CHECK: vfmadd213ps (%rdi), %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0x71,0xa8,0x07] %a2 = load <4 x float>, <4 x float>* %ptr_a2, align 4 - %res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind + %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind ret <4 x float> %res } @@ -897,7 +1574,7 @@ define <4 x float> @test_mask_vfmadd128_ps_rmb(<4 x float> %a0, <4 x float> %a1, %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1 %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2 %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3 - %res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 %mask) nounwind + %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 %mask) nounwind ret <4 x float> %res } @@ -909,7 +1586,7 @@ define <4 x float> @test_mask_vfmadd128_ps_rmba(<4 x float> %a0, <4 x float> %a1 %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1 %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2 %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3 - %res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 %mask) nounwind + %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 %mask) nounwind ret <4 x float> %res } @@ -921,7 +1598,7 @@ define <4 x float> @test_mask_vfmadd128_ps_rmbz(<4 x float> %a0, <4 x float> %a1 %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1 %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2 %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3 - %res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 -1) nounwind + %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 -1) nounwind ret <4 x float> %res } @@ -933,21 +1610,21 @@ define <4 x float> @test_mask_vfmadd128_ps_rmbza(<4 x float> %a0, <4 x float> %a %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1 %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2 %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3 - %res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 -1) nounwind + %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 -1) nounwind ret <4 x float> %res } define <2 x double> @test_mask_vfmadd128_pd_r(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) { ; CHECK-LABEL: test_mask_vfmadd128_pd_r ; CHECK: vfmadd213pd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa8,0xc2] - %res = call <2 x double> @llvm.x86.fma.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind + %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind ret <2 x double> %res } define <2 x double> @test_mask_vfmadd128_pd_rz(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { ; CHECK-LABEL: test_mask_vfmadd128_pd_rz ; CHECK: vfmadd213pd %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0xf5,0x08,0xa8,0xc2] - %res = call <2 x double> @llvm.x86.fma.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 -1) nounwind + %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 -1) nounwind ret <2 x double> %res } @@ -955,7 +1632,7 @@ define <2 x double> @test_mask_vfmadd128_pd_rmk(<2 x double> %a0, <2 x double> % ; CHECK-LABEL: test_mask_vfmadd128_pd_rmk ; CHECK: vfmadd213pd (%rdi), %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa8,0x07] %a2 = load <2 x double>, <2 x double>* %ptr_a2 - %res = call <2 x double> @llvm.x86.fma.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind + %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind ret <2 x double> %res } @@ -963,21 +1640,21 @@ define <2 x double> @test_mask_vfmadd128_pd_rmkz(<2 x double> %a0, <2 x double> ; CHECK-LABEL: test_mask_vfmadd128_pd_rmkz ; CHECK: vfmadd213pd (%rdi), %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0xf1,0xa8,0x07] %a2 = load <2 x double>, <2 x double>* %ptr_a2 - %res = call <2 x double> @llvm.x86.fma.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 -1) nounwind + %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 -1) nounwind ret <2 x double> %res } define <4 x double> @test_mask_vfmadd256_pd_r(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) { ; CHECK-LABEL: test_mask_vfmadd256_pd_r ; CHECK: vfmadd213pd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xa8,0xc2] - %res = call <4 x double> @llvm.x86.fma.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind + %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind ret <4 x double> %res } define <4 x double> @test_mask_vfmadd256_pd_rz(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) { ; CHECK-LABEL: test_mask_vfmadd256_pd_rz ; CHECK: vfmadd213pd %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0xf5,0x28,0xa8,0xc2] - %res = call <4 x double> @llvm.x86.fma.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 -1) nounwind + %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 -1) nounwind ret <4 x double> %res } @@ -985,7 +1662,7 @@ define <4 x double> @test_mask_vfmadd256_pd_rmk(<4 x double> %a0, <4 x double> % ; CHECK-LABEL: test_mask_vfmadd256_pd_rmk ; CHECK: vfmadd213pd (%rdi), %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xa8,0x07] %a2 = load <4 x double>, <4 x double>* %ptr_a2 - %res = call <4 x double> @llvm.x86.fma.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind + %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind ret <4 x double> %res } @@ -993,7 +1670,7 @@ define <4 x double> @test_mask_vfmadd256_pd_rmkz(<4 x double> %a0, <4 x double> ; CHECK-LABEL: test_mask_vfmadd256_pd_rmkz ; CHECK: vfmadd213pd (%rdi), %ymm1, %ymm0 ## encoding: [0xc4,0xe2,0xf5,0xa8,0x07] %a2 = load <4 x double>, <4 x double>* %ptr_a2 - %res = call <4 x double> @llvm.x86.fma.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 -1) nounwind + %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 -1) nounwind ret <4 x double> %res } define <8 x i16> @test_mask_add_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) { diff --git a/test/MC/X86/avx512-encodings.s b/test/MC/X86/avx512-encodings.s index 1ab484d2a80..27fe9f94c7b 100644 --- a/test/MC/X86/avx512-encodings.s +++ b/test/MC/X86/avx512-encodings.s @@ -9290,6 +9290,2671 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2 // CHECK: encoding: [0x62,0x71,0xce,0x00,0x7b,0xb2,0xf8,0xfb,0xff,0xff] vcvtusi2ssq -1032(%rdx), %xmm22, %xmm14 +// CHECK: vfmadd132ps %zmm25, %zmm20, %zmm1 +// CHECK: encoding: [0x62,0x92,0x5d,0x40,0x98,0xc9] + vfmadd132ps %zmm25, %zmm20, %zmm1 + +// CHECK: vfmadd132ps %zmm25, %zmm20, %zmm1 {%k1} +// CHECK: encoding: [0x62,0x92,0x5d,0x41,0x98,0xc9] + vfmadd132ps %zmm25, %zmm20, %zmm1 {%k1} + +// CHECK: vfmadd132ps %zmm25, %zmm20, %zmm1 {%k1} {z} +// CHECK: encoding: [0x62,0x92,0x5d,0xc1,0x98,0xc9] + vfmadd132ps %zmm25, %zmm20, %zmm1 {%k1} {z} + +// CHECK: vfmadd132ps {rn-sae}, %zmm25, %zmm20, %zmm1 +// CHECK: encoding: [0x62,0x92,0x5d,0x10,0x98,0xc9] + vfmadd132ps {rn-sae}, %zmm25, %zmm20, %zmm1 + +// CHECK: vfmadd132ps {ru-sae}, %zmm25, %zmm20, %zmm1 +// CHECK: encoding: [0x62,0x92,0x5d,0x50,0x98,0xc9] + vfmadd132ps {ru-sae}, %zmm25, %zmm20, %zmm1 + +// CHECK: vfmadd132ps {rd-sae}, %zmm25, %zmm20, %zmm1 +// CHECK: encoding: [0x62,0x92,0x5d,0x30,0x98,0xc9] + vfmadd132ps {rd-sae}, %zmm25, %zmm20, %zmm1 + +// CHECK: vfmadd132ps {rz-sae}, %zmm25, %zmm20, %zmm1 +// CHECK: encoding: [0x62,0x92,0x5d,0x70,0x98,0xc9] + vfmadd132ps {rz-sae}, %zmm25, %zmm20, %zmm1 + +// CHECK: vfmadd132ps (%rcx), %zmm20, %zmm1 +// CHECK: encoding: [0x62,0xf2,0x5d,0x40,0x98,0x09] + vfmadd132ps (%rcx), %zmm20, %zmm1 + +// CHECK: vfmadd132ps 291(%rax,%r14,8), %zmm20, %zmm1 +// CHECK: encoding: [0x62,0xb2,0x5d,0x40,0x98,0x8c,0xf0,0x23,0x01,0x00,0x00] + vfmadd132ps 291(%rax,%r14,8), %zmm20, %zmm1 + +// CHECK: vfmadd132ps (%rcx){1to16}, %zmm20, %zmm1 +// CHECK: encoding: [0x62,0xf2,0x5d,0x50,0x98,0x09] + vfmadd132ps (%rcx){1to16}, %zmm20, %zmm1 + +// CHECK: vfmadd132ps 8128(%rdx), %zmm20, %zmm1 +// CHECK: encoding: [0x62,0xf2,0x5d,0x40,0x98,0x4a,0x7f] + vfmadd132ps 8128(%rdx), %zmm20, %zmm1 + +// CHECK: vfmadd132ps 8192(%rdx), %zmm20, %zmm1 +// CHECK: encoding: [0x62,0xf2,0x5d,0x40,0x98,0x8a,0x00,0x20,0x00,0x00] + vfmadd132ps 8192(%rdx), %zmm20, %zmm1 + +// CHECK: vfmadd132ps -8192(%rdx), %zmm20, %zmm1 +// CHECK: encoding: [0x62,0xf2,0x5d,0x40,0x98,0x4a,0x80] + vfmadd132ps -8192(%rdx), %zmm20, %zmm1 + +// CHECK: vfmadd132ps -8256(%rdx), %zmm20, %zmm1 +// CHECK: encoding: [0x62,0xf2,0x5d,0x40,0x98,0x8a,0xc0,0xdf,0xff,0xff] + vfmadd132ps -8256(%rdx), %zmm20, %zmm1 + +// CHECK: vfmadd132ps 508(%rdx){1to16}, %zmm20, %zmm1 +// CHECK: encoding: [0x62,0xf2,0x5d,0x50,0x98,0x4a,0x7f] + vfmadd132ps 508(%rdx){1to16}, %zmm20, %zmm1 + +// CHECK: vfmadd132ps 512(%rdx){1to16}, %zmm20, %zmm1 +// CHECK: encoding: [0x62,0xf2,0x5d,0x50,0x98,0x8a,0x00,0x02,0x00,0x00] + vfmadd132ps 512(%rdx){1to16}, %zmm20, %zmm1 + +// CHECK: vfmadd132ps -512(%rdx){1to16}, %zmm20, %zmm1 +// CHECK: encoding: [0x62,0xf2,0x5d,0x50,0x98,0x4a,0x80] + vfmadd132ps -512(%rdx){1to16}, %zmm20, %zmm1 + +// CHECK: vfmadd132ps -516(%rdx){1to16}, %zmm20, %zmm1 +// CHECK: encoding: [0x62,0xf2,0x5d,0x50,0x98,0x8a,0xfc,0xfd,0xff,0xff] + vfmadd132ps -516(%rdx){1to16}, %zmm20, %zmm1 + +// CHECK: vfmadd132pd %zmm21, %zmm16, %zmm26 +// CHECK: encoding: [0x62,0x22,0xfd,0x40,0x98,0xd5] + vfmadd132pd %zmm21, %zmm16, %zmm26 + +// CHECK: vfmadd132pd %zmm21, %zmm16, %zmm26 {%k5} +// CHECK: encoding: [0x62,0x22,0xfd,0x45,0x98,0xd5] + vfmadd132pd %zmm21, %zmm16, %zmm26 {%k5} + +// CHECK: vfmadd132pd %zmm21, %zmm16, %zmm26 {%k5} {z} +// CHECK: encoding: [0x62,0x22,0xfd,0xc5,0x98,0xd5] + vfmadd132pd %zmm21, %zmm16, %zmm26 {%k5} {z} + +// CHECK: vfmadd132pd {rn-sae}, %zmm21, %zmm16, %zmm26 +// CHECK: encoding: [0x62,0x22,0xfd,0x10,0x98,0xd5] + vfmadd132pd {rn-sae}, %zmm21, %zmm16, %zmm26 + +// CHECK: vfmadd132pd {ru-sae}, %zmm21, %zmm16, %zmm26 +// CHECK: encoding: [0x62,0x22,0xfd,0x50,0x98,0xd5] + vfmadd132pd {ru-sae}, %zmm21, %zmm16, %zmm26 + +// CHECK: vfmadd132pd {rd-sae}, %zmm21, %zmm16, %zmm26 +// CHECK: encoding: [0x62,0x22,0xfd,0x30,0x98,0xd5] + vfmadd132pd {rd-sae}, %zmm21, %zmm16, %zmm26 + +// CHECK: vfmadd132pd {rz-sae}, %zmm21, %zmm16, %zmm26 +// CHECK: encoding: [0x62,0x22,0xfd,0x70,0x98,0xd5] + vfmadd132pd {rz-sae}, %zmm21, %zmm16, %zmm26 + +// CHECK: vfmadd132pd (%rcx), %zmm16, %zmm26 +// CHECK: encoding: [0x62,0x62,0xfd,0x40,0x98,0x11] + vfmadd132pd (%rcx), %zmm16, %zmm26 + +// CHECK: vfmadd132pd 291(%rax,%r14,8), %zmm16, %zmm26 +// CHECK: encoding: [0x62,0x22,0xfd,0x40,0x98,0x94,0xf0,0x23,0x01,0x00,0x00] + vfmadd132pd 291(%rax,%r14,8), %zmm16, %zmm26 + +// CHECK: vfmadd132pd (%rcx){1to8}, %zmm16, %zmm26 +// CHECK: encoding: [0x62,0x62,0xfd,0x50,0x98,0x11] + vfmadd132pd (%rcx){1to8}, %zmm16, %zmm26 + +// CHECK: vfmadd132pd 8128(%rdx), %zmm16, %zmm26 +// CHECK: encoding: [0x62,0x62,0xfd,0x40,0x98,0x52,0x7f] + vfmadd132pd 8128(%rdx), %zmm16, %zmm26 + +// CHECK: vfmadd132pd 8192(%rdx), %zmm16, %zmm26 +// CHECK: encoding: [0x62,0x62,0xfd,0x40,0x98,0x92,0x00,0x20,0x00,0x00] + vfmadd132pd 8192(%rdx), %zmm16, %zmm26 + +// CHECK: vfmadd132pd -8192(%rdx), %zmm16, %zmm26 +// CHECK: encoding: [0x62,0x62,0xfd,0x40,0x98,0x52,0x80] + vfmadd132pd -8192(%rdx), %zmm16, %zmm26 + +// CHECK: vfmadd132pd -8256(%rdx), %zmm16, %zmm26 +// CHECK: encoding: [0x62,0x62,0xfd,0x40,0x98,0x92,0xc0,0xdf,0xff,0xff] + vfmadd132pd -8256(%rdx), %zmm16, %zmm26 + +// CHECK: vfmadd132pd 1016(%rdx){1to8}, %zmm16, %zmm26 +// CHECK: encoding: [0x62,0x62,0xfd,0x50,0x98,0x52,0x7f] + vfmadd132pd 1016(%rdx){1to8}, %zmm16, %zmm26 + +// CHECK: vfmadd132pd 1024(%rdx){1to8}, %zmm16, %zmm26 +// CHECK: encoding: [0x62,0x62,0xfd,0x50,0x98,0x92,0x00,0x04,0x00,0x00] + vfmadd132pd 1024(%rdx){1to8}, %zmm16, %zmm26 + +// CHECK: vfmadd132pd -1024(%rdx){1to8}, %zmm16, %zmm26 +// CHECK: encoding: [0x62,0x62,0xfd,0x50,0x98,0x52,0x80] + vfmadd132pd -1024(%rdx){1to8}, %zmm16, %zmm26 + +// CHECK: vfmadd132pd -1032(%rdx){1to8}, %zmm16, %zmm26 +// CHECK: encoding: [0x62,0x62,0xfd,0x50,0x98,0x92,0xf8,0xfb,0xff,0xff] + vfmadd132pd -1032(%rdx){1to8}, %zmm16, %zmm26 + +// CHECK: vfmadd213ps %zmm14, %zmm19, %zmm20 +// CHECK: encoding: [0x62,0xc2,0x65,0x40,0xa8,0xe6] + vfmadd213ps %zmm14, %zmm19, %zmm20 + +// CHECK: vfmadd213ps %zmm14, %zmm19, %zmm20 {%k4} +// CHECK: encoding: [0x62,0xc2,0x65,0x44,0xa8,0xe6] + vfmadd213ps %zmm14, %zmm19, %zmm20 {%k4} + +// CHECK: vfmadd213ps %zmm14, %zmm19, %zmm20 {%k4} {z} +// CHECK: encoding: [0x62,0xc2,0x65,0xc4,0xa8,0xe6] + vfmadd213ps %zmm14, %zmm19, %zmm20 {%k4} {z} + +// CHECK: vfmadd213ps {rn-sae}, %zmm14, %zmm19, %zmm20 +// CHECK: encoding: [0x62,0xc2,0x65,0x10,0xa8,0xe6] + vfmadd213ps {rn-sae}, %zmm14, %zmm19, %zmm20 + +// CHECK: vfmadd213ps {ru-sae}, %zmm14, %zmm19, %zmm20 +// CHECK: encoding: [0x62,0xc2,0x65,0x50,0xa8,0xe6] + vfmadd213ps {ru-sae}, %zmm14, %zmm19, %zmm20 + +// CHECK: vfmadd213ps {rd-sae}, %zmm14, %zmm19, %zmm20 +// CHECK: encoding: [0x62,0xc2,0x65,0x30,0xa8,0xe6] + vfmadd213ps {rd-sae}, %zmm14, %zmm19, %zmm20 + +// CHECK: vfmadd213ps {rz-sae}, %zmm14, %zmm19, %zmm20 +// CHECK: encoding: [0x62,0xc2,0x65,0x70,0xa8,0xe6] + vfmadd213ps {rz-sae}, %zmm14, %zmm19, %zmm20 + +// CHECK: vfmadd213ps (%rcx), %zmm19, %zmm20 +// CHECK: encoding: [0x62,0xe2,0x65,0x40,0xa8,0x21] + vfmadd213ps (%rcx), %zmm19, %zmm20 + +// CHECK: vfmadd213ps 291(%rax,%r14,8), %zmm19, %zmm20 +// CHECK: encoding: [0x62,0xa2,0x65,0x40,0xa8,0xa4,0xf0,0x23,0x01,0x00,0x00] + vfmadd213ps 291(%rax,%r14,8), %zmm19, %zmm20 + +// CHECK: vfmadd213ps (%rcx){1to16}, %zmm19, %zmm20 +// CHECK: encoding: [0x62,0xe2,0x65,0x50,0xa8,0x21] + vfmadd213ps (%rcx){1to16}, %zmm19, %zmm20 + +// CHECK: vfmadd213ps 8128(%rdx), %zmm19, %zmm20 +// CHECK: encoding: [0x62,0xe2,0x65,0x40,0xa8,0x62,0x7f] + vfmadd213ps 8128(%rdx), %zmm19, %zmm20 + +// CHECK: vfmadd213ps 8192(%rdx), %zmm19, %zmm20 +// CHECK: encoding: [0x62,0xe2,0x65,0x40,0xa8,0xa2,0x00,0x20,0x00,0x00] + vfmadd213ps 8192(%rdx), %zmm19, %zmm20 + +// CHECK: vfmadd213ps -8192(%rdx), %zmm19, %zmm20 +// CHECK: encoding: [0x62,0xe2,0x65,0x40,0xa8,0x62,0x80] + vfmadd213ps -8192(%rdx), %zmm19, %zmm20 + +// CHECK: vfmadd213ps -8256(%rdx), %zmm19, %zmm20 +// CHECK: encoding: [0x62,0xe2,0x65,0x40,0xa8,0xa2,0xc0,0xdf,0xff,0xff] + vfmadd213ps -8256(%rdx), %zmm19, %zmm20 + +// CHECK: vfmadd213ps 508(%rdx){1to16}, %zmm19, %zmm20 +// CHECK: encoding: [0x62,0xe2,0x65,0x50,0xa8,0x62,0x7f] + vfmadd213ps 508(%rdx){1to16}, %zmm19, %zmm20 + +// CHECK: vfmadd213ps 512(%rdx){1to16}, %zmm19, %zmm20 +// CHECK: encoding: [0x62,0xe2,0x65,0x50,0xa8,0xa2,0x00,0x02,0x00,0x00] + vfmadd213ps 512(%rdx){1to16}, %zmm19, %zmm20 + +// CHECK: vfmadd213ps -512(%rdx){1to16}, %zmm19, %zmm20 +// CHECK: encoding: [0x62,0xe2,0x65,0x50,0xa8,0x62,0x80] + vfmadd213ps -512(%rdx){1to16}, %zmm19, %zmm20 + +// CHECK: vfmadd213ps -516(%rdx){1to16}, %zmm19, %zmm20 +// CHECK: encoding: [0x62,0xe2,0x65,0x50,0xa8,0xa2,0xfc,0xfd,0xff,0xff] + vfmadd213ps -516(%rdx){1to16}, %zmm19, %zmm20 + +// CHECK: vfmadd213pd %zmm25, %zmm16, %zmm18 +// CHECK: encoding: [0x62,0x82,0xfd,0x40,0xa8,0xd1] + vfmadd213pd %zmm25, %zmm16, %zmm18 + +// CHECK: vfmadd213pd %zmm25, %zmm16, %zmm18 {%k3} +// CHECK: encoding: [0x62,0x82,0xfd,0x43,0xa8,0xd1] + vfmadd213pd %zmm25, %zmm16, %zmm18 {%k3} + +// CHECK: vfmadd213pd %zmm25, %zmm16, %zmm18 {%k3} {z} +// CHECK: encoding: [0x62,0x82,0xfd,0xc3,0xa8,0xd1] + vfmadd213pd %zmm25, %zmm16, %zmm18 {%k3} {z} + +// CHECK: vfmadd213pd {rn-sae}, %zmm25, %zmm16, %zmm18 +// CHECK: encoding: [0x62,0x82,0xfd,0x10,0xa8,0xd1] + vfmadd213pd {rn-sae}, %zmm25, %zmm16, %zmm18 + +// CHECK: vfmadd213pd {ru-sae}, %zmm25, %zmm16, %zmm18 +// CHECK: encoding: [0x62,0x82,0xfd,0x50,0xa8,0xd1] + vfmadd213pd {ru-sae}, %zmm25, %zmm16, %zmm18 + +// CHECK: vfmadd213pd {rd-sae}, %zmm25, %zmm16, %zmm18 +// CHECK: encoding: [0x62,0x82,0xfd,0x30,0xa8,0xd1] + vfmadd213pd {rd-sae}, %zmm25, %zmm16, %zmm18 + +// CHECK: vfmadd213pd {rz-sae}, %zmm25, %zmm16, %zmm18 +// CHECK: encoding: [0x62,0x82,0xfd,0x70,0xa8,0xd1] + vfmadd213pd {rz-sae}, %zmm25, %zmm16, %zmm18 + +// CHECK: vfmadd213pd (%rcx), %zmm16, %zmm18 +// CHECK: encoding: [0x62,0xe2,0xfd,0x40,0xa8,0x11] + vfmadd213pd (%rcx), %zmm16, %zmm18 + +// CHECK: vfmadd213pd 291(%rax,%r14,8), %zmm16, %zmm18 +// CHECK: encoding: [0x62,0xa2,0xfd,0x40,0xa8,0x94,0xf0,0x23,0x01,0x00,0x00] + vfmadd213pd 291(%rax,%r14,8), %zmm16, %zmm18 + +// CHECK: vfmadd213pd (%rcx){1to8}, %zmm16, %zmm18 +// CHECK: encoding: [0x62,0xe2,0xfd,0x50,0xa8,0x11] + vfmadd213pd (%rcx){1to8}, %zmm16, %zmm18 + +// CHECK: vfmadd213pd 8128(%rdx), %zmm16, %zmm18 +// CHECK: encoding: [0x62,0xe2,0xfd,0x40,0xa8,0x52,0x7f] + vfmadd213pd 8128(%rdx), %zmm16, %zmm18 + +// CHECK: vfmadd213pd 8192(%rdx), %zmm16, %zmm18 +// CHECK: encoding: [0x62,0xe2,0xfd,0x40,0xa8,0x92,0x00,0x20,0x00,0x00] + vfmadd213pd 8192(%rdx), %zmm16, %zmm18 + +// CHECK: vfmadd213pd -8192(%rdx), %zmm16, %zmm18 +// CHECK: encoding: [0x62,0xe2,0xfd,0x40,0xa8,0x52,0x80] + vfmadd213pd -8192(%rdx), %zmm16, %zmm18 + +// CHECK: vfmadd213pd -8256(%rdx), %zmm16, %zmm18 +// CHECK: encoding: [0x62,0xe2,0xfd,0x40,0xa8,0x92,0xc0,0xdf,0xff,0xff] + vfmadd213pd -8256(%rdx), %zmm16, %zmm18 + +// CHECK: vfmadd213pd 1016(%rdx){1to8}, %zmm16, %zmm18 +// CHECK: encoding: [0x62,0xe2,0xfd,0x50,0xa8,0x52,0x7f] + vfmadd213pd 1016(%rdx){1to8}, %zmm16, %zmm18 + +// CHECK: vfmadd213pd 1024(%rdx){1to8}, %zmm16, %zmm18 +// CHECK: encoding: [0x62,0xe2,0xfd,0x50,0xa8,0x92,0x00,0x04,0x00,0x00] + vfmadd213pd 1024(%rdx){1to8}, %zmm16, %zmm18 + +// CHECK: vfmadd213pd -1024(%rdx){1to8}, %zmm16, %zmm18 +// CHECK: encoding: [0x62,0xe2,0xfd,0x50,0xa8,0x52,0x80] + vfmadd213pd -1024(%rdx){1to8}, %zmm16, %zmm18 + +// CHECK: vfmadd213pd -1032(%rdx){1to8}, %zmm16, %zmm18 +// CHECK: encoding: [0x62,0xe2,0xfd,0x50,0xa8,0x92,0xf8,0xfb,0xff,0xff] + vfmadd213pd -1032(%rdx){1to8}, %zmm16, %zmm18 + +// CHECK: vfmadd231ps %zmm25, %zmm28, %zmm27 +// CHECK: encoding: [0x62,0x02,0x1d,0x40,0xb8,0xd9] + vfmadd231ps %zmm25, %zmm28, %zmm27 + +// CHECK: vfmadd231ps %zmm25, %zmm28, %zmm27 {%k3} +// CHECK: encoding: [0x62,0x02,0x1d,0x43,0xb8,0xd9] + vfmadd231ps %zmm25, %zmm28, %zmm27 {%k3} + +// CHECK: vfmadd231ps %zmm25, %zmm28, %zmm27 {%k3} {z} +// CHECK: encoding: [0x62,0x02,0x1d,0xc3,0xb8,0xd9] + vfmadd231ps %zmm25, %zmm28, %zmm27 {%k3} {z} + +// CHECK: vfmadd231ps {rn-sae}, %zmm25, %zmm28, %zmm27 +// CHECK: encoding: [0x62,0x02,0x1d,0x10,0xb8,0xd9] + vfmadd231ps {rn-sae}, %zmm25, %zmm28, %zmm27 + +// CHECK: vfmadd231ps {ru-sae}, %zmm25, %zmm28, %zmm27 +// CHECK: encoding: [0x62,0x02,0x1d,0x50,0xb8,0xd9] + vfmadd231ps {ru-sae}, %zmm25, %zmm28, %zmm27 + +// CHECK: vfmadd231ps {rd-sae}, %zmm25, %zmm28, %zmm27 +// CHECK: encoding: [0x62,0x02,0x1d,0x30,0xb8,0xd9] + vfmadd231ps {rd-sae}, %zmm25, %zmm28, %zmm27 + +// CHECK: vfmadd231ps {rz-sae}, %zmm25, %zmm28, %zmm27 +// CHECK: encoding: [0x62,0x02,0x1d,0x70,0xb8,0xd9] + vfmadd231ps {rz-sae}, %zmm25, %zmm28, %zmm27 + +// CHECK: vfmadd231ps (%rcx), %zmm28, %zmm27 +// CHECK: encoding: [0x62,0x62,0x1d,0x40,0xb8,0x19] + vfmadd231ps (%rcx), %zmm28, %zmm27 + +// CHECK: vfmadd231ps 291(%rax,%r14,8), %zmm28, %zmm27 +// CHECK: encoding: [0x62,0x22,0x1d,0x40,0xb8,0x9c,0xf0,0x23,0x01,0x00,0x00] + vfmadd231ps 291(%rax,%r14,8), %zmm28, %zmm27 + +// CHECK: vfmadd231ps (%rcx){1to16}, %zmm28, %zmm27 +// CHECK: encoding: [0x62,0x62,0x1d,0x50,0xb8,0x19] + vfmadd231ps (%rcx){1to16}, %zmm28, %zmm27 + +// CHECK: vfmadd231ps 8128(%rdx), %zmm28, %zmm27 +// CHECK: encoding: [0x62,0x62,0x1d,0x40,0xb8,0x5a,0x7f] + vfmadd231ps 8128(%rdx), %zmm28, %zmm27 + +// CHECK: vfmadd231ps 8192(%rdx), %zmm28, %zmm27 +// CHECK: encoding: [0x62,0x62,0x1d,0x40,0xb8,0x9a,0x00,0x20,0x00,0x00] + vfmadd231ps 8192(%rdx), %zmm28, %zmm27 + +// CHECK: vfmadd231ps -8192(%rdx), %zmm28, %zmm27 +// CHECK: encoding: [0x62,0x62,0x1d,0x40,0xb8,0x5a,0x80] + vfmadd231ps -8192(%rdx), %zmm28, %zmm27 + +// CHECK: vfmadd231ps -8256(%rdx), %zmm28, %zmm27 +// CHECK: encoding: [0x62,0x62,0x1d,0x40,0xb8,0x9a,0xc0,0xdf,0xff,0xff] + vfmadd231ps -8256(%rdx), %zmm28, %zmm27 + +// CHECK: vfmadd231ps 508(%rdx){1to16}, %zmm28, %zmm27 +// CHECK: encoding: [0x62,0x62,0x1d,0x50,0xb8,0x5a,0x7f] + vfmadd231ps 508(%rdx){1to16}, %zmm28, %zmm27 + +// CHECK: vfmadd231ps 512(%rdx){1to16}, %zmm28, %zmm27 +// CHECK: encoding: [0x62,0x62,0x1d,0x50,0xb8,0x9a,0x00,0x02,0x00,0x00] + vfmadd231ps 512(%rdx){1to16}, %zmm28, %zmm27 + +// CHECK: vfmadd231ps -512(%rdx){1to16}, %zmm28, %zmm27 +// CHECK: encoding: [0x62,0x62,0x1d,0x50,0xb8,0x5a,0x80] + vfmadd231ps -512(%rdx){1to16}, %zmm28, %zmm27 + +// CHECK: vfmadd231ps -516(%rdx){1to16}, %zmm28, %zmm27 +// CHECK: encoding: [0x62,0x62,0x1d,0x50,0xb8,0x9a,0xfc,0xfd,0xff,0xff] + vfmadd231ps -516(%rdx){1to16}, %zmm28, %zmm27 + +// CHECK: vfmadd231pd %zmm9, %zmm6, %zmm30 +// CHECK: encoding: [0x62,0x42,0xcd,0x48,0xb8,0xf1] + vfmadd231pd %zmm9, %zmm6, %zmm30 + +// CHECK: vfmadd231pd %zmm9, %zmm6, %zmm30 {%k4} +// CHECK: encoding: [0x62,0x42,0xcd,0x4c,0xb8,0xf1] + vfmadd231pd %zmm9, %zmm6, %zmm30 {%k4} + +// CHECK: vfmadd231pd %zmm9, %zmm6, %zmm30 {%k4} {z} +// CHECK: encoding: [0x62,0x42,0xcd,0xcc,0xb8,0xf1] + vfmadd231pd %zmm9, %zmm6, %zmm30 {%k4} {z} + +// CHECK: vfmadd231pd {rn-sae}, %zmm9, %zmm6, %zmm30 +// CHECK: encoding: [0x62,0x42,0xcd,0x18,0xb8,0xf1] + vfmadd231pd {rn-sae}, %zmm9, %zmm6, %zmm30 + +// CHECK: vfmadd231pd {ru-sae}, %zmm9, %zmm6, %zmm30 +// CHECK: encoding: [0x62,0x42,0xcd,0x58,0xb8,0xf1] + vfmadd231pd {ru-sae}, %zmm9, %zmm6, %zmm30 + +// CHECK: vfmadd231pd {rd-sae}, %zmm9, %zmm6, %zmm30 +// CHECK: encoding: [0x62,0x42,0xcd,0x38,0xb8,0xf1] + vfmadd231pd {rd-sae}, %zmm9, %zmm6, %zmm30 + +// CHECK: vfmadd231pd {rz-sae}, %zmm9, %zmm6, %zmm30 +// CHECK: encoding: [0x62,0x42,0xcd,0x78,0xb8,0xf1] + vfmadd231pd {rz-sae}, %zmm9, %zmm6, %zmm30 + +// CHECK: vfmadd231pd (%rcx), %zmm6, %zmm30 +// CHECK: encoding: [0x62,0x62,0xcd,0x48,0xb8,0x31] + vfmadd231pd (%rcx), %zmm6, %zmm30 + +// CHECK: vfmadd231pd 291(%rax,%r14,8), %zmm6, %zmm30 +// CHECK: encoding: [0x62,0x22,0xcd,0x48,0xb8,0xb4,0xf0,0x23,0x01,0x00,0x00] + vfmadd231pd 291(%rax,%r14,8), %zmm6, %zmm30 + +// CHECK: vfmadd231pd (%rcx){1to8}, %zmm6, %zmm30 +// CHECK: encoding: [0x62,0x62,0xcd,0x58,0xb8,0x31] + vfmadd231pd (%rcx){1to8}, %zmm6, %zmm30 + +// CHECK: vfmadd231pd 8128(%rdx), %zmm6, %zmm30 +// CHECK: encoding: [0x62,0x62,0xcd,0x48,0xb8,0x72,0x7f] + vfmadd231pd 8128(%rdx), %zmm6, %zmm30 + +// CHECK: vfmadd231pd 8192(%rdx), %zmm6, %zmm30 +// CHECK: encoding: [0x62,0x62,0xcd,0x48,0xb8,0xb2,0x00,0x20,0x00,0x00] + vfmadd231pd 8192(%rdx), %zmm6, %zmm30 + +// CHECK: vfmadd231pd -8192(%rdx), %zmm6, %zmm30 +// CHECK: encoding: [0x62,0x62,0xcd,0x48,0xb8,0x72,0x80] + vfmadd231pd -8192(%rdx), %zmm6, %zmm30 + +// CHECK: vfmadd231pd -8256(%rdx), %zmm6, %zmm30 +// CHECK: encoding: [0x62,0x62,0xcd,0x48,0xb8,0xb2,0xc0,0xdf,0xff,0xff] + vfmadd231pd -8256(%rdx), %zmm6, %zmm30 + +// CHECK: vfmadd231pd 1016(%rdx){1to8}, %zmm6, %zmm30 +// CHECK: encoding: [0x62,0x62,0xcd,0x58,0xb8,0x72,0x7f] + vfmadd231pd 1016(%rdx){1to8}, %zmm6, %zmm30 + +// CHECK: vfmadd231pd 1024(%rdx){1to8}, %zmm6, %zmm30 +// CHECK: encoding: [0x62,0x62,0xcd,0x58,0xb8,0xb2,0x00,0x04,0x00,0x00] + vfmadd231pd 1024(%rdx){1to8}, %zmm6, %zmm30 + +// CHECK: vfmadd231pd -1024(%rdx){1to8}, %zmm6, %zmm30 +// CHECK: encoding: [0x62,0x62,0xcd,0x58,0xb8,0x72,0x80] + vfmadd231pd -1024(%rdx){1to8}, %zmm6, %zmm30 + +// CHECK: vfmadd231pd -1032(%rdx){1to8}, %zmm6, %zmm30 +// CHECK: encoding: [0x62,0x62,0xcd,0x58,0xb8,0xb2,0xf8,0xfb,0xff,0xff] + vfmadd231pd -1032(%rdx){1to8}, %zmm6, %zmm30 + +// CHECK: vfmsub132ps %zmm16, %zmm13, %zmm1 +// CHECK: encoding: [0x62,0xb2,0x15,0x48,0x9a,0xc8] + vfmsub132ps %zmm16, %zmm13, %zmm1 + +// CHECK: vfmsub132ps %zmm16, %zmm13, %zmm1 {%k4} +// CHECK: encoding: [0x62,0xb2,0x15,0x4c,0x9a,0xc8] + vfmsub132ps %zmm16, %zmm13, %zmm1 {%k4} + +// CHECK: vfmsub132ps %zmm16, %zmm13, %zmm1 {%k4} {z} +// CHECK: encoding: [0x62,0xb2,0x15,0xcc,0x9a,0xc8] + vfmsub132ps %zmm16, %zmm13, %zmm1 {%k4} {z} + +// CHECK: vfmsub132ps {rn-sae}, %zmm16, %zmm13, %zmm1 +// CHECK: encoding: [0x62,0xb2,0x15,0x18,0x9a,0xc8] + vfmsub132ps {rn-sae}, %zmm16, %zmm13, %zmm1 + +// CHECK: vfmsub132ps {ru-sae}, %zmm16, %zmm13, %zmm1 +// CHECK: encoding: [0x62,0xb2,0x15,0x58,0x9a,0xc8] + vfmsub132ps {ru-sae}, %zmm16, %zmm13, %zmm1 + +// CHECK: vfmsub132ps {rd-sae}, %zmm16, %zmm13, %zmm1 +// CHECK: encoding: [0x62,0xb2,0x15,0x38,0x9a,0xc8] + vfmsub132ps {rd-sae}, %zmm16, %zmm13, %zmm1 + +// CHECK: vfmsub132ps {rz-sae}, %zmm16, %zmm13, %zmm1 +// CHECK: encoding: [0x62,0xb2,0x15,0x78,0x9a,0xc8] + vfmsub132ps {rz-sae}, %zmm16, %zmm13, %zmm1 + +// CHECK: vfmsub132ps (%rcx), %zmm13, %zmm1 +// CHECK: encoding: [0x62,0xf2,0x15,0x48,0x9a,0x09] + vfmsub132ps (%rcx), %zmm13, %zmm1 + +// CHECK: vfmsub132ps 291(%rax,%r14,8), %zmm13, %zmm1 +// CHECK: encoding: [0x62,0xb2,0x15,0x48,0x9a,0x8c,0xf0,0x23,0x01,0x00,0x00] + vfmsub132ps 291(%rax,%r14,8), %zmm13, %zmm1 + +// CHECK: vfmsub132ps (%rcx){1to16}, %zmm13, %zmm1 +// CHECK: encoding: [0x62,0xf2,0x15,0x58,0x9a,0x09] + vfmsub132ps (%rcx){1to16}, %zmm13, %zmm1 + +// CHECK: vfmsub132ps 8128(%rdx), %zmm13, %zmm1 +// CHECK: encoding: [0x62,0xf2,0x15,0x48,0x9a,0x4a,0x7f] + vfmsub132ps 8128(%rdx), %zmm13, %zmm1 + +// CHECK: vfmsub132ps 8192(%rdx), %zmm13, %zmm1 +// CHECK: encoding: [0x62,0xf2,0x15,0x48,0x9a,0x8a,0x00,0x20,0x00,0x00] + vfmsub132ps 8192(%rdx), %zmm13, %zmm1 + +// CHECK: vfmsub132ps -8192(%rdx), %zmm13, %zmm1 +// CHECK: encoding: [0x62,0xf2,0x15,0x48,0x9a,0x4a,0x80] + vfmsub132ps -8192(%rdx), %zmm13, %zmm1 + +// CHECK: vfmsub132ps -8256(%rdx), %zmm13, %zmm1 +// CHECK: encoding: [0x62,0xf2,0x15,0x48,0x9a,0x8a,0xc0,0xdf,0xff,0xff] + vfmsub132ps -8256(%rdx), %zmm13, %zmm1 + +// CHECK: vfmsub132ps 508(%rdx){1to16}, %zmm13, %zmm1 +// CHECK: encoding: [0x62,0xf2,0x15,0x58,0x9a,0x4a,0x7f] + vfmsub132ps 508(%rdx){1to16}, %zmm13, %zmm1 + +// CHECK: vfmsub132ps 512(%rdx){1to16}, %zmm13, %zmm1 +// CHECK: encoding: [0x62,0xf2,0x15,0x58,0x9a,0x8a,0x00,0x02,0x00,0x00] + vfmsub132ps 512(%rdx){1to16}, %zmm13, %zmm1 + +// CHECK: vfmsub132ps -512(%rdx){1to16}, %zmm13, %zmm1 +// CHECK: encoding: [0x62,0xf2,0x15,0x58,0x9a,0x4a,0x80] + vfmsub132ps -512(%rdx){1to16}, %zmm13, %zmm1 + +// CHECK: vfmsub132ps -516(%rdx){1to16}, %zmm13, %zmm1 +// CHECK: encoding: [0x62,0xf2,0x15,0x58,0x9a,0x8a,0xfc,0xfd,0xff,0xff] + vfmsub132ps -516(%rdx){1to16}, %zmm13, %zmm1 + +// CHECK: vfmsub132pd %zmm27, %zmm12, %zmm22 +// CHECK: encoding: [0x62,0x82,0x9d,0x48,0x9a,0xf3] + vfmsub132pd %zmm27, %zmm12, %zmm22 + +// CHECK: vfmsub132pd %zmm27, %zmm12, %zmm22 {%k2} +// CHECK: encoding: [0x62,0x82,0x9d,0x4a,0x9a,0xf3] + vfmsub132pd %zmm27, %zmm12, %zmm22 {%k2} + +// CHECK: vfmsub132pd %zmm27, %zmm12, %zmm22 {%k2} {z} +// CHECK: encoding: [0x62,0x82,0x9d,0xca,0x9a,0xf3] + vfmsub132pd %zmm27, %zmm12, %zmm22 {%k2} {z} + +// CHECK: vfmsub132pd {rn-sae}, %zmm27, %zmm12, %zmm22 +// CHECK: encoding: [0x62,0x82,0x9d,0x18,0x9a,0xf3] + vfmsub132pd {rn-sae}, %zmm27, %zmm12, %zmm22 + +// CHECK: vfmsub132pd {ru-sae}, %zmm27, %zmm12, %zmm22 +// CHECK: encoding: [0x62,0x82,0x9d,0x58,0x9a,0xf3] + vfmsub132pd {ru-sae}, %zmm27, %zmm12, %zmm22 + +// CHECK: vfmsub132pd {rd-sae}, %zmm27, %zmm12, %zmm22 +// CHECK: encoding: [0x62,0x82,0x9d,0x38,0x9a,0xf3] + vfmsub132pd {rd-sae}, %zmm27, %zmm12, %zmm22 + +// CHECK: vfmsub132pd {rz-sae}, %zmm27, %zmm12, %zmm22 +// CHECK: encoding: [0x62,0x82,0x9d,0x78,0x9a,0xf3] + vfmsub132pd {rz-sae}, %zmm27, %zmm12, %zmm22 + +// CHECK: vfmsub132pd (%rcx), %zmm12, %zmm22 +// CHECK: encoding: [0x62,0xe2,0x9d,0x48,0x9a,0x31] + vfmsub132pd (%rcx), %zmm12, %zmm22 + +// CHECK: vfmsub132pd 291(%rax,%r14,8), %zmm12, %zmm22 +// CHECK: encoding: [0x62,0xa2,0x9d,0x48,0x9a,0xb4,0xf0,0x23,0x01,0x00,0x00] + vfmsub132pd 291(%rax,%r14,8), %zmm12, %zmm22 + +// CHECK: vfmsub132pd (%rcx){1to8}, %zmm12, %zmm22 +// CHECK: encoding: [0x62,0xe2,0x9d,0x58,0x9a,0x31] + vfmsub132pd (%rcx){1to8}, %zmm12, %zmm22 + +// CHECK: vfmsub132pd 8128(%rdx), %zmm12, %zmm22 +// CHECK: encoding: [0x62,0xe2,0x9d,0x48,0x9a,0x72,0x7f] + vfmsub132pd 8128(%rdx), %zmm12, %zmm22 + +// CHECK: vfmsub132pd 8192(%rdx), %zmm12, %zmm22 +// CHECK: encoding: [0x62,0xe2,0x9d,0x48,0x9a,0xb2,0x00,0x20,0x00,0x00] + vfmsub132pd 8192(%rdx), %zmm12, %zmm22 + +// CHECK: vfmsub132pd -8192(%rdx), %zmm12, %zmm22 +// CHECK: encoding: [0x62,0xe2,0x9d,0x48,0x9a,0x72,0x80] + vfmsub132pd -8192(%rdx), %zmm12, %zmm22 + +// CHECK: vfmsub132pd -8256(%rdx), %zmm12, %zmm22 +// CHECK: encoding: [0x62,0xe2,0x9d,0x48,0x9a,0xb2,0xc0,0xdf,0xff,0xff] + vfmsub132pd -8256(%rdx), %zmm12, %zmm22 + +// CHECK: vfmsub132pd 1016(%rdx){1to8}, %zmm12, %zmm22 +// CHECK: encoding: [0x62,0xe2,0x9d,0x58,0x9a,0x72,0x7f] + vfmsub132pd 1016(%rdx){1to8}, %zmm12, %zmm22 + +// CHECK: vfmsub132pd 1024(%rdx){1to8}, %zmm12, %zmm22 +// CHECK: encoding: [0x62,0xe2,0x9d,0x58,0x9a,0xb2,0x00,0x04,0x00,0x00] + vfmsub132pd 1024(%rdx){1to8}, %zmm12, %zmm22 + +// CHECK: vfmsub132pd -1024(%rdx){1to8}, %zmm12, %zmm22 +// CHECK: encoding: [0x62,0xe2,0x9d,0x58,0x9a,0x72,0x80] + vfmsub132pd -1024(%rdx){1to8}, %zmm12, %zmm22 + +// CHECK: vfmsub132pd -1032(%rdx){1to8}, %zmm12, %zmm22 +// CHECK: encoding: [0x62,0xe2,0x9d,0x58,0x9a,0xb2,0xf8,0xfb,0xff,0xff] + vfmsub132pd -1032(%rdx){1to8}, %zmm12, %zmm22 + +// CHECK: vfmsub213ps %zmm10, %zmm22, %zmm22 +// CHECK: encoding: [0x62,0xc2,0x4d,0x40,0xaa,0xf2] + vfmsub213ps %zmm10, %zmm22, %zmm22 + +// CHECK: vfmsub213ps %zmm10, %zmm22, %zmm22 {%k6} +// CHECK: encoding: [0x62,0xc2,0x4d,0x46,0xaa,0xf2] + vfmsub213ps %zmm10, %zmm22, %zmm22 {%k6} + +// CHECK: vfmsub213ps %zmm10, %zmm22, %zmm22 {%k6} {z} +// CHECK: encoding: [0x62,0xc2,0x4d,0xc6,0xaa,0xf2] + vfmsub213ps %zmm10, %zmm22, %zmm22 {%k6} {z} + +// CHECK: vfmsub213ps {rn-sae}, %zmm10, %zmm22, %zmm22 +// CHECK: encoding: [0x62,0xc2,0x4d,0x10,0xaa,0xf2] + vfmsub213ps {rn-sae}, %zmm10, %zmm22, %zmm22 + +// CHECK: vfmsub213ps {ru-sae}, %zmm10, %zmm22, %zmm22 +// CHECK: encoding: [0x62,0xc2,0x4d,0x50,0xaa,0xf2] + vfmsub213ps {ru-sae}, %zmm10, %zmm22, %zmm22 + +// CHECK: vfmsub213ps {rd-sae}, %zmm10, %zmm22, %zmm22 +// CHECK: encoding: [0x62,0xc2,0x4d,0x30,0xaa,0xf2] + vfmsub213ps {rd-sae}, %zmm10, %zmm22, %zmm22 + +// CHECK: vfmsub213ps {rz-sae}, %zmm10, %zmm22, %zmm22 +// CHECK: encoding: [0x62,0xc2,0x4d,0x70,0xaa,0xf2] + vfmsub213ps {rz-sae}, %zmm10, %zmm22, %zmm22 + +// CHECK: vfmsub213ps (%rcx), %zmm22, %zmm22 +// CHECK: encoding: [0x62,0xe2,0x4d,0x40,0xaa,0x31] + vfmsub213ps (%rcx), %zmm22, %zmm22 + +// CHECK: vfmsub213ps 291(%rax,%r14,8), %zmm22, %zmm22 +// CHECK: encoding: [0x62,0xa2,0x4d,0x40,0xaa,0xb4,0xf0,0x23,0x01,0x00,0x00] + vfmsub213ps 291(%rax,%r14,8), %zmm22, %zmm22 + +// CHECK: vfmsub213ps (%rcx){1to16}, %zmm22, %zmm22 +// CHECK: encoding: [0x62,0xe2,0x4d,0x50,0xaa,0x31] + vfmsub213ps (%rcx){1to16}, %zmm22, %zmm22 + +// CHECK: vfmsub213ps 8128(%rdx), %zmm22, %zmm22 +// CHECK: encoding: [0x62,0xe2,0x4d,0x40,0xaa,0x72,0x7f] + vfmsub213ps 8128(%rdx), %zmm22, %zmm22 + +// CHECK: vfmsub213ps 8192(%rdx), %zmm22, %zmm22 +// CHECK: encoding: [0x62,0xe2,0x4d,0x40,0xaa,0xb2,0x00,0x20,0x00,0x00] + vfmsub213ps 8192(%rdx), %zmm22, %zmm22 + +// CHECK: vfmsub213ps -8192(%rdx), %zmm22, %zmm22 +// CHECK: encoding: [0x62,0xe2,0x4d,0x40,0xaa,0x72,0x80] + vfmsub213ps -8192(%rdx), %zmm22, %zmm22 + +// CHECK: vfmsub213ps -8256(%rdx), %zmm22, %zmm22 +// CHECK: encoding: [0x62,0xe2,0x4d,0x40,0xaa,0xb2,0xc0,0xdf,0xff,0xff] + vfmsub213ps -8256(%rdx), %zmm22, %zmm22 + +// CHECK: vfmsub213ps 508(%rdx){1to16}, %zmm22, %zmm22 +// CHECK: encoding: [0x62,0xe2,0x4d,0x50,0xaa,0x72,0x7f] + vfmsub213ps 508(%rdx){1to16}, %zmm22, %zmm22 + +// CHECK: vfmsub213ps 512(%rdx){1to16}, %zmm22, %zmm22 +// CHECK: encoding: [0x62,0xe2,0x4d,0x50,0xaa,0xb2,0x00,0x02,0x00,0x00] + vfmsub213ps 512(%rdx){1to16}, %zmm22, %zmm22 + +// CHECK: vfmsub213ps -512(%rdx){1to16}, %zmm22, %zmm22 +// CHECK: encoding: [0x62,0xe2,0x4d,0x50,0xaa,0x72,0x80] + vfmsub213ps -512(%rdx){1to16}, %zmm22, %zmm22 + +// CHECK: vfmsub213ps -516(%rdx){1to16}, %zmm22, %zmm22 +// CHECK: encoding: [0x62,0xe2,0x4d,0x50,0xaa,0xb2,0xfc,0xfd,0xff,0xff] + vfmsub213ps -516(%rdx){1to16}, %zmm22, %zmm22 + +// CHECK: vfmsub213pd %zmm4, %zmm10, %zmm5 +// CHECK: encoding: [0x62,0xf2,0xad,0x48,0xaa,0xec] + vfmsub213pd %zmm4, %zmm10, %zmm5 + +// CHECK: vfmsub213pd %zmm4, %zmm10, %zmm5 {%k1} +// CHECK: encoding: [0x62,0xf2,0xad,0x49,0xaa,0xec] + vfmsub213pd %zmm4, %zmm10, %zmm5 {%k1} + +// CHECK: vfmsub213pd %zmm4, %zmm10, %zmm5 {%k1} {z} +// CHECK: encoding: [0x62,0xf2,0xad,0xc9,0xaa,0xec] + vfmsub213pd %zmm4, %zmm10, %zmm5 {%k1} {z} + +// CHECK: vfmsub213pd {rn-sae}, %zmm4, %zmm10, %zmm5 +// CHECK: encoding: [0x62,0xf2,0xad,0x18,0xaa,0xec] + vfmsub213pd {rn-sae}, %zmm4, %zmm10, %zmm5 + +// CHECK: vfmsub213pd {ru-sae}, %zmm4, %zmm10, %zmm5 +// CHECK: encoding: [0x62,0xf2,0xad,0x58,0xaa,0xec] + vfmsub213pd {ru-sae}, %zmm4, %zmm10, %zmm5 + +// CHECK: vfmsub213pd {rd-sae}, %zmm4, %zmm10, %zmm5 +// CHECK: encoding: [0x62,0xf2,0xad,0x38,0xaa,0xec] + vfmsub213pd {rd-sae}, %zmm4, %zmm10, %zmm5 + +// CHECK: vfmsub213pd {rz-sae}, %zmm4, %zmm10, %zmm5 +// CHECK: encoding: [0x62,0xf2,0xad,0x78,0xaa,0xec] + vfmsub213pd {rz-sae}, %zmm4, %zmm10, %zmm5 + +// CHECK: vfmsub213pd (%rcx), %zmm10, %zmm5 +// CHECK: encoding: [0x62,0xf2,0xad,0x48,0xaa,0x29] + vfmsub213pd (%rcx), %zmm10, %zmm5 + +// CHECK: vfmsub213pd 291(%rax,%r14,8), %zmm10, %zmm5 +// CHECK: encoding: [0x62,0xb2,0xad,0x48,0xaa,0xac,0xf0,0x23,0x01,0x00,0x00] + vfmsub213pd 291(%rax,%r14,8), %zmm10, %zmm5 + +// CHECK: vfmsub213pd (%rcx){1to8}, %zmm10, %zmm5 +// CHECK: encoding: [0x62,0xf2,0xad,0x58,0xaa,0x29] + vfmsub213pd (%rcx){1to8}, %zmm10, %zmm5 + +// CHECK: vfmsub213pd 8128(%rdx), %zmm10, %zmm5 +// CHECK: encoding: [0x62,0xf2,0xad,0x48,0xaa,0x6a,0x7f] + vfmsub213pd 8128(%rdx), %zmm10, %zmm5 + +// CHECK: vfmsub213pd 8192(%rdx), %zmm10, %zmm5 +// CHECK: encoding: [0x62,0xf2,0xad,0x48,0xaa,0xaa,0x00,0x20,0x00,0x00] + vfmsub213pd 8192(%rdx), %zmm10, %zmm5 + +// CHECK: vfmsub213pd -8192(%rdx), %zmm10, %zmm5 +// CHECK: encoding: [0x62,0xf2,0xad,0x48,0xaa,0x6a,0x80] + vfmsub213pd -8192(%rdx), %zmm10, %zmm5 + +// CHECK: vfmsub213pd -8256(%rdx), %zmm10, %zmm5 +// CHECK: encoding: [0x62,0xf2,0xad,0x48,0xaa,0xaa,0xc0,0xdf,0xff,0xff] + vfmsub213pd -8256(%rdx), %zmm10, %zmm5 + +// CHECK: vfmsub213pd 1016(%rdx){1to8}, %zmm10, %zmm5 +// CHECK: encoding: [0x62,0xf2,0xad,0x58,0xaa,0x6a,0x7f] + vfmsub213pd 1016(%rdx){1to8}, %zmm10, %zmm5 + +// CHECK: vfmsub213pd 1024(%rdx){1to8}, %zmm10, %zmm5 +// CHECK: encoding: [0x62,0xf2,0xad,0x58,0xaa,0xaa,0x00,0x04,0x00,0x00] + vfmsub213pd 1024(%rdx){1to8}, %zmm10, %zmm5 + +// CHECK: vfmsub213pd -1024(%rdx){1to8}, %zmm10, %zmm5 +// CHECK: encoding: [0x62,0xf2,0xad,0x58,0xaa,0x6a,0x80] + vfmsub213pd -1024(%rdx){1to8}, %zmm10, %zmm5 + +// CHECK: vfmsub213pd -1032(%rdx){1to8}, %zmm10, %zmm5 +// CHECK: encoding: [0x62,0xf2,0xad,0x58,0xaa,0xaa,0xf8,0xfb,0xff,0xff] + vfmsub213pd -1032(%rdx){1to8}, %zmm10, %zmm5 + +// CHECK: vfmsub231ps %zmm27, %zmm21, %zmm6 +// CHECK: encoding: [0x62,0x92,0x55,0x40,0xba,0xf3] + vfmsub231ps %zmm27, %zmm21, %zmm6 + +// CHECK: vfmsub231ps %zmm27, %zmm21, %zmm6 {%k3} +// CHECK: encoding: [0x62,0x92,0x55,0x43,0xba,0xf3] + vfmsub231ps %zmm27, %zmm21, %zmm6 {%k3} + +// CHECK: vfmsub231ps %zmm27, %zmm21, %zmm6 {%k3} {z} +// CHECK: encoding: [0x62,0x92,0x55,0xc3,0xba,0xf3] + vfmsub231ps %zmm27, %zmm21, %zmm6 {%k3} {z} + +// CHECK: vfmsub231ps {rn-sae}, %zmm27, %zmm21, %zmm6 +// CHECK: encoding: [0x62,0x92,0x55,0x10,0xba,0xf3] + vfmsub231ps {rn-sae}, %zmm27, %zmm21, %zmm6 + +// CHECK: vfmsub231ps {ru-sae}, %zmm27, %zmm21, %zmm6 +// CHECK: encoding: [0x62,0x92,0x55,0x50,0xba,0xf3] + vfmsub231ps {ru-sae}, %zmm27, %zmm21, %zmm6 + +// CHECK: vfmsub231ps {rd-sae}, %zmm27, %zmm21, %zmm6 +// CHECK: encoding: [0x62,0x92,0x55,0x30,0xba,0xf3] + vfmsub231ps {rd-sae}, %zmm27, %zmm21, %zmm6 + +// CHECK: vfmsub231ps {rz-sae}, %zmm27, %zmm21, %zmm6 +// CHECK: encoding: [0x62,0x92,0x55,0x70,0xba,0xf3] + vfmsub231ps {rz-sae}, %zmm27, %zmm21, %zmm6 + +// CHECK: vfmsub231ps (%rcx), %zmm21, %zmm6 +// CHECK: encoding: [0x62,0xf2,0x55,0x40,0xba,0x31] + vfmsub231ps (%rcx), %zmm21, %zmm6 + +// CHECK: vfmsub231ps 291(%rax,%r14,8), %zmm21, %zmm6 +// CHECK: encoding: [0x62,0xb2,0x55,0x40,0xba,0xb4,0xf0,0x23,0x01,0x00,0x00] + vfmsub231ps 291(%rax,%r14,8), %zmm21, %zmm6 + +// CHECK: vfmsub231ps (%rcx){1to16}, %zmm21, %zmm6 +// CHECK: encoding: [0x62,0xf2,0x55,0x50,0xba,0x31] + vfmsub231ps (%rcx){1to16}, %zmm21, %zmm6 + +// CHECK: vfmsub231ps 8128(%rdx), %zmm21, %zmm6 +// CHECK: encoding: [0x62,0xf2,0x55,0x40,0xba,0x72,0x7f] + vfmsub231ps 8128(%rdx), %zmm21, %zmm6 + +// CHECK: vfmsub231ps 8192(%rdx), %zmm21, %zmm6 +// CHECK: encoding: [0x62,0xf2,0x55,0x40,0xba,0xb2,0x00,0x20,0x00,0x00] + vfmsub231ps 8192(%rdx), %zmm21, %zmm6 + +// CHECK: vfmsub231ps -8192(%rdx), %zmm21, %zmm6 +// CHECK: encoding: [0x62,0xf2,0x55,0x40,0xba,0x72,0x80] + vfmsub231ps -8192(%rdx), %zmm21, %zmm6 + +// CHECK: vfmsub231ps -8256(%rdx), %zmm21, %zmm6 +// CHECK: encoding: [0x62,0xf2,0x55,0x40,0xba,0xb2,0xc0,0xdf,0xff,0xff] + vfmsub231ps -8256(%rdx), %zmm21, %zmm6 + +// CHECK: vfmsub231ps 508(%rdx){1to16}, %zmm21, %zmm6 +// CHECK: encoding: [0x62,0xf2,0x55,0x50,0xba,0x72,0x7f] + vfmsub231ps 508(%rdx){1to16}, %zmm21, %zmm6 + +// CHECK: vfmsub231ps 512(%rdx){1to16}, %zmm21, %zmm6 +// CHECK: encoding: [0x62,0xf2,0x55,0x50,0xba,0xb2,0x00,0x02,0x00,0x00] + vfmsub231ps 512(%rdx){1to16}, %zmm21, %zmm6 + +// CHECK: vfmsub231ps -512(%rdx){1to16}, %zmm21, %zmm6 +// CHECK: encoding: [0x62,0xf2,0x55,0x50,0xba,0x72,0x80] + vfmsub231ps -512(%rdx){1to16}, %zmm21, %zmm6 + +// CHECK: vfmsub231ps -516(%rdx){1to16}, %zmm21, %zmm6 +// CHECK: encoding: [0x62,0xf2,0x55,0x50,0xba,0xb2,0xfc,0xfd,0xff,0xff] + vfmsub231ps -516(%rdx){1to16}, %zmm21, %zmm6 + +// CHECK: vfmsub231pd %zmm11, %zmm12, %zmm5 +// CHECK: encoding: [0x62,0xd2,0x9d,0x48,0xba,0xeb] + vfmsub231pd %zmm11, %zmm12, %zmm5 + +// CHECK: vfmsub231pd %zmm11, %zmm12, %zmm5 {%k2} +// CHECK: encoding: [0x62,0xd2,0x9d,0x4a,0xba,0xeb] + vfmsub231pd %zmm11, %zmm12, %zmm5 {%k2} + +// CHECK: vfmsub231pd %zmm11, %zmm12, %zmm5 {%k2} {z} +// CHECK: encoding: [0x62,0xd2,0x9d,0xca,0xba,0xeb] + vfmsub231pd %zmm11, %zmm12, %zmm5 {%k2} {z} + +// CHECK: vfmsub231pd {rn-sae}, %zmm11, %zmm12, %zmm5 +// CHECK: encoding: [0x62,0xd2,0x9d,0x18,0xba,0xeb] + vfmsub231pd {rn-sae}, %zmm11, %zmm12, %zmm5 + +// CHECK: vfmsub231pd {ru-sae}, %zmm11, %zmm12, %zmm5 +// CHECK: encoding: [0x62,0xd2,0x9d,0x58,0xba,0xeb] + vfmsub231pd {ru-sae}, %zmm11, %zmm12, %zmm5 + +// CHECK: vfmsub231pd {rd-sae}, %zmm11, %zmm12, %zmm5 +// CHECK: encoding: [0x62,0xd2,0x9d,0x38,0xba,0xeb] + vfmsub231pd {rd-sae}, %zmm11, %zmm12, %zmm5 + +// CHECK: vfmsub231pd {rz-sae}, %zmm11, %zmm12, %zmm5 +// CHECK: encoding: [0x62,0xd2,0x9d,0x78,0xba,0xeb] + vfmsub231pd {rz-sae}, %zmm11, %zmm12, %zmm5 + +// CHECK: vfmsub231pd (%rcx), %zmm12, %zmm5 +// CHECK: encoding: [0x62,0xf2,0x9d,0x48,0xba,0x29] + vfmsub231pd (%rcx), %zmm12, %zmm5 + +// CHECK: vfmsub231pd 291(%rax,%r14,8), %zmm12, %zmm5 +// CHECK: encoding: [0x62,0xb2,0x9d,0x48,0xba,0xac,0xf0,0x23,0x01,0x00,0x00] + vfmsub231pd 291(%rax,%r14,8), %zmm12, %zmm5 + +// CHECK: vfmsub231pd (%rcx){1to8}, %zmm12, %zmm5 +// CHECK: encoding: [0x62,0xf2,0x9d,0x58,0xba,0x29] + vfmsub231pd (%rcx){1to8}, %zmm12, %zmm5 + +// CHECK: vfmsub231pd 8128(%rdx), %zmm12, %zmm5 +// CHECK: encoding: [0x62,0xf2,0x9d,0x48,0xba,0x6a,0x7f] + vfmsub231pd 8128(%rdx), %zmm12, %zmm5 + +// CHECK: vfmsub231pd 8192(%rdx), %zmm12, %zmm5 +// CHECK: encoding: [0x62,0xf2,0x9d,0x48,0xba,0xaa,0x00,0x20,0x00,0x00] + vfmsub231pd 8192(%rdx), %zmm12, %zmm5 + +// CHECK: vfmsub231pd -8192(%rdx), %zmm12, %zmm5 +// CHECK: encoding: [0x62,0xf2,0x9d,0x48,0xba,0x6a,0x80] + vfmsub231pd -8192(%rdx), %zmm12, %zmm5 + +// CHECK: vfmsub231pd -8256(%rdx), %zmm12, %zmm5 +// CHECK: encoding: [0x62,0xf2,0x9d,0x48,0xba,0xaa,0xc0,0xdf,0xff,0xff] + vfmsub231pd -8256(%rdx), %zmm12, %zmm5 + +// CHECK: vfmsub231pd 1016(%rdx){1to8}, %zmm12, %zmm5 +// CHECK: encoding: [0x62,0xf2,0x9d,0x58,0xba,0x6a,0x7f] + vfmsub231pd 1016(%rdx){1to8}, %zmm12, %zmm5 + +// CHECK: vfmsub231pd 1024(%rdx){1to8}, %zmm12, %zmm5 +// CHECK: encoding: [0x62,0xf2,0x9d,0x58,0xba,0xaa,0x00,0x04,0x00,0x00] + vfmsub231pd 1024(%rdx){1to8}, %zmm12, %zmm5 + +// CHECK: vfmsub231pd -1024(%rdx){1to8}, %zmm12, %zmm5 +// CHECK: encoding: [0x62,0xf2,0x9d,0x58,0xba,0x6a,0x80] + vfmsub231pd -1024(%rdx){1to8}, %zmm12, %zmm5 + +// CHECK: vfmsub231pd -1032(%rdx){1to8}, %zmm12, %zmm5 +// CHECK: encoding: [0x62,0xf2,0x9d,0x58,0xba,0xaa,0xf8,0xfb,0xff,0xff] + vfmsub231pd -1032(%rdx){1to8}, %zmm12, %zmm5 + +// CHECK: vfmaddsub132ps %zmm20, %zmm9, %zmm10 +// CHECK: encoding: [0x62,0x32,0x35,0x48,0x96,0xd4] + vfmaddsub132ps %zmm20, %zmm9, %zmm10 + +// CHECK: vfmaddsub132ps %zmm20, %zmm9, %zmm10 {%k3} +// CHECK: encoding: [0x62,0x32,0x35,0x4b,0x96,0xd4] + vfmaddsub132ps %zmm20, %zmm9, %zmm10 {%k3} + +// CHECK: vfmaddsub132ps %zmm20, %zmm9, %zmm10 {%k3} {z} +// CHECK: encoding: [0x62,0x32,0x35,0xcb,0x96,0xd4] + vfmaddsub132ps %zmm20, %zmm9, %zmm10 {%k3} {z} + +// CHECK: vfmaddsub132ps {rn-sae}, %zmm20, %zmm9, %zmm10 +// CHECK: encoding: [0x62,0x32,0x35,0x18,0x96,0xd4] + vfmaddsub132ps {rn-sae}, %zmm20, %zmm9, %zmm10 + +// CHECK: vfmaddsub132ps {ru-sae}, %zmm20, %zmm9, %zmm10 +// CHECK: encoding: [0x62,0x32,0x35,0x58,0x96,0xd4] + vfmaddsub132ps {ru-sae}, %zmm20, %zmm9, %zmm10 + +// CHECK: vfmaddsub132ps {rd-sae}, %zmm20, %zmm9, %zmm10 +// CHECK: encoding: [0x62,0x32,0x35,0x38,0x96,0xd4] + vfmaddsub132ps {rd-sae}, %zmm20, %zmm9, %zmm10 + +// CHECK: vfmaddsub132ps {rz-sae}, %zmm20, %zmm9, %zmm10 +// CHECK: encoding: [0x62,0x32,0x35,0x78,0x96,0xd4] + vfmaddsub132ps {rz-sae}, %zmm20, %zmm9, %zmm10 + +// CHECK: vfmaddsub132ps (%rcx), %zmm9, %zmm10 +// CHECK: encoding: [0x62,0x72,0x35,0x48,0x96,0x11] + vfmaddsub132ps (%rcx), %zmm9, %zmm10 + +// CHECK: vfmaddsub132ps 291(%rax,%r14,8), %zmm9, %zmm10 +// CHECK: encoding: [0x62,0x32,0x35,0x48,0x96,0x94,0xf0,0x23,0x01,0x00,0x00] + vfmaddsub132ps 291(%rax,%r14,8), %zmm9, %zmm10 + +// CHECK: vfmaddsub132ps (%rcx){1to16}, %zmm9, %zmm10 +// CHECK: encoding: [0x62,0x72,0x35,0x58,0x96,0x11] + vfmaddsub132ps (%rcx){1to16}, %zmm9, %zmm10 + +// CHECK: vfmaddsub132ps 8128(%rdx), %zmm9, %zmm10 +// CHECK: encoding: [0x62,0x72,0x35,0x48,0x96,0x52,0x7f] + vfmaddsub132ps 8128(%rdx), %zmm9, %zmm10 + +// CHECK: vfmaddsub132ps 8192(%rdx), %zmm9, %zmm10 +// CHECK: encoding: [0x62,0x72,0x35,0x48,0x96,0x92,0x00,0x20,0x00,0x00] + vfmaddsub132ps 8192(%rdx), %zmm9, %zmm10 + +// CHECK: vfmaddsub132ps -8192(%rdx), %zmm9, %zmm10 +// CHECK: encoding: [0x62,0x72,0x35,0x48,0x96,0x52,0x80] + vfmaddsub132ps -8192(%rdx), %zmm9, %zmm10 + +// CHECK: vfmaddsub132ps -8256(%rdx), %zmm9, %zmm10 +// CHECK: encoding: [0x62,0x72,0x35,0x48,0x96,0x92,0xc0,0xdf,0xff,0xff] + vfmaddsub132ps -8256(%rdx), %zmm9, %zmm10 + +// CHECK: vfmaddsub132ps 508(%rdx){1to16}, %zmm9, %zmm10 +// CHECK: encoding: [0x62,0x72,0x35,0x58,0x96,0x52,0x7f] + vfmaddsub132ps 508(%rdx){1to16}, %zmm9, %zmm10 + +// CHECK: vfmaddsub132ps 512(%rdx){1to16}, %zmm9, %zmm10 +// CHECK: encoding: [0x62,0x72,0x35,0x58,0x96,0x92,0x00,0x02,0x00,0x00] + vfmaddsub132ps 512(%rdx){1to16}, %zmm9, %zmm10 + +// CHECK: vfmaddsub132ps -512(%rdx){1to16}, %zmm9, %zmm10 +// CHECK: encoding: [0x62,0x72,0x35,0x58,0x96,0x52,0x80] + vfmaddsub132ps -512(%rdx){1to16}, %zmm9, %zmm10 + +// CHECK: vfmaddsub132ps -516(%rdx){1to16}, %zmm9, %zmm10 +// CHECK: encoding: [0x62,0x72,0x35,0x58,0x96,0x92,0xfc,0xfd,0xff,0xff] + vfmaddsub132ps -516(%rdx){1to16}, %zmm9, %zmm10 + +// CHECK: vfmaddsub132pd %zmm21, %zmm25, %zmm20 +// CHECK: encoding: [0x62,0xa2,0xb5,0x40,0x96,0xe5] + vfmaddsub132pd %zmm21, %zmm25, %zmm20 + +// CHECK: vfmaddsub132pd %zmm21, %zmm25, %zmm20 {%k2} +// CHECK: encoding: [0x62,0xa2,0xb5,0x42,0x96,0xe5] + vfmaddsub132pd %zmm21, %zmm25, %zmm20 {%k2} + +// CHECK: vfmaddsub132pd %zmm21, %zmm25, %zmm20 {%k2} {z} +// CHECK: encoding: [0x62,0xa2,0xb5,0xc2,0x96,0xe5] + vfmaddsub132pd %zmm21, %zmm25, %zmm20 {%k2} {z} + +// CHECK: vfmaddsub132pd {rn-sae}, %zmm21, %zmm25, %zmm20 +// CHECK: encoding: [0x62,0xa2,0xb5,0x10,0x96,0xe5] + vfmaddsub132pd {rn-sae}, %zmm21, %zmm25, %zmm20 + +// CHECK: vfmaddsub132pd {ru-sae}, %zmm21, %zmm25, %zmm20 +// CHECK: encoding: [0x62,0xa2,0xb5,0x50,0x96,0xe5] + vfmaddsub132pd {ru-sae}, %zmm21, %zmm25, %zmm20 + +// CHECK: vfmaddsub132pd {rd-sae}, %zmm21, %zmm25, %zmm20 +// CHECK: encoding: [0x62,0xa2,0xb5,0x30,0x96,0xe5] + vfmaddsub132pd {rd-sae}, %zmm21, %zmm25, %zmm20 + +// CHECK: vfmaddsub132pd {rz-sae}, %zmm21, %zmm25, %zmm20 +// CHECK: encoding: [0x62,0xa2,0xb5,0x70,0x96,0xe5] + vfmaddsub132pd {rz-sae}, %zmm21, %zmm25, %zmm20 + +// CHECK: vfmaddsub132pd (%rcx), %zmm25, %zmm20 +// CHECK: encoding: [0x62,0xe2,0xb5,0x40,0x96,0x21] + vfmaddsub132pd (%rcx), %zmm25, %zmm20 + +// CHECK: vfmaddsub132pd 291(%rax,%r14,8), %zmm25, %zmm20 +// CHECK: encoding: [0x62,0xa2,0xb5,0x40,0x96,0xa4,0xf0,0x23,0x01,0x00,0x00] + vfmaddsub132pd 291(%rax,%r14,8), %zmm25, %zmm20 + +// CHECK: vfmaddsub132pd (%rcx){1to8}, %zmm25, %zmm20 +// CHECK: encoding: [0x62,0xe2,0xb5,0x50,0x96,0x21] + vfmaddsub132pd (%rcx){1to8}, %zmm25, %zmm20 + +// CHECK: vfmaddsub132pd 8128(%rdx), %zmm25, %zmm20 +// CHECK: encoding: [0x62,0xe2,0xb5,0x40,0x96,0x62,0x7f] + vfmaddsub132pd 8128(%rdx), %zmm25, %zmm20 + +// CHECK: vfmaddsub132pd 8192(%rdx), %zmm25, %zmm20 +// CHECK: encoding: [0x62,0xe2,0xb5,0x40,0x96,0xa2,0x00,0x20,0x00,0x00] + vfmaddsub132pd 8192(%rdx), %zmm25, %zmm20 + +// CHECK: vfmaddsub132pd -8192(%rdx), %zmm25, %zmm20 +// CHECK: encoding: [0x62,0xe2,0xb5,0x40,0x96,0x62,0x80] + vfmaddsub132pd -8192(%rdx), %zmm25, %zmm20 + +// CHECK: vfmaddsub132pd -8256(%rdx), %zmm25, %zmm20 +// CHECK: encoding: [0x62,0xe2,0xb5,0x40,0x96,0xa2,0xc0,0xdf,0xff,0xff] + vfmaddsub132pd -8256(%rdx), %zmm25, %zmm20 + +// CHECK: vfmaddsub132pd 1016(%rdx){1to8}, %zmm25, %zmm20 +// CHECK: encoding: [0x62,0xe2,0xb5,0x50,0x96,0x62,0x7f] + vfmaddsub132pd 1016(%rdx){1to8}, %zmm25, %zmm20 + +// CHECK: vfmaddsub132pd 1024(%rdx){1to8}, %zmm25, %zmm20 +// CHECK: encoding: [0x62,0xe2,0xb5,0x50,0x96,0xa2,0x00,0x04,0x00,0x00] + vfmaddsub132pd 1024(%rdx){1to8}, %zmm25, %zmm20 + +// CHECK: vfmaddsub132pd -1024(%rdx){1to8}, %zmm25, %zmm20 +// CHECK: encoding: [0x62,0xe2,0xb5,0x50,0x96,0x62,0x80] + vfmaddsub132pd -1024(%rdx){1to8}, %zmm25, %zmm20 + +// CHECK: vfmaddsub132pd -1032(%rdx){1to8}, %zmm25, %zmm20 +// CHECK: encoding: [0x62,0xe2,0xb5,0x50,0x96,0xa2,0xf8,0xfb,0xff,0xff] + vfmaddsub132pd -1032(%rdx){1to8}, %zmm25, %zmm20 + +// CHECK: vfmaddsub213ps %zmm28, %zmm24, %zmm17 +// CHECK: encoding: [0x62,0x82,0x3d,0x40,0xa6,0xcc] + vfmaddsub213ps %zmm28, %zmm24, %zmm17 + +// CHECK: vfmaddsub213ps %zmm28, %zmm24, %zmm17 {%k6} +// CHECK: encoding: [0x62,0x82,0x3d,0x46,0xa6,0xcc] + vfmaddsub213ps %zmm28, %zmm24, %zmm17 {%k6} + +// CHECK: vfmaddsub213ps %zmm28, %zmm24, %zmm17 {%k6} {z} +// CHECK: encoding: [0x62,0x82,0x3d,0xc6,0xa6,0xcc] + vfmaddsub213ps %zmm28, %zmm24, %zmm17 {%k6} {z} + +// CHECK: vfmaddsub213ps {rn-sae}, %zmm28, %zmm24, %zmm17 +// CHECK: encoding: [0x62,0x82,0x3d,0x10,0xa6,0xcc] + vfmaddsub213ps {rn-sae}, %zmm28, %zmm24, %zmm17 + +// CHECK: vfmaddsub213ps {ru-sae}, %zmm28, %zmm24, %zmm17 +// CHECK: encoding: [0x62,0x82,0x3d,0x50,0xa6,0xcc] + vfmaddsub213ps {ru-sae}, %zmm28, %zmm24, %zmm17 + +// CHECK: vfmaddsub213ps {rd-sae}, %zmm28, %zmm24, %zmm17 +// CHECK: encoding: [0x62,0x82,0x3d,0x30,0xa6,0xcc] + vfmaddsub213ps {rd-sae}, %zmm28, %zmm24, %zmm17 + +// CHECK: vfmaddsub213ps {rz-sae}, %zmm28, %zmm24, %zmm17 +// CHECK: encoding: [0x62,0x82,0x3d,0x70,0xa6,0xcc] + vfmaddsub213ps {rz-sae}, %zmm28, %zmm24, %zmm17 + +// CHECK: vfmaddsub213ps (%rcx), %zmm24, %zmm17 +// CHECK: encoding: [0x62,0xe2,0x3d,0x40,0xa6,0x09] + vfmaddsub213ps (%rcx), %zmm24, %zmm17 + +// CHECK: vfmaddsub213ps 291(%rax,%r14,8), %zmm24, %zmm17 +// CHECK: encoding: [0x62,0xa2,0x3d,0x40,0xa6,0x8c,0xf0,0x23,0x01,0x00,0x00] + vfmaddsub213ps 291(%rax,%r14,8), %zmm24, %zmm17 + +// CHECK: vfmaddsub213ps (%rcx){1to16}, %zmm24, %zmm17 +// CHECK: encoding: [0x62,0xe2,0x3d,0x50,0xa6,0x09] + vfmaddsub213ps (%rcx){1to16}, %zmm24, %zmm17 + +// CHECK: vfmaddsub213ps 8128(%rdx), %zmm24, %zmm17 +// CHECK: encoding: [0x62,0xe2,0x3d,0x40,0xa6,0x4a,0x7f] + vfmaddsub213ps 8128(%rdx), %zmm24, %zmm17 + +// CHECK: vfmaddsub213ps 8192(%rdx), %zmm24, %zmm17 +// CHECK: encoding: [0x62,0xe2,0x3d,0x40,0xa6,0x8a,0x00,0x20,0x00,0x00] + vfmaddsub213ps 8192(%rdx), %zmm24, %zmm17 + +// CHECK: vfmaddsub213ps -8192(%rdx), %zmm24, %zmm17 +// CHECK: encoding: [0x62,0xe2,0x3d,0x40,0xa6,0x4a,0x80] + vfmaddsub213ps -8192(%rdx), %zmm24, %zmm17 + +// CHECK: vfmaddsub213ps -8256(%rdx), %zmm24, %zmm17 +// CHECK: encoding: [0x62,0xe2,0x3d,0x40,0xa6,0x8a,0xc0,0xdf,0xff,0xff] + vfmaddsub213ps -8256(%rdx), %zmm24, %zmm17 + +// CHECK: vfmaddsub213ps 508(%rdx){1to16}, %zmm24, %zmm17 +// CHECK: encoding: [0x62,0xe2,0x3d,0x50,0xa6,0x4a,0x7f] + vfmaddsub213ps 508(%rdx){1to16}, %zmm24, %zmm17 + +// CHECK: vfmaddsub213ps 512(%rdx){1to16}, %zmm24, %zmm17 +// CHECK: encoding: [0x62,0xe2,0x3d,0x50,0xa6,0x8a,0x00,0x02,0x00,0x00] + vfmaddsub213ps 512(%rdx){1to16}, %zmm24, %zmm17 + +// CHECK: vfmaddsub213ps -512(%rdx){1to16}, %zmm24, %zmm17 +// CHECK: encoding: [0x62,0xe2,0x3d,0x50,0xa6,0x4a,0x80] + vfmaddsub213ps -512(%rdx){1to16}, %zmm24, %zmm17 + +// CHECK: vfmaddsub213ps -516(%rdx){1to16}, %zmm24, %zmm17 +// CHECK: encoding: [0x62,0xe2,0x3d,0x50,0xa6,0x8a,0xfc,0xfd,0xff,0xff] + vfmaddsub213ps -516(%rdx){1to16}, %zmm24, %zmm17 + +// CHECK: vfmaddsub213pd %zmm10, %zmm6, %zmm26 +// CHECK: encoding: [0x62,0x42,0xcd,0x48,0xa6,0xd2] + vfmaddsub213pd %zmm10, %zmm6, %zmm26 + +// CHECK: vfmaddsub213pd %zmm10, %zmm6, %zmm26 {%k6} +// CHECK: encoding: [0x62,0x42,0xcd,0x4e,0xa6,0xd2] + vfmaddsub213pd %zmm10, %zmm6, %zmm26 {%k6} + +// CHECK: vfmaddsub213pd %zmm10, %zmm6, %zmm26 {%k6} {z} +// CHECK: encoding: [0x62,0x42,0xcd,0xce,0xa6,0xd2] + vfmaddsub213pd %zmm10, %zmm6, %zmm26 {%k6} {z} + +// CHECK: vfmaddsub213pd {rn-sae}, %zmm10, %zmm6, %zmm26 +// CHECK: encoding: [0x62,0x42,0xcd,0x18,0xa6,0xd2] + vfmaddsub213pd {rn-sae}, %zmm10, %zmm6, %zmm26 + +// CHECK: vfmaddsub213pd {ru-sae}, %zmm10, %zmm6, %zmm26 +// CHECK: encoding: [0x62,0x42,0xcd,0x58,0xa6,0xd2] + vfmaddsub213pd {ru-sae}, %zmm10, %zmm6, %zmm26 + +// CHECK: vfmaddsub213pd {rd-sae}, %zmm10, %zmm6, %zmm26 +// CHECK: encoding: [0x62,0x42,0xcd,0x38,0xa6,0xd2] + vfmaddsub213pd {rd-sae}, %zmm10, %zmm6, %zmm26 + +// CHECK: vfmaddsub213pd {rz-sae}, %zmm10, %zmm6, %zmm26 +// CHECK: encoding: [0x62,0x42,0xcd,0x78,0xa6,0xd2] + vfmaddsub213pd {rz-sae}, %zmm10, %zmm6, %zmm26 + +// CHECK: vfmaddsub213pd (%rcx), %zmm6, %zmm26 +// CHECK: encoding: [0x62,0x62,0xcd,0x48,0xa6,0x11] + vfmaddsub213pd (%rcx), %zmm6, %zmm26 + +// CHECK: vfmaddsub213pd 291(%rax,%r14,8), %zmm6, %zmm26 +// CHECK: encoding: [0x62,0x22,0xcd,0x48,0xa6,0x94,0xf0,0x23,0x01,0x00,0x00] + vfmaddsub213pd 291(%rax,%r14,8), %zmm6, %zmm26 + +// CHECK: vfmaddsub213pd (%rcx){1to8}, %zmm6, %zmm26 +// CHECK: encoding: [0x62,0x62,0xcd,0x58,0xa6,0x11] + vfmaddsub213pd (%rcx){1to8}, %zmm6, %zmm26 + +// CHECK: vfmaddsub213pd 8128(%rdx), %zmm6, %zmm26 +// CHECK: encoding: [0x62,0x62,0xcd,0x48,0xa6,0x52,0x7f] + vfmaddsub213pd 8128(%rdx), %zmm6, %zmm26 + +// CHECK: vfmaddsub213pd 8192(%rdx), %zmm6, %zmm26 +// CHECK: encoding: [0x62,0x62,0xcd,0x48,0xa6,0x92,0x00,0x20,0x00,0x00] + vfmaddsub213pd 8192(%rdx), %zmm6, %zmm26 + +// CHECK: vfmaddsub213pd -8192(%rdx), %zmm6, %zmm26 +// CHECK: encoding: [0x62,0x62,0xcd,0x48,0xa6,0x52,0x80] + vfmaddsub213pd -8192(%rdx), %zmm6, %zmm26 + +// CHECK: vfmaddsub213pd -8256(%rdx), %zmm6, %zmm26 +// CHECK: encoding: [0x62,0x62,0xcd,0x48,0xa6,0x92,0xc0,0xdf,0xff,0xff] + vfmaddsub213pd -8256(%rdx), %zmm6, %zmm26 + +// CHECK: vfmaddsub213pd 1016(%rdx){1to8}, %zmm6, %zmm26 +// CHECK: encoding: [0x62,0x62,0xcd,0x58,0xa6,0x52,0x7f] + vfmaddsub213pd 1016(%rdx){1to8}, %zmm6, %zmm26 + +// CHECK: vfmaddsub213pd 1024(%rdx){1to8}, %zmm6, %zmm26 +// CHECK: encoding: [0x62,0x62,0xcd,0x58,0xa6,0x92,0x00,0x04,0x00,0x00] + vfmaddsub213pd 1024(%rdx){1to8}, %zmm6, %zmm26 + +// CHECK: vfmaddsub213pd -1024(%rdx){1to8}, %zmm6, %zmm26 +// CHECK: encoding: [0x62,0x62,0xcd,0x58,0xa6,0x52,0x80] + vfmaddsub213pd -1024(%rdx){1to8}, %zmm6, %zmm26 + +// CHECK: vfmaddsub213pd -1032(%rdx){1to8}, %zmm6, %zmm26 +// CHECK: encoding: [0x62,0x62,0xcd,0x58,0xa6,0x92,0xf8,0xfb,0xff,0xff] + vfmaddsub213pd -1032(%rdx){1to8}, %zmm6, %zmm26 + +// CHECK: vfmaddsub231ps %zmm19, %zmm21, %zmm15 +// CHECK: encoding: [0x62,0x32,0x55,0x40,0xb6,0xfb] + vfmaddsub231ps %zmm19, %zmm21, %zmm15 + +// CHECK: vfmaddsub231ps %zmm19, %zmm21, %zmm15 {%k6} +// CHECK: encoding: [0x62,0x32,0x55,0x46,0xb6,0xfb] + vfmaddsub231ps %zmm19, %zmm21, %zmm15 {%k6} + +// CHECK: vfmaddsub231ps %zmm19, %zmm21, %zmm15 {%k6} {z} +// CHECK: encoding: [0x62,0x32,0x55,0xc6,0xb6,0xfb] + vfmaddsub231ps %zmm19, %zmm21, %zmm15 {%k6} {z} + +// CHECK: vfmaddsub231ps {rn-sae}, %zmm19, %zmm21, %zmm15 +// CHECK: encoding: [0x62,0x32,0x55,0x10,0xb6,0xfb] + vfmaddsub231ps {rn-sae}, %zmm19, %zmm21, %zmm15 + +// CHECK: vfmaddsub231ps {ru-sae}, %zmm19, %zmm21, %zmm15 +// CHECK: encoding: [0x62,0x32,0x55,0x50,0xb6,0xfb] + vfmaddsub231ps {ru-sae}, %zmm19, %zmm21, %zmm15 + +// CHECK: vfmaddsub231ps {rd-sae}, %zmm19, %zmm21, %zmm15 +// CHECK: encoding: [0x62,0x32,0x55,0x30,0xb6,0xfb] + vfmaddsub231ps {rd-sae}, %zmm19, %zmm21, %zmm15 + +// CHECK: vfmaddsub231ps {rz-sae}, %zmm19, %zmm21, %zmm15 +// CHECK: encoding: [0x62,0x32,0x55,0x70,0xb6,0xfb] + vfmaddsub231ps {rz-sae}, %zmm19, %zmm21, %zmm15 + +// CHECK: vfmaddsub231ps (%rcx), %zmm21, %zmm15 +// CHECK: encoding: [0x62,0x72,0x55,0x40,0xb6,0x39] + vfmaddsub231ps (%rcx), %zmm21, %zmm15 + +// CHECK: vfmaddsub231ps 291(%rax,%r14,8), %zmm21, %zmm15 +// CHECK: encoding: [0x62,0x32,0x55,0x40,0xb6,0xbc,0xf0,0x23,0x01,0x00,0x00] + vfmaddsub231ps 291(%rax,%r14,8), %zmm21, %zmm15 + +// CHECK: vfmaddsub231ps (%rcx){1to16}, %zmm21, %zmm15 +// CHECK: encoding: [0x62,0x72,0x55,0x50,0xb6,0x39] + vfmaddsub231ps (%rcx){1to16}, %zmm21, %zmm15 + +// CHECK: vfmaddsub231ps 8128(%rdx), %zmm21, %zmm15 +// CHECK: encoding: [0x62,0x72,0x55,0x40,0xb6,0x7a,0x7f] + vfmaddsub231ps 8128(%rdx), %zmm21, %zmm15 + +// CHECK: vfmaddsub231ps 8192(%rdx), %zmm21, %zmm15 +// CHECK: encoding: [0x62,0x72,0x55,0x40,0xb6,0xba,0x00,0x20,0x00,0x00] + vfmaddsub231ps 8192(%rdx), %zmm21, %zmm15 + +// CHECK: vfmaddsub231ps -8192(%rdx), %zmm21, %zmm15 +// CHECK: encoding: [0x62,0x72,0x55,0x40,0xb6,0x7a,0x80] + vfmaddsub231ps -8192(%rdx), %zmm21, %zmm15 + +// CHECK: vfmaddsub231ps -8256(%rdx), %zmm21, %zmm15 +// CHECK: encoding: [0x62,0x72,0x55,0x40,0xb6,0xba,0xc0,0xdf,0xff,0xff] + vfmaddsub231ps -8256(%rdx), %zmm21, %zmm15 + +// CHECK: vfmaddsub231ps 508(%rdx){1to16}, %zmm21, %zmm15 +// CHECK: encoding: [0x62,0x72,0x55,0x50,0xb6,0x7a,0x7f] + vfmaddsub231ps 508(%rdx){1to16}, %zmm21, %zmm15 + +// CHECK: vfmaddsub231ps 512(%rdx){1to16}, %zmm21, %zmm15 +// CHECK: encoding: [0x62,0x72,0x55,0x50,0xb6,0xba,0x00,0x02,0x00,0x00] + vfmaddsub231ps 512(%rdx){1to16}, %zmm21, %zmm15 + +// CHECK: vfmaddsub231ps -512(%rdx){1to16}, %zmm21, %zmm15 +// CHECK: encoding: [0x62,0x72,0x55,0x50,0xb6,0x7a,0x80] + vfmaddsub231ps -512(%rdx){1to16}, %zmm21, %zmm15 + +// CHECK: vfmaddsub231ps -516(%rdx){1to16}, %zmm21, %zmm15 +// CHECK: encoding: [0x62,0x72,0x55,0x50,0xb6,0xba,0xfc,0xfd,0xff,0xff] + vfmaddsub231ps -516(%rdx){1to16}, %zmm21, %zmm15 + +// CHECK: vfmaddsub231pd %zmm24, %zmm27, %zmm9 +// CHECK: encoding: [0x62,0x12,0xa5,0x40,0xb6,0xc8] + vfmaddsub231pd %zmm24, %zmm27, %zmm9 + +// CHECK: vfmaddsub231pd %zmm24, %zmm27, %zmm9 {%k7} +// CHECK: encoding: [0x62,0x12,0xa5,0x47,0xb6,0xc8] + vfmaddsub231pd %zmm24, %zmm27, %zmm9 {%k7} + +// CHECK: vfmaddsub231pd %zmm24, %zmm27, %zmm9 {%k7} {z} +// CHECK: encoding: [0x62,0x12,0xa5,0xc7,0xb6,0xc8] + vfmaddsub231pd %zmm24, %zmm27, %zmm9 {%k7} {z} + +// CHECK: vfmaddsub231pd {rn-sae}, %zmm24, %zmm27, %zmm9 +// CHECK: encoding: [0x62,0x12,0xa5,0x10,0xb6,0xc8] + vfmaddsub231pd {rn-sae}, %zmm24, %zmm27, %zmm9 + +// CHECK: vfmaddsub231pd {ru-sae}, %zmm24, %zmm27, %zmm9 +// CHECK: encoding: [0x62,0x12,0xa5,0x50,0xb6,0xc8] + vfmaddsub231pd {ru-sae}, %zmm24, %zmm27, %zmm9 + +// CHECK: vfmaddsub231pd {rd-sae}, %zmm24, %zmm27, %zmm9 +// CHECK: encoding: [0x62,0x12,0xa5,0x30,0xb6,0xc8] + vfmaddsub231pd {rd-sae}, %zmm24, %zmm27, %zmm9 + +// CHECK: vfmaddsub231pd {rz-sae}, %zmm24, %zmm27, %zmm9 +// CHECK: encoding: [0x62,0x12,0xa5,0x70,0xb6,0xc8] + vfmaddsub231pd {rz-sae}, %zmm24, %zmm27, %zmm9 + +// CHECK: vfmaddsub231pd (%rcx), %zmm27, %zmm9 +// CHECK: encoding: [0x62,0x72,0xa5,0x40,0xb6,0x09] + vfmaddsub231pd (%rcx), %zmm27, %zmm9 + +// CHECK: vfmaddsub231pd 291(%rax,%r14,8), %zmm27, %zmm9 +// CHECK: encoding: [0x62,0x32,0xa5,0x40,0xb6,0x8c,0xf0,0x23,0x01,0x00,0x00] + vfmaddsub231pd 291(%rax,%r14,8), %zmm27, %zmm9 + +// CHECK: vfmaddsub231pd (%rcx){1to8}, %zmm27, %zmm9 +// CHECK: encoding: [0x62,0x72,0xa5,0x50,0xb6,0x09] + vfmaddsub231pd (%rcx){1to8}, %zmm27, %zmm9 + +// CHECK: vfmaddsub231pd 8128(%rdx), %zmm27, %zmm9 +// CHECK: encoding: [0x62,0x72,0xa5,0x40,0xb6,0x4a,0x7f] + vfmaddsub231pd 8128(%rdx), %zmm27, %zmm9 + +// CHECK: vfmaddsub231pd 8192(%rdx), %zmm27, %zmm9 +// CHECK: encoding: [0x62,0x72,0xa5,0x40,0xb6,0x8a,0x00,0x20,0x00,0x00] + vfmaddsub231pd 8192(%rdx), %zmm27, %zmm9 + +// CHECK: vfmaddsub231pd -8192(%rdx), %zmm27, %zmm9 +// CHECK: encoding: [0x62,0x72,0xa5,0x40,0xb6,0x4a,0x80] + vfmaddsub231pd -8192(%rdx), %zmm27, %zmm9 + +// CHECK: vfmaddsub231pd -8256(%rdx), %zmm27, %zmm9 +// CHECK: encoding: [0x62,0x72,0xa5,0x40,0xb6,0x8a,0xc0,0xdf,0xff,0xff] + vfmaddsub231pd -8256(%rdx), %zmm27, %zmm9 + +// CHECK: vfmaddsub231pd 1016(%rdx){1to8}, %zmm27, %zmm9 +// CHECK: encoding: [0x62,0x72,0xa5,0x50,0xb6,0x4a,0x7f] + vfmaddsub231pd 1016(%rdx){1to8}, %zmm27, %zmm9 + +// CHECK: vfmaddsub231pd 1024(%rdx){1to8}, %zmm27, %zmm9 +// CHECK: encoding: [0x62,0x72,0xa5,0x50,0xb6,0x8a,0x00,0x04,0x00,0x00] + vfmaddsub231pd 1024(%rdx){1to8}, %zmm27, %zmm9 + +// CHECK: vfmaddsub231pd -1024(%rdx){1to8}, %zmm27, %zmm9 +// CHECK: encoding: [0x62,0x72,0xa5,0x50,0xb6,0x4a,0x80] + vfmaddsub231pd -1024(%rdx){1to8}, %zmm27, %zmm9 + +// CHECK: vfmaddsub231pd -1032(%rdx){1to8}, %zmm27, %zmm9 +// CHECK: encoding: [0x62,0x72,0xa5,0x50,0xb6,0x8a,0xf8,0xfb,0xff,0xff] + vfmaddsub231pd -1032(%rdx){1to8}, %zmm27, %zmm9 + +// CHECK: vfmsubadd132ps %zmm21, %zmm13, %zmm2 +// CHECK: encoding: [0x62,0xb2,0x15,0x48,0x97,0xd5] + vfmsubadd132ps %zmm21, %zmm13, %zmm2 + +// CHECK: vfmsubadd132ps %zmm21, %zmm13, %zmm2 {%k7} +// CHECK: encoding: [0x62,0xb2,0x15,0x4f,0x97,0xd5] + vfmsubadd132ps %zmm21, %zmm13, %zmm2 {%k7} + +// CHECK: vfmsubadd132ps %zmm21, %zmm13, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xb2,0x15,0xcf,0x97,0xd5] + vfmsubadd132ps %zmm21, %zmm13, %zmm2 {%k7} {z} + +// CHECK: vfmsubadd132ps {rn-sae}, %zmm21, %zmm13, %zmm2 +// CHECK: encoding: [0x62,0xb2,0x15,0x18,0x97,0xd5] + vfmsubadd132ps {rn-sae}, %zmm21, %zmm13, %zmm2 + +// CHECK: vfmsubadd132ps {ru-sae}, %zmm21, %zmm13, %zmm2 +// CHECK: encoding: [0x62,0xb2,0x15,0x58,0x97,0xd5] + vfmsubadd132ps {ru-sae}, %zmm21, %zmm13, %zmm2 + +// CHECK: vfmsubadd132ps {rd-sae}, %zmm21, %zmm13, %zmm2 +// CHECK: encoding: [0x62,0xb2,0x15,0x38,0x97,0xd5] + vfmsubadd132ps {rd-sae}, %zmm21, %zmm13, %zmm2 + +// CHECK: vfmsubadd132ps {rz-sae}, %zmm21, %zmm13, %zmm2 +// CHECK: encoding: [0x62,0xb2,0x15,0x78,0x97,0xd5] + vfmsubadd132ps {rz-sae}, %zmm21, %zmm13, %zmm2 + +// CHECK: vfmsubadd132ps (%rcx), %zmm13, %zmm2 +// CHECK: encoding: [0x62,0xf2,0x15,0x48,0x97,0x11] + vfmsubadd132ps (%rcx), %zmm13, %zmm2 + +// CHECK: vfmsubadd132ps 291(%rax,%r14,8), %zmm13, %zmm2 +// CHECK: encoding: [0x62,0xb2,0x15,0x48,0x97,0x94,0xf0,0x23,0x01,0x00,0x00] + vfmsubadd132ps 291(%rax,%r14,8), %zmm13, %zmm2 + +// CHECK: vfmsubadd132ps (%rcx){1to16}, %zmm13, %zmm2 +// CHECK: encoding: [0x62,0xf2,0x15,0x58,0x97,0x11] + vfmsubadd132ps (%rcx){1to16}, %zmm13, %zmm2 + +// CHECK: vfmsubadd132ps 8128(%rdx), %zmm13, %zmm2 +// CHECK: encoding: [0x62,0xf2,0x15,0x48,0x97,0x52,0x7f] + vfmsubadd132ps 8128(%rdx), %zmm13, %zmm2 + +// CHECK: vfmsubadd132ps 8192(%rdx), %zmm13, %zmm2 +// CHECK: encoding: [0x62,0xf2,0x15,0x48,0x97,0x92,0x00,0x20,0x00,0x00] + vfmsubadd132ps 8192(%rdx), %zmm13, %zmm2 + +// CHECK: vfmsubadd132ps -8192(%rdx), %zmm13, %zmm2 +// CHECK: encoding: [0x62,0xf2,0x15,0x48,0x97,0x52,0x80] + vfmsubadd132ps -8192(%rdx), %zmm13, %zmm2 + +// CHECK: vfmsubadd132ps -8256(%rdx), %zmm13, %zmm2 +// CHECK: encoding: [0x62,0xf2,0x15,0x48,0x97,0x92,0xc0,0xdf,0xff,0xff] + vfmsubadd132ps -8256(%rdx), %zmm13, %zmm2 + +// CHECK: vfmsubadd132ps 508(%rdx){1to16}, %zmm13, %zmm2 +// CHECK: encoding: [0x62,0xf2,0x15,0x58,0x97,0x52,0x7f] + vfmsubadd132ps 508(%rdx){1to16}, %zmm13, %zmm2 + +// CHECK: vfmsubadd132ps 512(%rdx){1to16}, %zmm13, %zmm2 +// CHECK: encoding: [0x62,0xf2,0x15,0x58,0x97,0x92,0x00,0x02,0x00,0x00] + vfmsubadd132ps 512(%rdx){1to16}, %zmm13, %zmm2 + +// CHECK: vfmsubadd132ps -512(%rdx){1to16}, %zmm13, %zmm2 +// CHECK: encoding: [0x62,0xf2,0x15,0x58,0x97,0x52,0x80] + vfmsubadd132ps -512(%rdx){1to16}, %zmm13, %zmm2 + +// CHECK: vfmsubadd132ps -516(%rdx){1to16}, %zmm13, %zmm2 +// CHECK: encoding: [0x62,0xf2,0x15,0x58,0x97,0x92,0xfc,0xfd,0xff,0xff] + vfmsubadd132ps -516(%rdx){1to16}, %zmm13, %zmm2 + +// CHECK: vfmsubadd132pd %zmm18, %zmm28, %zmm21 +// CHECK: encoding: [0x62,0xa2,0x9d,0x40,0x97,0xea] + vfmsubadd132pd %zmm18, %zmm28, %zmm21 + +// CHECK: vfmsubadd132pd %zmm18, %zmm28, %zmm21 {%k7} +// CHECK: encoding: [0x62,0xa2,0x9d,0x47,0x97,0xea] + vfmsubadd132pd %zmm18, %zmm28, %zmm21 {%k7} + +// CHECK: vfmsubadd132pd %zmm18, %zmm28, %zmm21 {%k7} {z} +// CHECK: encoding: [0x62,0xa2,0x9d,0xc7,0x97,0xea] + vfmsubadd132pd %zmm18, %zmm28, %zmm21 {%k7} {z} + +// CHECK: vfmsubadd132pd {rn-sae}, %zmm18, %zmm28, %zmm21 +// CHECK: encoding: [0x62,0xa2,0x9d,0x10,0x97,0xea] + vfmsubadd132pd {rn-sae}, %zmm18, %zmm28, %zmm21 + +// CHECK: vfmsubadd132pd {ru-sae}, %zmm18, %zmm28, %zmm21 +// CHECK: encoding: [0x62,0xa2,0x9d,0x50,0x97,0xea] + vfmsubadd132pd {ru-sae}, %zmm18, %zmm28, %zmm21 + +// CHECK: vfmsubadd132pd {rd-sae}, %zmm18, %zmm28, %zmm21 +// CHECK: encoding: [0x62,0xa2,0x9d,0x30,0x97,0xea] + vfmsubadd132pd {rd-sae}, %zmm18, %zmm28, %zmm21 + +// CHECK: vfmsubadd132pd {rz-sae}, %zmm18, %zmm28, %zmm21 +// CHECK: encoding: [0x62,0xa2,0x9d,0x70,0x97,0xea] + vfmsubadd132pd {rz-sae}, %zmm18, %zmm28, %zmm21 + +// CHECK: vfmsubadd132pd (%rcx), %zmm28, %zmm21 +// CHECK: encoding: [0x62,0xe2,0x9d,0x40,0x97,0x29] + vfmsubadd132pd (%rcx), %zmm28, %zmm21 + +// CHECK: vfmsubadd132pd 291(%rax,%r14,8), %zmm28, %zmm21 +// CHECK: encoding: [0x62,0xa2,0x9d,0x40,0x97,0xac,0xf0,0x23,0x01,0x00,0x00] + vfmsubadd132pd 291(%rax,%r14,8), %zmm28, %zmm21 + +// CHECK: vfmsubadd132pd (%rcx){1to8}, %zmm28, %zmm21 +// CHECK: encoding: [0x62,0xe2,0x9d,0x50,0x97,0x29] + vfmsubadd132pd (%rcx){1to8}, %zmm28, %zmm21 + +// CHECK: vfmsubadd132pd 8128(%rdx), %zmm28, %zmm21 +// CHECK: encoding: [0x62,0xe2,0x9d,0x40,0x97,0x6a,0x7f] + vfmsubadd132pd 8128(%rdx), %zmm28, %zmm21 + +// CHECK: vfmsubadd132pd 8192(%rdx), %zmm28, %zmm21 +// CHECK: encoding: [0x62,0xe2,0x9d,0x40,0x97,0xaa,0x00,0x20,0x00,0x00] + vfmsubadd132pd 8192(%rdx), %zmm28, %zmm21 + +// CHECK: vfmsubadd132pd -8192(%rdx), %zmm28, %zmm21 +// CHECK: encoding: [0x62,0xe2,0x9d,0x40,0x97,0x6a,0x80] + vfmsubadd132pd -8192(%rdx), %zmm28, %zmm21 + +// CHECK: vfmsubadd132pd -8256(%rdx), %zmm28, %zmm21 +// CHECK: encoding: [0x62,0xe2,0x9d,0x40,0x97,0xaa,0xc0,0xdf,0xff,0xff] + vfmsubadd132pd -8256(%rdx), %zmm28, %zmm21 + +// CHECK: vfmsubadd132pd 1016(%rdx){1to8}, %zmm28, %zmm21 +// CHECK: encoding: [0x62,0xe2,0x9d,0x50,0x97,0x6a,0x7f] + vfmsubadd132pd 1016(%rdx){1to8}, %zmm28, %zmm21 + +// CHECK: vfmsubadd132pd 1024(%rdx){1to8}, %zmm28, %zmm21 +// CHECK: encoding: [0x62,0xe2,0x9d,0x50,0x97,0xaa,0x00,0x04,0x00,0x00] + vfmsubadd132pd 1024(%rdx){1to8}, %zmm28, %zmm21 + +// CHECK: vfmsubadd132pd -1024(%rdx){1to8}, %zmm28, %zmm21 +// CHECK: encoding: [0x62,0xe2,0x9d,0x50,0x97,0x6a,0x80] + vfmsubadd132pd -1024(%rdx){1to8}, %zmm28, %zmm21 + +// CHECK: vfmsubadd132pd -1032(%rdx){1to8}, %zmm28, %zmm21 +// CHECK: encoding: [0x62,0xe2,0x9d,0x50,0x97,0xaa,0xf8,0xfb,0xff,0xff] + vfmsubadd132pd -1032(%rdx){1to8}, %zmm28, %zmm21 + +// CHECK: vfmsubadd213ps %zmm14, %zmm12, %zmm22 +// CHECK: encoding: [0x62,0xc2,0x1d,0x48,0xa7,0xf6] + vfmsubadd213ps %zmm14, %zmm12, %zmm22 + +// CHECK: vfmsubadd213ps %zmm14, %zmm12, %zmm22 {%k6} +// CHECK: encoding: [0x62,0xc2,0x1d,0x4e,0xa7,0xf6] + vfmsubadd213ps %zmm14, %zmm12, %zmm22 {%k6} + +// CHECK: vfmsubadd213ps %zmm14, %zmm12, %zmm22 {%k6} {z} +// CHECK: encoding: [0x62,0xc2,0x1d,0xce,0xa7,0xf6] + vfmsubadd213ps %zmm14, %zmm12, %zmm22 {%k6} {z} + +// CHECK: vfmsubadd213ps {rn-sae}, %zmm14, %zmm12, %zmm22 +// CHECK: encoding: [0x62,0xc2,0x1d,0x18,0xa7,0xf6] + vfmsubadd213ps {rn-sae}, %zmm14, %zmm12, %zmm22 + +// CHECK: vfmsubadd213ps {ru-sae}, %zmm14, %zmm12, %zmm22 +// CHECK: encoding: [0x62,0xc2,0x1d,0x58,0xa7,0xf6] + vfmsubadd213ps {ru-sae}, %zmm14, %zmm12, %zmm22 + +// CHECK: vfmsubadd213ps {rd-sae}, %zmm14, %zmm12, %zmm22 +// CHECK: encoding: [0x62,0xc2,0x1d,0x38,0xa7,0xf6] + vfmsubadd213ps {rd-sae}, %zmm14, %zmm12, %zmm22 + +// CHECK: vfmsubadd213ps {rz-sae}, %zmm14, %zmm12, %zmm22 +// CHECK: encoding: [0x62,0xc2,0x1d,0x78,0xa7,0xf6] + vfmsubadd213ps {rz-sae}, %zmm14, %zmm12, %zmm22 + +// CHECK: vfmsubadd213ps (%rcx), %zmm12, %zmm22 +// CHECK: encoding: [0x62,0xe2,0x1d,0x48,0xa7,0x31] + vfmsubadd213ps (%rcx), %zmm12, %zmm22 + +// CHECK: vfmsubadd213ps 291(%rax,%r14,8), %zmm12, %zmm22 +// CHECK: encoding: [0x62,0xa2,0x1d,0x48,0xa7,0xb4,0xf0,0x23,0x01,0x00,0x00] + vfmsubadd213ps 291(%rax,%r14,8), %zmm12, %zmm22 + +// CHECK: vfmsubadd213ps (%rcx){1to16}, %zmm12, %zmm22 +// CHECK: encoding: [0x62,0xe2,0x1d,0x58,0xa7,0x31] + vfmsubadd213ps (%rcx){1to16}, %zmm12, %zmm22 + +// CHECK: vfmsubadd213ps 8128(%rdx), %zmm12, %zmm22 +// CHECK: encoding: [0x62,0xe2,0x1d,0x48,0xa7,0x72,0x7f] + vfmsubadd213ps 8128(%rdx), %zmm12, %zmm22 + +// CHECK: vfmsubadd213ps 8192(%rdx), %zmm12, %zmm22 +// CHECK: encoding: [0x62,0xe2,0x1d,0x48,0xa7,0xb2,0x00,0x20,0x00,0x00] + vfmsubadd213ps 8192(%rdx), %zmm12, %zmm22 + +// CHECK: vfmsubadd213ps -8192(%rdx), %zmm12, %zmm22 +// CHECK: encoding: [0x62,0xe2,0x1d,0x48,0xa7,0x72,0x80] + vfmsubadd213ps -8192(%rdx), %zmm12, %zmm22 + +// CHECK: vfmsubadd213ps -8256(%rdx), %zmm12, %zmm22 +// CHECK: encoding: [0x62,0xe2,0x1d,0x48,0xa7,0xb2,0xc0,0xdf,0xff,0xff] + vfmsubadd213ps -8256(%rdx), %zmm12, %zmm22 + +// CHECK: vfmsubadd213ps 508(%rdx){1to16}, %zmm12, %zmm22 +// CHECK: encoding: [0x62,0xe2,0x1d,0x58,0xa7,0x72,0x7f] + vfmsubadd213ps 508(%rdx){1to16}, %zmm12, %zmm22 + +// CHECK: vfmsubadd213ps 512(%rdx){1to16}, %zmm12, %zmm22 +// CHECK: encoding: [0x62,0xe2,0x1d,0x58,0xa7,0xb2,0x00,0x02,0x00,0x00] + vfmsubadd213ps 512(%rdx){1to16}, %zmm12, %zmm22 + +// CHECK: vfmsubadd213ps -512(%rdx){1to16}, %zmm12, %zmm22 +// CHECK: encoding: [0x62,0xe2,0x1d,0x58,0xa7,0x72,0x80] + vfmsubadd213ps -512(%rdx){1to16}, %zmm12, %zmm22 + +// CHECK: vfmsubadd213ps -516(%rdx){1to16}, %zmm12, %zmm22 +// CHECK: encoding: [0x62,0xe2,0x1d,0x58,0xa7,0xb2,0xfc,0xfd,0xff,0xff] + vfmsubadd213ps -516(%rdx){1to16}, %zmm12, %zmm22 + +// CHECK: vfmsubadd213pd %zmm2, %zmm23, %zmm2 +// CHECK: encoding: [0x62,0xf2,0xc5,0x40,0xa7,0xd2] + vfmsubadd213pd %zmm2, %zmm23, %zmm2 + +// CHECK: vfmsubadd213pd %zmm2, %zmm23, %zmm2 {%k6} +// CHECK: encoding: [0x62,0xf2,0xc5,0x46,0xa7,0xd2] + vfmsubadd213pd %zmm2, %zmm23, %zmm2 {%k6} + +// CHECK: vfmsubadd213pd %zmm2, %zmm23, %zmm2 {%k6} {z} +// CHECK: encoding: [0x62,0xf2,0xc5,0xc6,0xa7,0xd2] + vfmsubadd213pd %zmm2, %zmm23, %zmm2 {%k6} {z} + +// CHECK: vfmsubadd213pd {rn-sae}, %zmm2, %zmm23, %zmm2 +// CHECK: encoding: [0x62,0xf2,0xc5,0x10,0xa7,0xd2] + vfmsubadd213pd {rn-sae}, %zmm2, %zmm23, %zmm2 + +// CHECK: vfmsubadd213pd {ru-sae}, %zmm2, %zmm23, %zmm2 +// CHECK: encoding: [0x62,0xf2,0xc5,0x50,0xa7,0xd2] + vfmsubadd213pd {ru-sae}, %zmm2, %zmm23, %zmm2 + +// CHECK: vfmsubadd213pd {rd-sae}, %zmm2, %zmm23, %zmm2 +// CHECK: encoding: [0x62,0xf2,0xc5,0x30,0xa7,0xd2] + vfmsubadd213pd {rd-sae}, %zmm2, %zmm23, %zmm2 + +// CHECK: vfmsubadd213pd {rz-sae}, %zmm2, %zmm23, %zmm2 +// CHECK: encoding: [0x62,0xf2,0xc5,0x70,0xa7,0xd2] + vfmsubadd213pd {rz-sae}, %zmm2, %zmm23, %zmm2 + +// CHECK: vfmsubadd213pd (%rcx), %zmm23, %zmm2 +// CHECK: encoding: [0x62,0xf2,0xc5,0x40,0xa7,0x11] + vfmsubadd213pd (%rcx), %zmm23, %zmm2 + +// CHECK: vfmsubadd213pd 291(%rax,%r14,8), %zmm23, %zmm2 +// CHECK: encoding: [0x62,0xb2,0xc5,0x40,0xa7,0x94,0xf0,0x23,0x01,0x00,0x00] + vfmsubadd213pd 291(%rax,%r14,8), %zmm23, %zmm2 + +// CHECK: vfmsubadd213pd (%rcx){1to8}, %zmm23, %zmm2 +// CHECK: encoding: [0x62,0xf2,0xc5,0x50,0xa7,0x11] + vfmsubadd213pd (%rcx){1to8}, %zmm23, %zmm2 + +// CHECK: vfmsubadd213pd 8128(%rdx), %zmm23, %zmm2 +// CHECK: encoding: [0x62,0xf2,0xc5,0x40,0xa7,0x52,0x7f] + vfmsubadd213pd 8128(%rdx), %zmm23, %zmm2 + +// CHECK: vfmsubadd213pd 8192(%rdx), %zmm23, %zmm2 +// CHECK: encoding: [0x62,0xf2,0xc5,0x40,0xa7,0x92,0x00,0x20,0x00,0x00] + vfmsubadd213pd 8192(%rdx), %zmm23, %zmm2 + +// CHECK: vfmsubadd213pd -8192(%rdx), %zmm23, %zmm2 +// CHECK: encoding: [0x62,0xf2,0xc5,0x40,0xa7,0x52,0x80] + vfmsubadd213pd -8192(%rdx), %zmm23, %zmm2 + +// CHECK: vfmsubadd213pd -8256(%rdx), %zmm23, %zmm2 +// CHECK: encoding: [0x62,0xf2,0xc5,0x40,0xa7,0x92,0xc0,0xdf,0xff,0xff] + vfmsubadd213pd -8256(%rdx), %zmm23, %zmm2 + +// CHECK: vfmsubadd213pd 1016(%rdx){1to8}, %zmm23, %zmm2 +// CHECK: encoding: [0x62,0xf2,0xc5,0x50,0xa7,0x52,0x7f] + vfmsubadd213pd 1016(%rdx){1to8}, %zmm23, %zmm2 + +// CHECK: vfmsubadd213pd 1024(%rdx){1to8}, %zmm23, %zmm2 +// CHECK: encoding: [0x62,0xf2,0xc5,0x50,0xa7,0x92,0x00,0x04,0x00,0x00] + vfmsubadd213pd 1024(%rdx){1to8}, %zmm23, %zmm2 + +// CHECK: vfmsubadd213pd -1024(%rdx){1to8}, %zmm23, %zmm2 +// CHECK: encoding: [0x62,0xf2,0xc5,0x50,0xa7,0x52,0x80] + vfmsubadd213pd -1024(%rdx){1to8}, %zmm23, %zmm2 + +// CHECK: vfmsubadd213pd -1032(%rdx){1to8}, %zmm23, %zmm2 +// CHECK: encoding: [0x62,0xf2,0xc5,0x50,0xa7,0x92,0xf8,0xfb,0xff,0xff] + vfmsubadd213pd -1032(%rdx){1to8}, %zmm23, %zmm2 + +// CHECK: vfmsubadd231ps %zmm1, %zmm19, %zmm8 +// CHECK: encoding: [0x62,0x72,0x65,0x40,0xb7,0xc1] + vfmsubadd231ps %zmm1, %zmm19, %zmm8 + +// CHECK: vfmsubadd231ps %zmm1, %zmm19, %zmm8 {%k2} +// CHECK: encoding: [0x62,0x72,0x65,0x42,0xb7,0xc1] + vfmsubadd231ps %zmm1, %zmm19, %zmm8 {%k2} + +// CHECK: vfmsubadd231ps %zmm1, %zmm19, %zmm8 {%k2} {z} +// CHECK: encoding: [0x62,0x72,0x65,0xc2,0xb7,0xc1] + vfmsubadd231ps %zmm1, %zmm19, %zmm8 {%k2} {z} + +// CHECK: vfmsubadd231ps {rn-sae}, %zmm1, %zmm19, %zmm8 +// CHECK: encoding: [0x62,0x72,0x65,0x10,0xb7,0xc1] + vfmsubadd231ps {rn-sae}, %zmm1, %zmm19, %zmm8 + +// CHECK: vfmsubadd231ps {ru-sae}, %zmm1, %zmm19, %zmm8 +// CHECK: encoding: [0x62,0x72,0x65,0x50,0xb7,0xc1] + vfmsubadd231ps {ru-sae}, %zmm1, %zmm19, %zmm8 + +// CHECK: vfmsubadd231ps {rd-sae}, %zmm1, %zmm19, %zmm8 +// CHECK: encoding: [0x62,0x72,0x65,0x30,0xb7,0xc1] + vfmsubadd231ps {rd-sae}, %zmm1, %zmm19, %zmm8 + +// CHECK: vfmsubadd231ps {rz-sae}, %zmm1, %zmm19, %zmm8 +// CHECK: encoding: [0x62,0x72,0x65,0x70,0xb7,0xc1] + vfmsubadd231ps {rz-sae}, %zmm1, %zmm19, %zmm8 + +// CHECK: vfmsubadd231ps (%rcx), %zmm19, %zmm8 +// CHECK: encoding: [0x62,0x72,0x65,0x40,0xb7,0x01] + vfmsubadd231ps (%rcx), %zmm19, %zmm8 + +// CHECK: vfmsubadd231ps 291(%rax,%r14,8), %zmm19, %zmm8 +// CHECK: encoding: [0x62,0x32,0x65,0x40,0xb7,0x84,0xf0,0x23,0x01,0x00,0x00] + vfmsubadd231ps 291(%rax,%r14,8), %zmm19, %zmm8 + +// CHECK: vfmsubadd231ps (%rcx){1to16}, %zmm19, %zmm8 +// CHECK: encoding: [0x62,0x72,0x65,0x50,0xb7,0x01] + vfmsubadd231ps (%rcx){1to16}, %zmm19, %zmm8 + +// CHECK: vfmsubadd231ps 8128(%rdx), %zmm19, %zmm8 +// CHECK: encoding: [0x62,0x72,0x65,0x40,0xb7,0x42,0x7f] + vfmsubadd231ps 8128(%rdx), %zmm19, %zmm8 + +// CHECK: vfmsubadd231ps 8192(%rdx), %zmm19, %zmm8 +// CHECK: encoding: [0x62,0x72,0x65,0x40,0xb7,0x82,0x00,0x20,0x00,0x00] + vfmsubadd231ps 8192(%rdx), %zmm19, %zmm8 + +// CHECK: vfmsubadd231ps -8192(%rdx), %zmm19, %zmm8 +// CHECK: encoding: [0x62,0x72,0x65,0x40,0xb7,0x42,0x80] + vfmsubadd231ps -8192(%rdx), %zmm19, %zmm8 + +// CHECK: vfmsubadd231ps -8256(%rdx), %zmm19, %zmm8 +// CHECK: encoding: [0x62,0x72,0x65,0x40,0xb7,0x82,0xc0,0xdf,0xff,0xff] + vfmsubadd231ps -8256(%rdx), %zmm19, %zmm8 + +// CHECK: vfmsubadd231ps 508(%rdx){1to16}, %zmm19, %zmm8 +// CHECK: encoding: [0x62,0x72,0x65,0x50,0xb7,0x42,0x7f] + vfmsubadd231ps 508(%rdx){1to16}, %zmm19, %zmm8 + +// CHECK: vfmsubadd231ps 512(%rdx){1to16}, %zmm19, %zmm8 +// CHECK: encoding: [0x62,0x72,0x65,0x50,0xb7,0x82,0x00,0x02,0x00,0x00] + vfmsubadd231ps 512(%rdx){1to16}, %zmm19, %zmm8 + +// CHECK: vfmsubadd231ps -512(%rdx){1to16}, %zmm19, %zmm8 +// CHECK: encoding: [0x62,0x72,0x65,0x50,0xb7,0x42,0x80] + vfmsubadd231ps -512(%rdx){1to16}, %zmm19, %zmm8 + +// CHECK: vfmsubadd231ps -516(%rdx){1to16}, %zmm19, %zmm8 +// CHECK: encoding: [0x62,0x72,0x65,0x50,0xb7,0x82,0xfc,0xfd,0xff,0xff] + vfmsubadd231ps -516(%rdx){1to16}, %zmm19, %zmm8 + +// CHECK: vfmsubadd231pd %zmm21, %zmm27, %zmm16 +// CHECK: encoding: [0x62,0xa2,0xa5,0x40,0xb7,0xc5] + vfmsubadd231pd %zmm21, %zmm27, %zmm16 + +// CHECK: vfmsubadd231pd %zmm21, %zmm27, %zmm16 {%k2} +// CHECK: encoding: [0x62,0xa2,0xa5,0x42,0xb7,0xc5] + vfmsubadd231pd %zmm21, %zmm27, %zmm16 {%k2} + +// CHECK: vfmsubadd231pd %zmm21, %zmm27, %zmm16 {%k2} {z} +// CHECK: encoding: [0x62,0xa2,0xa5,0xc2,0xb7,0xc5] + vfmsubadd231pd %zmm21, %zmm27, %zmm16 {%k2} {z} + +// CHECK: vfmsubadd231pd {rn-sae}, %zmm21, %zmm27, %zmm16 +// CHECK: encoding: [0x62,0xa2,0xa5,0x10,0xb7,0xc5] + vfmsubadd231pd {rn-sae}, %zmm21, %zmm27, %zmm16 + +// CHECK: vfmsubadd231pd {ru-sae}, %zmm21, %zmm27, %zmm16 +// CHECK: encoding: [0x62,0xa2,0xa5,0x50,0xb7,0xc5] + vfmsubadd231pd {ru-sae}, %zmm21, %zmm27, %zmm16 + +// CHECK: vfmsubadd231pd {rd-sae}, %zmm21, %zmm27, %zmm16 +// CHECK: encoding: [0x62,0xa2,0xa5,0x30,0xb7,0xc5] + vfmsubadd231pd {rd-sae}, %zmm21, %zmm27, %zmm16 + +// CHECK: vfmsubadd231pd {rz-sae}, %zmm21, %zmm27, %zmm16 +// CHECK: encoding: [0x62,0xa2,0xa5,0x70,0xb7,0xc5] + vfmsubadd231pd {rz-sae}, %zmm21, %zmm27, %zmm16 + +// CHECK: vfmsubadd231pd (%rcx), %zmm27, %zmm16 +// CHECK: encoding: [0x62,0xe2,0xa5,0x40,0xb7,0x01] + vfmsubadd231pd (%rcx), %zmm27, %zmm16 + +// CHECK: vfmsubadd231pd 291(%rax,%r14,8), %zmm27, %zmm16 +// CHECK: encoding: [0x62,0xa2,0xa5,0x40,0xb7,0x84,0xf0,0x23,0x01,0x00,0x00] + vfmsubadd231pd 291(%rax,%r14,8), %zmm27, %zmm16 + +// CHECK: vfmsubadd231pd (%rcx){1to8}, %zmm27, %zmm16 +// CHECK: encoding: [0x62,0xe2,0xa5,0x50,0xb7,0x01] + vfmsubadd231pd (%rcx){1to8}, %zmm27, %zmm16 + +// CHECK: vfmsubadd231pd 8128(%rdx), %zmm27, %zmm16 +// CHECK: encoding: [0x62,0xe2,0xa5,0x40,0xb7,0x42,0x7f] + vfmsubadd231pd 8128(%rdx), %zmm27, %zmm16 + +// CHECK: vfmsubadd231pd 8192(%rdx), %zmm27, %zmm16 +// CHECK: encoding: [0x62,0xe2,0xa5,0x40,0xb7,0x82,0x00,0x20,0x00,0x00] + vfmsubadd231pd 8192(%rdx), %zmm27, %zmm16 + +// CHECK: vfmsubadd231pd -8192(%rdx), %zmm27, %zmm16 +// CHECK: encoding: [0x62,0xe2,0xa5,0x40,0xb7,0x42,0x80] + vfmsubadd231pd -8192(%rdx), %zmm27, %zmm16 + +// CHECK: vfmsubadd231pd -8256(%rdx), %zmm27, %zmm16 +// CHECK: encoding: [0x62,0xe2,0xa5,0x40,0xb7,0x82,0xc0,0xdf,0xff,0xff] + vfmsubadd231pd -8256(%rdx), %zmm27, %zmm16 + +// CHECK: vfmsubadd231pd 1016(%rdx){1to8}, %zmm27, %zmm16 +// CHECK: encoding: [0x62,0xe2,0xa5,0x50,0xb7,0x42,0x7f] + vfmsubadd231pd 1016(%rdx){1to8}, %zmm27, %zmm16 + +// CHECK: vfmsubadd231pd 1024(%rdx){1to8}, %zmm27, %zmm16 +// CHECK: encoding: [0x62,0xe2,0xa5,0x50,0xb7,0x82,0x00,0x04,0x00,0x00] + vfmsubadd231pd 1024(%rdx){1to8}, %zmm27, %zmm16 + +// CHECK: vfmsubadd231pd -1024(%rdx){1to8}, %zmm27, %zmm16 +// CHECK: encoding: [0x62,0xe2,0xa5,0x50,0xb7,0x42,0x80] + vfmsubadd231pd -1024(%rdx){1to8}, %zmm27, %zmm16 + +// CHECK: vfmsubadd231pd -1032(%rdx){1to8}, %zmm27, %zmm16 +// CHECK: encoding: [0x62,0xe2,0xa5,0x50,0xb7,0x82,0xf8,0xfb,0xff,0xff] + vfmsubadd231pd -1032(%rdx){1to8}, %zmm27, %zmm16 + +// CHECK: vfnmadd132ps %zmm10, %zmm16, %zmm21 +// CHECK: encoding: [0x62,0xc2,0x7d,0x40,0x9c,0xea] + vfnmadd132ps %zmm10, %zmm16, %zmm21 + +// CHECK: vfnmadd132ps %zmm10, %zmm16, %zmm21 {%k5} +// CHECK: encoding: [0x62,0xc2,0x7d,0x45,0x9c,0xea] + vfnmadd132ps %zmm10, %zmm16, %zmm21 {%k5} + +// CHECK: vfnmadd132ps %zmm10, %zmm16, %zmm21 {%k5} {z} +// CHECK: encoding: [0x62,0xc2,0x7d,0xc5,0x9c,0xea] + vfnmadd132ps %zmm10, %zmm16, %zmm21 {%k5} {z} + +// CHECK: vfnmadd132ps {rn-sae}, %zmm10, %zmm16, %zmm21 +// CHECK: encoding: [0x62,0xc2,0x7d,0x10,0x9c,0xea] + vfnmadd132ps {rn-sae}, %zmm10, %zmm16, %zmm21 + +// CHECK: vfnmadd132ps {ru-sae}, %zmm10, %zmm16, %zmm21 +// CHECK: encoding: [0x62,0xc2,0x7d,0x50,0x9c,0xea] + vfnmadd132ps {ru-sae}, %zmm10, %zmm16, %zmm21 + +// CHECK: vfnmadd132ps {rd-sae}, %zmm10, %zmm16, %zmm21 +// CHECK: encoding: [0x62,0xc2,0x7d,0x30,0x9c,0xea] + vfnmadd132ps {rd-sae}, %zmm10, %zmm16, %zmm21 + +// CHECK: vfnmadd132ps {rz-sae}, %zmm10, %zmm16, %zmm21 +// CHECK: encoding: [0x62,0xc2,0x7d,0x70,0x9c,0xea] + vfnmadd132ps {rz-sae}, %zmm10, %zmm16, %zmm21 + +// CHECK: vfnmadd132ps (%rcx), %zmm16, %zmm21 +// CHECK: encoding: [0x62,0xe2,0x7d,0x40,0x9c,0x29] + vfnmadd132ps (%rcx), %zmm16, %zmm21 + +// CHECK: vfnmadd132ps 291(%rax,%r14,8), %zmm16, %zmm21 +// CHECK: encoding: [0x62,0xa2,0x7d,0x40,0x9c,0xac,0xf0,0x23,0x01,0x00,0x00] + vfnmadd132ps 291(%rax,%r14,8), %zmm16, %zmm21 + +// CHECK: vfnmadd132ps (%rcx){1to16}, %zmm16, %zmm21 +// CHECK: encoding: [0x62,0xe2,0x7d,0x50,0x9c,0x29] + vfnmadd132ps (%rcx){1to16}, %zmm16, %zmm21 + +// CHECK: vfnmadd132ps 8128(%rdx), %zmm16, %zmm21 +// CHECK: encoding: [0x62,0xe2,0x7d,0x40,0x9c,0x6a,0x7f] + vfnmadd132ps 8128(%rdx), %zmm16, %zmm21 + +// CHECK: vfnmadd132ps 8192(%rdx), %zmm16, %zmm21 +// CHECK: encoding: [0x62,0xe2,0x7d,0x40,0x9c,0xaa,0x00,0x20,0x00,0x00] + vfnmadd132ps 8192(%rdx), %zmm16, %zmm21 + +// CHECK: vfnmadd132ps -8192(%rdx), %zmm16, %zmm21 +// CHECK: encoding: [0x62,0xe2,0x7d,0x40,0x9c,0x6a,0x80] + vfnmadd132ps -8192(%rdx), %zmm16, %zmm21 + +// CHECK: vfnmadd132ps -8256(%rdx), %zmm16, %zmm21 +// CHECK: encoding: [0x62,0xe2,0x7d,0x40,0x9c,0xaa,0xc0,0xdf,0xff,0xff] + vfnmadd132ps -8256(%rdx), %zmm16, %zmm21 + +// CHECK: vfnmadd132ps 508(%rdx){1to16}, %zmm16, %zmm21 +// CHECK: encoding: [0x62,0xe2,0x7d,0x50,0x9c,0x6a,0x7f] + vfnmadd132ps 508(%rdx){1to16}, %zmm16, %zmm21 + +// CHECK: vfnmadd132ps 512(%rdx){1to16}, %zmm16, %zmm21 +// CHECK: encoding: [0x62,0xe2,0x7d,0x50,0x9c,0xaa,0x00,0x02,0x00,0x00] + vfnmadd132ps 512(%rdx){1to16}, %zmm16, %zmm21 + +// CHECK: vfnmadd132ps -512(%rdx){1to16}, %zmm16, %zmm21 +// CHECK: encoding: [0x62,0xe2,0x7d,0x50,0x9c,0x6a,0x80] + vfnmadd132ps -512(%rdx){1to16}, %zmm16, %zmm21 + +// CHECK: vfnmadd132ps -516(%rdx){1to16}, %zmm16, %zmm21 +// CHECK: encoding: [0x62,0xe2,0x7d,0x50,0x9c,0xaa,0xfc,0xfd,0xff,0xff] + vfnmadd132ps -516(%rdx){1to16}, %zmm16, %zmm21 + +// CHECK: vfnmadd132pd %zmm1, %zmm14, %zmm12 +// CHECK: encoding: [0x62,0x72,0x8d,0x48,0x9c,0xe1] + vfnmadd132pd %zmm1, %zmm14, %zmm12 + +// CHECK: vfnmadd132pd %zmm1, %zmm14, %zmm12 {%k7} +// CHECK: encoding: [0x62,0x72,0x8d,0x4f,0x9c,0xe1] + vfnmadd132pd %zmm1, %zmm14, %zmm12 {%k7} + +// CHECK: vfnmadd132pd %zmm1, %zmm14, %zmm12 {%k7} {z} +// CHECK: encoding: [0x62,0x72,0x8d,0xcf,0x9c,0xe1] + vfnmadd132pd %zmm1, %zmm14, %zmm12 {%k7} {z} + +// CHECK: vfnmadd132pd {rn-sae}, %zmm1, %zmm14, %zmm12 +// CHECK: encoding: [0x62,0x72,0x8d,0x18,0x9c,0xe1] + vfnmadd132pd {rn-sae}, %zmm1, %zmm14, %zmm12 + +// CHECK: vfnmadd132pd {ru-sae}, %zmm1, %zmm14, %zmm12 +// CHECK: encoding: [0x62,0x72,0x8d,0x58,0x9c,0xe1] + vfnmadd132pd {ru-sae}, %zmm1, %zmm14, %zmm12 + +// CHECK: vfnmadd132pd {rd-sae}, %zmm1, %zmm14, %zmm12 +// CHECK: encoding: [0x62,0x72,0x8d,0x38,0x9c,0xe1] + vfnmadd132pd {rd-sae}, %zmm1, %zmm14, %zmm12 + +// CHECK: vfnmadd132pd {rz-sae}, %zmm1, %zmm14, %zmm12 +// CHECK: encoding: [0x62,0x72,0x8d,0x78,0x9c,0xe1] + vfnmadd132pd {rz-sae}, %zmm1, %zmm14, %zmm12 + +// CHECK: vfnmadd132pd (%rcx), %zmm14, %zmm12 +// CHECK: encoding: [0x62,0x72,0x8d,0x48,0x9c,0x21] + vfnmadd132pd (%rcx), %zmm14, %zmm12 + +// CHECK: vfnmadd132pd 291(%rax,%r14,8), %zmm14, %zmm12 +// CHECK: encoding: [0x62,0x32,0x8d,0x48,0x9c,0xa4,0xf0,0x23,0x01,0x00,0x00] + vfnmadd132pd 291(%rax,%r14,8), %zmm14, %zmm12 + +// CHECK: vfnmadd132pd (%rcx){1to8}, %zmm14, %zmm12 +// CHECK: encoding: [0x62,0x72,0x8d,0x58,0x9c,0x21] + vfnmadd132pd (%rcx){1to8}, %zmm14, %zmm12 + +// CHECK: vfnmadd132pd 8128(%rdx), %zmm14, %zmm12 +// CHECK: encoding: [0x62,0x72,0x8d,0x48,0x9c,0x62,0x7f] + vfnmadd132pd 8128(%rdx), %zmm14, %zmm12 + +// CHECK: vfnmadd132pd 8192(%rdx), %zmm14, %zmm12 +// CHECK: encoding: [0x62,0x72,0x8d,0x48,0x9c,0xa2,0x00,0x20,0x00,0x00] + vfnmadd132pd 8192(%rdx), %zmm14, %zmm12 + +// CHECK: vfnmadd132pd -8192(%rdx), %zmm14, %zmm12 +// CHECK: encoding: [0x62,0x72,0x8d,0x48,0x9c,0x62,0x80] + vfnmadd132pd -8192(%rdx), %zmm14, %zmm12 + +// CHECK: vfnmadd132pd -8256(%rdx), %zmm14, %zmm12 +// CHECK: encoding: [0x62,0x72,0x8d,0x48,0x9c,0xa2,0xc0,0xdf,0xff,0xff] + vfnmadd132pd -8256(%rdx), %zmm14, %zmm12 + +// CHECK: vfnmadd132pd 1016(%rdx){1to8}, %zmm14, %zmm12 +// CHECK: encoding: [0x62,0x72,0x8d,0x58,0x9c,0x62,0x7f] + vfnmadd132pd 1016(%rdx){1to8}, %zmm14, %zmm12 + +// CHECK: vfnmadd132pd 1024(%rdx){1to8}, %zmm14, %zmm12 +// CHECK: encoding: [0x62,0x72,0x8d,0x58,0x9c,0xa2,0x00,0x04,0x00,0x00] + vfnmadd132pd 1024(%rdx){1to8}, %zmm14, %zmm12 + +// CHECK: vfnmadd132pd -1024(%rdx){1to8}, %zmm14, %zmm12 +// CHECK: encoding: [0x62,0x72,0x8d,0x58,0x9c,0x62,0x80] + vfnmadd132pd -1024(%rdx){1to8}, %zmm14, %zmm12 + +// CHECK: vfnmadd132pd -1032(%rdx){1to8}, %zmm14, %zmm12 +// CHECK: encoding: [0x62,0x72,0x8d,0x58,0x9c,0xa2,0xf8,0xfb,0xff,0xff] + vfnmadd132pd -1032(%rdx){1to8}, %zmm14, %zmm12 + +// CHECK: vfnmadd213ps %zmm6, %zmm10, %zmm26 +// CHECK: encoding: [0x62,0x62,0x2d,0x48,0xac,0xd6] + vfnmadd213ps %zmm6, %zmm10, %zmm26 + +// CHECK: vfnmadd213ps %zmm6, %zmm10, %zmm26 {%k6} +// CHECK: encoding: [0x62,0x62,0x2d,0x4e,0xac,0xd6] + vfnmadd213ps %zmm6, %zmm10, %zmm26 {%k6} + +// CHECK: vfnmadd213ps %zmm6, %zmm10, %zmm26 {%k6} {z} +// CHECK: encoding: [0x62,0x62,0x2d,0xce,0xac,0xd6] + vfnmadd213ps %zmm6, %zmm10, %zmm26 {%k6} {z} + +// CHECK: vfnmadd213ps {rn-sae}, %zmm6, %zmm10, %zmm26 +// CHECK: encoding: [0x62,0x62,0x2d,0x18,0xac,0xd6] + vfnmadd213ps {rn-sae}, %zmm6, %zmm10, %zmm26 + +// CHECK: vfnmadd213ps {ru-sae}, %zmm6, %zmm10, %zmm26 +// CHECK: encoding: [0x62,0x62,0x2d,0x58,0xac,0xd6] + vfnmadd213ps {ru-sae}, %zmm6, %zmm10, %zmm26 + +// CHECK: vfnmadd213ps {rd-sae}, %zmm6, %zmm10, %zmm26 +// CHECK: encoding: [0x62,0x62,0x2d,0x38,0xac,0xd6] + vfnmadd213ps {rd-sae}, %zmm6, %zmm10, %zmm26 + +// CHECK: vfnmadd213ps {rz-sae}, %zmm6, %zmm10, %zmm26 +// CHECK: encoding: [0x62,0x62,0x2d,0x78,0xac,0xd6] + vfnmadd213ps {rz-sae}, %zmm6, %zmm10, %zmm26 + +// CHECK: vfnmadd213ps (%rcx), %zmm10, %zmm26 +// CHECK: encoding: [0x62,0x62,0x2d,0x48,0xac,0x11] + vfnmadd213ps (%rcx), %zmm10, %zmm26 + +// CHECK: vfnmadd213ps 291(%rax,%r14,8), %zmm10, %zmm26 +// CHECK: encoding: [0x62,0x22,0x2d,0x48,0xac,0x94,0xf0,0x23,0x01,0x00,0x00] + vfnmadd213ps 291(%rax,%r14,8), %zmm10, %zmm26 + +// CHECK: vfnmadd213ps (%rcx){1to16}, %zmm10, %zmm26 +// CHECK: encoding: [0x62,0x62,0x2d,0x58,0xac,0x11] + vfnmadd213ps (%rcx){1to16}, %zmm10, %zmm26 + +// CHECK: vfnmadd213ps 8128(%rdx), %zmm10, %zmm26 +// CHECK: encoding: [0x62,0x62,0x2d,0x48,0xac,0x52,0x7f] + vfnmadd213ps 8128(%rdx), %zmm10, %zmm26 + +// CHECK: vfnmadd213ps 8192(%rdx), %zmm10, %zmm26 +// CHECK: encoding: [0x62,0x62,0x2d,0x48,0xac,0x92,0x00,0x20,0x00,0x00] + vfnmadd213ps 8192(%rdx), %zmm10, %zmm26 + +// CHECK: vfnmadd213ps -8192(%rdx), %zmm10, %zmm26 +// CHECK: encoding: [0x62,0x62,0x2d,0x48,0xac,0x52,0x80] + vfnmadd213ps -8192(%rdx), %zmm10, %zmm26 + +// CHECK: vfnmadd213ps -8256(%rdx), %zmm10, %zmm26 +// CHECK: encoding: [0x62,0x62,0x2d,0x48,0xac,0x92,0xc0,0xdf,0xff,0xff] + vfnmadd213ps -8256(%rdx), %zmm10, %zmm26 + +// CHECK: vfnmadd213ps 508(%rdx){1to16}, %zmm10, %zmm26 +// CHECK: encoding: [0x62,0x62,0x2d,0x58,0xac,0x52,0x7f] + vfnmadd213ps 508(%rdx){1to16}, %zmm10, %zmm26 + +// CHECK: vfnmadd213ps 512(%rdx){1to16}, %zmm10, %zmm26 +// CHECK: encoding: [0x62,0x62,0x2d,0x58,0xac,0x92,0x00,0x02,0x00,0x00] + vfnmadd213ps 512(%rdx){1to16}, %zmm10, %zmm26 + +// CHECK: vfnmadd213ps -512(%rdx){1to16}, %zmm10, %zmm26 +// CHECK: encoding: [0x62,0x62,0x2d,0x58,0xac,0x52,0x80] + vfnmadd213ps -512(%rdx){1to16}, %zmm10, %zmm26 + +// CHECK: vfnmadd213ps -516(%rdx){1to16}, %zmm10, %zmm26 +// CHECK: encoding: [0x62,0x62,0x2d,0x58,0xac,0x92,0xfc,0xfd,0xff,0xff] + vfnmadd213ps -516(%rdx){1to16}, %zmm10, %zmm26 + +// CHECK: vfnmadd213pd %zmm9, %zmm16, %zmm17 +// CHECK: encoding: [0x62,0xc2,0xfd,0x40,0xac,0xc9] + vfnmadd213pd %zmm9, %zmm16, %zmm17 + +// CHECK: vfnmadd213pd %zmm9, %zmm16, %zmm17 {%k4} +// CHECK: encoding: [0x62,0xc2,0xfd,0x44,0xac,0xc9] + vfnmadd213pd %zmm9, %zmm16, %zmm17 {%k4} + +// CHECK: vfnmadd213pd %zmm9, %zmm16, %zmm17 {%k4} {z} +// CHECK: encoding: [0x62,0xc2,0xfd,0xc4,0xac,0xc9] + vfnmadd213pd %zmm9, %zmm16, %zmm17 {%k4} {z} + +// CHECK: vfnmadd213pd {rn-sae}, %zmm9, %zmm16, %zmm17 +// CHECK: encoding: [0x62,0xc2,0xfd,0x10,0xac,0xc9] + vfnmadd213pd {rn-sae}, %zmm9, %zmm16, %zmm17 + +// CHECK: vfnmadd213pd {ru-sae}, %zmm9, %zmm16, %zmm17 +// CHECK: encoding: [0x62,0xc2,0xfd,0x50,0xac,0xc9] + vfnmadd213pd {ru-sae}, %zmm9, %zmm16, %zmm17 + +// CHECK: vfnmadd213pd {rd-sae}, %zmm9, %zmm16, %zmm17 +// CHECK: encoding: [0x62,0xc2,0xfd,0x30,0xac,0xc9] + vfnmadd213pd {rd-sae}, %zmm9, %zmm16, %zmm17 + +// CHECK: vfnmadd213pd {rz-sae}, %zmm9, %zmm16, %zmm17 +// CHECK: encoding: [0x62,0xc2,0xfd,0x70,0xac,0xc9] + vfnmadd213pd {rz-sae}, %zmm9, %zmm16, %zmm17 + +// CHECK: vfnmadd213pd (%rcx), %zmm16, %zmm17 +// CHECK: encoding: [0x62,0xe2,0xfd,0x40,0xac,0x09] + vfnmadd213pd (%rcx), %zmm16, %zmm17 + +// CHECK: vfnmadd213pd 291(%rax,%r14,8), %zmm16, %zmm17 +// CHECK: encoding: [0x62,0xa2,0xfd,0x40,0xac,0x8c,0xf0,0x23,0x01,0x00,0x00] + vfnmadd213pd 291(%rax,%r14,8), %zmm16, %zmm17 + +// CHECK: vfnmadd213pd (%rcx){1to8}, %zmm16, %zmm17 +// CHECK: encoding: [0x62,0xe2,0xfd,0x50,0xac,0x09] + vfnmadd213pd (%rcx){1to8}, %zmm16, %zmm17 + +// CHECK: vfnmadd213pd 8128(%rdx), %zmm16, %zmm17 +// CHECK: encoding: [0x62,0xe2,0xfd,0x40,0xac,0x4a,0x7f] + vfnmadd213pd 8128(%rdx), %zmm16, %zmm17 + +// CHECK: vfnmadd213pd 8192(%rdx), %zmm16, %zmm17 +// CHECK: encoding: [0x62,0xe2,0xfd,0x40,0xac,0x8a,0x00,0x20,0x00,0x00] + vfnmadd213pd 8192(%rdx), %zmm16, %zmm17 + +// CHECK: vfnmadd213pd -8192(%rdx), %zmm16, %zmm17 +// CHECK: encoding: [0x62,0xe2,0xfd,0x40,0xac,0x4a,0x80] + vfnmadd213pd -8192(%rdx), %zmm16, %zmm17 + +// CHECK: vfnmadd213pd -8256(%rdx), %zmm16, %zmm17 +// CHECK: encoding: [0x62,0xe2,0xfd,0x40,0xac,0x8a,0xc0,0xdf,0xff,0xff] + vfnmadd213pd -8256(%rdx), %zmm16, %zmm17 + +// CHECK: vfnmadd213pd 1016(%rdx){1to8}, %zmm16, %zmm17 +// CHECK: encoding: [0x62,0xe2,0xfd,0x50,0xac,0x4a,0x7f] + vfnmadd213pd 1016(%rdx){1to8}, %zmm16, %zmm17 + +// CHECK: vfnmadd213pd 1024(%rdx){1to8}, %zmm16, %zmm17 +// CHECK: encoding: [0x62,0xe2,0xfd,0x50,0xac,0x8a,0x00,0x04,0x00,0x00] + vfnmadd213pd 1024(%rdx){1to8}, %zmm16, %zmm17 + +// CHECK: vfnmadd213pd -1024(%rdx){1to8}, %zmm16, %zmm17 +// CHECK: encoding: [0x62,0xe2,0xfd,0x50,0xac,0x4a,0x80] + vfnmadd213pd -1024(%rdx){1to8}, %zmm16, %zmm17 + +// CHECK: vfnmadd213pd -1032(%rdx){1to8}, %zmm16, %zmm17 +// CHECK: encoding: [0x62,0xe2,0xfd,0x50,0xac,0x8a,0xf8,0xfb,0xff,0xff] + vfnmadd213pd -1032(%rdx){1to8}, %zmm16, %zmm17 + +// CHECK: vfnmadd231ps %zmm24, %zmm7, %zmm14 +// CHECK: encoding: [0x62,0x12,0x45,0x48,0xbc,0xf0] + vfnmadd231ps %zmm24, %zmm7, %zmm14 + +// CHECK: vfnmadd231ps %zmm24, %zmm7, %zmm14 {%k5} +// CHECK: encoding: [0x62,0x12,0x45,0x4d,0xbc,0xf0] + vfnmadd231ps %zmm24, %zmm7, %zmm14 {%k5} + +// CHECK: vfnmadd231ps %zmm24, %zmm7, %zmm14 {%k5} {z} +// CHECK: encoding: [0x62,0x12,0x45,0xcd,0xbc,0xf0] + vfnmadd231ps %zmm24, %zmm7, %zmm14 {%k5} {z} + +// CHECK: vfnmadd231ps {rn-sae}, %zmm24, %zmm7, %zmm14 +// CHECK: encoding: [0x62,0x12,0x45,0x18,0xbc,0xf0] + vfnmadd231ps {rn-sae}, %zmm24, %zmm7, %zmm14 + +// CHECK: vfnmadd231ps {ru-sae}, %zmm24, %zmm7, %zmm14 +// CHECK: encoding: [0x62,0x12,0x45,0x58,0xbc,0xf0] + vfnmadd231ps {ru-sae}, %zmm24, %zmm7, %zmm14 + +// CHECK: vfnmadd231ps {rd-sae}, %zmm24, %zmm7, %zmm14 +// CHECK: encoding: [0x62,0x12,0x45,0x38,0xbc,0xf0] + vfnmadd231ps {rd-sae}, %zmm24, %zmm7, %zmm14 + +// CHECK: vfnmadd231ps {rz-sae}, %zmm24, %zmm7, %zmm14 +// CHECK: encoding: [0x62,0x12,0x45,0x78,0xbc,0xf0] + vfnmadd231ps {rz-sae}, %zmm24, %zmm7, %zmm14 + +// CHECK: vfnmadd231ps (%rcx), %zmm7, %zmm14 +// CHECK: encoding: [0x62,0x72,0x45,0x48,0xbc,0x31] + vfnmadd231ps (%rcx), %zmm7, %zmm14 + +// CHECK: vfnmadd231ps 291(%rax,%r14,8), %zmm7, %zmm14 +// CHECK: encoding: [0x62,0x32,0x45,0x48,0xbc,0xb4,0xf0,0x23,0x01,0x00,0x00] + vfnmadd231ps 291(%rax,%r14,8), %zmm7, %zmm14 + +// CHECK: vfnmadd231ps (%rcx){1to16}, %zmm7, %zmm14 +// CHECK: encoding: [0x62,0x72,0x45,0x58,0xbc,0x31] + vfnmadd231ps (%rcx){1to16}, %zmm7, %zmm14 + +// CHECK: vfnmadd231ps 8128(%rdx), %zmm7, %zmm14 +// CHECK: encoding: [0x62,0x72,0x45,0x48,0xbc,0x72,0x7f] + vfnmadd231ps 8128(%rdx), %zmm7, %zmm14 + +// CHECK: vfnmadd231ps 8192(%rdx), %zmm7, %zmm14 +// CHECK: encoding: [0x62,0x72,0x45,0x48,0xbc,0xb2,0x00,0x20,0x00,0x00] + vfnmadd231ps 8192(%rdx), %zmm7, %zmm14 + +// CHECK: vfnmadd231ps -8192(%rdx), %zmm7, %zmm14 +// CHECK: encoding: [0x62,0x72,0x45,0x48,0xbc,0x72,0x80] + vfnmadd231ps -8192(%rdx), %zmm7, %zmm14 + +// CHECK: vfnmadd231ps -8256(%rdx), %zmm7, %zmm14 +// CHECK: encoding: [0x62,0x72,0x45,0x48,0xbc,0xb2,0xc0,0xdf,0xff,0xff] + vfnmadd231ps -8256(%rdx), %zmm7, %zmm14 + +// CHECK: vfnmadd231ps 508(%rdx){1to16}, %zmm7, %zmm14 +// CHECK: encoding: [0x62,0x72,0x45,0x58,0xbc,0x72,0x7f] + vfnmadd231ps 508(%rdx){1to16}, %zmm7, %zmm14 + +// CHECK: vfnmadd231ps 512(%rdx){1to16}, %zmm7, %zmm14 +// CHECK: encoding: [0x62,0x72,0x45,0x58,0xbc,0xb2,0x00,0x02,0x00,0x00] + vfnmadd231ps 512(%rdx){1to16}, %zmm7, %zmm14 + +// CHECK: vfnmadd231ps -512(%rdx){1to16}, %zmm7, %zmm14 +// CHECK: encoding: [0x62,0x72,0x45,0x58,0xbc,0x72,0x80] + vfnmadd231ps -512(%rdx){1to16}, %zmm7, %zmm14 + +// CHECK: vfnmadd231ps -516(%rdx){1to16}, %zmm7, %zmm14 +// CHECK: encoding: [0x62,0x72,0x45,0x58,0xbc,0xb2,0xfc,0xfd,0xff,0xff] + vfnmadd231ps -516(%rdx){1to16}, %zmm7, %zmm14 + +// CHECK: vfnmadd231pd %zmm16, %zmm11, %zmm12 +// CHECK: encoding: [0x62,0x32,0xa5,0x48,0xbc,0xe0] + vfnmadd231pd %zmm16, %zmm11, %zmm12 + +// CHECK: vfnmadd231pd %zmm16, %zmm11, %zmm12 {%k6} +// CHECK: encoding: [0x62,0x32,0xa5,0x4e,0xbc,0xe0] + vfnmadd231pd %zmm16, %zmm11, %zmm12 {%k6} + +// CHECK: vfnmadd231pd %zmm16, %zmm11, %zmm12 {%k6} {z} +// CHECK: encoding: [0x62,0x32,0xa5,0xce,0xbc,0xe0] + vfnmadd231pd %zmm16, %zmm11, %zmm12 {%k6} {z} + +// CHECK: vfnmadd231pd {rn-sae}, %zmm16, %zmm11, %zmm12 +// CHECK: encoding: [0x62,0x32,0xa5,0x18,0xbc,0xe0] + vfnmadd231pd {rn-sae}, %zmm16, %zmm11, %zmm12 + +// CHECK: vfnmadd231pd {ru-sae}, %zmm16, %zmm11, %zmm12 +// CHECK: encoding: [0x62,0x32,0xa5,0x58,0xbc,0xe0] + vfnmadd231pd {ru-sae}, %zmm16, %zmm11, %zmm12 + +// CHECK: vfnmadd231pd {rd-sae}, %zmm16, %zmm11, %zmm12 +// CHECK: encoding: [0x62,0x32,0xa5,0x38,0xbc,0xe0] + vfnmadd231pd {rd-sae}, %zmm16, %zmm11, %zmm12 + +// CHECK: vfnmadd231pd {rz-sae}, %zmm16, %zmm11, %zmm12 +// CHECK: encoding: [0x62,0x32,0xa5,0x78,0xbc,0xe0] + vfnmadd231pd {rz-sae}, %zmm16, %zmm11, %zmm12 + +// CHECK: vfnmadd231pd (%rcx), %zmm11, %zmm12 +// CHECK: encoding: [0x62,0x72,0xa5,0x48,0xbc,0x21] + vfnmadd231pd (%rcx), %zmm11, %zmm12 + +// CHECK: vfnmadd231pd 291(%rax,%r14,8), %zmm11, %zmm12 +// CHECK: encoding: [0x62,0x32,0xa5,0x48,0xbc,0xa4,0xf0,0x23,0x01,0x00,0x00] + vfnmadd231pd 291(%rax,%r14,8), %zmm11, %zmm12 + +// CHECK: vfnmadd231pd (%rcx){1to8}, %zmm11, %zmm12 +// CHECK: encoding: [0x62,0x72,0xa5,0x58,0xbc,0x21] + vfnmadd231pd (%rcx){1to8}, %zmm11, %zmm12 + +// CHECK: vfnmadd231pd 8128(%rdx), %zmm11, %zmm12 +// CHECK: encoding: [0x62,0x72,0xa5,0x48,0xbc,0x62,0x7f] + vfnmadd231pd 8128(%rdx), %zmm11, %zmm12 + +// CHECK: vfnmadd231pd 8192(%rdx), %zmm11, %zmm12 +// CHECK: encoding: [0x62,0x72,0xa5,0x48,0xbc,0xa2,0x00,0x20,0x00,0x00] + vfnmadd231pd 8192(%rdx), %zmm11, %zmm12 + +// CHECK: vfnmadd231pd -8192(%rdx), %zmm11, %zmm12 +// CHECK: encoding: [0x62,0x72,0xa5,0x48,0xbc,0x62,0x80] + vfnmadd231pd -8192(%rdx), %zmm11, %zmm12 + +// CHECK: vfnmadd231pd -8256(%rdx), %zmm11, %zmm12 +// CHECK: encoding: [0x62,0x72,0xa5,0x48,0xbc,0xa2,0xc0,0xdf,0xff,0xff] + vfnmadd231pd -8256(%rdx), %zmm11, %zmm12 + +// CHECK: vfnmadd231pd 1016(%rdx){1to8}, %zmm11, %zmm12 +// CHECK: encoding: [0x62,0x72,0xa5,0x58,0xbc,0x62,0x7f] + vfnmadd231pd 1016(%rdx){1to8}, %zmm11, %zmm12 + +// CHECK: vfnmadd231pd 1024(%rdx){1to8}, %zmm11, %zmm12 +// CHECK: encoding: [0x62,0x72,0xa5,0x58,0xbc,0xa2,0x00,0x04,0x00,0x00] + vfnmadd231pd 1024(%rdx){1to8}, %zmm11, %zmm12 + +// CHECK: vfnmadd231pd -1024(%rdx){1to8}, %zmm11, %zmm12 +// CHECK: encoding: [0x62,0x72,0xa5,0x58,0xbc,0x62,0x80] + vfnmadd231pd -1024(%rdx){1to8}, %zmm11, %zmm12 + +// CHECK: vfnmadd231pd -1032(%rdx){1to8}, %zmm11, %zmm12 +// CHECK: encoding: [0x62,0x72,0xa5,0x58,0xbc,0xa2,0xf8,0xfb,0xff,0xff] + vfnmadd231pd -1032(%rdx){1to8}, %zmm11, %zmm12 + +// CHECK: vfnmsub132ps %zmm6, %zmm18, %zmm4 +// CHECK: encoding: [0x62,0xf2,0x6d,0x40,0x9e,0xe6] + vfnmsub132ps %zmm6, %zmm18, %zmm4 + +// CHECK: vfnmsub132ps %zmm6, %zmm18, %zmm4 {%k2} +// CHECK: encoding: [0x62,0xf2,0x6d,0x42,0x9e,0xe6] + vfnmsub132ps %zmm6, %zmm18, %zmm4 {%k2} + +// CHECK: vfnmsub132ps %zmm6, %zmm18, %zmm4 {%k2} {z} +// CHECK: encoding: [0x62,0xf2,0x6d,0xc2,0x9e,0xe6] + vfnmsub132ps %zmm6, %zmm18, %zmm4 {%k2} {z} + +// CHECK: vfnmsub132ps {rn-sae}, %zmm6, %zmm18, %zmm4 +// CHECK: encoding: [0x62,0xf2,0x6d,0x10,0x9e,0xe6] + vfnmsub132ps {rn-sae}, %zmm6, %zmm18, %zmm4 + +// CHECK: vfnmsub132ps {ru-sae}, %zmm6, %zmm18, %zmm4 +// CHECK: encoding: [0x62,0xf2,0x6d,0x50,0x9e,0xe6] + vfnmsub132ps {ru-sae}, %zmm6, %zmm18, %zmm4 + +// CHECK: vfnmsub132ps {rd-sae}, %zmm6, %zmm18, %zmm4 +// CHECK: encoding: [0x62,0xf2,0x6d,0x30,0x9e,0xe6] + vfnmsub132ps {rd-sae}, %zmm6, %zmm18, %zmm4 + +// CHECK: vfnmsub132ps {rz-sae}, %zmm6, %zmm18, %zmm4 +// CHECK: encoding: [0x62,0xf2,0x6d,0x70,0x9e,0xe6] + vfnmsub132ps {rz-sae}, %zmm6, %zmm18, %zmm4 + +// CHECK: vfnmsub132ps (%rcx), %zmm18, %zmm4 +// CHECK: encoding: [0x62,0xf2,0x6d,0x40,0x9e,0x21] + vfnmsub132ps (%rcx), %zmm18, %zmm4 + +// CHECK: vfnmsub132ps 291(%rax,%r14,8), %zmm18, %zmm4 +// CHECK: encoding: [0x62,0xb2,0x6d,0x40,0x9e,0xa4,0xf0,0x23,0x01,0x00,0x00] + vfnmsub132ps 291(%rax,%r14,8), %zmm18, %zmm4 + +// CHECK: vfnmsub132ps (%rcx){1to16}, %zmm18, %zmm4 +// CHECK: encoding: [0x62,0xf2,0x6d,0x50,0x9e,0x21] + vfnmsub132ps (%rcx){1to16}, %zmm18, %zmm4 + +// CHECK: vfnmsub132ps 8128(%rdx), %zmm18, %zmm4 +// CHECK: encoding: [0x62,0xf2,0x6d,0x40,0x9e,0x62,0x7f] + vfnmsub132ps 8128(%rdx), %zmm18, %zmm4 + +// CHECK: vfnmsub132ps 8192(%rdx), %zmm18, %zmm4 +// CHECK: encoding: [0x62,0xf2,0x6d,0x40,0x9e,0xa2,0x00,0x20,0x00,0x00] + vfnmsub132ps 8192(%rdx), %zmm18, %zmm4 + +// CHECK: vfnmsub132ps -8192(%rdx), %zmm18, %zmm4 +// CHECK: encoding: [0x62,0xf2,0x6d,0x40,0x9e,0x62,0x80] + vfnmsub132ps -8192(%rdx), %zmm18, %zmm4 + +// CHECK: vfnmsub132ps -8256(%rdx), %zmm18, %zmm4 +// CHECK: encoding: [0x62,0xf2,0x6d,0x40,0x9e,0xa2,0xc0,0xdf,0xff,0xff] + vfnmsub132ps -8256(%rdx), %zmm18, %zmm4 + +// CHECK: vfnmsub132ps 508(%rdx){1to16}, %zmm18, %zmm4 +// CHECK: encoding: [0x62,0xf2,0x6d,0x50,0x9e,0x62,0x7f] + vfnmsub132ps 508(%rdx){1to16}, %zmm18, %zmm4 + +// CHECK: vfnmsub132ps 512(%rdx){1to16}, %zmm18, %zmm4 +// CHECK: encoding: [0x62,0xf2,0x6d,0x50,0x9e,0xa2,0x00,0x02,0x00,0x00] + vfnmsub132ps 512(%rdx){1to16}, %zmm18, %zmm4 + +// CHECK: vfnmsub132ps -512(%rdx){1to16}, %zmm18, %zmm4 +// CHECK: encoding: [0x62,0xf2,0x6d,0x50,0x9e,0x62,0x80] + vfnmsub132ps -512(%rdx){1to16}, %zmm18, %zmm4 + +// CHECK: vfnmsub132ps -516(%rdx){1to16}, %zmm18, %zmm4 +// CHECK: encoding: [0x62,0xf2,0x6d,0x50,0x9e,0xa2,0xfc,0xfd,0xff,0xff] + vfnmsub132ps -516(%rdx){1to16}, %zmm18, %zmm4 + +// CHECK: vfnmsub132pd %zmm6, %zmm5, %zmm28 +// CHECK: encoding: [0x62,0x62,0xd5,0x48,0x9e,0xe6] + vfnmsub132pd %zmm6, %zmm5, %zmm28 + +// CHECK: vfnmsub132pd %zmm6, %zmm5, %zmm28 {%k2} +// CHECK: encoding: [0x62,0x62,0xd5,0x4a,0x9e,0xe6] + vfnmsub132pd %zmm6, %zmm5, %zmm28 {%k2} + +// CHECK: vfnmsub132pd %zmm6, %zmm5, %zmm28 {%k2} {z} +// CHECK: encoding: [0x62,0x62,0xd5,0xca,0x9e,0xe6] + vfnmsub132pd %zmm6, %zmm5, %zmm28 {%k2} {z} + +// CHECK: vfnmsub132pd {rn-sae}, %zmm6, %zmm5, %zmm28 +// CHECK: encoding: [0x62,0x62,0xd5,0x18,0x9e,0xe6] + vfnmsub132pd {rn-sae}, %zmm6, %zmm5, %zmm28 + +// CHECK: vfnmsub132pd {ru-sae}, %zmm6, %zmm5, %zmm28 +// CHECK: encoding: [0x62,0x62,0xd5,0x58,0x9e,0xe6] + vfnmsub132pd {ru-sae}, %zmm6, %zmm5, %zmm28 + +// CHECK: vfnmsub132pd {rd-sae}, %zmm6, %zmm5, %zmm28 +// CHECK: encoding: [0x62,0x62,0xd5,0x38,0x9e,0xe6] + vfnmsub132pd {rd-sae}, %zmm6, %zmm5, %zmm28 + +// CHECK: vfnmsub132pd {rz-sae}, %zmm6, %zmm5, %zmm28 +// CHECK: encoding: [0x62,0x62,0xd5,0x78,0x9e,0xe6] + vfnmsub132pd {rz-sae}, %zmm6, %zmm5, %zmm28 + +// CHECK: vfnmsub132pd (%rcx), %zmm5, %zmm28 +// CHECK: encoding: [0x62,0x62,0xd5,0x48,0x9e,0x21] + vfnmsub132pd (%rcx), %zmm5, %zmm28 + +// CHECK: vfnmsub132pd 291(%rax,%r14,8), %zmm5, %zmm28 +// CHECK: encoding: [0x62,0x22,0xd5,0x48,0x9e,0xa4,0xf0,0x23,0x01,0x00,0x00] + vfnmsub132pd 291(%rax,%r14,8), %zmm5, %zmm28 + +// CHECK: vfnmsub132pd (%rcx){1to8}, %zmm5, %zmm28 +// CHECK: encoding: [0x62,0x62,0xd5,0x58,0x9e,0x21] + vfnmsub132pd (%rcx){1to8}, %zmm5, %zmm28 + +// CHECK: vfnmsub132pd 8128(%rdx), %zmm5, %zmm28 +// CHECK: encoding: [0x62,0x62,0xd5,0x48,0x9e,0x62,0x7f] + vfnmsub132pd 8128(%rdx), %zmm5, %zmm28 + +// CHECK: vfnmsub132pd 8192(%rdx), %zmm5, %zmm28 +// CHECK: encoding: [0x62,0x62,0xd5,0x48,0x9e,0xa2,0x00,0x20,0x00,0x00] + vfnmsub132pd 8192(%rdx), %zmm5, %zmm28 + +// CHECK: vfnmsub132pd -8192(%rdx), %zmm5, %zmm28 +// CHECK: encoding: [0x62,0x62,0xd5,0x48,0x9e,0x62,0x80] + vfnmsub132pd -8192(%rdx), %zmm5, %zmm28 + +// CHECK: vfnmsub132pd -8256(%rdx), %zmm5, %zmm28 +// CHECK: encoding: [0x62,0x62,0xd5,0x48,0x9e,0xa2,0xc0,0xdf,0xff,0xff] + vfnmsub132pd -8256(%rdx), %zmm5, %zmm28 + +// CHECK: vfnmsub132pd 1016(%rdx){1to8}, %zmm5, %zmm28 +// CHECK: encoding: [0x62,0x62,0xd5,0x58,0x9e,0x62,0x7f] + vfnmsub132pd 1016(%rdx){1to8}, %zmm5, %zmm28 + +// CHECK: vfnmsub132pd 1024(%rdx){1to8}, %zmm5, %zmm28 +// CHECK: encoding: [0x62,0x62,0xd5,0x58,0x9e,0xa2,0x00,0x04,0x00,0x00] + vfnmsub132pd 1024(%rdx){1to8}, %zmm5, %zmm28 + +// CHECK: vfnmsub132pd -1024(%rdx){1to8}, %zmm5, %zmm28 +// CHECK: encoding: [0x62,0x62,0xd5,0x58,0x9e,0x62,0x80] + vfnmsub132pd -1024(%rdx){1to8}, %zmm5, %zmm28 + +// CHECK: vfnmsub132pd -1032(%rdx){1to8}, %zmm5, %zmm28 +// CHECK: encoding: [0x62,0x62,0xd5,0x58,0x9e,0xa2,0xf8,0xfb,0xff,0xff] + vfnmsub132pd -1032(%rdx){1to8}, %zmm5, %zmm28 + +// CHECK: vfnmsub213ps %zmm2, %zmm13, %zmm21 +// CHECK: encoding: [0x62,0xe2,0x15,0x48,0xae,0xea] + vfnmsub213ps %zmm2, %zmm13, %zmm21 + +// CHECK: vfnmsub213ps %zmm2, %zmm13, %zmm21 {%k3} +// CHECK: encoding: [0x62,0xe2,0x15,0x4b,0xae,0xea] + vfnmsub213ps %zmm2, %zmm13, %zmm21 {%k3} + +// CHECK: vfnmsub213ps %zmm2, %zmm13, %zmm21 {%k3} {z} +// CHECK: encoding: [0x62,0xe2,0x15,0xcb,0xae,0xea] + vfnmsub213ps %zmm2, %zmm13, %zmm21 {%k3} {z} + +// CHECK: vfnmsub213ps {rn-sae}, %zmm2, %zmm13, %zmm21 +// CHECK: encoding: [0x62,0xe2,0x15,0x18,0xae,0xea] + vfnmsub213ps {rn-sae}, %zmm2, %zmm13, %zmm21 + +// CHECK: vfnmsub213ps {ru-sae}, %zmm2, %zmm13, %zmm21 +// CHECK: encoding: [0x62,0xe2,0x15,0x58,0xae,0xea] + vfnmsub213ps {ru-sae}, %zmm2, %zmm13, %zmm21 + +// CHECK: vfnmsub213ps {rd-sae}, %zmm2, %zmm13, %zmm21 +// CHECK: encoding: [0x62,0xe2,0x15,0x38,0xae,0xea] + vfnmsub213ps {rd-sae}, %zmm2, %zmm13, %zmm21 + +// CHECK: vfnmsub213ps {rz-sae}, %zmm2, %zmm13, %zmm21 +// CHECK: encoding: [0x62,0xe2,0x15,0x78,0xae,0xea] + vfnmsub213ps {rz-sae}, %zmm2, %zmm13, %zmm21 + +// CHECK: vfnmsub213ps (%rcx), %zmm13, %zmm21 +// CHECK: encoding: [0x62,0xe2,0x15,0x48,0xae,0x29] + vfnmsub213ps (%rcx), %zmm13, %zmm21 + +// CHECK: vfnmsub213ps 291(%rax,%r14,8), %zmm13, %zmm21 +// CHECK: encoding: [0x62,0xa2,0x15,0x48,0xae,0xac,0xf0,0x23,0x01,0x00,0x00] + vfnmsub213ps 291(%rax,%r14,8), %zmm13, %zmm21 + +// CHECK: vfnmsub213ps (%rcx){1to16}, %zmm13, %zmm21 +// CHECK: encoding: [0x62,0xe2,0x15,0x58,0xae,0x29] + vfnmsub213ps (%rcx){1to16}, %zmm13, %zmm21 + +// CHECK: vfnmsub213ps 8128(%rdx), %zmm13, %zmm21 +// CHECK: encoding: [0x62,0xe2,0x15,0x48,0xae,0x6a,0x7f] + vfnmsub213ps 8128(%rdx), %zmm13, %zmm21 + +// CHECK: vfnmsub213ps 8192(%rdx), %zmm13, %zmm21 +// CHECK: encoding: [0x62,0xe2,0x15,0x48,0xae,0xaa,0x00,0x20,0x00,0x00] + vfnmsub213ps 8192(%rdx), %zmm13, %zmm21 + +// CHECK: vfnmsub213ps -8192(%rdx), %zmm13, %zmm21 +// CHECK: encoding: [0x62,0xe2,0x15,0x48,0xae,0x6a,0x80] + vfnmsub213ps -8192(%rdx), %zmm13, %zmm21 + +// CHECK: vfnmsub213ps -8256(%rdx), %zmm13, %zmm21 +// CHECK: encoding: [0x62,0xe2,0x15,0x48,0xae,0xaa,0xc0,0xdf,0xff,0xff] + vfnmsub213ps -8256(%rdx), %zmm13, %zmm21 + +// CHECK: vfnmsub213ps 508(%rdx){1to16}, %zmm13, %zmm21 +// CHECK: encoding: [0x62,0xe2,0x15,0x58,0xae,0x6a,0x7f] + vfnmsub213ps 508(%rdx){1to16}, %zmm13, %zmm21 + +// CHECK: vfnmsub213ps 512(%rdx){1to16}, %zmm13, %zmm21 +// CHECK: encoding: [0x62,0xe2,0x15,0x58,0xae,0xaa,0x00,0x02,0x00,0x00] + vfnmsub213ps 512(%rdx){1to16}, %zmm13, %zmm21 + +// CHECK: vfnmsub213ps -512(%rdx){1to16}, %zmm13, %zmm21 +// CHECK: encoding: [0x62,0xe2,0x15,0x58,0xae,0x6a,0x80] + vfnmsub213ps -512(%rdx){1to16}, %zmm13, %zmm21 + +// CHECK: vfnmsub213ps -516(%rdx){1to16}, %zmm13, %zmm21 +// CHECK: encoding: [0x62,0xe2,0x15,0x58,0xae,0xaa,0xfc,0xfd,0xff,0xff] + vfnmsub213ps -516(%rdx){1to16}, %zmm13, %zmm21 + +// CHECK: vfnmsub213pd %zmm11, %zmm18, %zmm23 +// CHECK: encoding: [0x62,0xc2,0xed,0x40,0xae,0xfb] + vfnmsub213pd %zmm11, %zmm18, %zmm23 + +// CHECK: vfnmsub213pd %zmm11, %zmm18, %zmm23 {%k2} +// CHECK: encoding: [0x62,0xc2,0xed,0x42,0xae,0xfb] + vfnmsub213pd %zmm11, %zmm18, %zmm23 {%k2} + +// CHECK: vfnmsub213pd %zmm11, %zmm18, %zmm23 {%k2} {z} +// CHECK: encoding: [0x62,0xc2,0xed,0xc2,0xae,0xfb] + vfnmsub213pd %zmm11, %zmm18, %zmm23 {%k2} {z} + +// CHECK: vfnmsub213pd {rn-sae}, %zmm11, %zmm18, %zmm23 +// CHECK: encoding: [0x62,0xc2,0xed,0x10,0xae,0xfb] + vfnmsub213pd {rn-sae}, %zmm11, %zmm18, %zmm23 + +// CHECK: vfnmsub213pd {ru-sae}, %zmm11, %zmm18, %zmm23 +// CHECK: encoding: [0x62,0xc2,0xed,0x50,0xae,0xfb] + vfnmsub213pd {ru-sae}, %zmm11, %zmm18, %zmm23 + +// CHECK: vfnmsub213pd {rd-sae}, %zmm11, %zmm18, %zmm23 +// CHECK: encoding: [0x62,0xc2,0xed,0x30,0xae,0xfb] + vfnmsub213pd {rd-sae}, %zmm11, %zmm18, %zmm23 + +// CHECK: vfnmsub213pd {rz-sae}, %zmm11, %zmm18, %zmm23 +// CHECK: encoding: [0x62,0xc2,0xed,0x70,0xae,0xfb] + vfnmsub213pd {rz-sae}, %zmm11, %zmm18, %zmm23 + +// CHECK: vfnmsub213pd (%rcx), %zmm18, %zmm23 +// CHECK: encoding: [0x62,0xe2,0xed,0x40,0xae,0x39] + vfnmsub213pd (%rcx), %zmm18, %zmm23 + +// CHECK: vfnmsub213pd 291(%rax,%r14,8), %zmm18, %zmm23 +// CHECK: encoding: [0x62,0xa2,0xed,0x40,0xae,0xbc,0xf0,0x23,0x01,0x00,0x00] + vfnmsub213pd 291(%rax,%r14,8), %zmm18, %zmm23 + +// CHECK: vfnmsub213pd (%rcx){1to8}, %zmm18, %zmm23 +// CHECK: encoding: [0x62,0xe2,0xed,0x50,0xae,0x39] + vfnmsub213pd (%rcx){1to8}, %zmm18, %zmm23 + +// CHECK: vfnmsub213pd 8128(%rdx), %zmm18, %zmm23 +// CHECK: encoding: [0x62,0xe2,0xed,0x40,0xae,0x7a,0x7f] + vfnmsub213pd 8128(%rdx), %zmm18, %zmm23 + +// CHECK: vfnmsub213pd 8192(%rdx), %zmm18, %zmm23 +// CHECK: encoding: [0x62,0xe2,0xed,0x40,0xae,0xba,0x00,0x20,0x00,0x00] + vfnmsub213pd 8192(%rdx), %zmm18, %zmm23 + +// CHECK: vfnmsub213pd -8192(%rdx), %zmm18, %zmm23 +// CHECK: encoding: [0x62,0xe2,0xed,0x40,0xae,0x7a,0x80] + vfnmsub213pd -8192(%rdx), %zmm18, %zmm23 + +// CHECK: vfnmsub213pd -8256(%rdx), %zmm18, %zmm23 +// CHECK: encoding: [0x62,0xe2,0xed,0x40,0xae,0xba,0xc0,0xdf,0xff,0xff] + vfnmsub213pd -8256(%rdx), %zmm18, %zmm23 + +// CHECK: vfnmsub213pd 1016(%rdx){1to8}, %zmm18, %zmm23 +// CHECK: encoding: [0x62,0xe2,0xed,0x50,0xae,0x7a,0x7f] + vfnmsub213pd 1016(%rdx){1to8}, %zmm18, %zmm23 + +// CHECK: vfnmsub213pd 1024(%rdx){1to8}, %zmm18, %zmm23 +// CHECK: encoding: [0x62,0xe2,0xed,0x50,0xae,0xba,0x00,0x04,0x00,0x00] + vfnmsub213pd 1024(%rdx){1to8}, %zmm18, %zmm23 + +// CHECK: vfnmsub213pd -1024(%rdx){1to8}, %zmm18, %zmm23 +// CHECK: encoding: [0x62,0xe2,0xed,0x50,0xae,0x7a,0x80] + vfnmsub213pd -1024(%rdx){1to8}, %zmm18, %zmm23 + +// CHECK: vfnmsub213pd -1032(%rdx){1to8}, %zmm18, %zmm23 +// CHECK: encoding: [0x62,0xe2,0xed,0x50,0xae,0xba,0xf8,0xfb,0xff,0xff] + vfnmsub213pd -1032(%rdx){1to8}, %zmm18, %zmm23 + +// CHECK: vfnmsub231ps %zmm13, %zmm6, %zmm8 +// CHECK: encoding: [0x62,0x52,0x4d,0x48,0xbe,0xc5] + vfnmsub231ps %zmm13, %zmm6, %zmm8 + +// CHECK: vfnmsub231ps %zmm13, %zmm6, %zmm8 {%k2} +// CHECK: encoding: [0x62,0x52,0x4d,0x4a,0xbe,0xc5] + vfnmsub231ps %zmm13, %zmm6, %zmm8 {%k2} + +// CHECK: vfnmsub231ps %zmm13, %zmm6, %zmm8 {%k2} {z} +// CHECK: encoding: [0x62,0x52,0x4d,0xca,0xbe,0xc5] + vfnmsub231ps %zmm13, %zmm6, %zmm8 {%k2} {z} + +// CHECK: vfnmsub231ps {rn-sae}, %zmm13, %zmm6, %zmm8 +// CHECK: encoding: [0x62,0x52,0x4d,0x18,0xbe,0xc5] + vfnmsub231ps {rn-sae}, %zmm13, %zmm6, %zmm8 + +// CHECK: vfnmsub231ps {ru-sae}, %zmm13, %zmm6, %zmm8 +// CHECK: encoding: [0x62,0x52,0x4d,0x58,0xbe,0xc5] + vfnmsub231ps {ru-sae}, %zmm13, %zmm6, %zmm8 + +// CHECK: vfnmsub231ps {rd-sae}, %zmm13, %zmm6, %zmm8 +// CHECK: encoding: [0x62,0x52,0x4d,0x38,0xbe,0xc5] + vfnmsub231ps {rd-sae}, %zmm13, %zmm6, %zmm8 + +// CHECK: vfnmsub231ps {rz-sae}, %zmm13, %zmm6, %zmm8 +// CHECK: encoding: [0x62,0x52,0x4d,0x78,0xbe,0xc5] + vfnmsub231ps {rz-sae}, %zmm13, %zmm6, %zmm8 + +// CHECK: vfnmsub231ps (%rcx), %zmm6, %zmm8 +// CHECK: encoding: [0x62,0x72,0x4d,0x48,0xbe,0x01] + vfnmsub231ps (%rcx), %zmm6, %zmm8 + +// CHECK: vfnmsub231ps 291(%rax,%r14,8), %zmm6, %zmm8 +// CHECK: encoding: [0x62,0x32,0x4d,0x48,0xbe,0x84,0xf0,0x23,0x01,0x00,0x00] + vfnmsub231ps 291(%rax,%r14,8), %zmm6, %zmm8 + +// CHECK: vfnmsub231ps (%rcx){1to16}, %zmm6, %zmm8 +// CHECK: encoding: [0x62,0x72,0x4d,0x58,0xbe,0x01] + vfnmsub231ps (%rcx){1to16}, %zmm6, %zmm8 + +// CHECK: vfnmsub231ps 8128(%rdx), %zmm6, %zmm8 +// CHECK: encoding: [0x62,0x72,0x4d,0x48,0xbe,0x42,0x7f] + vfnmsub231ps 8128(%rdx), %zmm6, %zmm8 + +// CHECK: vfnmsub231ps 8192(%rdx), %zmm6, %zmm8 +// CHECK: encoding: [0x62,0x72,0x4d,0x48,0xbe,0x82,0x00,0x20,0x00,0x00] + vfnmsub231ps 8192(%rdx), %zmm6, %zmm8 + +// CHECK: vfnmsub231ps -8192(%rdx), %zmm6, %zmm8 +// CHECK: encoding: [0x62,0x72,0x4d,0x48,0xbe,0x42,0x80] + vfnmsub231ps -8192(%rdx), %zmm6, %zmm8 + +// CHECK: vfnmsub231ps -8256(%rdx), %zmm6, %zmm8 +// CHECK: encoding: [0x62,0x72,0x4d,0x48,0xbe,0x82,0xc0,0xdf,0xff,0xff] + vfnmsub231ps -8256(%rdx), %zmm6, %zmm8 + +// CHECK: vfnmsub231ps 508(%rdx){1to16}, %zmm6, %zmm8 +// CHECK: encoding: [0x62,0x72,0x4d,0x58,0xbe,0x42,0x7f] + vfnmsub231ps 508(%rdx){1to16}, %zmm6, %zmm8 + +// CHECK: vfnmsub231ps 512(%rdx){1to16}, %zmm6, %zmm8 +// CHECK: encoding: [0x62,0x72,0x4d,0x58,0xbe,0x82,0x00,0x02,0x00,0x00] + vfnmsub231ps 512(%rdx){1to16}, %zmm6, %zmm8 + +// CHECK: vfnmsub231ps -512(%rdx){1to16}, %zmm6, %zmm8 +// CHECK: encoding: [0x62,0x72,0x4d,0x58,0xbe,0x42,0x80] + vfnmsub231ps -512(%rdx){1to16}, %zmm6, %zmm8 + +// CHECK: vfnmsub231ps -516(%rdx){1to16}, %zmm6, %zmm8 +// CHECK: encoding: [0x62,0x72,0x4d,0x58,0xbe,0x82,0xfc,0xfd,0xff,0xff] + vfnmsub231ps -516(%rdx){1to16}, %zmm6, %zmm8 + +// CHECK: vfnmsub231pd %zmm24, %zmm4, %zmm29 +// CHECK: encoding: [0x62,0x02,0xdd,0x48,0xbe,0xe8] + vfnmsub231pd %zmm24, %zmm4, %zmm29 + +// CHECK: vfnmsub231pd %zmm24, %zmm4, %zmm29 {%k7} +// CHECK: encoding: [0x62,0x02,0xdd,0x4f,0xbe,0xe8] + vfnmsub231pd %zmm24, %zmm4, %zmm29 {%k7} + +// CHECK: vfnmsub231pd %zmm24, %zmm4, %zmm29 {%k7} {z} +// CHECK: encoding: [0x62,0x02,0xdd,0xcf,0xbe,0xe8] + vfnmsub231pd %zmm24, %zmm4, %zmm29 {%k7} {z} + +// CHECK: vfnmsub231pd {rn-sae}, %zmm24, %zmm4, %zmm29 +// CHECK: encoding: [0x62,0x02,0xdd,0x18,0xbe,0xe8] + vfnmsub231pd {rn-sae}, %zmm24, %zmm4, %zmm29 + +// CHECK: vfnmsub231pd {ru-sae}, %zmm24, %zmm4, %zmm29 +// CHECK: encoding: [0x62,0x02,0xdd,0x58,0xbe,0xe8] + vfnmsub231pd {ru-sae}, %zmm24, %zmm4, %zmm29 + +// CHECK: vfnmsub231pd {rd-sae}, %zmm24, %zmm4, %zmm29 +// CHECK: encoding: [0x62,0x02,0xdd,0x38,0xbe,0xe8] + vfnmsub231pd {rd-sae}, %zmm24, %zmm4, %zmm29 + +// CHECK: vfnmsub231pd {rz-sae}, %zmm24, %zmm4, %zmm29 +// CHECK: encoding: [0x62,0x02,0xdd,0x78,0xbe,0xe8] + vfnmsub231pd {rz-sae}, %zmm24, %zmm4, %zmm29 + +// CHECK: vfnmsub231pd (%rcx), %zmm4, %zmm29 +// CHECK: encoding: [0x62,0x62,0xdd,0x48,0xbe,0x29] + vfnmsub231pd (%rcx), %zmm4, %zmm29 + +// CHECK: vfnmsub231pd 291(%rax,%r14,8), %zmm4, %zmm29 +// CHECK: encoding: [0x62,0x22,0xdd,0x48,0xbe,0xac,0xf0,0x23,0x01,0x00,0x00] + vfnmsub231pd 291(%rax,%r14,8), %zmm4, %zmm29 + +// CHECK: vfnmsub231pd (%rcx){1to8}, %zmm4, %zmm29 +// CHECK: encoding: [0x62,0x62,0xdd,0x58,0xbe,0x29] + vfnmsub231pd (%rcx){1to8}, %zmm4, %zmm29 + +// CHECK: vfnmsub231pd 8128(%rdx), %zmm4, %zmm29 +// CHECK: encoding: [0x62,0x62,0xdd,0x48,0xbe,0x6a,0x7f] + vfnmsub231pd 8128(%rdx), %zmm4, %zmm29 + +// CHECK: vfnmsub231pd 8192(%rdx), %zmm4, %zmm29 +// CHECK: encoding: [0x62,0x62,0xdd,0x48,0xbe,0xaa,0x00,0x20,0x00,0x00] + vfnmsub231pd 8192(%rdx), %zmm4, %zmm29 + +// CHECK: vfnmsub231pd -8192(%rdx), %zmm4, %zmm29 +// CHECK: encoding: [0x62,0x62,0xdd,0x48,0xbe,0x6a,0x80] + vfnmsub231pd -8192(%rdx), %zmm4, %zmm29 + +// CHECK: vfnmsub231pd -8256(%rdx), %zmm4, %zmm29 +// CHECK: encoding: [0x62,0x62,0xdd,0x48,0xbe,0xaa,0xc0,0xdf,0xff,0xff] + vfnmsub231pd -8256(%rdx), %zmm4, %zmm29 + +// CHECK: vfnmsub231pd 1016(%rdx){1to8}, %zmm4, %zmm29 +// CHECK: encoding: [0x62,0x62,0xdd,0x58,0xbe,0x6a,0x7f] + vfnmsub231pd 1016(%rdx){1to8}, %zmm4, %zmm29 + +// CHECK: vfnmsub231pd 1024(%rdx){1to8}, %zmm4, %zmm29 +// CHECK: encoding: [0x62,0x62,0xdd,0x58,0xbe,0xaa,0x00,0x04,0x00,0x00] + vfnmsub231pd 1024(%rdx){1to8}, %zmm4, %zmm29 + +// CHECK: vfnmsub231pd -1024(%rdx){1to8}, %zmm4, %zmm29 +// CHECK: encoding: [0x62,0x62,0xdd,0x58,0xbe,0x6a,0x80] + vfnmsub231pd -1024(%rdx){1to8}, %zmm4, %zmm29 + +// CHECK: vfnmsub231pd -1032(%rdx){1to8}, %zmm4, %zmm29 +// CHECK: encoding: [0x62,0x62,0xdd,0x58,0xbe,0xaa,0xf8,0xfb,0xff,0xff] + vfnmsub231pd -1032(%rdx){1to8}, %zmm4, %zmm29 + + +// CHECK: vfmadd231ps 8192(%rdx), %zmm28, %zmm27 {%k3} {z} +// CHECK: encoding: [0x62,0x62,0x1d,0xc3,0xb8,0x9a,0x00,0x20,0x00,0x00] + vfmadd231ps 8192(%rdx), %zmm28, %zmm27 {%k3} {z} + +// CHECK: vfmadd231ps {rn-sae}, %zmm25, %zmm28, %zmm27 {%k3} {z} +// CHECK: encoding: [0x62,0x02,0x1d,0x93,0xb8,0xd9] + vfmadd231ps {rn-sae}, %zmm25, %zmm28, %zmm27 {%k3} {z} + +// CHECK: vfmadd231ps -1032(%rdx){1to16}, %zmm20, %zmm24 {%k3} {z} +// CHECK: encoding: [0x62,0x62,0x5d,0xd3,0xb8,0x82,0xf8,0xfb,0xff,0xff] + vfmadd231ps -1032(%rdx){1to16}, %zmm20, %zmm24 {%k3} {z} + +// CHECK: vfmadd231pd 8192(%rdx), %zmm28, %zmm27 {%k3} {z} +// CHECK: encoding: [0x62,0x62,0x9d,0xc3,0xb8,0x9a,0x00,0x20,0x00,0x00] + vfmadd231pd 8192(%rdx), %zmm28, %zmm27 {%k3} {z} + +// CHECK: vfmadd231pd {rn-sae}, %zmm25, %zmm28, %zmm27 {%k3} {z} +// CHECK: encoding: [0x62,0x02,0x9d,0x93,0xb8,0xd9] + vfmadd231pd {rn-sae}, %zmm25, %zmm28, %zmm27 {%k3} {z} + +// CHECK: vfmadd231pd -1032(%rdx){1to8}, %zmm20, %zmm24 {%k3} {z} +// CHECK: encoding: [0x62,0x62,0xdd,0xd3,0xb8,0x82,0xf8,0xfb,0xff,0xff] + vfmadd231pd -1032(%rdx){1to8}, %zmm20, %zmm24 {%k3} {z} + +// CHECK: vfmadd213ps 8192(%rdx), %zmm28, %zmm27 {%k3} {z} +// CHECK: encoding: [0x62,0x62,0x1d,0xc3,0xa8,0x9a,0x00,0x20,0x00,0x00] + vfmadd213ps 8192(%rdx), %zmm28, %zmm27 {%k3} {z} + +// CHECK: vfmadd213ps {rn-sae}, %zmm25, %zmm28, %zmm27 {%k3} {z} +// CHECK: encoding: [0x62,0x02,0x1d,0x93,0xa8,0xd9] + vfmadd213ps {rn-sae}, %zmm25, %zmm28, %zmm27 {%k3} {z} + +// CHECK: vfmadd213ps -1032(%rdx){1to16}, %zmm20, %zmm24 {%k3} {z} +// CHECK: encoding: [0x62,0x62,0x5d,0xd3,0xa8,0x82,0xf8,0xfb,0xff,0xff] + vfmadd213ps -1032(%rdx){1to16}, %zmm20, %zmm24 {%k3} {z} + +// CHECK: vfmadd213pd 8192(%rdx), %zmm28, %zmm27 {%k3} {z} +// CHECK: encoding: [0x62,0x62,0x9d,0xc3,0xa8,0x9a,0x00,0x20,0x00,0x00] + vfmadd213pd 8192(%rdx), %zmm28, %zmm27 {%k3} {z} + +// CHECK: vfmadd213pd {rn-sae}, %zmm25, %zmm28, %zmm27 {%k3} {z} +// CHECK: encoding: [0x62,0x02,0x9d,0x93,0xa8,0xd9] + vfmadd213pd {rn-sae}, %zmm25, %zmm28, %zmm27 {%k3} {z} + +// CHECK: vfmadd213pd -1032(%rdx){1to8}, %zmm20, %zmm24 {%k3} {z} +// CHECK: encoding: [0x62,0x62,0xdd,0xd3,0xa8,0x82,0xf8,0xfb,0xff,0xff] + vfmadd213pd -1032(%rdx){1to8}, %zmm20, %zmm24 {%k3} {z} + +// CHECK: vfmadd132ps 8192(%rdx), %zmm28, %zmm27 {%k3} {z} +// CHECK: encoding: [0x62,0x62,0x1d,0xc3,0x98,0x9a,0x00,0x20,0x00,0x00] + vfmadd132ps 8192(%rdx), %zmm28, %zmm27 {%k3} {z} + +// CHECK: vfmadd132ps {rn-sae}, %zmm25, %zmm28, %zmm27 {%k3} {z} +// CHECK: encoding: [0x62,0x02,0x1d,0x93,0x98,0xd9] + vfmadd132ps {rn-sae}, %zmm25, %zmm28, %zmm27 {%k3} {z} + +// CHECK: vfmadd132ps -1032(%rdx){1to16}, %zmm20, %zmm24 {%k3} {z} +// CHECK: encoding: [0x62,0x62,0x5d,0xd3,0x98,0x82,0xf8,0xfb,0xff,0xff] + vfmadd132ps -1032(%rdx){1to16}, %zmm20, %zmm24 {%k3} {z} + +// CHECK: vfmadd132pd 8192(%rdx), %zmm28, %zmm27 {%k3} {z} +// CHECK: encoding: [0x62,0x62,0x9d,0xc3,0x98,0x9a,0x00,0x20,0x00,0x00] + vfmadd132pd 8192(%rdx), %zmm28, %zmm27 {%k3} {z} + +// CHECK: vfmadd132pd {rn-sae}, %zmm25, %zmm28, %zmm27 {%k3} {z} +// CHECK: encoding: [0x62,0x02,0x9d,0x93,0x98,0xd9] + vfmadd132pd {rn-sae}, %zmm25, %zmm28, %zmm27 {%k3} {z} + +// CHECK: vfmadd132pd -1032(%rdx){1to8}, %zmm20, %zmm24 {%k3} {z} +// CHECK: encoding: [0x62,0x62,0xdd,0xd3,0x98,0x82,0xf8,0xfb,0xff,0xff] + vfmadd132pd -1032(%rdx){1to8}, %zmm20, %zmm24 {%k3} {z} + // CHECK: vpermi2d %zmm4, %zmm28, %zmm10 // CHECK: encoding: [0x62,0x72,0x1d,0x40,0x76,0xd4] vpermi2d %zmm4, %zmm28, %zmm10 diff --git a/test/MC/X86/x86-64-avx512f_vl.s b/test/MC/X86/x86-64-avx512f_vl.s index 5d80beb081e..c587f8a75aa 100644 --- a/test/MC/X86/x86-64-avx512f_vl.s +++ b/test/MC/X86/x86-64-avx512f_vl.s @@ -11133,6 +11133,4038 @@ vaddpd {rz-sae}, %zmm2, %zmm1, %zmm1 // CHECK: encoding: [0x62,0x63,0xbd,0x30,0x03,0x8a,0xf8,0xfb,0xff,0xff,0x7b] valignq $0x7b, -1032(%rdx){1to4}, %ymm24, %ymm25 +// CHECK: vfmadd132ps %xmm19, %xmm25, %xmm29 +// CHECK: encoding: [0x62,0x22,0x35,0x00,0x98,0xeb] + vfmadd132ps %xmm19, %xmm25, %xmm29 + +// CHECK: vfmadd132ps %xmm19, %xmm25, %xmm29 {%k4} +// CHECK: encoding: [0x62,0x22,0x35,0x04,0x98,0xeb] + vfmadd132ps %xmm19, %xmm25, %xmm29 {%k4} + +// CHECK: vfmadd132ps %xmm19, %xmm25, %xmm29 {%k4} {z} +// CHECK: encoding: [0x62,0x22,0x35,0x84,0x98,0xeb] + vfmadd132ps %xmm19, %xmm25, %xmm29 {%k4} {z} + +// CHECK: vfmadd132ps (%rcx), %xmm25, %xmm29 +// CHECK: encoding: [0x62,0x62,0x35,0x00,0x98,0x29] + vfmadd132ps (%rcx), %xmm25, %xmm29 + +// CHECK: vfmadd132ps 291(%rax,%r14,8), %xmm25, %xmm29 +// CHECK: encoding: [0x62,0x22,0x35,0x00,0x98,0xac,0xf0,0x23,0x01,0x00,0x00] + vfmadd132ps 291(%rax,%r14,8), %xmm25, %xmm29 + +// CHECK: vfmadd132ps (%rcx){1to4}, %xmm25, %xmm29 +// CHECK: encoding: [0x62,0x62,0x35,0x10,0x98,0x29] + vfmadd132ps (%rcx){1to4}, %xmm25, %xmm29 + +// CHECK: vfmadd132ps 2032(%rdx), %xmm25, %xmm29 +// CHECK: encoding: [0x62,0x62,0x35,0x00,0x98,0x6a,0x7f] + vfmadd132ps 2032(%rdx), %xmm25, %xmm29 + +// CHECK: vfmadd132ps 2048(%rdx), %xmm25, %xmm29 +// CHECK: encoding: [0x62,0x62,0x35,0x00,0x98,0xaa,0x00,0x08,0x00,0x00] + vfmadd132ps 2048(%rdx), %xmm25, %xmm29 + +// CHECK: vfmadd132ps -2048(%rdx), %xmm25, %xmm29 +// CHECK: encoding: [0x62,0x62,0x35,0x00,0x98,0x6a,0x80] + vfmadd132ps -2048(%rdx), %xmm25, %xmm29 + +// CHECK: vfmadd132ps -2064(%rdx), %xmm25, %xmm29 +// CHECK: encoding: [0x62,0x62,0x35,0x00,0x98,0xaa,0xf0,0xf7,0xff,0xff] + vfmadd132ps -2064(%rdx), %xmm25, %xmm29 + +// CHECK: vfmadd132ps 508(%rdx){1to4}, %xmm25, %xmm29 +// CHECK: encoding: [0x62,0x62,0x35,0x10,0x98,0x6a,0x7f] + vfmadd132ps 508(%rdx){1to4}, %xmm25, %xmm29 + +// CHECK: vfmadd132ps 512(%rdx){1to4}, %xmm25, %xmm29 +// CHECK: encoding: [0x62,0x62,0x35,0x10,0x98,0xaa,0x00,0x02,0x00,0x00] + vfmadd132ps 512(%rdx){1to4}, %xmm25, %xmm29 + +// CHECK: vfmadd132ps -512(%rdx){1to4}, %xmm25, %xmm29 +// CHECK: encoding: [0x62,0x62,0x35,0x10,0x98,0x6a,0x80] + vfmadd132ps -512(%rdx){1to4}, %xmm25, %xmm29 + +// CHECK: vfmadd132ps -516(%rdx){1to4}, %xmm25, %xmm29 +// CHECK: encoding: [0x62,0x62,0x35,0x10,0x98,0xaa,0xfc,0xfd,0xff,0xff] + vfmadd132ps -516(%rdx){1to4}, %xmm25, %xmm29 + +// CHECK: vfmadd132ps %ymm26, %ymm22, %ymm23 +// CHECK: encoding: [0x62,0x82,0x4d,0x20,0x98,0xfa] + vfmadd132ps %ymm26, %ymm22, %ymm23 + +// CHECK: vfmadd132ps %ymm26, %ymm22, %ymm23 {%k5} +// CHECK: encoding: [0x62,0x82,0x4d,0x25,0x98,0xfa] + vfmadd132ps %ymm26, %ymm22, %ymm23 {%k5} + +// CHECK: vfmadd132ps %ymm26, %ymm22, %ymm23 {%k5} {z} +// CHECK: encoding: [0x62,0x82,0x4d,0xa5,0x98,0xfa] + vfmadd132ps %ymm26, %ymm22, %ymm23 {%k5} {z} + +// CHECK: vfmadd132ps (%rcx), %ymm22, %ymm23 +// CHECK: encoding: [0x62,0xe2,0x4d,0x20,0x98,0x39] + vfmadd132ps (%rcx), %ymm22, %ymm23 + +// CHECK: vfmadd132ps 291(%rax,%r14,8), %ymm22, %ymm23 +// CHECK: encoding: [0x62,0xa2,0x4d,0x20,0x98,0xbc,0xf0,0x23,0x01,0x00,0x00] + vfmadd132ps 291(%rax,%r14,8), %ymm22, %ymm23 + +// CHECK: vfmadd132ps (%rcx){1to8}, %ymm22, %ymm23 +// CHECK: encoding: [0x62,0xe2,0x4d,0x30,0x98,0x39] + vfmadd132ps (%rcx){1to8}, %ymm22, %ymm23 + +// CHECK: vfmadd132ps 4064(%rdx), %ymm22, %ymm23 +// CHECK: encoding: [0x62,0xe2,0x4d,0x20,0x98,0x7a,0x7f] + vfmadd132ps 4064(%rdx), %ymm22, %ymm23 + +// CHECK: vfmadd132ps 4096(%rdx), %ymm22, %ymm23 +// CHECK: encoding: [0x62,0xe2,0x4d,0x20,0x98,0xba,0x00,0x10,0x00,0x00] + vfmadd132ps 4096(%rdx), %ymm22, %ymm23 + +// CHECK: vfmadd132ps -4096(%rdx), %ymm22, %ymm23 +// CHECK: encoding: [0x62,0xe2,0x4d,0x20,0x98,0x7a,0x80] + vfmadd132ps -4096(%rdx), %ymm22, %ymm23 + +// CHECK: vfmadd132ps -4128(%rdx), %ymm22, %ymm23 +// CHECK: encoding: [0x62,0xe2,0x4d,0x20,0x98,0xba,0xe0,0xef,0xff,0xff] + vfmadd132ps -4128(%rdx), %ymm22, %ymm23 + +// CHECK: vfmadd132ps 508(%rdx){1to8}, %ymm22, %ymm23 +// CHECK: encoding: [0x62,0xe2,0x4d,0x30,0x98,0x7a,0x7f] + vfmadd132ps 508(%rdx){1to8}, %ymm22, %ymm23 + +// CHECK: vfmadd132ps 512(%rdx){1to8}, %ymm22, %ymm23 +// CHECK: encoding: [0x62,0xe2,0x4d,0x30,0x98,0xba,0x00,0x02,0x00,0x00] + vfmadd132ps 512(%rdx){1to8}, %ymm22, %ymm23 + +// CHECK: vfmadd132ps -512(%rdx){1to8}, %ymm22, %ymm23 +// CHECK: encoding: [0x62,0xe2,0x4d,0x30,0x98,0x7a,0x80] + vfmadd132ps -512(%rdx){1to8}, %ymm22, %ymm23 + +// CHECK: vfmadd132ps -516(%rdx){1to8}, %ymm22, %ymm23 +// CHECK: encoding: [0x62,0xe2,0x4d,0x30,0x98,0xba,0xfc,0xfd,0xff,0xff] + vfmadd132ps -516(%rdx){1to8}, %ymm22, %ymm23 + +// CHECK: vfmadd132pd %xmm27, %xmm21, %xmm28 +// CHECK: encoding: [0x62,0x02,0xd5,0x00,0x98,0xe3] + vfmadd132pd %xmm27, %xmm21, %xmm28 + +// CHECK: vfmadd132pd %xmm27, %xmm21, %xmm28 {%k1} +// CHECK: encoding: [0x62,0x02,0xd5,0x01,0x98,0xe3] + vfmadd132pd %xmm27, %xmm21, %xmm28 {%k1} + +// CHECK: vfmadd132pd %xmm27, %xmm21, %xmm28 {%k1} {z} +// CHECK: encoding: [0x62,0x02,0xd5,0x81,0x98,0xe3] + vfmadd132pd %xmm27, %xmm21, %xmm28 {%k1} {z} + +// CHECK: vfmadd132pd (%rcx), %xmm21, %xmm28 +// CHECK: encoding: [0x62,0x62,0xd5,0x00,0x98,0x21] + vfmadd132pd (%rcx), %xmm21, %xmm28 + +// CHECK: vfmadd132pd 291(%rax,%r14,8), %xmm21, %xmm28 +// CHECK: encoding: [0x62,0x22,0xd5,0x00,0x98,0xa4,0xf0,0x23,0x01,0x00,0x00] + vfmadd132pd 291(%rax,%r14,8), %xmm21, %xmm28 + +// CHECK: vfmadd132pd (%rcx){1to2}, %xmm21, %xmm28 +// CHECK: encoding: [0x62,0x62,0xd5,0x10,0x98,0x21] + vfmadd132pd (%rcx){1to2}, %xmm21, %xmm28 + +// CHECK: vfmadd132pd 2032(%rdx), %xmm21, %xmm28 +// CHECK: encoding: [0x62,0x62,0xd5,0x00,0x98,0x62,0x7f] + vfmadd132pd 2032(%rdx), %xmm21, %xmm28 + +// CHECK: vfmadd132pd 2048(%rdx), %xmm21, %xmm28 +// CHECK: encoding: [0x62,0x62,0xd5,0x00,0x98,0xa2,0x00,0x08,0x00,0x00] + vfmadd132pd 2048(%rdx), %xmm21, %xmm28 + +// CHECK: vfmadd132pd -2048(%rdx), %xmm21, %xmm28 +// CHECK: encoding: [0x62,0x62,0xd5,0x00,0x98,0x62,0x80] + vfmadd132pd -2048(%rdx), %xmm21, %xmm28 + +// CHECK: vfmadd132pd -2064(%rdx), %xmm21, %xmm28 +// CHECK: encoding: [0x62,0x62,0xd5,0x00,0x98,0xa2,0xf0,0xf7,0xff,0xff] + vfmadd132pd -2064(%rdx), %xmm21, %xmm28 + +// CHECK: vfmadd132pd 1016(%rdx){1to2}, %xmm21, %xmm28 +// CHECK: encoding: [0x62,0x62,0xd5,0x10,0x98,0x62,0x7f] + vfmadd132pd 1016(%rdx){1to2}, %xmm21, %xmm28 + +// CHECK: vfmadd132pd 1024(%rdx){1to2}, %xmm21, %xmm28 +// CHECK: encoding: [0x62,0x62,0xd5,0x10,0x98,0xa2,0x00,0x04,0x00,0x00] + vfmadd132pd 1024(%rdx){1to2}, %xmm21, %xmm28 + +// CHECK: vfmadd132pd -1024(%rdx){1to2}, %xmm21, %xmm28 +// CHECK: encoding: [0x62,0x62,0xd5,0x10,0x98,0x62,0x80] + vfmadd132pd -1024(%rdx){1to2}, %xmm21, %xmm28 + +// CHECK: vfmadd132pd -1032(%rdx){1to2}, %xmm21, %xmm28 +// CHECK: encoding: [0x62,0x62,0xd5,0x10,0x98,0xa2,0xf8,0xfb,0xff,0xff] + vfmadd132pd -1032(%rdx){1to2}, %xmm21, %xmm28 + +// CHECK: vfmadd132pd %ymm27, %ymm24, %ymm22 +// CHECK: encoding: [0x62,0x82,0xbd,0x20,0x98,0xf3] + vfmadd132pd %ymm27, %ymm24, %ymm22 + +// CHECK: vfmadd132pd %ymm27, %ymm24, %ymm22 {%k7} +// CHECK: encoding: [0x62,0x82,0xbd,0x27,0x98,0xf3] + vfmadd132pd %ymm27, %ymm24, %ymm22 {%k7} + +// CHECK: vfmadd132pd %ymm27, %ymm24, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0x82,0xbd,0xa7,0x98,0xf3] + vfmadd132pd %ymm27, %ymm24, %ymm22 {%k7} {z} + +// CHECK: vfmadd132pd (%rcx), %ymm24, %ymm22 +// CHECK: encoding: [0x62,0xe2,0xbd,0x20,0x98,0x31] + vfmadd132pd (%rcx), %ymm24, %ymm22 + +// CHECK: vfmadd132pd 291(%rax,%r14,8), %ymm24, %ymm22 +// CHECK: encoding: [0x62,0xa2,0xbd,0x20,0x98,0xb4,0xf0,0x23,0x01,0x00,0x00] + vfmadd132pd 291(%rax,%r14,8), %ymm24, %ymm22 + +// CHECK: vfmadd132pd (%rcx){1to4}, %ymm24, %ymm22 +// CHECK: encoding: [0x62,0xe2,0xbd,0x30,0x98,0x31] + vfmadd132pd (%rcx){1to4}, %ymm24, %ymm22 + +// CHECK: vfmadd132pd 4064(%rdx), %ymm24, %ymm22 +// CHECK: encoding: [0x62,0xe2,0xbd,0x20,0x98,0x72,0x7f] + vfmadd132pd 4064(%rdx), %ymm24, %ymm22 + +// CHECK: vfmadd132pd 4096(%rdx), %ymm24, %ymm22 +// CHECK: encoding: [0x62,0xe2,0xbd,0x20,0x98,0xb2,0x00,0x10,0x00,0x00] + vfmadd132pd 4096(%rdx), %ymm24, %ymm22 + +// CHECK: vfmadd132pd -4096(%rdx), %ymm24, %ymm22 +// CHECK: encoding: [0x62,0xe2,0xbd,0x20,0x98,0x72,0x80] + vfmadd132pd -4096(%rdx), %ymm24, %ymm22 + +// CHECK: vfmadd132pd -4128(%rdx), %ymm24, %ymm22 +// CHECK: encoding: [0x62,0xe2,0xbd,0x20,0x98,0xb2,0xe0,0xef,0xff,0xff] + vfmadd132pd -4128(%rdx), %ymm24, %ymm22 + +// CHECK: vfmadd132pd 1016(%rdx){1to4}, %ymm24, %ymm22 +// CHECK: encoding: [0x62,0xe2,0xbd,0x30,0x98,0x72,0x7f] + vfmadd132pd 1016(%rdx){1to4}, %ymm24, %ymm22 + +// CHECK: vfmadd132pd 1024(%rdx){1to4}, %ymm24, %ymm22 +// CHECK: encoding: [0x62,0xe2,0xbd,0x30,0x98,0xb2,0x00,0x04,0x00,0x00] + vfmadd132pd 1024(%rdx){1to4}, %ymm24, %ymm22 + +// CHECK: vfmadd132pd -1024(%rdx){1to4}, %ymm24, %ymm22 +// CHECK: encoding: [0x62,0xe2,0xbd,0x30,0x98,0x72,0x80] + vfmadd132pd -1024(%rdx){1to4}, %ymm24, %ymm22 + +// CHECK: vfmadd132pd -1032(%rdx){1to4}, %ymm24, %ymm22 +// CHECK: encoding: [0x62,0xe2,0xbd,0x30,0x98,0xb2,0xf8,0xfb,0xff,0xff] + vfmadd132pd -1032(%rdx){1to4}, %ymm24, %ymm22 + +// CHECK: vfmadd213ps %xmm28, %xmm28, %xmm24 +// CHECK: encoding: [0x62,0x02,0x1d,0x00,0xa8,0xc4] + vfmadd213ps %xmm28, %xmm28, %xmm24 + +// CHECK: vfmadd213ps %xmm28, %xmm28, %xmm24 {%k1} +// CHECK: encoding: [0x62,0x02,0x1d,0x01,0xa8,0xc4] + vfmadd213ps %xmm28, %xmm28, %xmm24 {%k1} + +// CHECK: vfmadd213ps %xmm28, %xmm28, %xmm24 {%k1} {z} +// CHECK: encoding: [0x62,0x02,0x1d,0x81,0xa8,0xc4] + vfmadd213ps %xmm28, %xmm28, %xmm24 {%k1} {z} + +// CHECK: vfmadd213ps (%rcx), %xmm28, %xmm24 +// CHECK: encoding: [0x62,0x62,0x1d,0x00,0xa8,0x01] + vfmadd213ps (%rcx), %xmm28, %xmm24 + +// CHECK: vfmadd213ps 291(%rax,%r14,8), %xmm28, %xmm24 +// CHECK: encoding: [0x62,0x22,0x1d,0x00,0xa8,0x84,0xf0,0x23,0x01,0x00,0x00] + vfmadd213ps 291(%rax,%r14,8), %xmm28, %xmm24 + +// CHECK: vfmadd213ps (%rcx){1to4}, %xmm28, %xmm24 +// CHECK: encoding: [0x62,0x62,0x1d,0x10,0xa8,0x01] + vfmadd213ps (%rcx){1to4}, %xmm28, %xmm24 + +// CHECK: vfmadd213ps 2032(%rdx), %xmm28, %xmm24 +// CHECK: encoding: [0x62,0x62,0x1d,0x00,0xa8,0x42,0x7f] + vfmadd213ps 2032(%rdx), %xmm28, %xmm24 + +// CHECK: vfmadd213ps 2048(%rdx), %xmm28, %xmm24 +// CHECK: encoding: [0x62,0x62,0x1d,0x00,0xa8,0x82,0x00,0x08,0x00,0x00] + vfmadd213ps 2048(%rdx), %xmm28, %xmm24 + +// CHECK: vfmadd213ps -2048(%rdx), %xmm28, %xmm24 +// CHECK: encoding: [0x62,0x62,0x1d,0x00,0xa8,0x42,0x80] + vfmadd213ps -2048(%rdx), %xmm28, %xmm24 + +// CHECK: vfmadd213ps -2064(%rdx), %xmm28, %xmm24 +// CHECK: encoding: [0x62,0x62,0x1d,0x00,0xa8,0x82,0xf0,0xf7,0xff,0xff] + vfmadd213ps -2064(%rdx), %xmm28, %xmm24 + +// CHECK: vfmadd213ps 508(%rdx){1to4}, %xmm28, %xmm24 +// CHECK: encoding: [0x62,0x62,0x1d,0x10,0xa8,0x42,0x7f] + vfmadd213ps 508(%rdx){1to4}, %xmm28, %xmm24 + +// CHECK: vfmadd213ps 512(%rdx){1to4}, %xmm28, %xmm24 +// CHECK: encoding: [0x62,0x62,0x1d,0x10,0xa8,0x82,0x00,0x02,0x00,0x00] + vfmadd213ps 512(%rdx){1to4}, %xmm28, %xmm24 + +// CHECK: vfmadd213ps -512(%rdx){1to4}, %xmm28, %xmm24 +// CHECK: encoding: [0x62,0x62,0x1d,0x10,0xa8,0x42,0x80] + vfmadd213ps -512(%rdx){1to4}, %xmm28, %xmm24 + +// CHECK: vfmadd213ps -516(%rdx){1to4}, %xmm28, %xmm24 +// CHECK: encoding: [0x62,0x62,0x1d,0x10,0xa8,0x82,0xfc,0xfd,0xff,0xff] + vfmadd213ps -516(%rdx){1to4}, %xmm28, %xmm24 + +// CHECK: vfmadd213ps %ymm17, %ymm22, %ymm26 +// CHECK: encoding: [0x62,0x22,0x4d,0x20,0xa8,0xd1] + vfmadd213ps %ymm17, %ymm22, %ymm26 + +// CHECK: vfmadd213ps %ymm17, %ymm22, %ymm26 {%k3} +// CHECK: encoding: [0x62,0x22,0x4d,0x23,0xa8,0xd1] + vfmadd213ps %ymm17, %ymm22, %ymm26 {%k3} + +// CHECK: vfmadd213ps %ymm17, %ymm22, %ymm26 {%k3} {z} +// CHECK: encoding: [0x62,0x22,0x4d,0xa3,0xa8,0xd1] + vfmadd213ps %ymm17, %ymm22, %ymm26 {%k3} {z} + +// CHECK: vfmadd213ps (%rcx), %ymm22, %ymm26 +// CHECK: encoding: [0x62,0x62,0x4d,0x20,0xa8,0x11] + vfmadd213ps (%rcx), %ymm22, %ymm26 + +// CHECK: vfmadd213ps 291(%rax,%r14,8), %ymm22, %ymm26 +// CHECK: encoding: [0x62,0x22,0x4d,0x20,0xa8,0x94,0xf0,0x23,0x01,0x00,0x00] + vfmadd213ps 291(%rax,%r14,8), %ymm22, %ymm26 + +// CHECK: vfmadd213ps (%rcx){1to8}, %ymm22, %ymm26 +// CHECK: encoding: [0x62,0x62,0x4d,0x30,0xa8,0x11] + vfmadd213ps (%rcx){1to8}, %ymm22, %ymm26 + +// CHECK: vfmadd213ps 4064(%rdx), %ymm22, %ymm26 +// CHECK: encoding: [0x62,0x62,0x4d,0x20,0xa8,0x52,0x7f] + vfmadd213ps 4064(%rdx), %ymm22, %ymm26 + +// CHECK: vfmadd213ps 4096(%rdx), %ymm22, %ymm26 +// CHECK: encoding: [0x62,0x62,0x4d,0x20,0xa8,0x92,0x00,0x10,0x00,0x00] + vfmadd213ps 4096(%rdx), %ymm22, %ymm26 + +// CHECK: vfmadd213ps -4096(%rdx), %ymm22, %ymm26 +// CHECK: encoding: [0x62,0x62,0x4d,0x20,0xa8,0x52,0x80] + vfmadd213ps -4096(%rdx), %ymm22, %ymm26 + +// CHECK: vfmadd213ps -4128(%rdx), %ymm22, %ymm26 +// CHECK: encoding: [0x62,0x62,0x4d,0x20,0xa8,0x92,0xe0,0xef,0xff,0xff] + vfmadd213ps -4128(%rdx), %ymm22, %ymm26 + +// CHECK: vfmadd213ps 508(%rdx){1to8}, %ymm22, %ymm26 +// CHECK: encoding: [0x62,0x62,0x4d,0x30,0xa8,0x52,0x7f] + vfmadd213ps 508(%rdx){1to8}, %ymm22, %ymm26 + +// CHECK: vfmadd213ps 512(%rdx){1to8}, %ymm22, %ymm26 +// CHECK: encoding: [0x62,0x62,0x4d,0x30,0xa8,0x92,0x00,0x02,0x00,0x00] + vfmadd213ps 512(%rdx){1to8}, %ymm22, %ymm26 + +// CHECK: vfmadd213ps -512(%rdx){1to8}, %ymm22, %ymm26 +// CHECK: encoding: [0x62,0x62,0x4d,0x30,0xa8,0x52,0x80] + vfmadd213ps -512(%rdx){1to8}, %ymm22, %ymm26 + +// CHECK: vfmadd213ps -516(%rdx){1to8}, %ymm22, %ymm26 +// CHECK: encoding: [0x62,0x62,0x4d,0x30,0xa8,0x92,0xfc,0xfd,0xff,0xff] + vfmadd213ps -516(%rdx){1to8}, %ymm22, %ymm26 + +// CHECK: vfmadd213pd %xmm23, %xmm21, %xmm22 +// CHECK: encoding: [0x62,0xa2,0xd5,0x00,0xa8,0xf7] + vfmadd213pd %xmm23, %xmm21, %xmm22 + +// CHECK: vfmadd213pd %xmm23, %xmm21, %xmm22 {%k4} +// CHECK: encoding: [0x62,0xa2,0xd5,0x04,0xa8,0xf7] + vfmadd213pd %xmm23, %xmm21, %xmm22 {%k4} + +// CHECK: vfmadd213pd %xmm23, %xmm21, %xmm22 {%k4} {z} +// CHECK: encoding: [0x62,0xa2,0xd5,0x84,0xa8,0xf7] + vfmadd213pd %xmm23, %xmm21, %xmm22 {%k4} {z} + +// CHECK: vfmadd213pd (%rcx), %xmm21, %xmm22 +// CHECK: encoding: [0x62,0xe2,0xd5,0x00,0xa8,0x31] + vfmadd213pd (%rcx), %xmm21, %xmm22 + +// CHECK: vfmadd213pd 291(%rax,%r14,8), %xmm21, %xmm22 +// CHECK: encoding: [0x62,0xa2,0xd5,0x00,0xa8,0xb4,0xf0,0x23,0x01,0x00,0x00] + vfmadd213pd 291(%rax,%r14,8), %xmm21, %xmm22 + +// CHECK: vfmadd213pd (%rcx){1to2}, %xmm21, %xmm22 +// CHECK: encoding: [0x62,0xe2,0xd5,0x10,0xa8,0x31] + vfmadd213pd (%rcx){1to2}, %xmm21, %xmm22 + +// CHECK: vfmadd213pd 2032(%rdx), %xmm21, %xmm22 +// CHECK: encoding: [0x62,0xe2,0xd5,0x00,0xa8,0x72,0x7f] + vfmadd213pd 2032(%rdx), %xmm21, %xmm22 + +// CHECK: vfmadd213pd 2048(%rdx), %xmm21, %xmm22 +// CHECK: encoding: [0x62,0xe2,0xd5,0x00,0xa8,0xb2,0x00,0x08,0x00,0x00] + vfmadd213pd 2048(%rdx), %xmm21, %xmm22 + +// CHECK: vfmadd213pd -2048(%rdx), %xmm21, %xmm22 +// CHECK: encoding: [0x62,0xe2,0xd5,0x00,0xa8,0x72,0x80] + vfmadd213pd -2048(%rdx), %xmm21, %xmm22 + +// CHECK: vfmadd213pd -2064(%rdx), %xmm21, %xmm22 +// CHECK: encoding: [0x62,0xe2,0xd5,0x00,0xa8,0xb2,0xf0,0xf7,0xff,0xff] + vfmadd213pd -2064(%rdx), %xmm21, %xmm22 + +// CHECK: vfmadd213pd 1016(%rdx){1to2}, %xmm21, %xmm22 +// CHECK: encoding: [0x62,0xe2,0xd5,0x10,0xa8,0x72,0x7f] + vfmadd213pd 1016(%rdx){1to2}, %xmm21, %xmm22 + +// CHECK: vfmadd213pd 1024(%rdx){1to2}, %xmm21, %xmm22 +// CHECK: encoding: [0x62,0xe2,0xd5,0x10,0xa8,0xb2,0x00,0x04,0x00,0x00] + vfmadd213pd 1024(%rdx){1to2}, %xmm21, %xmm22 + +// CHECK: vfmadd213pd -1024(%rdx){1to2}, %xmm21, %xmm22 +// CHECK: encoding: [0x62,0xe2,0xd5,0x10,0xa8,0x72,0x80] + vfmadd213pd -1024(%rdx){1to2}, %xmm21, %xmm22 + +// CHECK: vfmadd213pd -1032(%rdx){1to2}, %xmm21, %xmm22 +// CHECK: encoding: [0x62,0xe2,0xd5,0x10,0xa8,0xb2,0xf8,0xfb,0xff,0xff] + vfmadd213pd -1032(%rdx){1to2}, %xmm21, %xmm22 + +// CHECK: vfmadd213pd %ymm17, %ymm19, %ymm18 +// CHECK: encoding: [0x62,0xa2,0xe5,0x20,0xa8,0xd1] + vfmadd213pd %ymm17, %ymm19, %ymm18 + +// CHECK: vfmadd213pd %ymm17, %ymm19, %ymm18 {%k1} +// CHECK: encoding: [0x62,0xa2,0xe5,0x21,0xa8,0xd1] + vfmadd213pd %ymm17, %ymm19, %ymm18 {%k1} + +// CHECK: vfmadd213pd %ymm17, %ymm19, %ymm18 {%k1} {z} +// CHECK: encoding: [0x62,0xa2,0xe5,0xa1,0xa8,0xd1] + vfmadd213pd %ymm17, %ymm19, %ymm18 {%k1} {z} + +// CHECK: vfmadd213pd (%rcx), %ymm19, %ymm18 +// CHECK: encoding: [0x62,0xe2,0xe5,0x20,0xa8,0x11] + vfmadd213pd (%rcx), %ymm19, %ymm18 + +// CHECK: vfmadd213pd 291(%rax,%r14,8), %ymm19, %ymm18 +// CHECK: encoding: [0x62,0xa2,0xe5,0x20,0xa8,0x94,0xf0,0x23,0x01,0x00,0x00] + vfmadd213pd 291(%rax,%r14,8), %ymm19, %ymm18 + +// CHECK: vfmadd213pd (%rcx){1to4}, %ymm19, %ymm18 +// CHECK: encoding: [0x62,0xe2,0xe5,0x30,0xa8,0x11] + vfmadd213pd (%rcx){1to4}, %ymm19, %ymm18 + +// CHECK: vfmadd213pd 4064(%rdx), %ymm19, %ymm18 +// CHECK: encoding: [0x62,0xe2,0xe5,0x20,0xa8,0x52,0x7f] + vfmadd213pd 4064(%rdx), %ymm19, %ymm18 + +// CHECK: vfmadd213pd 4096(%rdx), %ymm19, %ymm18 +// CHECK: encoding: [0x62,0xe2,0xe5,0x20,0xa8,0x92,0x00,0x10,0x00,0x00] + vfmadd213pd 4096(%rdx), %ymm19, %ymm18 + +// CHECK: vfmadd213pd -4096(%rdx), %ymm19, %ymm18 +// CHECK: encoding: [0x62,0xe2,0xe5,0x20,0xa8,0x52,0x80] + vfmadd213pd -4096(%rdx), %ymm19, %ymm18 + +// CHECK: vfmadd213pd -4128(%rdx), %ymm19, %ymm18 +// CHECK: encoding: [0x62,0xe2,0xe5,0x20,0xa8,0x92,0xe0,0xef,0xff,0xff] + vfmadd213pd -4128(%rdx), %ymm19, %ymm18 + +// CHECK: vfmadd213pd 1016(%rdx){1to4}, %ymm19, %ymm18 +// CHECK: encoding: [0x62,0xe2,0xe5,0x30,0xa8,0x52,0x7f] + vfmadd213pd 1016(%rdx){1to4}, %ymm19, %ymm18 + +// CHECK: vfmadd213pd 1024(%rdx){1to4}, %ymm19, %ymm18 +// CHECK: encoding: [0x62,0xe2,0xe5,0x30,0xa8,0x92,0x00,0x04,0x00,0x00] + vfmadd213pd 1024(%rdx){1to4}, %ymm19, %ymm18 + +// CHECK: vfmadd213pd -1024(%rdx){1to4}, %ymm19, %ymm18 +// CHECK: encoding: [0x62,0xe2,0xe5,0x30,0xa8,0x52,0x80] + vfmadd213pd -1024(%rdx){1to4}, %ymm19, %ymm18 + +// CHECK: vfmadd213pd -1032(%rdx){1to4}, %ymm19, %ymm18 +// CHECK: encoding: [0x62,0xe2,0xe5,0x30,0xa8,0x92,0xf8,0xfb,0xff,0xff] + vfmadd213pd -1032(%rdx){1to4}, %ymm19, %ymm18 + +// CHECK: vfmadd231ps %xmm27, %xmm20, %xmm30 +// CHECK: encoding: [0x62,0x02,0x5d,0x00,0xb8,0xf3] + vfmadd231ps %xmm27, %xmm20, %xmm30 + +// CHECK: vfmadd231ps %xmm27, %xmm20, %xmm30 {%k7} +// CHECK: encoding: [0x62,0x02,0x5d,0x07,0xb8,0xf3] + vfmadd231ps %xmm27, %xmm20, %xmm30 {%k7} + +// CHECK: vfmadd231ps %xmm27, %xmm20, %xmm30 {%k7} {z} +// CHECK: encoding: [0x62,0x02,0x5d,0x87,0xb8,0xf3] + vfmadd231ps %xmm27, %xmm20, %xmm30 {%k7} {z} + +// CHECK: vfmadd231ps (%rcx), %xmm20, %xmm30 +// CHECK: encoding: [0x62,0x62,0x5d,0x00,0xb8,0x31] + vfmadd231ps (%rcx), %xmm20, %xmm30 + +// CHECK: vfmadd231ps 291(%rax,%r14,8), %xmm20, %xmm30 +// CHECK: encoding: [0x62,0x22,0x5d,0x00,0xb8,0xb4,0xf0,0x23,0x01,0x00,0x00] + vfmadd231ps 291(%rax,%r14,8), %xmm20, %xmm30 + +// CHECK: vfmadd231ps (%rcx){1to4}, %xmm20, %xmm30 +// CHECK: encoding: [0x62,0x62,0x5d,0x10,0xb8,0x31] + vfmadd231ps (%rcx){1to4}, %xmm20, %xmm30 + +// CHECK: vfmadd231ps 2032(%rdx), %xmm20, %xmm30 +// CHECK: encoding: [0x62,0x62,0x5d,0x00,0xb8,0x72,0x7f] + vfmadd231ps 2032(%rdx), %xmm20, %xmm30 + +// CHECK: vfmadd231ps 2048(%rdx), %xmm20, %xmm30 +// CHECK: encoding: [0x62,0x62,0x5d,0x00,0xb8,0xb2,0x00,0x08,0x00,0x00] + vfmadd231ps 2048(%rdx), %xmm20, %xmm30 + +// CHECK: vfmadd231ps -2048(%rdx), %xmm20, %xmm30 +// CHECK: encoding: [0x62,0x62,0x5d,0x00,0xb8,0x72,0x80] + vfmadd231ps -2048(%rdx), %xmm20, %xmm30 + +// CHECK: vfmadd231ps -2064(%rdx), %xmm20, %xmm30 +// CHECK: encoding: [0x62,0x62,0x5d,0x00,0xb8,0xb2,0xf0,0xf7,0xff,0xff] + vfmadd231ps -2064(%rdx), %xmm20, %xmm30 + +// CHECK: vfmadd231ps 508(%rdx){1to4}, %xmm20, %xmm30 +// CHECK: encoding: [0x62,0x62,0x5d,0x10,0xb8,0x72,0x7f] + vfmadd231ps 508(%rdx){1to4}, %xmm20, %xmm30 + +// CHECK: vfmadd231ps 512(%rdx){1to4}, %xmm20, %xmm30 +// CHECK: encoding: [0x62,0x62,0x5d,0x10,0xb8,0xb2,0x00,0x02,0x00,0x00] + vfmadd231ps 512(%rdx){1to4}, %xmm20, %xmm30 + +// CHECK: vfmadd231ps -512(%rdx){1to4}, %xmm20, %xmm30 +// CHECK: encoding: [0x62,0x62,0x5d,0x10,0xb8,0x72,0x80] + vfmadd231ps -512(%rdx){1to4}, %xmm20, %xmm30 + +// CHECK: vfmadd231ps -516(%rdx){1to4}, %xmm20, %xmm30 +// CHECK: encoding: [0x62,0x62,0x5d,0x10,0xb8,0xb2,0xfc,0xfd,0xff,0xff] + vfmadd231ps -516(%rdx){1to4}, %xmm20, %xmm30 + +// CHECK: vfmadd231ps %ymm25, %ymm26, %ymm22 +// CHECK: encoding: [0x62,0x82,0x2d,0x20,0xb8,0xf1] + vfmadd231ps %ymm25, %ymm26, %ymm22 + +// CHECK: vfmadd231ps %ymm25, %ymm26, %ymm22 {%k7} +// CHECK: encoding: [0x62,0x82,0x2d,0x27,0xb8,0xf1] + vfmadd231ps %ymm25, %ymm26, %ymm22 {%k7} + +// CHECK: vfmadd231ps %ymm25, %ymm26, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0x82,0x2d,0xa7,0xb8,0xf1] + vfmadd231ps %ymm25, %ymm26, %ymm22 {%k7} {z} + +// CHECK: vfmadd231ps (%rcx), %ymm26, %ymm22 +// CHECK: encoding: [0x62,0xe2,0x2d,0x20,0xb8,0x31] + vfmadd231ps (%rcx), %ymm26, %ymm22 + +// CHECK: vfmadd231ps 291(%rax,%r14,8), %ymm26, %ymm22 +// CHECK: encoding: [0x62,0xa2,0x2d,0x20,0xb8,0xb4,0xf0,0x23,0x01,0x00,0x00] + vfmadd231ps 291(%rax,%r14,8), %ymm26, %ymm22 + +// CHECK: vfmadd231ps (%rcx){1to8}, %ymm26, %ymm22 +// CHECK: encoding: [0x62,0xe2,0x2d,0x30,0xb8,0x31] + vfmadd231ps (%rcx){1to8}, %ymm26, %ymm22 + +// CHECK: vfmadd231ps 4064(%rdx), %ymm26, %ymm22 +// CHECK: encoding: [0x62,0xe2,0x2d,0x20,0xb8,0x72,0x7f] + vfmadd231ps 4064(%rdx), %ymm26, %ymm22 + +// CHECK: vfmadd231ps 4096(%rdx), %ymm26, %ymm22 +// CHECK: encoding: [0x62,0xe2,0x2d,0x20,0xb8,0xb2,0x00,0x10,0x00,0x00] + vfmadd231ps 4096(%rdx), %ymm26, %ymm22 + +// CHECK: vfmadd231ps -4096(%rdx), %ymm26, %ymm22 +// CHECK: encoding: [0x62,0xe2,0x2d,0x20,0xb8,0x72,0x80] + vfmadd231ps -4096(%rdx), %ymm26, %ymm22 + +// CHECK: vfmadd231ps -4128(%rdx), %ymm26, %ymm22 +// CHECK: encoding: [0x62,0xe2,0x2d,0x20,0xb8,0xb2,0xe0,0xef,0xff,0xff] + vfmadd231ps -4128(%rdx), %ymm26, %ymm22 + +// CHECK: vfmadd231ps 508(%rdx){1to8}, %ymm26, %ymm22 +// CHECK: encoding: [0x62,0xe2,0x2d,0x30,0xb8,0x72,0x7f] + vfmadd231ps 508(%rdx){1to8}, %ymm26, %ymm22 + +// CHECK: vfmadd231ps 512(%rdx){1to8}, %ymm26, %ymm22 +// CHECK: encoding: [0x62,0xe2,0x2d,0x30,0xb8,0xb2,0x00,0x02,0x00,0x00] + vfmadd231ps 512(%rdx){1to8}, %ymm26, %ymm22 + +// CHECK: vfmadd231ps -512(%rdx){1to8}, %ymm26, %ymm22 +// CHECK: encoding: [0x62,0xe2,0x2d,0x30,0xb8,0x72,0x80] + vfmadd231ps -512(%rdx){1to8}, %ymm26, %ymm22 + +// CHECK: vfmadd231ps -516(%rdx){1to8}, %ymm26, %ymm22 +// CHECK: encoding: [0x62,0xe2,0x2d,0x30,0xb8,0xb2,0xfc,0xfd,0xff,0xff] + vfmadd231ps -516(%rdx){1to8}, %ymm26, %ymm22 + +// CHECK: vfmadd231pd %xmm24, %xmm20, %xmm29 +// CHECK: encoding: [0x62,0x02,0xdd,0x00,0xb8,0xe8] + vfmadd231pd %xmm24, %xmm20, %xmm29 + +// CHECK: vfmadd231pd %xmm24, %xmm20, %xmm29 {%k7} +// CHECK: encoding: [0x62,0x02,0xdd,0x07,0xb8,0xe8] + vfmadd231pd %xmm24, %xmm20, %xmm29 {%k7} + +// CHECK: vfmadd231pd %xmm24, %xmm20, %xmm29 {%k7} {z} +// CHECK: encoding: [0x62,0x02,0xdd,0x87,0xb8,0xe8] + vfmadd231pd %xmm24, %xmm20, %xmm29 {%k7} {z} + +// CHECK: vfmadd231pd (%rcx), %xmm20, %xmm29 +// CHECK: encoding: [0x62,0x62,0xdd,0x00,0xb8,0x29] + vfmadd231pd (%rcx), %xmm20, %xmm29 + +// CHECK: vfmadd231pd 291(%rax,%r14,8), %xmm20, %xmm29 +// CHECK: encoding: [0x62,0x22,0xdd,0x00,0xb8,0xac,0xf0,0x23,0x01,0x00,0x00] + vfmadd231pd 291(%rax,%r14,8), %xmm20, %xmm29 + +// CHECK: vfmadd231pd (%rcx){1to2}, %xmm20, %xmm29 +// CHECK: encoding: [0x62,0x62,0xdd,0x10,0xb8,0x29] + vfmadd231pd (%rcx){1to2}, %xmm20, %xmm29 + +// CHECK: vfmadd231pd 2032(%rdx), %xmm20, %xmm29 +// CHECK: encoding: [0x62,0x62,0xdd,0x00,0xb8,0x6a,0x7f] + vfmadd231pd 2032(%rdx), %xmm20, %xmm29 + +// CHECK: vfmadd231pd 2048(%rdx), %xmm20, %xmm29 +// CHECK: encoding: [0x62,0x62,0xdd,0x00,0xb8,0xaa,0x00,0x08,0x00,0x00] + vfmadd231pd 2048(%rdx), %xmm20, %xmm29 + +// CHECK: vfmadd231pd -2048(%rdx), %xmm20, %xmm29 +// CHECK: encoding: [0x62,0x62,0xdd,0x00,0xb8,0x6a,0x80] + vfmadd231pd -2048(%rdx), %xmm20, %xmm29 + +// CHECK: vfmadd231pd -2064(%rdx), %xmm20, %xmm29 +// CHECK: encoding: [0x62,0x62,0xdd,0x00,0xb8,0xaa,0xf0,0xf7,0xff,0xff] + vfmadd231pd -2064(%rdx), %xmm20, %xmm29 + +// CHECK: vfmadd231pd 1016(%rdx){1to2}, %xmm20, %xmm29 +// CHECK: encoding: [0x62,0x62,0xdd,0x10,0xb8,0x6a,0x7f] + vfmadd231pd 1016(%rdx){1to2}, %xmm20, %xmm29 + +// CHECK: vfmadd231pd 1024(%rdx){1to2}, %xmm20, %xmm29 +// CHECK: encoding: [0x62,0x62,0xdd,0x10,0xb8,0xaa,0x00,0x04,0x00,0x00] + vfmadd231pd 1024(%rdx){1to2}, %xmm20, %xmm29 + +// CHECK: vfmadd231pd -1024(%rdx){1to2}, %xmm20, %xmm29 +// CHECK: encoding: [0x62,0x62,0xdd,0x10,0xb8,0x6a,0x80] + vfmadd231pd -1024(%rdx){1to2}, %xmm20, %xmm29 + +// CHECK: vfmadd231pd -1032(%rdx){1to2}, %xmm20, %xmm29 +// CHECK: encoding: [0x62,0x62,0xdd,0x10,0xb8,0xaa,0xf8,0xfb,0xff,0xff] + vfmadd231pd -1032(%rdx){1to2}, %xmm20, %xmm29 + +// CHECK: vfmadd231pd %ymm26, %ymm20, %ymm24 +// CHECK: encoding: [0x62,0x02,0xdd,0x20,0xb8,0xc2] + vfmadd231pd %ymm26, %ymm20, %ymm24 + +// CHECK: vfmadd231pd %ymm26, %ymm20, %ymm24 {%k6} +// CHECK: encoding: [0x62,0x02,0xdd,0x26,0xb8,0xc2] + vfmadd231pd %ymm26, %ymm20, %ymm24 {%k6} + +// CHECK: vfmadd231pd %ymm26, %ymm20, %ymm24 {%k6} {z} +// CHECK: encoding: [0x62,0x02,0xdd,0xa6,0xb8,0xc2] + vfmadd231pd %ymm26, %ymm20, %ymm24 {%k6} {z} + +// CHECK: vfmadd231pd (%rcx), %ymm20, %ymm24 +// CHECK: encoding: [0x62,0x62,0xdd,0x20,0xb8,0x01] + vfmadd231pd (%rcx), %ymm20, %ymm24 + +// CHECK: vfmadd231pd 291(%rax,%r14,8), %ymm20, %ymm24 +// CHECK: encoding: [0x62,0x22,0xdd,0x20,0xb8,0x84,0xf0,0x23,0x01,0x00,0x00] + vfmadd231pd 291(%rax,%r14,8), %ymm20, %ymm24 + +// CHECK: vfmadd231pd (%rcx){1to4}, %ymm20, %ymm24 +// CHECK: encoding: [0x62,0x62,0xdd,0x30,0xb8,0x01] + vfmadd231pd (%rcx){1to4}, %ymm20, %ymm24 + +// CHECK: vfmadd231pd 4064(%rdx), %ymm20, %ymm24 +// CHECK: encoding: [0x62,0x62,0xdd,0x20,0xb8,0x42,0x7f] + vfmadd231pd 4064(%rdx), %ymm20, %ymm24 + +// CHECK: vfmadd231pd 4096(%rdx), %ymm20, %ymm24 +// CHECK: encoding: [0x62,0x62,0xdd,0x20,0xb8,0x82,0x00,0x10,0x00,0x00] + vfmadd231pd 4096(%rdx), %ymm20, %ymm24 + +// CHECK: vfmadd231pd -4096(%rdx), %ymm20, %ymm24 +// CHECK: encoding: [0x62,0x62,0xdd,0x20,0xb8,0x42,0x80] + vfmadd231pd -4096(%rdx), %ymm20, %ymm24 + +// CHECK: vfmadd231pd -4128(%rdx), %ymm20, %ymm24 +// CHECK: encoding: [0x62,0x62,0xdd,0x20,0xb8,0x82,0xe0,0xef,0xff,0xff] + vfmadd231pd -4128(%rdx), %ymm20, %ymm24 + +// CHECK: vfmadd231pd 1016(%rdx){1to4}, %ymm20, %ymm24 +// CHECK: encoding: [0x62,0x62,0xdd,0x30,0xb8,0x42,0x7f] + vfmadd231pd 1016(%rdx){1to4}, %ymm20, %ymm24 + +// CHECK: vfmadd231pd 1024(%rdx){1to4}, %ymm20, %ymm24 +// CHECK: encoding: [0x62,0x62,0xdd,0x30,0xb8,0x82,0x00,0x04,0x00,0x00] + vfmadd231pd 1024(%rdx){1to4}, %ymm20, %ymm24 + +// CHECK: vfmadd231pd -1024(%rdx){1to4}, %ymm20, %ymm24 +// CHECK: encoding: [0x62,0x62,0xdd,0x30,0xb8,0x42,0x80] + vfmadd231pd -1024(%rdx){1to4}, %ymm20, %ymm24 + +// CHECK: vfmadd231pd -1032(%rdx){1to4}, %ymm20, %ymm24 +// CHECK: encoding: [0x62,0x62,0xdd,0x30,0xb8,0x82,0xf8,0xfb,0xff,0xff] + vfmadd231pd -1032(%rdx){1to4}, %ymm20, %ymm24 + +// CHECK: vfmsub132ps %xmm21, %xmm18, %xmm17 +// CHECK: encoding: [0x62,0xa2,0x6d,0x00,0x9a,0xcd] + vfmsub132ps %xmm21, %xmm18, %xmm17 + +// CHECK: vfmsub132ps %xmm21, %xmm18, %xmm17 {%k1} +// CHECK: encoding: [0x62,0xa2,0x6d,0x01,0x9a,0xcd] + vfmsub132ps %xmm21, %xmm18, %xmm17 {%k1} + +// CHECK: vfmsub132ps %xmm21, %xmm18, %xmm17 {%k1} {z} +// CHECK: encoding: [0x62,0xa2,0x6d,0x81,0x9a,0xcd] + vfmsub132ps %xmm21, %xmm18, %xmm17 {%k1} {z} + +// CHECK: vfmsub132ps (%rcx), %xmm18, %xmm17 +// CHECK: encoding: [0x62,0xe2,0x6d,0x00,0x9a,0x09] + vfmsub132ps (%rcx), %xmm18, %xmm17 + +// CHECK: vfmsub132ps 291(%rax,%r14,8), %xmm18, %xmm17 +// CHECK: encoding: [0x62,0xa2,0x6d,0x00,0x9a,0x8c,0xf0,0x23,0x01,0x00,0x00] + vfmsub132ps 291(%rax,%r14,8), %xmm18, %xmm17 + +// CHECK: vfmsub132ps (%rcx){1to4}, %xmm18, %xmm17 +// CHECK: encoding: [0x62,0xe2,0x6d,0x10,0x9a,0x09] + vfmsub132ps (%rcx){1to4}, %xmm18, %xmm17 + +// CHECK: vfmsub132ps 2032(%rdx), %xmm18, %xmm17 +// CHECK: encoding: [0x62,0xe2,0x6d,0x00,0x9a,0x4a,0x7f] + vfmsub132ps 2032(%rdx), %xmm18, %xmm17 + +// CHECK: vfmsub132ps 2048(%rdx), %xmm18, %xmm17 +// CHECK: encoding: [0x62,0xe2,0x6d,0x00,0x9a,0x8a,0x00,0x08,0x00,0x00] + vfmsub132ps 2048(%rdx), %xmm18, %xmm17 + +// CHECK: vfmsub132ps -2048(%rdx), %xmm18, %xmm17 +// CHECK: encoding: [0x62,0xe2,0x6d,0x00,0x9a,0x4a,0x80] + vfmsub132ps -2048(%rdx), %xmm18, %xmm17 + +// CHECK: vfmsub132ps -2064(%rdx), %xmm18, %xmm17 +// CHECK: encoding: [0x62,0xe2,0x6d,0x00,0x9a,0x8a,0xf0,0xf7,0xff,0xff] + vfmsub132ps -2064(%rdx), %xmm18, %xmm17 + +// CHECK: vfmsub132ps 508(%rdx){1to4}, %xmm18, %xmm17 +// CHECK: encoding: [0x62,0xe2,0x6d,0x10,0x9a,0x4a,0x7f] + vfmsub132ps 508(%rdx){1to4}, %xmm18, %xmm17 + +// CHECK: vfmsub132ps 512(%rdx){1to4}, %xmm18, %xmm17 +// CHECK: encoding: [0x62,0xe2,0x6d,0x10,0x9a,0x8a,0x00,0x02,0x00,0x00] + vfmsub132ps 512(%rdx){1to4}, %xmm18, %xmm17 + +// CHECK: vfmsub132ps -512(%rdx){1to4}, %xmm18, %xmm17 +// CHECK: encoding: [0x62,0xe2,0x6d,0x10,0x9a,0x4a,0x80] + vfmsub132ps -512(%rdx){1to4}, %xmm18, %xmm17 + +// CHECK: vfmsub132ps -516(%rdx){1to4}, %xmm18, %xmm17 +// CHECK: encoding: [0x62,0xe2,0x6d,0x10,0x9a,0x8a,0xfc,0xfd,0xff,0xff] + vfmsub132ps -516(%rdx){1to4}, %xmm18, %xmm17 + +// CHECK: vfmsub132ps %ymm23, %ymm28, %ymm25 +// CHECK: encoding: [0x62,0x22,0x1d,0x20,0x9a,0xcf] + vfmsub132ps %ymm23, %ymm28, %ymm25 + +// CHECK: vfmsub132ps %ymm23, %ymm28, %ymm25 {%k5} +// CHECK: encoding: [0x62,0x22,0x1d,0x25,0x9a,0xcf] + vfmsub132ps %ymm23, %ymm28, %ymm25 {%k5} + +// CHECK: vfmsub132ps %ymm23, %ymm28, %ymm25 {%k5} {z} +// CHECK: encoding: [0x62,0x22,0x1d,0xa5,0x9a,0xcf] + vfmsub132ps %ymm23, %ymm28, %ymm25 {%k5} {z} + +// CHECK: vfmsub132ps (%rcx), %ymm28, %ymm25 +// CHECK: encoding: [0x62,0x62,0x1d,0x20,0x9a,0x09] + vfmsub132ps (%rcx), %ymm28, %ymm25 + +// CHECK: vfmsub132ps 291(%rax,%r14,8), %ymm28, %ymm25 +// CHECK: encoding: [0x62,0x22,0x1d,0x20,0x9a,0x8c,0xf0,0x23,0x01,0x00,0x00] + vfmsub132ps 291(%rax,%r14,8), %ymm28, %ymm25 + +// CHECK: vfmsub132ps (%rcx){1to8}, %ymm28, %ymm25 +// CHECK: encoding: [0x62,0x62,0x1d,0x30,0x9a,0x09] + vfmsub132ps (%rcx){1to8}, %ymm28, %ymm25 + +// CHECK: vfmsub132ps 4064(%rdx), %ymm28, %ymm25 +// CHECK: encoding: [0x62,0x62,0x1d,0x20,0x9a,0x4a,0x7f] + vfmsub132ps 4064(%rdx), %ymm28, %ymm25 + +// CHECK: vfmsub132ps 4096(%rdx), %ymm28, %ymm25 +// CHECK: encoding: [0x62,0x62,0x1d,0x20,0x9a,0x8a,0x00,0x10,0x00,0x00] + vfmsub132ps 4096(%rdx), %ymm28, %ymm25 + +// CHECK: vfmsub132ps -4096(%rdx), %ymm28, %ymm25 +// CHECK: encoding: [0x62,0x62,0x1d,0x20,0x9a,0x4a,0x80] + vfmsub132ps -4096(%rdx), %ymm28, %ymm25 + +// CHECK: vfmsub132ps -4128(%rdx), %ymm28, %ymm25 +// CHECK: encoding: [0x62,0x62,0x1d,0x20,0x9a,0x8a,0xe0,0xef,0xff,0xff] + vfmsub132ps -4128(%rdx), %ymm28, %ymm25 + +// CHECK: vfmsub132ps 508(%rdx){1to8}, %ymm28, %ymm25 +// CHECK: encoding: [0x62,0x62,0x1d,0x30,0x9a,0x4a,0x7f] + vfmsub132ps 508(%rdx){1to8}, %ymm28, %ymm25 + +// CHECK: vfmsub132ps 512(%rdx){1to8}, %ymm28, %ymm25 +// CHECK: encoding: [0x62,0x62,0x1d,0x30,0x9a,0x8a,0x00,0x02,0x00,0x00] + vfmsub132ps 512(%rdx){1to8}, %ymm28, %ymm25 + +// CHECK: vfmsub132ps -512(%rdx){1to8}, %ymm28, %ymm25 +// CHECK: encoding: [0x62,0x62,0x1d,0x30,0x9a,0x4a,0x80] + vfmsub132ps -512(%rdx){1to8}, %ymm28, %ymm25 + +// CHECK: vfmsub132ps -516(%rdx){1to8}, %ymm28, %ymm25 +// CHECK: encoding: [0x62,0x62,0x1d,0x30,0x9a,0x8a,0xfc,0xfd,0xff,0xff] + vfmsub132ps -516(%rdx){1to8}, %ymm28, %ymm25 + +// CHECK: vfmsub132pd %xmm20, %xmm21, %xmm18 +// CHECK: encoding: [0x62,0xa2,0xd5,0x00,0x9a,0xd4] + vfmsub132pd %xmm20, %xmm21, %xmm18 + +// CHECK: vfmsub132pd %xmm20, %xmm21, %xmm18 {%k1} +// CHECK: encoding: [0x62,0xa2,0xd5,0x01,0x9a,0xd4] + vfmsub132pd %xmm20, %xmm21, %xmm18 {%k1} + +// CHECK: vfmsub132pd %xmm20, %xmm21, %xmm18 {%k1} {z} +// CHECK: encoding: [0x62,0xa2,0xd5,0x81,0x9a,0xd4] + vfmsub132pd %xmm20, %xmm21, %xmm18 {%k1} {z} + +// CHECK: vfmsub132pd (%rcx), %xmm21, %xmm18 +// CHECK: encoding: [0x62,0xe2,0xd5,0x00,0x9a,0x11] + vfmsub132pd (%rcx), %xmm21, %xmm18 + +// CHECK: vfmsub132pd 291(%rax,%r14,8), %xmm21, %xmm18 +// CHECK: encoding: [0x62,0xa2,0xd5,0x00,0x9a,0x94,0xf0,0x23,0x01,0x00,0x00] + vfmsub132pd 291(%rax,%r14,8), %xmm21, %xmm18 + +// CHECK: vfmsub132pd (%rcx){1to2}, %xmm21, %xmm18 +// CHECK: encoding: [0x62,0xe2,0xd5,0x10,0x9a,0x11] + vfmsub132pd (%rcx){1to2}, %xmm21, %xmm18 + +// CHECK: vfmsub132pd 2032(%rdx), %xmm21, %xmm18 +// CHECK: encoding: [0x62,0xe2,0xd5,0x00,0x9a,0x52,0x7f] + vfmsub132pd 2032(%rdx), %xmm21, %xmm18 + +// CHECK: vfmsub132pd 2048(%rdx), %xmm21, %xmm18 +// CHECK: encoding: [0x62,0xe2,0xd5,0x00,0x9a,0x92,0x00,0x08,0x00,0x00] + vfmsub132pd 2048(%rdx), %xmm21, %xmm18 + +// CHECK: vfmsub132pd -2048(%rdx), %xmm21, %xmm18 +// CHECK: encoding: [0x62,0xe2,0xd5,0x00,0x9a,0x52,0x80] + vfmsub132pd -2048(%rdx), %xmm21, %xmm18 + +// CHECK: vfmsub132pd -2064(%rdx), %xmm21, %xmm18 +// CHECK: encoding: [0x62,0xe2,0xd5,0x00,0x9a,0x92,0xf0,0xf7,0xff,0xff] + vfmsub132pd -2064(%rdx), %xmm21, %xmm18 + +// CHECK: vfmsub132pd 1016(%rdx){1to2}, %xmm21, %xmm18 +// CHECK: encoding: [0x62,0xe2,0xd5,0x10,0x9a,0x52,0x7f] + vfmsub132pd 1016(%rdx){1to2}, %xmm21, %xmm18 + +// CHECK: vfmsub132pd 1024(%rdx){1to2}, %xmm21, %xmm18 +// CHECK: encoding: [0x62,0xe2,0xd5,0x10,0x9a,0x92,0x00,0x04,0x00,0x00] + vfmsub132pd 1024(%rdx){1to2}, %xmm21, %xmm18 + +// CHECK: vfmsub132pd -1024(%rdx){1to2}, %xmm21, %xmm18 +// CHECK: encoding: [0x62,0xe2,0xd5,0x10,0x9a,0x52,0x80] + vfmsub132pd -1024(%rdx){1to2}, %xmm21, %xmm18 + +// CHECK: vfmsub132pd -1032(%rdx){1to2}, %xmm21, %xmm18 +// CHECK: encoding: [0x62,0xe2,0xd5,0x10,0x9a,0x92,0xf8,0xfb,0xff,0xff] + vfmsub132pd -1032(%rdx){1to2}, %xmm21, %xmm18 + +// CHECK: vfmsub132pd %ymm17, %ymm28, %ymm22 +// CHECK: encoding: [0x62,0xa2,0x9d,0x20,0x9a,0xf1] + vfmsub132pd %ymm17, %ymm28, %ymm22 + +// CHECK: vfmsub132pd %ymm17, %ymm28, %ymm22 {%k5} +// CHECK: encoding: [0x62,0xa2,0x9d,0x25,0x9a,0xf1] + vfmsub132pd %ymm17, %ymm28, %ymm22 {%k5} + +// CHECK: vfmsub132pd %ymm17, %ymm28, %ymm22 {%k5} {z} +// CHECK: encoding: [0x62,0xa2,0x9d,0xa5,0x9a,0xf1] + vfmsub132pd %ymm17, %ymm28, %ymm22 {%k5} {z} + +// CHECK: vfmsub132pd (%rcx), %ymm28, %ymm22 +// CHECK: encoding: [0x62,0xe2,0x9d,0x20,0x9a,0x31] + vfmsub132pd (%rcx), %ymm28, %ymm22 + +// CHECK: vfmsub132pd 291(%rax,%r14,8), %ymm28, %ymm22 +// CHECK: encoding: [0x62,0xa2,0x9d,0x20,0x9a,0xb4,0xf0,0x23,0x01,0x00,0x00] + vfmsub132pd 291(%rax,%r14,8), %ymm28, %ymm22 + +// CHECK: vfmsub132pd (%rcx){1to4}, %ymm28, %ymm22 +// CHECK: encoding: [0x62,0xe2,0x9d,0x30,0x9a,0x31] + vfmsub132pd (%rcx){1to4}, %ymm28, %ymm22 + +// CHECK: vfmsub132pd 4064(%rdx), %ymm28, %ymm22 +// CHECK: encoding: [0x62,0xe2,0x9d,0x20,0x9a,0x72,0x7f] + vfmsub132pd 4064(%rdx), %ymm28, %ymm22 + +// CHECK: vfmsub132pd 4096(%rdx), %ymm28, %ymm22 +// CHECK: encoding: [0x62,0xe2,0x9d,0x20,0x9a,0xb2,0x00,0x10,0x00,0x00] + vfmsub132pd 4096(%rdx), %ymm28, %ymm22 + +// CHECK: vfmsub132pd -4096(%rdx), %ymm28, %ymm22 +// CHECK: encoding: [0x62,0xe2,0x9d,0x20,0x9a,0x72,0x80] + vfmsub132pd -4096(%rdx), %ymm28, %ymm22 + +// CHECK: vfmsub132pd -4128(%rdx), %ymm28, %ymm22 +// CHECK: encoding: [0x62,0xe2,0x9d,0x20,0x9a,0xb2,0xe0,0xef,0xff,0xff] + vfmsub132pd -4128(%rdx), %ymm28, %ymm22 + +// CHECK: vfmsub132pd 1016(%rdx){1to4}, %ymm28, %ymm22 +// CHECK: encoding: [0x62,0xe2,0x9d,0x30,0x9a,0x72,0x7f] + vfmsub132pd 1016(%rdx){1to4}, %ymm28, %ymm22 + +// CHECK: vfmsub132pd 1024(%rdx){1to4}, %ymm28, %ymm22 +// CHECK: encoding: [0x62,0xe2,0x9d,0x30,0x9a,0xb2,0x00,0x04,0x00,0x00] + vfmsub132pd 1024(%rdx){1to4}, %ymm28, %ymm22 + +// CHECK: vfmsub132pd -1024(%rdx){1to4}, %ymm28, %ymm22 +// CHECK: encoding: [0x62,0xe2,0x9d,0x30,0x9a,0x72,0x80] + vfmsub132pd -1024(%rdx){1to4}, %ymm28, %ymm22 + +// CHECK: vfmsub132pd -1032(%rdx){1to4}, %ymm28, %ymm22 +// CHECK: encoding: [0x62,0xe2,0x9d,0x30,0x9a,0xb2,0xf8,0xfb,0xff,0xff] + vfmsub132pd -1032(%rdx){1to4}, %ymm28, %ymm22 + +// CHECK: vfmsub213ps %xmm28, %xmm27, %xmm22 +// CHECK: encoding: [0x62,0x82,0x25,0x00,0xaa,0xf4] + vfmsub213ps %xmm28, %xmm27, %xmm22 + +// CHECK: vfmsub213ps %xmm28, %xmm27, %xmm22 {%k2} +// CHECK: encoding: [0x62,0x82,0x25,0x02,0xaa,0xf4] + vfmsub213ps %xmm28, %xmm27, %xmm22 {%k2} + +// CHECK: vfmsub213ps %xmm28, %xmm27, %xmm22 {%k2} {z} +// CHECK: encoding: [0x62,0x82,0x25,0x82,0xaa,0xf4] + vfmsub213ps %xmm28, %xmm27, %xmm22 {%k2} {z} + +// CHECK: vfmsub213ps (%rcx), %xmm27, %xmm22 +// CHECK: encoding: [0x62,0xe2,0x25,0x00,0xaa,0x31] + vfmsub213ps (%rcx), %xmm27, %xmm22 + +// CHECK: vfmsub213ps 291(%rax,%r14,8), %xmm27, %xmm22 +// CHECK: encoding: [0x62,0xa2,0x25,0x00,0xaa,0xb4,0xf0,0x23,0x01,0x00,0x00] + vfmsub213ps 291(%rax,%r14,8), %xmm27, %xmm22 + +// CHECK: vfmsub213ps (%rcx){1to4}, %xmm27, %xmm22 +// CHECK: encoding: [0x62,0xe2,0x25,0x10,0xaa,0x31] + vfmsub213ps (%rcx){1to4}, %xmm27, %xmm22 + +// CHECK: vfmsub213ps 2032(%rdx), %xmm27, %xmm22 +// CHECK: encoding: [0x62,0xe2,0x25,0x00,0xaa,0x72,0x7f] + vfmsub213ps 2032(%rdx), %xmm27, %xmm22 + +// CHECK: vfmsub213ps 2048(%rdx), %xmm27, %xmm22 +// CHECK: encoding: [0x62,0xe2,0x25,0x00,0xaa,0xb2,0x00,0x08,0x00,0x00] + vfmsub213ps 2048(%rdx), %xmm27, %xmm22 + +// CHECK: vfmsub213ps -2048(%rdx), %xmm27, %xmm22 +// CHECK: encoding: [0x62,0xe2,0x25,0x00,0xaa,0x72,0x80] + vfmsub213ps -2048(%rdx), %xmm27, %xmm22 + +// CHECK: vfmsub213ps -2064(%rdx), %xmm27, %xmm22 +// CHECK: encoding: [0x62,0xe2,0x25,0x00,0xaa,0xb2,0xf0,0xf7,0xff,0xff] + vfmsub213ps -2064(%rdx), %xmm27, %xmm22 + +// CHECK: vfmsub213ps 508(%rdx){1to4}, %xmm27, %xmm22 +// CHECK: encoding: [0x62,0xe2,0x25,0x10,0xaa,0x72,0x7f] + vfmsub213ps 508(%rdx){1to4}, %xmm27, %xmm22 + +// CHECK: vfmsub213ps 512(%rdx){1to4}, %xmm27, %xmm22 +// CHECK: encoding: [0x62,0xe2,0x25,0x10,0xaa,0xb2,0x00,0x02,0x00,0x00] + vfmsub213ps 512(%rdx){1to4}, %xmm27, %xmm22 + +// CHECK: vfmsub213ps -512(%rdx){1to4}, %xmm27, %xmm22 +// CHECK: encoding: [0x62,0xe2,0x25,0x10,0xaa,0x72,0x80] + vfmsub213ps -512(%rdx){1to4}, %xmm27, %xmm22 + +// CHECK: vfmsub213ps -516(%rdx){1to4}, %xmm27, %xmm22 +// CHECK: encoding: [0x62,0xe2,0x25,0x10,0xaa,0xb2,0xfc,0xfd,0xff,0xff] + vfmsub213ps -516(%rdx){1to4}, %xmm27, %xmm22 + +// CHECK: vfmsub213ps %ymm22, %ymm29, %ymm28 +// CHECK: encoding: [0x62,0x22,0x15,0x20,0xaa,0xe6] + vfmsub213ps %ymm22, %ymm29, %ymm28 + +// CHECK: vfmsub213ps %ymm22, %ymm29, %ymm28 {%k1} +// CHECK: encoding: [0x62,0x22,0x15,0x21,0xaa,0xe6] + vfmsub213ps %ymm22, %ymm29, %ymm28 {%k1} + +// CHECK: vfmsub213ps %ymm22, %ymm29, %ymm28 {%k1} {z} +// CHECK: encoding: [0x62,0x22,0x15,0xa1,0xaa,0xe6] + vfmsub213ps %ymm22, %ymm29, %ymm28 {%k1} {z} + +// CHECK: vfmsub213ps (%rcx), %ymm29, %ymm28 +// CHECK: encoding: [0x62,0x62,0x15,0x20,0xaa,0x21] + vfmsub213ps (%rcx), %ymm29, %ymm28 + +// CHECK: vfmsub213ps 291(%rax,%r14,8), %ymm29, %ymm28 +// CHECK: encoding: [0x62,0x22,0x15,0x20,0xaa,0xa4,0xf0,0x23,0x01,0x00,0x00] + vfmsub213ps 291(%rax,%r14,8), %ymm29, %ymm28 + +// CHECK: vfmsub213ps (%rcx){1to8}, %ymm29, %ymm28 +// CHECK: encoding: [0x62,0x62,0x15,0x30,0xaa,0x21] + vfmsub213ps (%rcx){1to8}, %ymm29, %ymm28 + +// CHECK: vfmsub213ps 4064(%rdx), %ymm29, %ymm28 +// CHECK: encoding: [0x62,0x62,0x15,0x20,0xaa,0x62,0x7f] + vfmsub213ps 4064(%rdx), %ymm29, %ymm28 + +// CHECK: vfmsub213ps 4096(%rdx), %ymm29, %ymm28 +// CHECK: encoding: [0x62,0x62,0x15,0x20,0xaa,0xa2,0x00,0x10,0x00,0x00] + vfmsub213ps 4096(%rdx), %ymm29, %ymm28 + +// CHECK: vfmsub213ps -4096(%rdx), %ymm29, %ymm28 +// CHECK: encoding: [0x62,0x62,0x15,0x20,0xaa,0x62,0x80] + vfmsub213ps -4096(%rdx), %ymm29, %ymm28 + +// CHECK: vfmsub213ps -4128(%rdx), %ymm29, %ymm28 +// CHECK: encoding: [0x62,0x62,0x15,0x20,0xaa,0xa2,0xe0,0xef,0xff,0xff] + vfmsub213ps -4128(%rdx), %ymm29, %ymm28 + +// CHECK: vfmsub213ps 508(%rdx){1to8}, %ymm29, %ymm28 +// CHECK: encoding: [0x62,0x62,0x15,0x30,0xaa,0x62,0x7f] + vfmsub213ps 508(%rdx){1to8}, %ymm29, %ymm28 + +// CHECK: vfmsub213ps 512(%rdx){1to8}, %ymm29, %ymm28 +// CHECK: encoding: [0x62,0x62,0x15,0x30,0xaa,0xa2,0x00,0x02,0x00,0x00] + vfmsub213ps 512(%rdx){1to8}, %ymm29, %ymm28 + +// CHECK: vfmsub213ps -512(%rdx){1to8}, %ymm29, %ymm28 +// CHECK: encoding: [0x62,0x62,0x15,0x30,0xaa,0x62,0x80] + vfmsub213ps -512(%rdx){1to8}, %ymm29, %ymm28 + +// CHECK: vfmsub213ps -516(%rdx){1to8}, %ymm29, %ymm28 +// CHECK: encoding: [0x62,0x62,0x15,0x30,0xaa,0xa2,0xfc,0xfd,0xff,0xff] + vfmsub213ps -516(%rdx){1to8}, %ymm29, %ymm28 + +// CHECK: vfmsub213pd %xmm20, %xmm19, %xmm19 +// CHECK: encoding: [0x62,0xa2,0xe5,0x00,0xaa,0xdc] + vfmsub213pd %xmm20, %xmm19, %xmm19 + +// CHECK: vfmsub213pd %xmm20, %xmm19, %xmm19 {%k1} +// CHECK: encoding: [0x62,0xa2,0xe5,0x01,0xaa,0xdc] + vfmsub213pd %xmm20, %xmm19, %xmm19 {%k1} + +// CHECK: vfmsub213pd %xmm20, %xmm19, %xmm19 {%k1} {z} +// CHECK: encoding: [0x62,0xa2,0xe5,0x81,0xaa,0xdc] + vfmsub213pd %xmm20, %xmm19, %xmm19 {%k1} {z} + +// CHECK: vfmsub213pd (%rcx), %xmm19, %xmm19 +// CHECK: encoding: [0x62,0xe2,0xe5,0x00,0xaa,0x19] + vfmsub213pd (%rcx), %xmm19, %xmm19 + +// CHECK: vfmsub213pd 291(%rax,%r14,8), %xmm19, %xmm19 +// CHECK: encoding: [0x62,0xa2,0xe5,0x00,0xaa,0x9c,0xf0,0x23,0x01,0x00,0x00] + vfmsub213pd 291(%rax,%r14,8), %xmm19, %xmm19 + +// CHECK: vfmsub213pd (%rcx){1to2}, %xmm19, %xmm19 +// CHECK: encoding: [0x62,0xe2,0xe5,0x10,0xaa,0x19] + vfmsub213pd (%rcx){1to2}, %xmm19, %xmm19 + +// CHECK: vfmsub213pd 2032(%rdx), %xmm19, %xmm19 +// CHECK: encoding: [0x62,0xe2,0xe5,0x00,0xaa,0x5a,0x7f] + vfmsub213pd 2032(%rdx), %xmm19, %xmm19 + +// CHECK: vfmsub213pd 2048(%rdx), %xmm19, %xmm19 +// CHECK: encoding: [0x62,0xe2,0xe5,0x00,0xaa,0x9a,0x00,0x08,0x00,0x00] + vfmsub213pd 2048(%rdx), %xmm19, %xmm19 + +// CHECK: vfmsub213pd -2048(%rdx), %xmm19, %xmm19 +// CHECK: encoding: [0x62,0xe2,0xe5,0x00,0xaa,0x5a,0x80] + vfmsub213pd -2048(%rdx), %xmm19, %xmm19 + +// CHECK: vfmsub213pd -2064(%rdx), %xmm19, %xmm19 +// CHECK: encoding: [0x62,0xe2,0xe5,0x00,0xaa,0x9a,0xf0,0xf7,0xff,0xff] + vfmsub213pd -2064(%rdx), %xmm19, %xmm19 + +// CHECK: vfmsub213pd 1016(%rdx){1to2}, %xmm19, %xmm19 +// CHECK: encoding: [0x62,0xe2,0xe5,0x10,0xaa,0x5a,0x7f] + vfmsub213pd 1016(%rdx){1to2}, %xmm19, %xmm19 + +// CHECK: vfmsub213pd 1024(%rdx){1to2}, %xmm19, %xmm19 +// CHECK: encoding: [0x62,0xe2,0xe5,0x10,0xaa,0x9a,0x00,0x04,0x00,0x00] + vfmsub213pd 1024(%rdx){1to2}, %xmm19, %xmm19 + +// CHECK: vfmsub213pd -1024(%rdx){1to2}, %xmm19, %xmm19 +// CHECK: encoding: [0x62,0xe2,0xe5,0x10,0xaa,0x5a,0x80] + vfmsub213pd -1024(%rdx){1to2}, %xmm19, %xmm19 + +// CHECK: vfmsub213pd -1032(%rdx){1to2}, %xmm19, %xmm19 +// CHECK: encoding: [0x62,0xe2,0xe5,0x10,0xaa,0x9a,0xf8,0xfb,0xff,0xff] + vfmsub213pd -1032(%rdx){1to2}, %xmm19, %xmm19 + +// CHECK: vfmsub213pd %ymm28, %ymm25, %ymm27 +// CHECK: encoding: [0x62,0x02,0xb5,0x20,0xaa,0xdc] + vfmsub213pd %ymm28, %ymm25, %ymm27 + +// CHECK: vfmsub213pd %ymm28, %ymm25, %ymm27 {%k4} +// CHECK: encoding: [0x62,0x02,0xb5,0x24,0xaa,0xdc] + vfmsub213pd %ymm28, %ymm25, %ymm27 {%k4} + +// CHECK: vfmsub213pd %ymm28, %ymm25, %ymm27 {%k4} {z} +// CHECK: encoding: [0x62,0x02,0xb5,0xa4,0xaa,0xdc] + vfmsub213pd %ymm28, %ymm25, %ymm27 {%k4} {z} + +// CHECK: vfmsub213pd (%rcx), %ymm25, %ymm27 +// CHECK: encoding: [0x62,0x62,0xb5,0x20,0xaa,0x19] + vfmsub213pd (%rcx), %ymm25, %ymm27 + +// CHECK: vfmsub213pd 291(%rax,%r14,8), %ymm25, %ymm27 +// CHECK: encoding: [0x62,0x22,0xb5,0x20,0xaa,0x9c,0xf0,0x23,0x01,0x00,0x00] + vfmsub213pd 291(%rax,%r14,8), %ymm25, %ymm27 + +// CHECK: vfmsub213pd (%rcx){1to4}, %ymm25, %ymm27 +// CHECK: encoding: [0x62,0x62,0xb5,0x30,0xaa,0x19] + vfmsub213pd (%rcx){1to4}, %ymm25, %ymm27 + +// CHECK: vfmsub213pd 4064(%rdx), %ymm25, %ymm27 +// CHECK: encoding: [0x62,0x62,0xb5,0x20,0xaa,0x5a,0x7f] + vfmsub213pd 4064(%rdx), %ymm25, %ymm27 + +// CHECK: vfmsub213pd 4096(%rdx), %ymm25, %ymm27 +// CHECK: encoding: [0x62,0x62,0xb5,0x20,0xaa,0x9a,0x00,0x10,0x00,0x00] + vfmsub213pd 4096(%rdx), %ymm25, %ymm27 + +// CHECK: vfmsub213pd -4096(%rdx), %ymm25, %ymm27 +// CHECK: encoding: [0x62,0x62,0xb5,0x20,0xaa,0x5a,0x80] + vfmsub213pd -4096(%rdx), %ymm25, %ymm27 + +// CHECK: vfmsub213pd -4128(%rdx), %ymm25, %ymm27 +// CHECK: encoding: [0x62,0x62,0xb5,0x20,0xaa,0x9a,0xe0,0xef,0xff,0xff] + vfmsub213pd -4128(%rdx), %ymm25, %ymm27 + +// CHECK: vfmsub213pd 1016(%rdx){1to4}, %ymm25, %ymm27 +// CHECK: encoding: [0x62,0x62,0xb5,0x30,0xaa,0x5a,0x7f] + vfmsub213pd 1016(%rdx){1to4}, %ymm25, %ymm27 + +// CHECK: vfmsub213pd 1024(%rdx){1to4}, %ymm25, %ymm27 +// CHECK: encoding: [0x62,0x62,0xb5,0x30,0xaa,0x9a,0x00,0x04,0x00,0x00] + vfmsub213pd 1024(%rdx){1to4}, %ymm25, %ymm27 + +// CHECK: vfmsub213pd -1024(%rdx){1to4}, %ymm25, %ymm27 +// CHECK: encoding: [0x62,0x62,0xb5,0x30,0xaa,0x5a,0x80] + vfmsub213pd -1024(%rdx){1to4}, %ymm25, %ymm27 + +// CHECK: vfmsub213pd -1032(%rdx){1to4}, %ymm25, %ymm27 +// CHECK: encoding: [0x62,0x62,0xb5,0x30,0xaa,0x9a,0xf8,0xfb,0xff,0xff] + vfmsub213pd -1032(%rdx){1to4}, %ymm25, %ymm27 + +// CHECK: vfmsub231ps %xmm25, %xmm19, %xmm28 +// CHECK: encoding: [0x62,0x02,0x65,0x00,0xba,0xe1] + vfmsub231ps %xmm25, %xmm19, %xmm28 + +// CHECK: vfmsub231ps %xmm25, %xmm19, %xmm28 {%k1} +// CHECK: encoding: [0x62,0x02,0x65,0x01,0xba,0xe1] + vfmsub231ps %xmm25, %xmm19, %xmm28 {%k1} + +// CHECK: vfmsub231ps %xmm25, %xmm19, %xmm28 {%k1} {z} +// CHECK: encoding: [0x62,0x02,0x65,0x81,0xba,0xe1] + vfmsub231ps %xmm25, %xmm19, %xmm28 {%k1} {z} + +// CHECK: vfmsub231ps (%rcx), %xmm19, %xmm28 +// CHECK: encoding: [0x62,0x62,0x65,0x00,0xba,0x21] + vfmsub231ps (%rcx), %xmm19, %xmm28 + +// CHECK: vfmsub231ps 291(%rax,%r14,8), %xmm19, %xmm28 +// CHECK: encoding: [0x62,0x22,0x65,0x00,0xba,0xa4,0xf0,0x23,0x01,0x00,0x00] + vfmsub231ps 291(%rax,%r14,8), %xmm19, %xmm28 + +// CHECK: vfmsub231ps (%rcx){1to4}, %xmm19, %xmm28 +// CHECK: encoding: [0x62,0x62,0x65,0x10,0xba,0x21] + vfmsub231ps (%rcx){1to4}, %xmm19, %xmm28 + +// CHECK: vfmsub231ps 2032(%rdx), %xmm19, %xmm28 +// CHECK: encoding: [0x62,0x62,0x65,0x00,0xba,0x62,0x7f] + vfmsub231ps 2032(%rdx), %xmm19, %xmm28 + +// CHECK: vfmsub231ps 2048(%rdx), %xmm19, %xmm28 +// CHECK: encoding: [0x62,0x62,0x65,0x00,0xba,0xa2,0x00,0x08,0x00,0x00] + vfmsub231ps 2048(%rdx), %xmm19, %xmm28 + +// CHECK: vfmsub231ps -2048(%rdx), %xmm19, %xmm28 +// CHECK: encoding: [0x62,0x62,0x65,0x00,0xba,0x62,0x80] + vfmsub231ps -2048(%rdx), %xmm19, %xmm28 + +// CHECK: vfmsub231ps -2064(%rdx), %xmm19, %xmm28 +// CHECK: encoding: [0x62,0x62,0x65,0x00,0xba,0xa2,0xf0,0xf7,0xff,0xff] + vfmsub231ps -2064(%rdx), %xmm19, %xmm28 + +// CHECK: vfmsub231ps 508(%rdx){1to4}, %xmm19, %xmm28 +// CHECK: encoding: [0x62,0x62,0x65,0x10,0xba,0x62,0x7f] + vfmsub231ps 508(%rdx){1to4}, %xmm19, %xmm28 + +// CHECK: vfmsub231ps 512(%rdx){1to4}, %xmm19, %xmm28 +// CHECK: encoding: [0x62,0x62,0x65,0x10,0xba,0xa2,0x00,0x02,0x00,0x00] + vfmsub231ps 512(%rdx){1to4}, %xmm19, %xmm28 + +// CHECK: vfmsub231ps -512(%rdx){1to4}, %xmm19, %xmm28 +// CHECK: encoding: [0x62,0x62,0x65,0x10,0xba,0x62,0x80] + vfmsub231ps -512(%rdx){1to4}, %xmm19, %xmm28 + +// CHECK: vfmsub231ps -516(%rdx){1to4}, %xmm19, %xmm28 +// CHECK: encoding: [0x62,0x62,0x65,0x10,0xba,0xa2,0xfc,0xfd,0xff,0xff] + vfmsub231ps -516(%rdx){1to4}, %xmm19, %xmm28 + +// CHECK: vfmsub231ps %ymm26, %ymm26, %ymm23 +// CHECK: encoding: [0x62,0x82,0x2d,0x20,0xba,0xfa] + vfmsub231ps %ymm26, %ymm26, %ymm23 + +// CHECK: vfmsub231ps %ymm26, %ymm26, %ymm23 {%k1} +// CHECK: encoding: [0x62,0x82,0x2d,0x21,0xba,0xfa] + vfmsub231ps %ymm26, %ymm26, %ymm23 {%k1} + +// CHECK: vfmsub231ps %ymm26, %ymm26, %ymm23 {%k1} {z} +// CHECK: encoding: [0x62,0x82,0x2d,0xa1,0xba,0xfa] + vfmsub231ps %ymm26, %ymm26, %ymm23 {%k1} {z} + +// CHECK: vfmsub231ps (%rcx), %ymm26, %ymm23 +// CHECK: encoding: [0x62,0xe2,0x2d,0x20,0xba,0x39] + vfmsub231ps (%rcx), %ymm26, %ymm23 + +// CHECK: vfmsub231ps 291(%rax,%r14,8), %ymm26, %ymm23 +// CHECK: encoding: [0x62,0xa2,0x2d,0x20,0xba,0xbc,0xf0,0x23,0x01,0x00,0x00] + vfmsub231ps 291(%rax,%r14,8), %ymm26, %ymm23 + +// CHECK: vfmsub231ps (%rcx){1to8}, %ymm26, %ymm23 +// CHECK: encoding: [0x62,0xe2,0x2d,0x30,0xba,0x39] + vfmsub231ps (%rcx){1to8}, %ymm26, %ymm23 + +// CHECK: vfmsub231ps 4064(%rdx), %ymm26, %ymm23 +// CHECK: encoding: [0x62,0xe2,0x2d,0x20,0xba,0x7a,0x7f] + vfmsub231ps 4064(%rdx), %ymm26, %ymm23 + +// CHECK: vfmsub231ps 4096(%rdx), %ymm26, %ymm23 +// CHECK: encoding: [0x62,0xe2,0x2d,0x20,0xba,0xba,0x00,0x10,0x00,0x00] + vfmsub231ps 4096(%rdx), %ymm26, %ymm23 + +// CHECK: vfmsub231ps -4096(%rdx), %ymm26, %ymm23 +// CHECK: encoding: [0x62,0xe2,0x2d,0x20,0xba,0x7a,0x80] + vfmsub231ps -4096(%rdx), %ymm26, %ymm23 + +// CHECK: vfmsub231ps -4128(%rdx), %ymm26, %ymm23 +// CHECK: encoding: [0x62,0xe2,0x2d,0x20,0xba,0xba,0xe0,0xef,0xff,0xff] + vfmsub231ps -4128(%rdx), %ymm26, %ymm23 + +// CHECK: vfmsub231ps 508(%rdx){1to8}, %ymm26, %ymm23 +// CHECK: encoding: [0x62,0xe2,0x2d,0x30,0xba,0x7a,0x7f] + vfmsub231ps 508(%rdx){1to8}, %ymm26, %ymm23 + +// CHECK: vfmsub231ps 512(%rdx){1to8}, %ymm26, %ymm23 +// CHECK: encoding: [0x62,0xe2,0x2d,0x30,0xba,0xba,0x00,0x02,0x00,0x00] + vfmsub231ps 512(%rdx){1to8}, %ymm26, %ymm23 + +// CHECK: vfmsub231ps -512(%rdx){1to8}, %ymm26, %ymm23 +// CHECK: encoding: [0x62,0xe2,0x2d,0x30,0xba,0x7a,0x80] + vfmsub231ps -512(%rdx){1to8}, %ymm26, %ymm23 + +// CHECK: vfmsub231ps -516(%rdx){1to8}, %ymm26, %ymm23 +// CHECK: encoding: [0x62,0xe2,0x2d,0x30,0xba,0xba,0xfc,0xfd,0xff,0xff] + vfmsub231ps -516(%rdx){1to8}, %ymm26, %ymm23 + +// CHECK: vfmsub231pd %xmm23, %xmm20, %xmm28 +// CHECK: encoding: [0x62,0x22,0xdd,0x00,0xba,0xe7] + vfmsub231pd %xmm23, %xmm20, %xmm28 + +// CHECK: vfmsub231pd %xmm23, %xmm20, %xmm28 {%k4} +// CHECK: encoding: [0x62,0x22,0xdd,0x04,0xba,0xe7] + vfmsub231pd %xmm23, %xmm20, %xmm28 {%k4} + +// CHECK: vfmsub231pd %xmm23, %xmm20, %xmm28 {%k4} {z} +// CHECK: encoding: [0x62,0x22,0xdd,0x84,0xba,0xe7] + vfmsub231pd %xmm23, %xmm20, %xmm28 {%k4} {z} + +// CHECK: vfmsub231pd (%rcx), %xmm20, %xmm28 +// CHECK: encoding: [0x62,0x62,0xdd,0x00,0xba,0x21] + vfmsub231pd (%rcx), %xmm20, %xmm28 + +// CHECK: vfmsub231pd 291(%rax,%r14,8), %xmm20, %xmm28 +// CHECK: encoding: [0x62,0x22,0xdd,0x00,0xba,0xa4,0xf0,0x23,0x01,0x00,0x00] + vfmsub231pd 291(%rax,%r14,8), %xmm20, %xmm28 + +// CHECK: vfmsub231pd (%rcx){1to2}, %xmm20, %xmm28 +// CHECK: encoding: [0x62,0x62,0xdd,0x10,0xba,0x21] + vfmsub231pd (%rcx){1to2}, %xmm20, %xmm28 + +// CHECK: vfmsub231pd 2032(%rdx), %xmm20, %xmm28 +// CHECK: encoding: [0x62,0x62,0xdd,0x00,0xba,0x62,0x7f] + vfmsub231pd 2032(%rdx), %xmm20, %xmm28 + +// CHECK: vfmsub231pd 2048(%rdx), %xmm20, %xmm28 +// CHECK: encoding: [0x62,0x62,0xdd,0x00,0xba,0xa2,0x00,0x08,0x00,0x00] + vfmsub231pd 2048(%rdx), %xmm20, %xmm28 + +// CHECK: vfmsub231pd -2048(%rdx), %xmm20, %xmm28 +// CHECK: encoding: [0x62,0x62,0xdd,0x00,0xba,0x62,0x80] + vfmsub231pd -2048(%rdx), %xmm20, %xmm28 + +// CHECK: vfmsub231pd -2064(%rdx), %xmm20, %xmm28 +// CHECK: encoding: [0x62,0x62,0xdd,0x00,0xba,0xa2,0xf0,0xf7,0xff,0xff] + vfmsub231pd -2064(%rdx), %xmm20, %xmm28 + +// CHECK: vfmsub231pd 1016(%rdx){1to2}, %xmm20, %xmm28 +// CHECK: encoding: [0x62,0x62,0xdd,0x10,0xba,0x62,0x7f] + vfmsub231pd 1016(%rdx){1to2}, %xmm20, %xmm28 + +// CHECK: vfmsub231pd 1024(%rdx){1to2}, %xmm20, %xmm28 +// CHECK: encoding: [0x62,0x62,0xdd,0x10,0xba,0xa2,0x00,0x04,0x00,0x00] + vfmsub231pd 1024(%rdx){1to2}, %xmm20, %xmm28 + +// CHECK: vfmsub231pd -1024(%rdx){1to2}, %xmm20, %xmm28 +// CHECK: encoding: [0x62,0x62,0xdd,0x10,0xba,0x62,0x80] + vfmsub231pd -1024(%rdx){1to2}, %xmm20, %xmm28 + +// CHECK: vfmsub231pd -1032(%rdx){1to2}, %xmm20, %xmm28 +// CHECK: encoding: [0x62,0x62,0xdd,0x10,0xba,0xa2,0xf8,0xfb,0xff,0xff] + vfmsub231pd -1032(%rdx){1to2}, %xmm20, %xmm28 + +// CHECK: vfmsub231pd %ymm22, %ymm18, %ymm17 +// CHECK: encoding: [0x62,0xa2,0xed,0x20,0xba,0xce] + vfmsub231pd %ymm22, %ymm18, %ymm17 + +// CHECK: vfmsub231pd %ymm22, %ymm18, %ymm17 {%k2} +// CHECK: encoding: [0x62,0xa2,0xed,0x22,0xba,0xce] + vfmsub231pd %ymm22, %ymm18, %ymm17 {%k2} + +// CHECK: vfmsub231pd %ymm22, %ymm18, %ymm17 {%k2} {z} +// CHECK: encoding: [0x62,0xa2,0xed,0xa2,0xba,0xce] + vfmsub231pd %ymm22, %ymm18, %ymm17 {%k2} {z} + +// CHECK: vfmsub231pd (%rcx), %ymm18, %ymm17 +// CHECK: encoding: [0x62,0xe2,0xed,0x20,0xba,0x09] + vfmsub231pd (%rcx), %ymm18, %ymm17 + +// CHECK: vfmsub231pd 291(%rax,%r14,8), %ymm18, %ymm17 +// CHECK: encoding: [0x62,0xa2,0xed,0x20,0xba,0x8c,0xf0,0x23,0x01,0x00,0x00] + vfmsub231pd 291(%rax,%r14,8), %ymm18, %ymm17 + +// CHECK: vfmsub231pd (%rcx){1to4}, %ymm18, %ymm17 +// CHECK: encoding: [0x62,0xe2,0xed,0x30,0xba,0x09] + vfmsub231pd (%rcx){1to4}, %ymm18, %ymm17 + +// CHECK: vfmsub231pd 4064(%rdx), %ymm18, %ymm17 +// CHECK: encoding: [0x62,0xe2,0xed,0x20,0xba,0x4a,0x7f] + vfmsub231pd 4064(%rdx), %ymm18, %ymm17 + +// CHECK: vfmsub231pd 4096(%rdx), %ymm18, %ymm17 +// CHECK: encoding: [0x62,0xe2,0xed,0x20,0xba,0x8a,0x00,0x10,0x00,0x00] + vfmsub231pd 4096(%rdx), %ymm18, %ymm17 + +// CHECK: vfmsub231pd -4096(%rdx), %ymm18, %ymm17 +// CHECK: encoding: [0x62,0xe2,0xed,0x20,0xba,0x4a,0x80] + vfmsub231pd -4096(%rdx), %ymm18, %ymm17 + +// CHECK: vfmsub231pd -4128(%rdx), %ymm18, %ymm17 +// CHECK: encoding: [0x62,0xe2,0xed,0x20,0xba,0x8a,0xe0,0xef,0xff,0xff] + vfmsub231pd -4128(%rdx), %ymm18, %ymm17 + +// CHECK: vfmsub231pd 1016(%rdx){1to4}, %ymm18, %ymm17 +// CHECK: encoding: [0x62,0xe2,0xed,0x30,0xba,0x4a,0x7f] + vfmsub231pd 1016(%rdx){1to4}, %ymm18, %ymm17 + +// CHECK: vfmsub231pd 1024(%rdx){1to4}, %ymm18, %ymm17 +// CHECK: encoding: [0x62,0xe2,0xed,0x30,0xba,0x8a,0x00,0x04,0x00,0x00] + vfmsub231pd 1024(%rdx){1to4}, %ymm18, %ymm17 + +// CHECK: vfmsub231pd -1024(%rdx){1to4}, %ymm18, %ymm17 +// CHECK: encoding: [0x62,0xe2,0xed,0x30,0xba,0x4a,0x80] + vfmsub231pd -1024(%rdx){1to4}, %ymm18, %ymm17 + +// CHECK: vfmsub231pd -1032(%rdx){1to4}, %ymm18, %ymm17 +// CHECK: encoding: [0x62,0xe2,0xed,0x30,0xba,0x8a,0xf8,0xfb,0xff,0xff] + vfmsub231pd -1032(%rdx){1to4}, %ymm18, %ymm17 + +// CHECK: vfmaddsub132ps %xmm18, %xmm27, %xmm24 +// CHECK: encoding: [0x62,0x22,0x25,0x00,0x96,0xc2] + vfmaddsub132ps %xmm18, %xmm27, %xmm24 + +// CHECK: vfmaddsub132ps %xmm18, %xmm27, %xmm24 {%k2} +// CHECK: encoding: [0x62,0x22,0x25,0x02,0x96,0xc2] + vfmaddsub132ps %xmm18, %xmm27, %xmm24 {%k2} + +// CHECK: vfmaddsub132ps %xmm18, %xmm27, %xmm24 {%k2} {z} +// CHECK: encoding: [0x62,0x22,0x25,0x82,0x96,0xc2] + vfmaddsub132ps %xmm18, %xmm27, %xmm24 {%k2} {z} + +// CHECK: vfmaddsub132ps (%rcx), %xmm27, %xmm24 +// CHECK: encoding: [0x62,0x62,0x25,0x00,0x96,0x01] + vfmaddsub132ps (%rcx), %xmm27, %xmm24 + +// CHECK: vfmaddsub132ps 291(%rax,%r14,8), %xmm27, %xmm24 +// CHECK: encoding: [0x62,0x22,0x25,0x00,0x96,0x84,0xf0,0x23,0x01,0x00,0x00] + vfmaddsub132ps 291(%rax,%r14,8), %xmm27, %xmm24 + +// CHECK: vfmaddsub132ps (%rcx){1to4}, %xmm27, %xmm24 +// CHECK: encoding: [0x62,0x62,0x25,0x10,0x96,0x01] + vfmaddsub132ps (%rcx){1to4}, %xmm27, %xmm24 + +// CHECK: vfmaddsub132ps 2032(%rdx), %xmm27, %xmm24 +// CHECK: encoding: [0x62,0x62,0x25,0x00,0x96,0x42,0x7f] + vfmaddsub132ps 2032(%rdx), %xmm27, %xmm24 + +// CHECK: vfmaddsub132ps 2048(%rdx), %xmm27, %xmm24 +// CHECK: encoding: [0x62,0x62,0x25,0x00,0x96,0x82,0x00,0x08,0x00,0x00] + vfmaddsub132ps 2048(%rdx), %xmm27, %xmm24 + +// CHECK: vfmaddsub132ps -2048(%rdx), %xmm27, %xmm24 +// CHECK: encoding: [0x62,0x62,0x25,0x00,0x96,0x42,0x80] + vfmaddsub132ps -2048(%rdx), %xmm27, %xmm24 + +// CHECK: vfmaddsub132ps -2064(%rdx), %xmm27, %xmm24 +// CHECK: encoding: [0x62,0x62,0x25,0x00,0x96,0x82,0xf0,0xf7,0xff,0xff] + vfmaddsub132ps -2064(%rdx), %xmm27, %xmm24 + +// CHECK: vfmaddsub132ps 508(%rdx){1to4}, %xmm27, %xmm24 +// CHECK: encoding: [0x62,0x62,0x25,0x10,0x96,0x42,0x7f] + vfmaddsub132ps 508(%rdx){1to4}, %xmm27, %xmm24 + +// CHECK: vfmaddsub132ps 512(%rdx){1to4}, %xmm27, %xmm24 +// CHECK: encoding: [0x62,0x62,0x25,0x10,0x96,0x82,0x00,0x02,0x00,0x00] + vfmaddsub132ps 512(%rdx){1to4}, %xmm27, %xmm24 + +// CHECK: vfmaddsub132ps -512(%rdx){1to4}, %xmm27, %xmm24 +// CHECK: encoding: [0x62,0x62,0x25,0x10,0x96,0x42,0x80] + vfmaddsub132ps -512(%rdx){1to4}, %xmm27, %xmm24 + +// CHECK: vfmaddsub132ps -516(%rdx){1to4}, %xmm27, %xmm24 +// CHECK: encoding: [0x62,0x62,0x25,0x10,0x96,0x82,0xfc,0xfd,0xff,0xff] + vfmaddsub132ps -516(%rdx){1to4}, %xmm27, %xmm24 + +// CHECK: vfmaddsub132ps %ymm24, %ymm20, %ymm21 +// CHECK: encoding: [0x62,0x82,0x5d,0x20,0x96,0xe8] + vfmaddsub132ps %ymm24, %ymm20, %ymm21 + +// CHECK: vfmaddsub132ps %ymm24, %ymm20, %ymm21 {%k5} +// CHECK: encoding: [0x62,0x82,0x5d,0x25,0x96,0xe8] + vfmaddsub132ps %ymm24, %ymm20, %ymm21 {%k5} + +// CHECK: vfmaddsub132ps %ymm24, %ymm20, %ymm21 {%k5} {z} +// CHECK: encoding: [0x62,0x82,0x5d,0xa5,0x96,0xe8] + vfmaddsub132ps %ymm24, %ymm20, %ymm21 {%k5} {z} + +// CHECK: vfmaddsub132ps (%rcx), %ymm20, %ymm21 +// CHECK: encoding: [0x62,0xe2,0x5d,0x20,0x96,0x29] + vfmaddsub132ps (%rcx), %ymm20, %ymm21 + +// CHECK: vfmaddsub132ps 291(%rax,%r14,8), %ymm20, %ymm21 +// CHECK: encoding: [0x62,0xa2,0x5d,0x20,0x96,0xac,0xf0,0x23,0x01,0x00,0x00] + vfmaddsub132ps 291(%rax,%r14,8), %ymm20, %ymm21 + +// CHECK: vfmaddsub132ps (%rcx){1to8}, %ymm20, %ymm21 +// CHECK: encoding: [0x62,0xe2,0x5d,0x30,0x96,0x29] + vfmaddsub132ps (%rcx){1to8}, %ymm20, %ymm21 + +// CHECK: vfmaddsub132ps 4064(%rdx), %ymm20, %ymm21 +// CHECK: encoding: [0x62,0xe2,0x5d,0x20,0x96,0x6a,0x7f] + vfmaddsub132ps 4064(%rdx), %ymm20, %ymm21 + +// CHECK: vfmaddsub132ps 4096(%rdx), %ymm20, %ymm21 +// CHECK: encoding: [0x62,0xe2,0x5d,0x20,0x96,0xaa,0x00,0x10,0x00,0x00] + vfmaddsub132ps 4096(%rdx), %ymm20, %ymm21 + +// CHECK: vfmaddsub132ps -4096(%rdx), %ymm20, %ymm21 +// CHECK: encoding: [0x62,0xe2,0x5d,0x20,0x96,0x6a,0x80] + vfmaddsub132ps -4096(%rdx), %ymm20, %ymm21 + +// CHECK: vfmaddsub132ps -4128(%rdx), %ymm20, %ymm21 +// CHECK: encoding: [0x62,0xe2,0x5d,0x20,0x96,0xaa,0xe0,0xef,0xff,0xff] + vfmaddsub132ps -4128(%rdx), %ymm20, %ymm21 + +// CHECK: vfmaddsub132ps 508(%rdx){1to8}, %ymm20, %ymm21 +// CHECK: encoding: [0x62,0xe2,0x5d,0x30,0x96,0x6a,0x7f] + vfmaddsub132ps 508(%rdx){1to8}, %ymm20, %ymm21 + +// CHECK: vfmaddsub132ps 512(%rdx){1to8}, %ymm20, %ymm21 +// CHECK: encoding: [0x62,0xe2,0x5d,0x30,0x96,0xaa,0x00,0x02,0x00,0x00] + vfmaddsub132ps 512(%rdx){1to8}, %ymm20, %ymm21 + +// CHECK: vfmaddsub132ps -512(%rdx){1to8}, %ymm20, %ymm21 +// CHECK: encoding: [0x62,0xe2,0x5d,0x30,0x96,0x6a,0x80] + vfmaddsub132ps -512(%rdx){1to8}, %ymm20, %ymm21 + +// CHECK: vfmaddsub132ps -516(%rdx){1to8}, %ymm20, %ymm21 +// CHECK: encoding: [0x62,0xe2,0x5d,0x30,0x96,0xaa,0xfc,0xfd,0xff,0xff] + vfmaddsub132ps -516(%rdx){1to8}, %ymm20, %ymm21 + +// CHECK: vfmaddsub132pd %xmm20, %xmm27, %xmm26 +// CHECK: encoding: [0x62,0x22,0xa5,0x00,0x96,0xd4] + vfmaddsub132pd %xmm20, %xmm27, %xmm26 + +// CHECK: vfmaddsub132pd %xmm20, %xmm27, %xmm26 {%k6} +// CHECK: encoding: [0x62,0x22,0xa5,0x06,0x96,0xd4] + vfmaddsub132pd %xmm20, %xmm27, %xmm26 {%k6} + +// CHECK: vfmaddsub132pd %xmm20, %xmm27, %xmm26 {%k6} {z} +// CHECK: encoding: [0x62,0x22,0xa5,0x86,0x96,0xd4] + vfmaddsub132pd %xmm20, %xmm27, %xmm26 {%k6} {z} + +// CHECK: vfmaddsub132pd (%rcx), %xmm27, %xmm26 +// CHECK: encoding: [0x62,0x62,0xa5,0x00,0x96,0x11] + vfmaddsub132pd (%rcx), %xmm27, %xmm26 + +// CHECK: vfmaddsub132pd 291(%rax,%r14,8), %xmm27, %xmm26 +// CHECK: encoding: [0x62,0x22,0xa5,0x00,0x96,0x94,0xf0,0x23,0x01,0x00,0x00] + vfmaddsub132pd 291(%rax,%r14,8), %xmm27, %xmm26 + +// CHECK: vfmaddsub132pd (%rcx){1to2}, %xmm27, %xmm26 +// CHECK: encoding: [0x62,0x62,0xa5,0x10,0x96,0x11] + vfmaddsub132pd (%rcx){1to2}, %xmm27, %xmm26 + +// CHECK: vfmaddsub132pd 2032(%rdx), %xmm27, %xmm26 +// CHECK: encoding: [0x62,0x62,0xa5,0x00,0x96,0x52,0x7f] + vfmaddsub132pd 2032(%rdx), %xmm27, %xmm26 + +// CHECK: vfmaddsub132pd 2048(%rdx), %xmm27, %xmm26 +// CHECK: encoding: [0x62,0x62,0xa5,0x00,0x96,0x92,0x00,0x08,0x00,0x00] + vfmaddsub132pd 2048(%rdx), %xmm27, %xmm26 + +// CHECK: vfmaddsub132pd -2048(%rdx), %xmm27, %xmm26 +// CHECK: encoding: [0x62,0x62,0xa5,0x00,0x96,0x52,0x80] + vfmaddsub132pd -2048(%rdx), %xmm27, %xmm26 + +// CHECK: vfmaddsub132pd -2064(%rdx), %xmm27, %xmm26 +// CHECK: encoding: [0x62,0x62,0xa5,0x00,0x96,0x92,0xf0,0xf7,0xff,0xff] + vfmaddsub132pd -2064(%rdx), %xmm27, %xmm26 + +// CHECK: vfmaddsub132pd 1016(%rdx){1to2}, %xmm27, %xmm26 +// CHECK: encoding: [0x62,0x62,0xa5,0x10,0x96,0x52,0x7f] + vfmaddsub132pd 1016(%rdx){1to2}, %xmm27, %xmm26 + +// CHECK: vfmaddsub132pd 1024(%rdx){1to2}, %xmm27, %xmm26 +// CHECK: encoding: [0x62,0x62,0xa5,0x10,0x96,0x92,0x00,0x04,0x00,0x00] + vfmaddsub132pd 1024(%rdx){1to2}, %xmm27, %xmm26 + +// CHECK: vfmaddsub132pd -1024(%rdx){1to2}, %xmm27, %xmm26 +// CHECK: encoding: [0x62,0x62,0xa5,0x10,0x96,0x52,0x80] + vfmaddsub132pd -1024(%rdx){1to2}, %xmm27, %xmm26 + +// CHECK: vfmaddsub132pd -1032(%rdx){1to2}, %xmm27, %xmm26 +// CHECK: encoding: [0x62,0x62,0xa5,0x10,0x96,0x92,0xf8,0xfb,0xff,0xff] + vfmaddsub132pd -1032(%rdx){1to2}, %xmm27, %xmm26 + +// CHECK: vfmaddsub132pd %ymm28, %ymm18, %ymm30 +// CHECK: encoding: [0x62,0x02,0xed,0x20,0x96,0xf4] + vfmaddsub132pd %ymm28, %ymm18, %ymm30 + +// CHECK: vfmaddsub132pd %ymm28, %ymm18, %ymm30 {%k2} +// CHECK: encoding: [0x62,0x02,0xed,0x22,0x96,0xf4] + vfmaddsub132pd %ymm28, %ymm18, %ymm30 {%k2} + +// CHECK: vfmaddsub132pd %ymm28, %ymm18, %ymm30 {%k2} {z} +// CHECK: encoding: [0x62,0x02,0xed,0xa2,0x96,0xf4] + vfmaddsub132pd %ymm28, %ymm18, %ymm30 {%k2} {z} + +// CHECK: vfmaddsub132pd (%rcx), %ymm18, %ymm30 +// CHECK: encoding: [0x62,0x62,0xed,0x20,0x96,0x31] + vfmaddsub132pd (%rcx), %ymm18, %ymm30 + +// CHECK: vfmaddsub132pd 291(%rax,%r14,8), %ymm18, %ymm30 +// CHECK: encoding: [0x62,0x22,0xed,0x20,0x96,0xb4,0xf0,0x23,0x01,0x00,0x00] + vfmaddsub132pd 291(%rax,%r14,8), %ymm18, %ymm30 + +// CHECK: vfmaddsub132pd (%rcx){1to4}, %ymm18, %ymm30 +// CHECK: encoding: [0x62,0x62,0xed,0x30,0x96,0x31] + vfmaddsub132pd (%rcx){1to4}, %ymm18, %ymm30 + +// CHECK: vfmaddsub132pd 4064(%rdx), %ymm18, %ymm30 +// CHECK: encoding: [0x62,0x62,0xed,0x20,0x96,0x72,0x7f] + vfmaddsub132pd 4064(%rdx), %ymm18, %ymm30 + +// CHECK: vfmaddsub132pd 4096(%rdx), %ymm18, %ymm30 +// CHECK: encoding: [0x62,0x62,0xed,0x20,0x96,0xb2,0x00,0x10,0x00,0x00] + vfmaddsub132pd 4096(%rdx), %ymm18, %ymm30 + +// CHECK: vfmaddsub132pd -4096(%rdx), %ymm18, %ymm30 +// CHECK: encoding: [0x62,0x62,0xed,0x20,0x96,0x72,0x80] + vfmaddsub132pd -4096(%rdx), %ymm18, %ymm30 + +// CHECK: vfmaddsub132pd -4128(%rdx), %ymm18, %ymm30 +// CHECK: encoding: [0x62,0x62,0xed,0x20,0x96,0xb2,0xe0,0xef,0xff,0xff] + vfmaddsub132pd -4128(%rdx), %ymm18, %ymm30 + +// CHECK: vfmaddsub132pd 1016(%rdx){1to4}, %ymm18, %ymm30 +// CHECK: encoding: [0x62,0x62,0xed,0x30,0x96,0x72,0x7f] + vfmaddsub132pd 1016(%rdx){1to4}, %ymm18, %ymm30 + +// CHECK: vfmaddsub132pd 1024(%rdx){1to4}, %ymm18, %ymm30 +// CHECK: encoding: [0x62,0x62,0xed,0x30,0x96,0xb2,0x00,0x04,0x00,0x00] + vfmaddsub132pd 1024(%rdx){1to4}, %ymm18, %ymm30 + +// CHECK: vfmaddsub132pd -1024(%rdx){1to4}, %ymm18, %ymm30 +// CHECK: encoding: [0x62,0x62,0xed,0x30,0x96,0x72,0x80] + vfmaddsub132pd -1024(%rdx){1to4}, %ymm18, %ymm30 + +// CHECK: vfmaddsub132pd -1032(%rdx){1to4}, %ymm18, %ymm30 +// CHECK: encoding: [0x62,0x62,0xed,0x30,0x96,0xb2,0xf8,0xfb,0xff,0xff] + vfmaddsub132pd -1032(%rdx){1to4}, %ymm18, %ymm30 + +// CHECK: vfmaddsub213ps %xmm17, %xmm29, %xmm20 +// CHECK: encoding: [0x62,0xa2,0x15,0x00,0xa6,0xe1] + vfmaddsub213ps %xmm17, %xmm29, %xmm20 + +// CHECK: vfmaddsub213ps %xmm17, %xmm29, %xmm20 {%k1} +// CHECK: encoding: [0x62,0xa2,0x15,0x01,0xa6,0xe1] + vfmaddsub213ps %xmm17, %xmm29, %xmm20 {%k1} + +// CHECK: vfmaddsub213ps %xmm17, %xmm29, %xmm20 {%k1} {z} +// CHECK: encoding: [0x62,0xa2,0x15,0x81,0xa6,0xe1] + vfmaddsub213ps %xmm17, %xmm29, %xmm20 {%k1} {z} + +// CHECK: vfmaddsub213ps (%rcx), %xmm29, %xmm20 +// CHECK: encoding: [0x62,0xe2,0x15,0x00,0xa6,0x21] + vfmaddsub213ps (%rcx), %xmm29, %xmm20 + +// CHECK: vfmaddsub213ps 291(%rax,%r14,8), %xmm29, %xmm20 +// CHECK: encoding: [0x62,0xa2,0x15,0x00,0xa6,0xa4,0xf0,0x23,0x01,0x00,0x00] + vfmaddsub213ps 291(%rax,%r14,8), %xmm29, %xmm20 + +// CHECK: vfmaddsub213ps (%rcx){1to4}, %xmm29, %xmm20 +// CHECK: encoding: [0x62,0xe2,0x15,0x10,0xa6,0x21] + vfmaddsub213ps (%rcx){1to4}, %xmm29, %xmm20 + +// CHECK: vfmaddsub213ps 2032(%rdx), %xmm29, %xmm20 +// CHECK: encoding: [0x62,0xe2,0x15,0x00,0xa6,0x62,0x7f] + vfmaddsub213ps 2032(%rdx), %xmm29, %xmm20 + +// CHECK: vfmaddsub213ps 2048(%rdx), %xmm29, %xmm20 +// CHECK: encoding: [0x62,0xe2,0x15,0x00,0xa6,0xa2,0x00,0x08,0x00,0x00] + vfmaddsub213ps 2048(%rdx), %xmm29, %xmm20 + +// CHECK: vfmaddsub213ps -2048(%rdx), %xmm29, %xmm20 +// CHECK: encoding: [0x62,0xe2,0x15,0x00,0xa6,0x62,0x80] + vfmaddsub213ps -2048(%rdx), %xmm29, %xmm20 + +// CHECK: vfmaddsub213ps -2064(%rdx), %xmm29, %xmm20 +// CHECK: encoding: [0x62,0xe2,0x15,0x00,0xa6,0xa2,0xf0,0xf7,0xff,0xff] + vfmaddsub213ps -2064(%rdx), %xmm29, %xmm20 + +// CHECK: vfmaddsub213ps 508(%rdx){1to4}, %xmm29, %xmm20 +// CHECK: encoding: [0x62,0xe2,0x15,0x10,0xa6,0x62,0x7f] + vfmaddsub213ps 508(%rdx){1to4}, %xmm29, %xmm20 + +// CHECK: vfmaddsub213ps 512(%rdx){1to4}, %xmm29, %xmm20 +// CHECK: encoding: [0x62,0xe2,0x15,0x10,0xa6,0xa2,0x00,0x02,0x00,0x00] + vfmaddsub213ps 512(%rdx){1to4}, %xmm29, %xmm20 + +// CHECK: vfmaddsub213ps -512(%rdx){1to4}, %xmm29, %xmm20 +// CHECK: encoding: [0x62,0xe2,0x15,0x10,0xa6,0x62,0x80] + vfmaddsub213ps -512(%rdx){1to4}, %xmm29, %xmm20 + +// CHECK: vfmaddsub213ps -516(%rdx){1to4}, %xmm29, %xmm20 +// CHECK: encoding: [0x62,0xe2,0x15,0x10,0xa6,0xa2,0xfc,0xfd,0xff,0xff] + vfmaddsub213ps -516(%rdx){1to4}, %xmm29, %xmm20 + +// CHECK: vfmaddsub213ps %ymm23, %ymm27, %ymm25 +// CHECK: encoding: [0x62,0x22,0x25,0x20,0xa6,0xcf] + vfmaddsub213ps %ymm23, %ymm27, %ymm25 + +// CHECK: vfmaddsub213ps %ymm23, %ymm27, %ymm25 {%k4} +// CHECK: encoding: [0x62,0x22,0x25,0x24,0xa6,0xcf] + vfmaddsub213ps %ymm23, %ymm27, %ymm25 {%k4} + +// CHECK: vfmaddsub213ps %ymm23, %ymm27, %ymm25 {%k4} {z} +// CHECK: encoding: [0x62,0x22,0x25,0xa4,0xa6,0xcf] + vfmaddsub213ps %ymm23, %ymm27, %ymm25 {%k4} {z} + +// CHECK: vfmaddsub213ps (%rcx), %ymm27, %ymm25 +// CHECK: encoding: [0x62,0x62,0x25,0x20,0xa6,0x09] + vfmaddsub213ps (%rcx), %ymm27, %ymm25 + +// CHECK: vfmaddsub213ps 291(%rax,%r14,8), %ymm27, %ymm25 +// CHECK: encoding: [0x62,0x22,0x25,0x20,0xa6,0x8c,0xf0,0x23,0x01,0x00,0x00] + vfmaddsub213ps 291(%rax,%r14,8), %ymm27, %ymm25 + +// CHECK: vfmaddsub213ps (%rcx){1to8}, %ymm27, %ymm25 +// CHECK: encoding: [0x62,0x62,0x25,0x30,0xa6,0x09] + vfmaddsub213ps (%rcx){1to8}, %ymm27, %ymm25 + +// CHECK: vfmaddsub213ps 4064(%rdx), %ymm27, %ymm25 +// CHECK: encoding: [0x62,0x62,0x25,0x20,0xa6,0x4a,0x7f] + vfmaddsub213ps 4064(%rdx), %ymm27, %ymm25 + +// CHECK: vfmaddsub213ps 4096(%rdx), %ymm27, %ymm25 +// CHECK: encoding: [0x62,0x62,0x25,0x20,0xa6,0x8a,0x00,0x10,0x00,0x00] + vfmaddsub213ps 4096(%rdx), %ymm27, %ymm25 + +// CHECK: vfmaddsub213ps -4096(%rdx), %ymm27, %ymm25 +// CHECK: encoding: [0x62,0x62,0x25,0x20,0xa6,0x4a,0x80] + vfmaddsub213ps -4096(%rdx), %ymm27, %ymm25 + +// CHECK: vfmaddsub213ps -4128(%rdx), %ymm27, %ymm25 +// CHECK: encoding: [0x62,0x62,0x25,0x20,0xa6,0x8a,0xe0,0xef,0xff,0xff] + vfmaddsub213ps -4128(%rdx), %ymm27, %ymm25 + +// CHECK: vfmaddsub213ps 508(%rdx){1to8}, %ymm27, %ymm25 +// CHECK: encoding: [0x62,0x62,0x25,0x30,0xa6,0x4a,0x7f] + vfmaddsub213ps 508(%rdx){1to8}, %ymm27, %ymm25 + +// CHECK: vfmaddsub213ps 512(%rdx){1to8}, %ymm27, %ymm25 +// CHECK: encoding: [0x62,0x62,0x25,0x30,0xa6,0x8a,0x00,0x02,0x00,0x00] + vfmaddsub213ps 512(%rdx){1to8}, %ymm27, %ymm25 + +// CHECK: vfmaddsub213ps -512(%rdx){1to8}, %ymm27, %ymm25 +// CHECK: encoding: [0x62,0x62,0x25,0x30,0xa6,0x4a,0x80] + vfmaddsub213ps -512(%rdx){1to8}, %ymm27, %ymm25 + +// CHECK: vfmaddsub213ps -516(%rdx){1to8}, %ymm27, %ymm25 +// CHECK: encoding: [0x62,0x62,0x25,0x30,0xa6,0x8a,0xfc,0xfd,0xff,0xff] + vfmaddsub213ps -516(%rdx){1to8}, %ymm27, %ymm25 + +// CHECK: vfmaddsub213pd %xmm19, %xmm29, %xmm25 +// CHECK: encoding: [0x62,0x22,0x95,0x00,0xa6,0xcb] + vfmaddsub213pd %xmm19, %xmm29, %xmm25 + +// CHECK: vfmaddsub213pd %xmm19, %xmm29, %xmm25 {%k7} +// CHECK: encoding: [0x62,0x22,0x95,0x07,0xa6,0xcb] + vfmaddsub213pd %xmm19, %xmm29, %xmm25 {%k7} + +// CHECK: vfmaddsub213pd %xmm19, %xmm29, %xmm25 {%k7} {z} +// CHECK: encoding: [0x62,0x22,0x95,0x87,0xa6,0xcb] + vfmaddsub213pd %xmm19, %xmm29, %xmm25 {%k7} {z} + +// CHECK: vfmaddsub213pd (%rcx), %xmm29, %xmm25 +// CHECK: encoding: [0x62,0x62,0x95,0x00,0xa6,0x09] + vfmaddsub213pd (%rcx), %xmm29, %xmm25 + +// CHECK: vfmaddsub213pd 291(%rax,%r14,8), %xmm29, %xmm25 +// CHECK: encoding: [0x62,0x22,0x95,0x00,0xa6,0x8c,0xf0,0x23,0x01,0x00,0x00] + vfmaddsub213pd 291(%rax,%r14,8), %xmm29, %xmm25 + +// CHECK: vfmaddsub213pd (%rcx){1to2}, %xmm29, %xmm25 +// CHECK: encoding: [0x62,0x62,0x95,0x10,0xa6,0x09] + vfmaddsub213pd (%rcx){1to2}, %xmm29, %xmm25 + +// CHECK: vfmaddsub213pd 2032(%rdx), %xmm29, %xmm25 +// CHECK: encoding: [0x62,0x62,0x95,0x00,0xa6,0x4a,0x7f] + vfmaddsub213pd 2032(%rdx), %xmm29, %xmm25 + +// CHECK: vfmaddsub213pd 2048(%rdx), %xmm29, %xmm25 +// CHECK: encoding: [0x62,0x62,0x95,0x00,0xa6,0x8a,0x00,0x08,0x00,0x00] + vfmaddsub213pd 2048(%rdx), %xmm29, %xmm25 + +// CHECK: vfmaddsub213pd -2048(%rdx), %xmm29, %xmm25 +// CHECK: encoding: [0x62,0x62,0x95,0x00,0xa6,0x4a,0x80] + vfmaddsub213pd -2048(%rdx), %xmm29, %xmm25 + +// CHECK: vfmaddsub213pd -2064(%rdx), %xmm29, %xmm25 +// CHECK: encoding: [0x62,0x62,0x95,0x00,0xa6,0x8a,0xf0,0xf7,0xff,0xff] + vfmaddsub213pd -2064(%rdx), %xmm29, %xmm25 + +// CHECK: vfmaddsub213pd 1016(%rdx){1to2}, %xmm29, %xmm25 +// CHECK: encoding: [0x62,0x62,0x95,0x10,0xa6,0x4a,0x7f] + vfmaddsub213pd 1016(%rdx){1to2}, %xmm29, %xmm25 + +// CHECK: vfmaddsub213pd 1024(%rdx){1to2}, %xmm29, %xmm25 +// CHECK: encoding: [0x62,0x62,0x95,0x10,0xa6,0x8a,0x00,0x04,0x00,0x00] + vfmaddsub213pd 1024(%rdx){1to2}, %xmm29, %xmm25 + +// CHECK: vfmaddsub213pd -1024(%rdx){1to2}, %xmm29, %xmm25 +// CHECK: encoding: [0x62,0x62,0x95,0x10,0xa6,0x4a,0x80] + vfmaddsub213pd -1024(%rdx){1to2}, %xmm29, %xmm25 + +// CHECK: vfmaddsub213pd -1032(%rdx){1to2}, %xmm29, %xmm25 +// CHECK: encoding: [0x62,0x62,0x95,0x10,0xa6,0x8a,0xf8,0xfb,0xff,0xff] + vfmaddsub213pd -1032(%rdx){1to2}, %xmm29, %xmm25 + +// CHECK: vfmaddsub213pd %ymm20, %ymm17, %ymm21 +// CHECK: encoding: [0x62,0xa2,0xf5,0x20,0xa6,0xec] + vfmaddsub213pd %ymm20, %ymm17, %ymm21 + +// CHECK: vfmaddsub213pd %ymm20, %ymm17, %ymm21 {%k4} +// CHECK: encoding: [0x62,0xa2,0xf5,0x24,0xa6,0xec] + vfmaddsub213pd %ymm20, %ymm17, %ymm21 {%k4} + +// CHECK: vfmaddsub213pd %ymm20, %ymm17, %ymm21 {%k4} {z} +// CHECK: encoding: [0x62,0xa2,0xf5,0xa4,0xa6,0xec] + vfmaddsub213pd %ymm20, %ymm17, %ymm21 {%k4} {z} + +// CHECK: vfmaddsub213pd (%rcx), %ymm17, %ymm21 +// CHECK: encoding: [0x62,0xe2,0xf5,0x20,0xa6,0x29] + vfmaddsub213pd (%rcx), %ymm17, %ymm21 + +// CHECK: vfmaddsub213pd 291(%rax,%r14,8), %ymm17, %ymm21 +// CHECK: encoding: [0x62,0xa2,0xf5,0x20,0xa6,0xac,0xf0,0x23,0x01,0x00,0x00] + vfmaddsub213pd 291(%rax,%r14,8), %ymm17, %ymm21 + +// CHECK: vfmaddsub213pd (%rcx){1to4}, %ymm17, %ymm21 +// CHECK: encoding: [0x62,0xe2,0xf5,0x30,0xa6,0x29] + vfmaddsub213pd (%rcx){1to4}, %ymm17, %ymm21 + +// CHECK: vfmaddsub213pd 4064(%rdx), %ymm17, %ymm21 +// CHECK: encoding: [0x62,0xe2,0xf5,0x20,0xa6,0x6a,0x7f] + vfmaddsub213pd 4064(%rdx), %ymm17, %ymm21 + +// CHECK: vfmaddsub213pd 4096(%rdx), %ymm17, %ymm21 +// CHECK: encoding: [0x62,0xe2,0xf5,0x20,0xa6,0xaa,0x00,0x10,0x00,0x00] + vfmaddsub213pd 4096(%rdx), %ymm17, %ymm21 + +// CHECK: vfmaddsub213pd -4096(%rdx), %ymm17, %ymm21 +// CHECK: encoding: [0x62,0xe2,0xf5,0x20,0xa6,0x6a,0x80] + vfmaddsub213pd -4096(%rdx), %ymm17, %ymm21 + +// CHECK: vfmaddsub213pd -4128(%rdx), %ymm17, %ymm21 +// CHECK: encoding: [0x62,0xe2,0xf5,0x20,0xa6,0xaa,0xe0,0xef,0xff,0xff] + vfmaddsub213pd -4128(%rdx), %ymm17, %ymm21 + +// CHECK: vfmaddsub213pd 1016(%rdx){1to4}, %ymm17, %ymm21 +// CHECK: encoding: [0x62,0xe2,0xf5,0x30,0xa6,0x6a,0x7f] + vfmaddsub213pd 1016(%rdx){1to4}, %ymm17, %ymm21 + +// CHECK: vfmaddsub213pd 1024(%rdx){1to4}, %ymm17, %ymm21 +// CHECK: encoding: [0x62,0xe2,0xf5,0x30,0xa6,0xaa,0x00,0x04,0x00,0x00] + vfmaddsub213pd 1024(%rdx){1to4}, %ymm17, %ymm21 + +// CHECK: vfmaddsub213pd -1024(%rdx){1to4}, %ymm17, %ymm21 +// CHECK: encoding: [0x62,0xe2,0xf5,0x30,0xa6,0x6a,0x80] + vfmaddsub213pd -1024(%rdx){1to4}, %ymm17, %ymm21 + +// CHECK: vfmaddsub213pd -1032(%rdx){1to4}, %ymm17, %ymm21 +// CHECK: encoding: [0x62,0xe2,0xf5,0x30,0xa6,0xaa,0xf8,0xfb,0xff,0xff] + vfmaddsub213pd -1032(%rdx){1to4}, %ymm17, %ymm21 + +// CHECK: vfmaddsub231ps %xmm20, %xmm29, %xmm19 +// CHECK: encoding: [0x62,0xa2,0x15,0x00,0xb6,0xdc] + vfmaddsub231ps %xmm20, %xmm29, %xmm19 + +// CHECK: vfmaddsub231ps %xmm20, %xmm29, %xmm19 {%k6} +// CHECK: encoding: [0x62,0xa2,0x15,0x06,0xb6,0xdc] + vfmaddsub231ps %xmm20, %xmm29, %xmm19 {%k6} + +// CHECK: vfmaddsub231ps %xmm20, %xmm29, %xmm19 {%k6} {z} +// CHECK: encoding: [0x62,0xa2,0x15,0x86,0xb6,0xdc] + vfmaddsub231ps %xmm20, %xmm29, %xmm19 {%k6} {z} + +// CHECK: vfmaddsub231ps (%rcx), %xmm29, %xmm19 +// CHECK: encoding: [0x62,0xe2,0x15,0x00,0xb6,0x19] + vfmaddsub231ps (%rcx), %xmm29, %xmm19 + +// CHECK: vfmaddsub231ps 291(%rax,%r14,8), %xmm29, %xmm19 +// CHECK: encoding: [0x62,0xa2,0x15,0x00,0xb6,0x9c,0xf0,0x23,0x01,0x00,0x00] + vfmaddsub231ps 291(%rax,%r14,8), %xmm29, %xmm19 + +// CHECK: vfmaddsub231ps (%rcx){1to4}, %xmm29, %xmm19 +// CHECK: encoding: [0x62,0xe2,0x15,0x10,0xb6,0x19] + vfmaddsub231ps (%rcx){1to4}, %xmm29, %xmm19 + +// CHECK: vfmaddsub231ps 2032(%rdx), %xmm29, %xmm19 +// CHECK: encoding: [0x62,0xe2,0x15,0x00,0xb6,0x5a,0x7f] + vfmaddsub231ps 2032(%rdx), %xmm29, %xmm19 + +// CHECK: vfmaddsub231ps 2048(%rdx), %xmm29, %xmm19 +// CHECK: encoding: [0x62,0xe2,0x15,0x00,0xb6,0x9a,0x00,0x08,0x00,0x00] + vfmaddsub231ps 2048(%rdx), %xmm29, %xmm19 + +// CHECK: vfmaddsub231ps -2048(%rdx), %xmm29, %xmm19 +// CHECK: encoding: [0x62,0xe2,0x15,0x00,0xb6,0x5a,0x80] + vfmaddsub231ps -2048(%rdx), %xmm29, %xmm19 + +// CHECK: vfmaddsub231ps -2064(%rdx), %xmm29, %xmm19 +// CHECK: encoding: [0x62,0xe2,0x15,0x00,0xb6,0x9a,0xf0,0xf7,0xff,0xff] + vfmaddsub231ps -2064(%rdx), %xmm29, %xmm19 + +// CHECK: vfmaddsub231ps 508(%rdx){1to4}, %xmm29, %xmm19 +// CHECK: encoding: [0x62,0xe2,0x15,0x10,0xb6,0x5a,0x7f] + vfmaddsub231ps 508(%rdx){1to4}, %xmm29, %xmm19 + +// CHECK: vfmaddsub231ps 512(%rdx){1to4}, %xmm29, %xmm19 +// CHECK: encoding: [0x62,0xe2,0x15,0x10,0xb6,0x9a,0x00,0x02,0x00,0x00] + vfmaddsub231ps 512(%rdx){1to4}, %xmm29, %xmm19 + +// CHECK: vfmaddsub231ps -512(%rdx){1to4}, %xmm29, %xmm19 +// CHECK: encoding: [0x62,0xe2,0x15,0x10,0xb6,0x5a,0x80] + vfmaddsub231ps -512(%rdx){1to4}, %xmm29, %xmm19 + +// CHECK: vfmaddsub231ps -516(%rdx){1to4}, %xmm29, %xmm19 +// CHECK: encoding: [0x62,0xe2,0x15,0x10,0xb6,0x9a,0xfc,0xfd,0xff,0xff] + vfmaddsub231ps -516(%rdx){1to4}, %xmm29, %xmm19 + +// CHECK: vfmaddsub231ps %ymm17, %ymm24, %ymm19 +// CHECK: encoding: [0x62,0xa2,0x3d,0x20,0xb6,0xd9] + vfmaddsub231ps %ymm17, %ymm24, %ymm19 + +// CHECK: vfmaddsub231ps %ymm17, %ymm24, %ymm19 {%k1} +// CHECK: encoding: [0x62,0xa2,0x3d,0x21,0xb6,0xd9] + vfmaddsub231ps %ymm17, %ymm24, %ymm19 {%k1} + +// CHECK: vfmaddsub231ps %ymm17, %ymm24, %ymm19 {%k1} {z} +// CHECK: encoding: [0x62,0xa2,0x3d,0xa1,0xb6,0xd9] + vfmaddsub231ps %ymm17, %ymm24, %ymm19 {%k1} {z} + +// CHECK: vfmaddsub231ps (%rcx), %ymm24, %ymm19 +// CHECK: encoding: [0x62,0xe2,0x3d,0x20,0xb6,0x19] + vfmaddsub231ps (%rcx), %ymm24, %ymm19 + +// CHECK: vfmaddsub231ps 291(%rax,%r14,8), %ymm24, %ymm19 +// CHECK: encoding: [0x62,0xa2,0x3d,0x20,0xb6,0x9c,0xf0,0x23,0x01,0x00,0x00] + vfmaddsub231ps 291(%rax,%r14,8), %ymm24, %ymm19 + +// CHECK: vfmaddsub231ps (%rcx){1to8}, %ymm24, %ymm19 +// CHECK: encoding: [0x62,0xe2,0x3d,0x30,0xb6,0x19] + vfmaddsub231ps (%rcx){1to8}, %ymm24, %ymm19 + +// CHECK: vfmaddsub231ps 4064(%rdx), %ymm24, %ymm19 +// CHECK: encoding: [0x62,0xe2,0x3d,0x20,0xb6,0x5a,0x7f] + vfmaddsub231ps 4064(%rdx), %ymm24, %ymm19 + +// CHECK: vfmaddsub231ps 4096(%rdx), %ymm24, %ymm19 +// CHECK: encoding: [0x62,0xe2,0x3d,0x20,0xb6,0x9a,0x00,0x10,0x00,0x00] + vfmaddsub231ps 4096(%rdx), %ymm24, %ymm19 + +// CHECK: vfmaddsub231ps -4096(%rdx), %ymm24, %ymm19 +// CHECK: encoding: [0x62,0xe2,0x3d,0x20,0xb6,0x5a,0x80] + vfmaddsub231ps -4096(%rdx), %ymm24, %ymm19 + +// CHECK: vfmaddsub231ps -4128(%rdx), %ymm24, %ymm19 +// CHECK: encoding: [0x62,0xe2,0x3d,0x20,0xb6,0x9a,0xe0,0xef,0xff,0xff] + vfmaddsub231ps -4128(%rdx), %ymm24, %ymm19 + +// CHECK: vfmaddsub231ps 508(%rdx){1to8}, %ymm24, %ymm19 +// CHECK: encoding: [0x62,0xe2,0x3d,0x30,0xb6,0x5a,0x7f] + vfmaddsub231ps 508(%rdx){1to8}, %ymm24, %ymm19 + +// CHECK: vfmaddsub231ps 512(%rdx){1to8}, %ymm24, %ymm19 +// CHECK: encoding: [0x62,0xe2,0x3d,0x30,0xb6,0x9a,0x00,0x02,0x00,0x00] + vfmaddsub231ps 512(%rdx){1to8}, %ymm24, %ymm19 + +// CHECK: vfmaddsub231ps -512(%rdx){1to8}, %ymm24, %ymm19 +// CHECK: encoding: [0x62,0xe2,0x3d,0x30,0xb6,0x5a,0x80] + vfmaddsub231ps -512(%rdx){1to8}, %ymm24, %ymm19 + +// CHECK: vfmaddsub231ps -516(%rdx){1to8}, %ymm24, %ymm19 +// CHECK: encoding: [0x62,0xe2,0x3d,0x30,0xb6,0x9a,0xfc,0xfd,0xff,0xff] + vfmaddsub231ps -516(%rdx){1to8}, %ymm24, %ymm19 + +// CHECK: vfmaddsub231pd %xmm28, %xmm26, %xmm23 +// CHECK: encoding: [0x62,0x82,0xad,0x00,0xb6,0xfc] + vfmaddsub231pd %xmm28, %xmm26, %xmm23 + +// CHECK: vfmaddsub231pd %xmm28, %xmm26, %xmm23 {%k7} +// CHECK: encoding: [0x62,0x82,0xad,0x07,0xb6,0xfc] + vfmaddsub231pd %xmm28, %xmm26, %xmm23 {%k7} + +// CHECK: vfmaddsub231pd %xmm28, %xmm26, %xmm23 {%k7} {z} +// CHECK: encoding: [0x62,0x82,0xad,0x87,0xb6,0xfc] + vfmaddsub231pd %xmm28, %xmm26, %xmm23 {%k7} {z} + +// CHECK: vfmaddsub231pd (%rcx), %xmm26, %xmm23 +// CHECK: encoding: [0x62,0xe2,0xad,0x00,0xb6,0x39] + vfmaddsub231pd (%rcx), %xmm26, %xmm23 + +// CHECK: vfmaddsub231pd 291(%rax,%r14,8), %xmm26, %xmm23 +// CHECK: encoding: [0x62,0xa2,0xad,0x00,0xb6,0xbc,0xf0,0x23,0x01,0x00,0x00] + vfmaddsub231pd 291(%rax,%r14,8), %xmm26, %xmm23 + +// CHECK: vfmaddsub231pd (%rcx){1to2}, %xmm26, %xmm23 +// CHECK: encoding: [0x62,0xe2,0xad,0x10,0xb6,0x39] + vfmaddsub231pd (%rcx){1to2}, %xmm26, %xmm23 + +// CHECK: vfmaddsub231pd 2032(%rdx), %xmm26, %xmm23 +// CHECK: encoding: [0x62,0xe2,0xad,0x00,0xb6,0x7a,0x7f] + vfmaddsub231pd 2032(%rdx), %xmm26, %xmm23 + +// CHECK: vfmaddsub231pd 2048(%rdx), %xmm26, %xmm23 +// CHECK: encoding: [0x62,0xe2,0xad,0x00,0xb6,0xba,0x00,0x08,0x00,0x00] + vfmaddsub231pd 2048(%rdx), %xmm26, %xmm23 + +// CHECK: vfmaddsub231pd -2048(%rdx), %xmm26, %xmm23 +// CHECK: encoding: [0x62,0xe2,0xad,0x00,0xb6,0x7a,0x80] + vfmaddsub231pd -2048(%rdx), %xmm26, %xmm23 + +// CHECK: vfmaddsub231pd -2064(%rdx), %xmm26, %xmm23 +// CHECK: encoding: [0x62,0xe2,0xad,0x00,0xb6,0xba,0xf0,0xf7,0xff,0xff] + vfmaddsub231pd -2064(%rdx), %xmm26, %xmm23 + +// CHECK: vfmaddsub231pd 1016(%rdx){1to2}, %xmm26, %xmm23 +// CHECK: encoding: [0x62,0xe2,0xad,0x10,0xb6,0x7a,0x7f] + vfmaddsub231pd 1016(%rdx){1to2}, %xmm26, %xmm23 + +// CHECK: vfmaddsub231pd 1024(%rdx){1to2}, %xmm26, %xmm23 +// CHECK: encoding: [0x62,0xe2,0xad,0x10,0xb6,0xba,0x00,0x04,0x00,0x00] + vfmaddsub231pd 1024(%rdx){1to2}, %xmm26, %xmm23 + +// CHECK: vfmaddsub231pd -1024(%rdx){1to2}, %xmm26, %xmm23 +// CHECK: encoding: [0x62,0xe2,0xad,0x10,0xb6,0x7a,0x80] + vfmaddsub231pd -1024(%rdx){1to2}, %xmm26, %xmm23 + +// CHECK: vfmaddsub231pd -1032(%rdx){1to2}, %xmm26, %xmm23 +// CHECK: encoding: [0x62,0xe2,0xad,0x10,0xb6,0xba,0xf8,0xfb,0xff,0xff] + vfmaddsub231pd -1032(%rdx){1to2}, %xmm26, %xmm23 + +// CHECK: vfmaddsub231pd %ymm27, %ymm25, %ymm30 +// CHECK: encoding: [0x62,0x02,0xb5,0x20,0xb6,0xf3] + vfmaddsub231pd %ymm27, %ymm25, %ymm30 + +// CHECK: vfmaddsub231pd %ymm27, %ymm25, %ymm30 {%k5} +// CHECK: encoding: [0x62,0x02,0xb5,0x25,0xb6,0xf3] + vfmaddsub231pd %ymm27, %ymm25, %ymm30 {%k5} + +// CHECK: vfmaddsub231pd %ymm27, %ymm25, %ymm30 {%k5} {z} +// CHECK: encoding: [0x62,0x02,0xb5,0xa5,0xb6,0xf3] + vfmaddsub231pd %ymm27, %ymm25, %ymm30 {%k5} {z} + +// CHECK: vfmaddsub231pd (%rcx), %ymm25, %ymm30 +// CHECK: encoding: [0x62,0x62,0xb5,0x20,0xb6,0x31] + vfmaddsub231pd (%rcx), %ymm25, %ymm30 + +// CHECK: vfmaddsub231pd 291(%rax,%r14,8), %ymm25, %ymm30 +// CHECK: encoding: [0x62,0x22,0xb5,0x20,0xb6,0xb4,0xf0,0x23,0x01,0x00,0x00] + vfmaddsub231pd 291(%rax,%r14,8), %ymm25, %ymm30 + +// CHECK: vfmaddsub231pd (%rcx){1to4}, %ymm25, %ymm30 +// CHECK: encoding: [0x62,0x62,0xb5,0x30,0xb6,0x31] + vfmaddsub231pd (%rcx){1to4}, %ymm25, %ymm30 + +// CHECK: vfmaddsub231pd 4064(%rdx), %ymm25, %ymm30 +// CHECK: encoding: [0x62,0x62,0xb5,0x20,0xb6,0x72,0x7f] + vfmaddsub231pd 4064(%rdx), %ymm25, %ymm30 + +// CHECK: vfmaddsub231pd 4096(%rdx), %ymm25, %ymm30 +// CHECK: encoding: [0x62,0x62,0xb5,0x20,0xb6,0xb2,0x00,0x10,0x00,0x00] + vfmaddsub231pd 4096(%rdx), %ymm25, %ymm30 + +// CHECK: vfmaddsub231pd -4096(%rdx), %ymm25, %ymm30 +// CHECK: encoding: [0x62,0x62,0xb5,0x20,0xb6,0x72,0x80] + vfmaddsub231pd -4096(%rdx), %ymm25, %ymm30 + +// CHECK: vfmaddsub231pd -4128(%rdx), %ymm25, %ymm30 +// CHECK: encoding: [0x62,0x62,0xb5,0x20,0xb6,0xb2,0xe0,0xef,0xff,0xff] + vfmaddsub231pd -4128(%rdx), %ymm25, %ymm30 + +// CHECK: vfmaddsub231pd 1016(%rdx){1to4}, %ymm25, %ymm30 +// CHECK: encoding: [0x62,0x62,0xb5,0x30,0xb6,0x72,0x7f] + vfmaddsub231pd 1016(%rdx){1to4}, %ymm25, %ymm30 + +// CHECK: vfmaddsub231pd 1024(%rdx){1to4}, %ymm25, %ymm30 +// CHECK: encoding: [0x62,0x62,0xb5,0x30,0xb6,0xb2,0x00,0x04,0x00,0x00] + vfmaddsub231pd 1024(%rdx){1to4}, %ymm25, %ymm30 + +// CHECK: vfmaddsub231pd -1024(%rdx){1to4}, %ymm25, %ymm30 +// CHECK: encoding: [0x62,0x62,0xb5,0x30,0xb6,0x72,0x80] + vfmaddsub231pd -1024(%rdx){1to4}, %ymm25, %ymm30 + +// CHECK: vfmaddsub231pd -1032(%rdx){1to4}, %ymm25, %ymm30 +// CHECK: encoding: [0x62,0x62,0xb5,0x30,0xb6,0xb2,0xf8,0xfb,0xff,0xff] + vfmaddsub231pd -1032(%rdx){1to4}, %ymm25, %ymm30 + +// CHECK: vfmsubadd132ps %xmm20, %xmm23, %xmm24 +// CHECK: encoding: [0x62,0x22,0x45,0x00,0x97,0xc4] + vfmsubadd132ps %xmm20, %xmm23, %xmm24 + +// CHECK: vfmsubadd132ps %xmm20, %xmm23, %xmm24 {%k5} +// CHECK: encoding: [0x62,0x22,0x45,0x05,0x97,0xc4] + vfmsubadd132ps %xmm20, %xmm23, %xmm24 {%k5} + +// CHECK: vfmsubadd132ps %xmm20, %xmm23, %xmm24 {%k5} {z} +// CHECK: encoding: [0x62,0x22,0x45,0x85,0x97,0xc4] + vfmsubadd132ps %xmm20, %xmm23, %xmm24 {%k5} {z} + +// CHECK: vfmsubadd132ps (%rcx), %xmm23, %xmm24 +// CHECK: encoding: [0x62,0x62,0x45,0x00,0x97,0x01] + vfmsubadd132ps (%rcx), %xmm23, %xmm24 + +// CHECK: vfmsubadd132ps 291(%rax,%r14,8), %xmm23, %xmm24 +// CHECK: encoding: [0x62,0x22,0x45,0x00,0x97,0x84,0xf0,0x23,0x01,0x00,0x00] + vfmsubadd132ps 291(%rax,%r14,8), %xmm23, %xmm24 + +// CHECK: vfmsubadd132ps (%rcx){1to4}, %xmm23, %xmm24 +// CHECK: encoding: [0x62,0x62,0x45,0x10,0x97,0x01] + vfmsubadd132ps (%rcx){1to4}, %xmm23, %xmm24 + +// CHECK: vfmsubadd132ps 2032(%rdx), %xmm23, %xmm24 +// CHECK: encoding: [0x62,0x62,0x45,0x00,0x97,0x42,0x7f] + vfmsubadd132ps 2032(%rdx), %xmm23, %xmm24 + +// CHECK: vfmsubadd132ps 2048(%rdx), %xmm23, %xmm24 +// CHECK: encoding: [0x62,0x62,0x45,0x00,0x97,0x82,0x00,0x08,0x00,0x00] + vfmsubadd132ps 2048(%rdx), %xmm23, %xmm24 + +// CHECK: vfmsubadd132ps -2048(%rdx), %xmm23, %xmm24 +// CHECK: encoding: [0x62,0x62,0x45,0x00,0x97,0x42,0x80] + vfmsubadd132ps -2048(%rdx), %xmm23, %xmm24 + +// CHECK: vfmsubadd132ps -2064(%rdx), %xmm23, %xmm24 +// CHECK: encoding: [0x62,0x62,0x45,0x00,0x97,0x82,0xf0,0xf7,0xff,0xff] + vfmsubadd132ps -2064(%rdx), %xmm23, %xmm24 + +// CHECK: vfmsubadd132ps 508(%rdx){1to4}, %xmm23, %xmm24 +// CHECK: encoding: [0x62,0x62,0x45,0x10,0x97,0x42,0x7f] + vfmsubadd132ps 508(%rdx){1to4}, %xmm23, %xmm24 + +// CHECK: vfmsubadd132ps 512(%rdx){1to4}, %xmm23, %xmm24 +// CHECK: encoding: [0x62,0x62,0x45,0x10,0x97,0x82,0x00,0x02,0x00,0x00] + vfmsubadd132ps 512(%rdx){1to4}, %xmm23, %xmm24 + +// CHECK: vfmsubadd132ps -512(%rdx){1to4}, %xmm23, %xmm24 +// CHECK: encoding: [0x62,0x62,0x45,0x10,0x97,0x42,0x80] + vfmsubadd132ps -512(%rdx){1to4}, %xmm23, %xmm24 + +// CHECK: vfmsubadd132ps -516(%rdx){1to4}, %xmm23, %xmm24 +// CHECK: encoding: [0x62,0x62,0x45,0x10,0x97,0x82,0xfc,0xfd,0xff,0xff] + vfmsubadd132ps -516(%rdx){1to4}, %xmm23, %xmm24 + +// CHECK: vfmsubadd132ps %ymm23, %ymm28, %ymm23 +// CHECK: encoding: [0x62,0xa2,0x1d,0x20,0x97,0xff] + vfmsubadd132ps %ymm23, %ymm28, %ymm23 + +// CHECK: vfmsubadd132ps %ymm23, %ymm28, %ymm23 {%k1} +// CHECK: encoding: [0x62,0xa2,0x1d,0x21,0x97,0xff] + vfmsubadd132ps %ymm23, %ymm28, %ymm23 {%k1} + +// CHECK: vfmsubadd132ps %ymm23, %ymm28, %ymm23 {%k1} {z} +// CHECK: encoding: [0x62,0xa2,0x1d,0xa1,0x97,0xff] + vfmsubadd132ps %ymm23, %ymm28, %ymm23 {%k1} {z} + +// CHECK: vfmsubadd132ps (%rcx), %ymm28, %ymm23 +// CHECK: encoding: [0x62,0xe2,0x1d,0x20,0x97,0x39] + vfmsubadd132ps (%rcx), %ymm28, %ymm23 + +// CHECK: vfmsubadd132ps 291(%rax,%r14,8), %ymm28, %ymm23 +// CHECK: encoding: [0x62,0xa2,0x1d,0x20,0x97,0xbc,0xf0,0x23,0x01,0x00,0x00] + vfmsubadd132ps 291(%rax,%r14,8), %ymm28, %ymm23 + +// CHECK: vfmsubadd132ps (%rcx){1to8}, %ymm28, %ymm23 +// CHECK: encoding: [0x62,0xe2,0x1d,0x30,0x97,0x39] + vfmsubadd132ps (%rcx){1to8}, %ymm28, %ymm23 + +// CHECK: vfmsubadd132ps 4064(%rdx), %ymm28, %ymm23 +// CHECK: encoding: [0x62,0xe2,0x1d,0x20,0x97,0x7a,0x7f] + vfmsubadd132ps 4064(%rdx), %ymm28, %ymm23 + +// CHECK: vfmsubadd132ps 4096(%rdx), %ymm28, %ymm23 +// CHECK: encoding: [0x62,0xe2,0x1d,0x20,0x97,0xba,0x00,0x10,0x00,0x00] + vfmsubadd132ps 4096(%rdx), %ymm28, %ymm23 + +// CHECK: vfmsubadd132ps -4096(%rdx), %ymm28, %ymm23 +// CHECK: encoding: [0x62,0xe2,0x1d,0x20,0x97,0x7a,0x80] + vfmsubadd132ps -4096(%rdx), %ymm28, %ymm23 + +// CHECK: vfmsubadd132ps -4128(%rdx), %ymm28, %ymm23 +// CHECK: encoding: [0x62,0xe2,0x1d,0x20,0x97,0xba,0xe0,0xef,0xff,0xff] + vfmsubadd132ps -4128(%rdx), %ymm28, %ymm23 + +// CHECK: vfmsubadd132ps 508(%rdx){1to8}, %ymm28, %ymm23 +// CHECK: encoding: [0x62,0xe2,0x1d,0x30,0x97,0x7a,0x7f] + vfmsubadd132ps 508(%rdx){1to8}, %ymm28, %ymm23 + +// CHECK: vfmsubadd132ps 512(%rdx){1to8}, %ymm28, %ymm23 +// CHECK: encoding: [0x62,0xe2,0x1d,0x30,0x97,0xba,0x00,0x02,0x00,0x00] + vfmsubadd132ps 512(%rdx){1to8}, %ymm28, %ymm23 + +// CHECK: vfmsubadd132ps -512(%rdx){1to8}, %ymm28, %ymm23 +// CHECK: encoding: [0x62,0xe2,0x1d,0x30,0x97,0x7a,0x80] + vfmsubadd132ps -512(%rdx){1to8}, %ymm28, %ymm23 + +// CHECK: vfmsubadd132ps -516(%rdx){1to8}, %ymm28, %ymm23 +// CHECK: encoding: [0x62,0xe2,0x1d,0x30,0x97,0xba,0xfc,0xfd,0xff,0xff] + vfmsubadd132ps -516(%rdx){1to8}, %ymm28, %ymm23 + +// CHECK: vfmsubadd132pd %xmm24, %xmm26, %xmm22 +// CHECK: encoding: [0x62,0x82,0xad,0x00,0x97,0xf0] + vfmsubadd132pd %xmm24, %xmm26, %xmm22 + +// CHECK: vfmsubadd132pd %xmm24, %xmm26, %xmm22 {%k3} +// CHECK: encoding: [0x62,0x82,0xad,0x03,0x97,0xf0] + vfmsubadd132pd %xmm24, %xmm26, %xmm22 {%k3} + +// CHECK: vfmsubadd132pd %xmm24, %xmm26, %xmm22 {%k3} {z} +// CHECK: encoding: [0x62,0x82,0xad,0x83,0x97,0xf0] + vfmsubadd132pd %xmm24, %xmm26, %xmm22 {%k3} {z} + +// CHECK: vfmsubadd132pd (%rcx), %xmm26, %xmm22 +// CHECK: encoding: [0x62,0xe2,0xad,0x00,0x97,0x31] + vfmsubadd132pd (%rcx), %xmm26, %xmm22 + +// CHECK: vfmsubadd132pd 291(%rax,%r14,8), %xmm26, %xmm22 +// CHECK: encoding: [0x62,0xa2,0xad,0x00,0x97,0xb4,0xf0,0x23,0x01,0x00,0x00] + vfmsubadd132pd 291(%rax,%r14,8), %xmm26, %xmm22 + +// CHECK: vfmsubadd132pd (%rcx){1to2}, %xmm26, %xmm22 +// CHECK: encoding: [0x62,0xe2,0xad,0x10,0x97,0x31] + vfmsubadd132pd (%rcx){1to2}, %xmm26, %xmm22 + +// CHECK: vfmsubadd132pd 2032(%rdx), %xmm26, %xmm22 +// CHECK: encoding: [0x62,0xe2,0xad,0x00,0x97,0x72,0x7f] + vfmsubadd132pd 2032(%rdx), %xmm26, %xmm22 + +// CHECK: vfmsubadd132pd 2048(%rdx), %xmm26, %xmm22 +// CHECK: encoding: [0x62,0xe2,0xad,0x00,0x97,0xb2,0x00,0x08,0x00,0x00] + vfmsubadd132pd 2048(%rdx), %xmm26, %xmm22 + +// CHECK: vfmsubadd132pd -2048(%rdx), %xmm26, %xmm22 +// CHECK: encoding: [0x62,0xe2,0xad,0x00,0x97,0x72,0x80] + vfmsubadd132pd -2048(%rdx), %xmm26, %xmm22 + +// CHECK: vfmsubadd132pd -2064(%rdx), %xmm26, %xmm22 +// CHECK: encoding: [0x62,0xe2,0xad,0x00,0x97,0xb2,0xf0,0xf7,0xff,0xff] + vfmsubadd132pd -2064(%rdx), %xmm26, %xmm22 + +// CHECK: vfmsubadd132pd 1016(%rdx){1to2}, %xmm26, %xmm22 +// CHECK: encoding: [0x62,0xe2,0xad,0x10,0x97,0x72,0x7f] + vfmsubadd132pd 1016(%rdx){1to2}, %xmm26, %xmm22 + +// CHECK: vfmsubadd132pd 1024(%rdx){1to2}, %xmm26, %xmm22 +// CHECK: encoding: [0x62,0xe2,0xad,0x10,0x97,0xb2,0x00,0x04,0x00,0x00] + vfmsubadd132pd 1024(%rdx){1to2}, %xmm26, %xmm22 + +// CHECK: vfmsubadd132pd -1024(%rdx){1to2}, %xmm26, %xmm22 +// CHECK: encoding: [0x62,0xe2,0xad,0x10,0x97,0x72,0x80] + vfmsubadd132pd -1024(%rdx){1to2}, %xmm26, %xmm22 + +// CHECK: vfmsubadd132pd -1032(%rdx){1to2}, %xmm26, %xmm22 +// CHECK: encoding: [0x62,0xe2,0xad,0x10,0x97,0xb2,0xf8,0xfb,0xff,0xff] + vfmsubadd132pd -1032(%rdx){1to2}, %xmm26, %xmm22 + +// CHECK: vfmsubadd132pd %ymm21, %ymm17, %ymm24 +// CHECK: encoding: [0x62,0x22,0xf5,0x20,0x97,0xc5] + vfmsubadd132pd %ymm21, %ymm17, %ymm24 + +// CHECK: vfmsubadd132pd %ymm21, %ymm17, %ymm24 {%k7} +// CHECK: encoding: [0x62,0x22,0xf5,0x27,0x97,0xc5] + vfmsubadd132pd %ymm21, %ymm17, %ymm24 {%k7} + +// CHECK: vfmsubadd132pd %ymm21, %ymm17, %ymm24 {%k7} {z} +// CHECK: encoding: [0x62,0x22,0xf5,0xa7,0x97,0xc5] + vfmsubadd132pd %ymm21, %ymm17, %ymm24 {%k7} {z} + +// CHECK: vfmsubadd132pd (%rcx), %ymm17, %ymm24 +// CHECK: encoding: [0x62,0x62,0xf5,0x20,0x97,0x01] + vfmsubadd132pd (%rcx), %ymm17, %ymm24 + +// CHECK: vfmsubadd132pd 291(%rax,%r14,8), %ymm17, %ymm24 +// CHECK: encoding: [0x62,0x22,0xf5,0x20,0x97,0x84,0xf0,0x23,0x01,0x00,0x00] + vfmsubadd132pd 291(%rax,%r14,8), %ymm17, %ymm24 + +// CHECK: vfmsubadd132pd (%rcx){1to4}, %ymm17, %ymm24 +// CHECK: encoding: [0x62,0x62,0xf5,0x30,0x97,0x01] + vfmsubadd132pd (%rcx){1to4}, %ymm17, %ymm24 + +// CHECK: vfmsubadd132pd 4064(%rdx), %ymm17, %ymm24 +// CHECK: encoding: [0x62,0x62,0xf5,0x20,0x97,0x42,0x7f] + vfmsubadd132pd 4064(%rdx), %ymm17, %ymm24 + +// CHECK: vfmsubadd132pd 4096(%rdx), %ymm17, %ymm24 +// CHECK: encoding: [0x62,0x62,0xf5,0x20,0x97,0x82,0x00,0x10,0x00,0x00] + vfmsubadd132pd 4096(%rdx), %ymm17, %ymm24 + +// CHECK: vfmsubadd132pd -4096(%rdx), %ymm17, %ymm24 +// CHECK: encoding: [0x62,0x62,0xf5,0x20,0x97,0x42,0x80] + vfmsubadd132pd -4096(%rdx), %ymm17, %ymm24 + +// CHECK: vfmsubadd132pd -4128(%rdx), %ymm17, %ymm24 +// CHECK: encoding: [0x62,0x62,0xf5,0x20,0x97,0x82,0xe0,0xef,0xff,0xff] + vfmsubadd132pd -4128(%rdx), %ymm17, %ymm24 + +// CHECK: vfmsubadd132pd 1016(%rdx){1to4}, %ymm17, %ymm24 +// CHECK: encoding: [0x62,0x62,0xf5,0x30,0x97,0x42,0x7f] + vfmsubadd132pd 1016(%rdx){1to4}, %ymm17, %ymm24 + +// CHECK: vfmsubadd132pd 1024(%rdx){1to4}, %ymm17, %ymm24 +// CHECK: encoding: [0x62,0x62,0xf5,0x30,0x97,0x82,0x00,0x04,0x00,0x00] + vfmsubadd132pd 1024(%rdx){1to4}, %ymm17, %ymm24 + +// CHECK: vfmsubadd132pd -1024(%rdx){1to4}, %ymm17, %ymm24 +// CHECK: encoding: [0x62,0x62,0xf5,0x30,0x97,0x42,0x80] + vfmsubadd132pd -1024(%rdx){1to4}, %ymm17, %ymm24 + +// CHECK: vfmsubadd132pd -1032(%rdx){1to4}, %ymm17, %ymm24 +// CHECK: encoding: [0x62,0x62,0xf5,0x30,0x97,0x82,0xf8,0xfb,0xff,0xff] + vfmsubadd132pd -1032(%rdx){1to4}, %ymm17, %ymm24 + +// CHECK: vfmsubadd213ps %xmm17, %xmm20, %xmm19 +// CHECK: encoding: [0x62,0xa2,0x5d,0x00,0xa7,0xd9] + vfmsubadd213ps %xmm17, %xmm20, %xmm19 + +// CHECK: vfmsubadd213ps %xmm17, %xmm20, %xmm19 {%k5} +// CHECK: encoding: [0x62,0xa2,0x5d,0x05,0xa7,0xd9] + vfmsubadd213ps %xmm17, %xmm20, %xmm19 {%k5} + +// CHECK: vfmsubadd213ps %xmm17, %xmm20, %xmm19 {%k5} {z} +// CHECK: encoding: [0x62,0xa2,0x5d,0x85,0xa7,0xd9] + vfmsubadd213ps %xmm17, %xmm20, %xmm19 {%k5} {z} + +// CHECK: vfmsubadd213ps (%rcx), %xmm20, %xmm19 +// CHECK: encoding: [0x62,0xe2,0x5d,0x00,0xa7,0x19] + vfmsubadd213ps (%rcx), %xmm20, %xmm19 + +// CHECK: vfmsubadd213ps 291(%rax,%r14,8), %xmm20, %xmm19 +// CHECK: encoding: [0x62,0xa2,0x5d,0x00,0xa7,0x9c,0xf0,0x23,0x01,0x00,0x00] + vfmsubadd213ps 291(%rax,%r14,8), %xmm20, %xmm19 + +// CHECK: vfmsubadd213ps (%rcx){1to4}, %xmm20, %xmm19 +// CHECK: encoding: [0x62,0xe2,0x5d,0x10,0xa7,0x19] + vfmsubadd213ps (%rcx){1to4}, %xmm20, %xmm19 + +// CHECK: vfmsubadd213ps 2032(%rdx), %xmm20, %xmm19 +// CHECK: encoding: [0x62,0xe2,0x5d,0x00,0xa7,0x5a,0x7f] + vfmsubadd213ps 2032(%rdx), %xmm20, %xmm19 + +// CHECK: vfmsubadd213ps 2048(%rdx), %xmm20, %xmm19 +// CHECK: encoding: [0x62,0xe2,0x5d,0x00,0xa7,0x9a,0x00,0x08,0x00,0x00] + vfmsubadd213ps 2048(%rdx), %xmm20, %xmm19 + +// CHECK: vfmsubadd213ps -2048(%rdx), %xmm20, %xmm19 +// CHECK: encoding: [0x62,0xe2,0x5d,0x00,0xa7,0x5a,0x80] + vfmsubadd213ps -2048(%rdx), %xmm20, %xmm19 + +// CHECK: vfmsubadd213ps -2064(%rdx), %xmm20, %xmm19 +// CHECK: encoding: [0x62,0xe2,0x5d,0x00,0xa7,0x9a,0xf0,0xf7,0xff,0xff] + vfmsubadd213ps -2064(%rdx), %xmm20, %xmm19 + +// CHECK: vfmsubadd213ps 508(%rdx){1to4}, %xmm20, %xmm19 +// CHECK: encoding: [0x62,0xe2,0x5d,0x10,0xa7,0x5a,0x7f] + vfmsubadd213ps 508(%rdx){1to4}, %xmm20, %xmm19 + +// CHECK: vfmsubadd213ps 512(%rdx){1to4}, %xmm20, %xmm19 +// CHECK: encoding: [0x62,0xe2,0x5d,0x10,0xa7,0x9a,0x00,0x02,0x00,0x00] + vfmsubadd213ps 512(%rdx){1to4}, %xmm20, %xmm19 + +// CHECK: vfmsubadd213ps -512(%rdx){1to4}, %xmm20, %xmm19 +// CHECK: encoding: [0x62,0xe2,0x5d,0x10,0xa7,0x5a,0x80] + vfmsubadd213ps -512(%rdx){1to4}, %xmm20, %xmm19 + +// CHECK: vfmsubadd213ps -516(%rdx){1to4}, %xmm20, %xmm19 +// CHECK: encoding: [0x62,0xe2,0x5d,0x10,0xa7,0x9a,0xfc,0xfd,0xff,0xff] + vfmsubadd213ps -516(%rdx){1to4}, %xmm20, %xmm19 + +// CHECK: vfmsubadd213ps %ymm23, %ymm17, %ymm26 +// CHECK: encoding: [0x62,0x22,0x75,0x20,0xa7,0xd7] + vfmsubadd213ps %ymm23, %ymm17, %ymm26 + +// CHECK: vfmsubadd213ps %ymm23, %ymm17, %ymm26 {%k7} +// CHECK: encoding: [0x62,0x22,0x75,0x27,0xa7,0xd7] + vfmsubadd213ps %ymm23, %ymm17, %ymm26 {%k7} + +// CHECK: vfmsubadd213ps %ymm23, %ymm17, %ymm26 {%k7} {z} +// CHECK: encoding: [0x62,0x22,0x75,0xa7,0xa7,0xd7] + vfmsubadd213ps %ymm23, %ymm17, %ymm26 {%k7} {z} + +// CHECK: vfmsubadd213ps (%rcx), %ymm17, %ymm26 +// CHECK: encoding: [0x62,0x62,0x75,0x20,0xa7,0x11] + vfmsubadd213ps (%rcx), %ymm17, %ymm26 + +// CHECK: vfmsubadd213ps 291(%rax,%r14,8), %ymm17, %ymm26 +// CHECK: encoding: [0x62,0x22,0x75,0x20,0xa7,0x94,0xf0,0x23,0x01,0x00,0x00] + vfmsubadd213ps 291(%rax,%r14,8), %ymm17, %ymm26 + +// CHECK: vfmsubadd213ps (%rcx){1to8}, %ymm17, %ymm26 +// CHECK: encoding: [0x62,0x62,0x75,0x30,0xa7,0x11] + vfmsubadd213ps (%rcx){1to8}, %ymm17, %ymm26 + +// CHECK: vfmsubadd213ps 4064(%rdx), %ymm17, %ymm26 +// CHECK: encoding: [0x62,0x62,0x75,0x20,0xa7,0x52,0x7f] + vfmsubadd213ps 4064(%rdx), %ymm17, %ymm26 + +// CHECK: vfmsubadd213ps 4096(%rdx), %ymm17, %ymm26 +// CHECK: encoding: [0x62,0x62,0x75,0x20,0xa7,0x92,0x00,0x10,0x00,0x00] + vfmsubadd213ps 4096(%rdx), %ymm17, %ymm26 + +// CHECK: vfmsubadd213ps -4096(%rdx), %ymm17, %ymm26 +// CHECK: encoding: [0x62,0x62,0x75,0x20,0xa7,0x52,0x80] + vfmsubadd213ps -4096(%rdx), %ymm17, %ymm26 + +// CHECK: vfmsubadd213ps -4128(%rdx), %ymm17, %ymm26 +// CHECK: encoding: [0x62,0x62,0x75,0x20,0xa7,0x92,0xe0,0xef,0xff,0xff] + vfmsubadd213ps -4128(%rdx), %ymm17, %ymm26 + +// CHECK: vfmsubadd213ps 508(%rdx){1to8}, %ymm17, %ymm26 +// CHECK: encoding: [0x62,0x62,0x75,0x30,0xa7,0x52,0x7f] + vfmsubadd213ps 508(%rdx){1to8}, %ymm17, %ymm26 + +// CHECK: vfmsubadd213ps 512(%rdx){1to8}, %ymm17, %ymm26 +// CHECK: encoding: [0x62,0x62,0x75,0x30,0xa7,0x92,0x00,0x02,0x00,0x00] + vfmsubadd213ps 512(%rdx){1to8}, %ymm17, %ymm26 + +// CHECK: vfmsubadd213ps -512(%rdx){1to8}, %ymm17, %ymm26 +// CHECK: encoding: [0x62,0x62,0x75,0x30,0xa7,0x52,0x80] + vfmsubadd213ps -512(%rdx){1to8}, %ymm17, %ymm26 + +// CHECK: vfmsubadd213ps -516(%rdx){1to8}, %ymm17, %ymm26 +// CHECK: encoding: [0x62,0x62,0x75,0x30,0xa7,0x92,0xfc,0xfd,0xff,0xff] + vfmsubadd213ps -516(%rdx){1to8}, %ymm17, %ymm26 + +// CHECK: vfmsubadd213pd %xmm28, %xmm21, %xmm18 +// CHECK: encoding: [0x62,0x82,0xd5,0x00,0xa7,0xd4] + vfmsubadd213pd %xmm28, %xmm21, %xmm18 + +// CHECK: vfmsubadd213pd %xmm28, %xmm21, %xmm18 {%k4} +// CHECK: encoding: [0x62,0x82,0xd5,0x04,0xa7,0xd4] + vfmsubadd213pd %xmm28, %xmm21, %xmm18 {%k4} + +// CHECK: vfmsubadd213pd %xmm28, %xmm21, %xmm18 {%k4} {z} +// CHECK: encoding: [0x62,0x82,0xd5,0x84,0xa7,0xd4] + vfmsubadd213pd %xmm28, %xmm21, %xmm18 {%k4} {z} + +// CHECK: vfmsubadd213pd (%rcx), %xmm21, %xmm18 +// CHECK: encoding: [0x62,0xe2,0xd5,0x00,0xa7,0x11] + vfmsubadd213pd (%rcx), %xmm21, %xmm18 + +// CHECK: vfmsubadd213pd 291(%rax,%r14,8), %xmm21, %xmm18 +// CHECK: encoding: [0x62,0xa2,0xd5,0x00,0xa7,0x94,0xf0,0x23,0x01,0x00,0x00] + vfmsubadd213pd 291(%rax,%r14,8), %xmm21, %xmm18 + +// CHECK: vfmsubadd213pd (%rcx){1to2}, %xmm21, %xmm18 +// CHECK: encoding: [0x62,0xe2,0xd5,0x10,0xa7,0x11] + vfmsubadd213pd (%rcx){1to2}, %xmm21, %xmm18 + +// CHECK: vfmsubadd213pd 2032(%rdx), %xmm21, %xmm18 +// CHECK: encoding: [0x62,0xe2,0xd5,0x00,0xa7,0x52,0x7f] + vfmsubadd213pd 2032(%rdx), %xmm21, %xmm18 + +// CHECK: vfmsubadd213pd 2048(%rdx), %xmm21, %xmm18 +// CHECK: encoding: [0x62,0xe2,0xd5,0x00,0xa7,0x92,0x00,0x08,0x00,0x00] + vfmsubadd213pd 2048(%rdx), %xmm21, %xmm18 + +// CHECK: vfmsubadd213pd -2048(%rdx), %xmm21, %xmm18 +// CHECK: encoding: [0x62,0xe2,0xd5,0x00,0xa7,0x52,0x80] + vfmsubadd213pd -2048(%rdx), %xmm21, %xmm18 + +// CHECK: vfmsubadd213pd -2064(%rdx), %xmm21, %xmm18 +// CHECK: encoding: [0x62,0xe2,0xd5,0x00,0xa7,0x92,0xf0,0xf7,0xff,0xff] + vfmsubadd213pd -2064(%rdx), %xmm21, %xmm18 + +// CHECK: vfmsubadd213pd 1016(%rdx){1to2}, %xmm21, %xmm18 +// CHECK: encoding: [0x62,0xe2,0xd5,0x10,0xa7,0x52,0x7f] + vfmsubadd213pd 1016(%rdx){1to2}, %xmm21, %xmm18 + +// CHECK: vfmsubadd213pd 1024(%rdx){1to2}, %xmm21, %xmm18 +// CHECK: encoding: [0x62,0xe2,0xd5,0x10,0xa7,0x92,0x00,0x04,0x00,0x00] + vfmsubadd213pd 1024(%rdx){1to2}, %xmm21, %xmm18 + +// CHECK: vfmsubadd213pd -1024(%rdx){1to2}, %xmm21, %xmm18 +// CHECK: encoding: [0x62,0xe2,0xd5,0x10,0xa7,0x52,0x80] + vfmsubadd213pd -1024(%rdx){1to2}, %xmm21, %xmm18 + +// CHECK: vfmsubadd213pd -1032(%rdx){1to2}, %xmm21, %xmm18 +// CHECK: encoding: [0x62,0xe2,0xd5,0x10,0xa7,0x92,0xf8,0xfb,0xff,0xff] + vfmsubadd213pd -1032(%rdx){1to2}, %xmm21, %xmm18 + +// CHECK: vfmsubadd213pd %ymm25, %ymm20, %ymm27 +// CHECK: encoding: [0x62,0x02,0xdd,0x20,0xa7,0xd9] + vfmsubadd213pd %ymm25, %ymm20, %ymm27 + +// CHECK: vfmsubadd213pd %ymm25, %ymm20, %ymm27 {%k7} +// CHECK: encoding: [0x62,0x02,0xdd,0x27,0xa7,0xd9] + vfmsubadd213pd %ymm25, %ymm20, %ymm27 {%k7} + +// CHECK: vfmsubadd213pd %ymm25, %ymm20, %ymm27 {%k7} {z} +// CHECK: encoding: [0x62,0x02,0xdd,0xa7,0xa7,0xd9] + vfmsubadd213pd %ymm25, %ymm20, %ymm27 {%k7} {z} + +// CHECK: vfmsubadd213pd (%rcx), %ymm20, %ymm27 +// CHECK: encoding: [0x62,0x62,0xdd,0x20,0xa7,0x19] + vfmsubadd213pd (%rcx), %ymm20, %ymm27 + +// CHECK: vfmsubadd213pd 291(%rax,%r14,8), %ymm20, %ymm27 +// CHECK: encoding: [0x62,0x22,0xdd,0x20,0xa7,0x9c,0xf0,0x23,0x01,0x00,0x00] + vfmsubadd213pd 291(%rax,%r14,8), %ymm20, %ymm27 + +// CHECK: vfmsubadd213pd (%rcx){1to4}, %ymm20, %ymm27 +// CHECK: encoding: [0x62,0x62,0xdd,0x30,0xa7,0x19] + vfmsubadd213pd (%rcx){1to4}, %ymm20, %ymm27 + +// CHECK: vfmsubadd213pd 4064(%rdx), %ymm20, %ymm27 +// CHECK: encoding: [0x62,0x62,0xdd,0x20,0xa7,0x5a,0x7f] + vfmsubadd213pd 4064(%rdx), %ymm20, %ymm27 + +// CHECK: vfmsubadd213pd 4096(%rdx), %ymm20, %ymm27 +// CHECK: encoding: [0x62,0x62,0xdd,0x20,0xa7,0x9a,0x00,0x10,0x00,0x00] + vfmsubadd213pd 4096(%rdx), %ymm20, %ymm27 + +// CHECK: vfmsubadd213pd -4096(%rdx), %ymm20, %ymm27 +// CHECK: encoding: [0x62,0x62,0xdd,0x20,0xa7,0x5a,0x80] + vfmsubadd213pd -4096(%rdx), %ymm20, %ymm27 + +// CHECK: vfmsubadd213pd -4128(%rdx), %ymm20, %ymm27 +// CHECK: encoding: [0x62,0x62,0xdd,0x20,0xa7,0x9a,0xe0,0xef,0xff,0xff] + vfmsubadd213pd -4128(%rdx), %ymm20, %ymm27 + +// CHECK: vfmsubadd213pd 1016(%rdx){1to4}, %ymm20, %ymm27 +// CHECK: encoding: [0x62,0x62,0xdd,0x30,0xa7,0x5a,0x7f] + vfmsubadd213pd 1016(%rdx){1to4}, %ymm20, %ymm27 + +// CHECK: vfmsubadd213pd 1024(%rdx){1to4}, %ymm20, %ymm27 +// CHECK: encoding: [0x62,0x62,0xdd,0x30,0xa7,0x9a,0x00,0x04,0x00,0x00] + vfmsubadd213pd 1024(%rdx){1to4}, %ymm20, %ymm27 + +// CHECK: vfmsubadd213pd -1024(%rdx){1to4}, %ymm20, %ymm27 +// CHECK: encoding: [0x62,0x62,0xdd,0x30,0xa7,0x5a,0x80] + vfmsubadd213pd -1024(%rdx){1to4}, %ymm20, %ymm27 + +// CHECK: vfmsubadd213pd -1032(%rdx){1to4}, %ymm20, %ymm27 +// CHECK: encoding: [0x62,0x62,0xdd,0x30,0xa7,0x9a,0xf8,0xfb,0xff,0xff] + vfmsubadd213pd -1032(%rdx){1to4}, %ymm20, %ymm27 + +// CHECK: vfmsubadd231ps %xmm23, %xmm25, %xmm21 +// CHECK: encoding: [0x62,0xa2,0x35,0x00,0xb7,0xef] + vfmsubadd231ps %xmm23, %xmm25, %xmm21 + +// CHECK: vfmsubadd231ps %xmm23, %xmm25, %xmm21 {%k4} +// CHECK: encoding: [0x62,0xa2,0x35,0x04,0xb7,0xef] + vfmsubadd231ps %xmm23, %xmm25, %xmm21 {%k4} + +// CHECK: vfmsubadd231ps %xmm23, %xmm25, %xmm21 {%k4} {z} +// CHECK: encoding: [0x62,0xa2,0x35,0x84,0xb7,0xef] + vfmsubadd231ps %xmm23, %xmm25, %xmm21 {%k4} {z} + +// CHECK: vfmsubadd231ps (%rcx), %xmm25, %xmm21 +// CHECK: encoding: [0x62,0xe2,0x35,0x00,0xb7,0x29] + vfmsubadd231ps (%rcx), %xmm25, %xmm21 + +// CHECK: vfmsubadd231ps 291(%rax,%r14,8), %xmm25, %xmm21 +// CHECK: encoding: [0x62,0xa2,0x35,0x00,0xb7,0xac,0xf0,0x23,0x01,0x00,0x00] + vfmsubadd231ps 291(%rax,%r14,8), %xmm25, %xmm21 + +// CHECK: vfmsubadd231ps (%rcx){1to4}, %xmm25, %xmm21 +// CHECK: encoding: [0x62,0xe2,0x35,0x10,0xb7,0x29] + vfmsubadd231ps (%rcx){1to4}, %xmm25, %xmm21 + +// CHECK: vfmsubadd231ps 2032(%rdx), %xmm25, %xmm21 +// CHECK: encoding: [0x62,0xe2,0x35,0x00,0xb7,0x6a,0x7f] + vfmsubadd231ps 2032(%rdx), %xmm25, %xmm21 + +// CHECK: vfmsubadd231ps 2048(%rdx), %xmm25, %xmm21 +// CHECK: encoding: [0x62,0xe2,0x35,0x00,0xb7,0xaa,0x00,0x08,0x00,0x00] + vfmsubadd231ps 2048(%rdx), %xmm25, %xmm21 + +// CHECK: vfmsubadd231ps -2048(%rdx), %xmm25, %xmm21 +// CHECK: encoding: [0x62,0xe2,0x35,0x00,0xb7,0x6a,0x80] + vfmsubadd231ps -2048(%rdx), %xmm25, %xmm21 + +// CHECK: vfmsubadd231ps -2064(%rdx), %xmm25, %xmm21 +// CHECK: encoding: [0x62,0xe2,0x35,0x00,0xb7,0xaa,0xf0,0xf7,0xff,0xff] + vfmsubadd231ps -2064(%rdx), %xmm25, %xmm21 + +// CHECK: vfmsubadd231ps 508(%rdx){1to4}, %xmm25, %xmm21 +// CHECK: encoding: [0x62,0xe2,0x35,0x10,0xb7,0x6a,0x7f] + vfmsubadd231ps 508(%rdx){1to4}, %xmm25, %xmm21 + +// CHECK: vfmsubadd231ps 512(%rdx){1to4}, %xmm25, %xmm21 +// CHECK: encoding: [0x62,0xe2,0x35,0x10,0xb7,0xaa,0x00,0x02,0x00,0x00] + vfmsubadd231ps 512(%rdx){1to4}, %xmm25, %xmm21 + +// CHECK: vfmsubadd231ps -512(%rdx){1to4}, %xmm25, %xmm21 +// CHECK: encoding: [0x62,0xe2,0x35,0x10,0xb7,0x6a,0x80] + vfmsubadd231ps -512(%rdx){1to4}, %xmm25, %xmm21 + +// CHECK: vfmsubadd231ps -516(%rdx){1to4}, %xmm25, %xmm21 +// CHECK: encoding: [0x62,0xe2,0x35,0x10,0xb7,0xaa,0xfc,0xfd,0xff,0xff] + vfmsubadd231ps -516(%rdx){1to4}, %xmm25, %xmm21 + +// CHECK: vfmsubadd231ps %ymm20, %ymm23, %ymm27 +// CHECK: encoding: [0x62,0x22,0x45,0x20,0xb7,0xdc] + vfmsubadd231ps %ymm20, %ymm23, %ymm27 + +// CHECK: vfmsubadd231ps %ymm20, %ymm23, %ymm27 {%k3} +// CHECK: encoding: [0x62,0x22,0x45,0x23,0xb7,0xdc] + vfmsubadd231ps %ymm20, %ymm23, %ymm27 {%k3} + +// CHECK: vfmsubadd231ps %ymm20, %ymm23, %ymm27 {%k3} {z} +// CHECK: encoding: [0x62,0x22,0x45,0xa3,0xb7,0xdc] + vfmsubadd231ps %ymm20, %ymm23, %ymm27 {%k3} {z} + +// CHECK: vfmsubadd231ps (%rcx), %ymm23, %ymm27 +// CHECK: encoding: [0x62,0x62,0x45,0x20,0xb7,0x19] + vfmsubadd231ps (%rcx), %ymm23, %ymm27 + +// CHECK: vfmsubadd231ps 291(%rax,%r14,8), %ymm23, %ymm27 +// CHECK: encoding: [0x62,0x22,0x45,0x20,0xb7,0x9c,0xf0,0x23,0x01,0x00,0x00] + vfmsubadd231ps 291(%rax,%r14,8), %ymm23, %ymm27 + +// CHECK: vfmsubadd231ps (%rcx){1to8}, %ymm23, %ymm27 +// CHECK: encoding: [0x62,0x62,0x45,0x30,0xb7,0x19] + vfmsubadd231ps (%rcx){1to8}, %ymm23, %ymm27 + +// CHECK: vfmsubadd231ps 4064(%rdx), %ymm23, %ymm27 +// CHECK: encoding: [0x62,0x62,0x45,0x20,0xb7,0x5a,0x7f] + vfmsubadd231ps 4064(%rdx), %ymm23, %ymm27 + +// CHECK: vfmsubadd231ps 4096(%rdx), %ymm23, %ymm27 +// CHECK: encoding: [0x62,0x62,0x45,0x20,0xb7,0x9a,0x00,0x10,0x00,0x00] + vfmsubadd231ps 4096(%rdx), %ymm23, %ymm27 + +// CHECK: vfmsubadd231ps -4096(%rdx), %ymm23, %ymm27 +// CHECK: encoding: [0x62,0x62,0x45,0x20,0xb7,0x5a,0x80] + vfmsubadd231ps -4096(%rdx), %ymm23, %ymm27 + +// CHECK: vfmsubadd231ps -4128(%rdx), %ymm23, %ymm27 +// CHECK: encoding: [0x62,0x62,0x45,0x20,0xb7,0x9a,0xe0,0xef,0xff,0xff] + vfmsubadd231ps -4128(%rdx), %ymm23, %ymm27 + +// CHECK: vfmsubadd231ps 508(%rdx){1to8}, %ymm23, %ymm27 +// CHECK: encoding: [0x62,0x62,0x45,0x30,0xb7,0x5a,0x7f] + vfmsubadd231ps 508(%rdx){1to8}, %ymm23, %ymm27 + +// CHECK: vfmsubadd231ps 512(%rdx){1to8}, %ymm23, %ymm27 +// CHECK: encoding: [0x62,0x62,0x45,0x30,0xb7,0x9a,0x00,0x02,0x00,0x00] + vfmsubadd231ps 512(%rdx){1to8}, %ymm23, %ymm27 + +// CHECK: vfmsubadd231ps -512(%rdx){1to8}, %ymm23, %ymm27 +// CHECK: encoding: [0x62,0x62,0x45,0x30,0xb7,0x5a,0x80] + vfmsubadd231ps -512(%rdx){1to8}, %ymm23, %ymm27 + +// CHECK: vfmsubadd231ps -516(%rdx){1to8}, %ymm23, %ymm27 +// CHECK: encoding: [0x62,0x62,0x45,0x30,0xb7,0x9a,0xfc,0xfd,0xff,0xff] + vfmsubadd231ps -516(%rdx){1to8}, %ymm23, %ymm27 + +// CHECK: vfmsubadd231pd %xmm28, %xmm24, %xmm20 +// CHECK: encoding: [0x62,0x82,0xbd,0x00,0xb7,0xe4] + vfmsubadd231pd %xmm28, %xmm24, %xmm20 + +// CHECK: vfmsubadd231pd %xmm28, %xmm24, %xmm20 {%k3} +// CHECK: encoding: [0x62,0x82,0xbd,0x03,0xb7,0xe4] + vfmsubadd231pd %xmm28, %xmm24, %xmm20 {%k3} + +// CHECK: vfmsubadd231pd %xmm28, %xmm24, %xmm20 {%k3} {z} +// CHECK: encoding: [0x62,0x82,0xbd,0x83,0xb7,0xe4] + vfmsubadd231pd %xmm28, %xmm24, %xmm20 {%k3} {z} + +// CHECK: vfmsubadd231pd (%rcx), %xmm24, %xmm20 +// CHECK: encoding: [0x62,0xe2,0xbd,0x00,0xb7,0x21] + vfmsubadd231pd (%rcx), %xmm24, %xmm20 + +// CHECK: vfmsubadd231pd 291(%rax,%r14,8), %xmm24, %xmm20 +// CHECK: encoding: [0x62,0xa2,0xbd,0x00,0xb7,0xa4,0xf0,0x23,0x01,0x00,0x00] + vfmsubadd231pd 291(%rax,%r14,8), %xmm24, %xmm20 + +// CHECK: vfmsubadd231pd (%rcx){1to2}, %xmm24, %xmm20 +// CHECK: encoding: [0x62,0xe2,0xbd,0x10,0xb7,0x21] + vfmsubadd231pd (%rcx){1to2}, %xmm24, %xmm20 + +// CHECK: vfmsubadd231pd 2032(%rdx), %xmm24, %xmm20 +// CHECK: encoding: [0x62,0xe2,0xbd,0x00,0xb7,0x62,0x7f] + vfmsubadd231pd 2032(%rdx), %xmm24, %xmm20 + +// CHECK: vfmsubadd231pd 2048(%rdx), %xmm24, %xmm20 +// CHECK: encoding: [0x62,0xe2,0xbd,0x00,0xb7,0xa2,0x00,0x08,0x00,0x00] + vfmsubadd231pd 2048(%rdx), %xmm24, %xmm20 + +// CHECK: vfmsubadd231pd -2048(%rdx), %xmm24, %xmm20 +// CHECK: encoding: [0x62,0xe2,0xbd,0x00,0xb7,0x62,0x80] + vfmsubadd231pd -2048(%rdx), %xmm24, %xmm20 + +// CHECK: vfmsubadd231pd -2064(%rdx), %xmm24, %xmm20 +// CHECK: encoding: [0x62,0xe2,0xbd,0x00,0xb7,0xa2,0xf0,0xf7,0xff,0xff] + vfmsubadd231pd -2064(%rdx), %xmm24, %xmm20 + +// CHECK: vfmsubadd231pd 1016(%rdx){1to2}, %xmm24, %xmm20 +// CHECK: encoding: [0x62,0xe2,0xbd,0x10,0xb7,0x62,0x7f] + vfmsubadd231pd 1016(%rdx){1to2}, %xmm24, %xmm20 + +// CHECK: vfmsubadd231pd 1024(%rdx){1to2}, %xmm24, %xmm20 +// CHECK: encoding: [0x62,0xe2,0xbd,0x10,0xb7,0xa2,0x00,0x04,0x00,0x00] + vfmsubadd231pd 1024(%rdx){1to2}, %xmm24, %xmm20 + +// CHECK: vfmsubadd231pd -1024(%rdx){1to2}, %xmm24, %xmm20 +// CHECK: encoding: [0x62,0xe2,0xbd,0x10,0xb7,0x62,0x80] + vfmsubadd231pd -1024(%rdx){1to2}, %xmm24, %xmm20 + +// CHECK: vfmsubadd231pd -1032(%rdx){1to2}, %xmm24, %xmm20 +// CHECK: encoding: [0x62,0xe2,0xbd,0x10,0xb7,0xa2,0xf8,0xfb,0xff,0xff] + vfmsubadd231pd -1032(%rdx){1to2}, %xmm24, %xmm20 + +// CHECK: vfmsubadd231pd %ymm21, %ymm29, %ymm30 +// CHECK: encoding: [0x62,0x22,0x95,0x20,0xb7,0xf5] + vfmsubadd231pd %ymm21, %ymm29, %ymm30 + +// CHECK: vfmsubadd231pd %ymm21, %ymm29, %ymm30 {%k7} +// CHECK: encoding: [0x62,0x22,0x95,0x27,0xb7,0xf5] + vfmsubadd231pd %ymm21, %ymm29, %ymm30 {%k7} + +// CHECK: vfmsubadd231pd %ymm21, %ymm29, %ymm30 {%k7} {z} +// CHECK: encoding: [0x62,0x22,0x95,0xa7,0xb7,0xf5] + vfmsubadd231pd %ymm21, %ymm29, %ymm30 {%k7} {z} + +// CHECK: vfmsubadd231pd (%rcx), %ymm29, %ymm30 +// CHECK: encoding: [0x62,0x62,0x95,0x20,0xb7,0x31] + vfmsubadd231pd (%rcx), %ymm29, %ymm30 + +// CHECK: vfmsubadd231pd 291(%rax,%r14,8), %ymm29, %ymm30 +// CHECK: encoding: [0x62,0x22,0x95,0x20,0xb7,0xb4,0xf0,0x23,0x01,0x00,0x00] + vfmsubadd231pd 291(%rax,%r14,8), %ymm29, %ymm30 + +// CHECK: vfmsubadd231pd (%rcx){1to4}, %ymm29, %ymm30 +// CHECK: encoding: [0x62,0x62,0x95,0x30,0xb7,0x31] + vfmsubadd231pd (%rcx){1to4}, %ymm29, %ymm30 + +// CHECK: vfmsubadd231pd 4064(%rdx), %ymm29, %ymm30 +// CHECK: encoding: [0x62,0x62,0x95,0x20,0xb7,0x72,0x7f] + vfmsubadd231pd 4064(%rdx), %ymm29, %ymm30 + +// CHECK: vfmsubadd231pd 4096(%rdx), %ymm29, %ymm30 +// CHECK: encoding: [0x62,0x62,0x95,0x20,0xb7,0xb2,0x00,0x10,0x00,0x00] + vfmsubadd231pd 4096(%rdx), %ymm29, %ymm30 + +// CHECK: vfmsubadd231pd -4096(%rdx), %ymm29, %ymm30 +// CHECK: encoding: [0x62,0x62,0x95,0x20,0xb7,0x72,0x80] + vfmsubadd231pd -4096(%rdx), %ymm29, %ymm30 + +// CHECK: vfmsubadd231pd -4128(%rdx), %ymm29, %ymm30 +// CHECK: encoding: [0x62,0x62,0x95,0x20,0xb7,0xb2,0xe0,0xef,0xff,0xff] + vfmsubadd231pd -4128(%rdx), %ymm29, %ymm30 + +// CHECK: vfmsubadd231pd 1016(%rdx){1to4}, %ymm29, %ymm30 +// CHECK: encoding: [0x62,0x62,0x95,0x30,0xb7,0x72,0x7f] + vfmsubadd231pd 1016(%rdx){1to4}, %ymm29, %ymm30 + +// CHECK: vfmsubadd231pd 1024(%rdx){1to4}, %ymm29, %ymm30 +// CHECK: encoding: [0x62,0x62,0x95,0x30,0xb7,0xb2,0x00,0x04,0x00,0x00] + vfmsubadd231pd 1024(%rdx){1to4}, %ymm29, %ymm30 + +// CHECK: vfmsubadd231pd -1024(%rdx){1to4}, %ymm29, %ymm30 +// CHECK: encoding: [0x62,0x62,0x95,0x30,0xb7,0x72,0x80] + vfmsubadd231pd -1024(%rdx){1to4}, %ymm29, %ymm30 + +// CHECK: vfmsubadd231pd -1032(%rdx){1to4}, %ymm29, %ymm30 +// CHECK: encoding: [0x62,0x62,0x95,0x30,0xb7,0xb2,0xf8,0xfb,0xff,0xff] + vfmsubadd231pd -1032(%rdx){1to4}, %ymm29, %ymm30 + +// CHECK: vfnmadd132ps %xmm18, %xmm26, %xmm20 +// CHECK: encoding: [0x62,0xa2,0x2d,0x00,0x9c,0xe2] + vfnmadd132ps %xmm18, %xmm26, %xmm20 + +// CHECK: vfnmadd132ps %xmm18, %xmm26, %xmm20 {%k7} +// CHECK: encoding: [0x62,0xa2,0x2d,0x07,0x9c,0xe2] + vfnmadd132ps %xmm18, %xmm26, %xmm20 {%k7} + +// CHECK: vfnmadd132ps %xmm18, %xmm26, %xmm20 {%k7} {z} +// CHECK: encoding: [0x62,0xa2,0x2d,0x87,0x9c,0xe2] + vfnmadd132ps %xmm18, %xmm26, %xmm20 {%k7} {z} + +// CHECK: vfnmadd132ps (%rcx), %xmm26, %xmm20 +// CHECK: encoding: [0x62,0xe2,0x2d,0x00,0x9c,0x21] + vfnmadd132ps (%rcx), %xmm26, %xmm20 + +// CHECK: vfnmadd132ps 291(%rax,%r14,8), %xmm26, %xmm20 +// CHECK: encoding: [0x62,0xa2,0x2d,0x00,0x9c,0xa4,0xf0,0x23,0x01,0x00,0x00] + vfnmadd132ps 291(%rax,%r14,8), %xmm26, %xmm20 + +// CHECK: vfnmadd132ps (%rcx){1to4}, %xmm26, %xmm20 +// CHECK: encoding: [0x62,0xe2,0x2d,0x10,0x9c,0x21] + vfnmadd132ps (%rcx){1to4}, %xmm26, %xmm20 + +// CHECK: vfnmadd132ps 2032(%rdx), %xmm26, %xmm20 +// CHECK: encoding: [0x62,0xe2,0x2d,0x00,0x9c,0x62,0x7f] + vfnmadd132ps 2032(%rdx), %xmm26, %xmm20 + +// CHECK: vfnmadd132ps 2048(%rdx), %xmm26, %xmm20 +// CHECK: encoding: [0x62,0xe2,0x2d,0x00,0x9c,0xa2,0x00,0x08,0x00,0x00] + vfnmadd132ps 2048(%rdx), %xmm26, %xmm20 + +// CHECK: vfnmadd132ps -2048(%rdx), %xmm26, %xmm20 +// CHECK: encoding: [0x62,0xe2,0x2d,0x00,0x9c,0x62,0x80] + vfnmadd132ps -2048(%rdx), %xmm26, %xmm20 + +// CHECK: vfnmadd132ps -2064(%rdx), %xmm26, %xmm20 +// CHECK: encoding: [0x62,0xe2,0x2d,0x00,0x9c,0xa2,0xf0,0xf7,0xff,0xff] + vfnmadd132ps -2064(%rdx), %xmm26, %xmm20 + +// CHECK: vfnmadd132ps 508(%rdx){1to4}, %xmm26, %xmm20 +// CHECK: encoding: [0x62,0xe2,0x2d,0x10,0x9c,0x62,0x7f] + vfnmadd132ps 508(%rdx){1to4}, %xmm26, %xmm20 + +// CHECK: vfnmadd132ps 512(%rdx){1to4}, %xmm26, %xmm20 +// CHECK: encoding: [0x62,0xe2,0x2d,0x10,0x9c,0xa2,0x00,0x02,0x00,0x00] + vfnmadd132ps 512(%rdx){1to4}, %xmm26, %xmm20 + +// CHECK: vfnmadd132ps -512(%rdx){1to4}, %xmm26, %xmm20 +// CHECK: encoding: [0x62,0xe2,0x2d,0x10,0x9c,0x62,0x80] + vfnmadd132ps -512(%rdx){1to4}, %xmm26, %xmm20 + +// CHECK: vfnmadd132ps -516(%rdx){1to4}, %xmm26, %xmm20 +// CHECK: encoding: [0x62,0xe2,0x2d,0x10,0x9c,0xa2,0xfc,0xfd,0xff,0xff] + vfnmadd132ps -516(%rdx){1to4}, %xmm26, %xmm20 + +// CHECK: vfnmadd132ps %ymm18, %ymm21, %ymm20 +// CHECK: encoding: [0x62,0xa2,0x55,0x20,0x9c,0xe2] + vfnmadd132ps %ymm18, %ymm21, %ymm20 + +// CHECK: vfnmadd132ps %ymm18, %ymm21, %ymm20 {%k7} +// CHECK: encoding: [0x62,0xa2,0x55,0x27,0x9c,0xe2] + vfnmadd132ps %ymm18, %ymm21, %ymm20 {%k7} + +// CHECK: vfnmadd132ps %ymm18, %ymm21, %ymm20 {%k7} {z} +// CHECK: encoding: [0x62,0xa2,0x55,0xa7,0x9c,0xe2] + vfnmadd132ps %ymm18, %ymm21, %ymm20 {%k7} {z} + +// CHECK: vfnmadd132ps (%rcx), %ymm21, %ymm20 +// CHECK: encoding: [0x62,0xe2,0x55,0x20,0x9c,0x21] + vfnmadd132ps (%rcx), %ymm21, %ymm20 + +// CHECK: vfnmadd132ps 291(%rax,%r14,8), %ymm21, %ymm20 +// CHECK: encoding: [0x62,0xa2,0x55,0x20,0x9c,0xa4,0xf0,0x23,0x01,0x00,0x00] + vfnmadd132ps 291(%rax,%r14,8), %ymm21, %ymm20 + +// CHECK: vfnmadd132ps (%rcx){1to8}, %ymm21, %ymm20 +// CHECK: encoding: [0x62,0xe2,0x55,0x30,0x9c,0x21] + vfnmadd132ps (%rcx){1to8}, %ymm21, %ymm20 + +// CHECK: vfnmadd132ps 4064(%rdx), %ymm21, %ymm20 +// CHECK: encoding: [0x62,0xe2,0x55,0x20,0x9c,0x62,0x7f] + vfnmadd132ps 4064(%rdx), %ymm21, %ymm20 + +// CHECK: vfnmadd132ps 4096(%rdx), %ymm21, %ymm20 +// CHECK: encoding: [0x62,0xe2,0x55,0x20,0x9c,0xa2,0x00,0x10,0x00,0x00] + vfnmadd132ps 4096(%rdx), %ymm21, %ymm20 + +// CHECK: vfnmadd132ps -4096(%rdx), %ymm21, %ymm20 +// CHECK: encoding: [0x62,0xe2,0x55,0x20,0x9c,0x62,0x80] + vfnmadd132ps -4096(%rdx), %ymm21, %ymm20 + +// CHECK: vfnmadd132ps -4128(%rdx), %ymm21, %ymm20 +// CHECK: encoding: [0x62,0xe2,0x55,0x20,0x9c,0xa2,0xe0,0xef,0xff,0xff] + vfnmadd132ps -4128(%rdx), %ymm21, %ymm20 + +// CHECK: vfnmadd132ps 508(%rdx){1to8}, %ymm21, %ymm20 +// CHECK: encoding: [0x62,0xe2,0x55,0x30,0x9c,0x62,0x7f] + vfnmadd132ps 508(%rdx){1to8}, %ymm21, %ymm20 + +// CHECK: vfnmadd132ps 512(%rdx){1to8}, %ymm21, %ymm20 +// CHECK: encoding: [0x62,0xe2,0x55,0x30,0x9c,0xa2,0x00,0x02,0x00,0x00] + vfnmadd132ps 512(%rdx){1to8}, %ymm21, %ymm20 + +// CHECK: vfnmadd132ps -512(%rdx){1to8}, %ymm21, %ymm20 +// CHECK: encoding: [0x62,0xe2,0x55,0x30,0x9c,0x62,0x80] + vfnmadd132ps -512(%rdx){1to8}, %ymm21, %ymm20 + +// CHECK: vfnmadd132ps -516(%rdx){1to8}, %ymm21, %ymm20 +// CHECK: encoding: [0x62,0xe2,0x55,0x30,0x9c,0xa2,0xfc,0xfd,0xff,0xff] + vfnmadd132ps -516(%rdx){1to8}, %ymm21, %ymm20 + +// CHECK: vfnmadd132pd %xmm18, %xmm21, %xmm26 +// CHECK: encoding: [0x62,0x22,0xd5,0x00,0x9c,0xd2] + vfnmadd132pd %xmm18, %xmm21, %xmm26 + +// CHECK: vfnmadd132pd %xmm18, %xmm21, %xmm26 {%k6} +// CHECK: encoding: [0x62,0x22,0xd5,0x06,0x9c,0xd2] + vfnmadd132pd %xmm18, %xmm21, %xmm26 {%k6} + +// CHECK: vfnmadd132pd %xmm18, %xmm21, %xmm26 {%k6} {z} +// CHECK: encoding: [0x62,0x22,0xd5,0x86,0x9c,0xd2] + vfnmadd132pd %xmm18, %xmm21, %xmm26 {%k6} {z} + +// CHECK: vfnmadd132pd (%rcx), %xmm21, %xmm26 +// CHECK: encoding: [0x62,0x62,0xd5,0x00,0x9c,0x11] + vfnmadd132pd (%rcx), %xmm21, %xmm26 + +// CHECK: vfnmadd132pd 291(%rax,%r14,8), %xmm21, %xmm26 +// CHECK: encoding: [0x62,0x22,0xd5,0x00,0x9c,0x94,0xf0,0x23,0x01,0x00,0x00] + vfnmadd132pd 291(%rax,%r14,8), %xmm21, %xmm26 + +// CHECK: vfnmadd132pd (%rcx){1to2}, %xmm21, %xmm26 +// CHECK: encoding: [0x62,0x62,0xd5,0x10,0x9c,0x11] + vfnmadd132pd (%rcx){1to2}, %xmm21, %xmm26 + +// CHECK: vfnmadd132pd 2032(%rdx), %xmm21, %xmm26 +// CHECK: encoding: [0x62,0x62,0xd5,0x00,0x9c,0x52,0x7f] + vfnmadd132pd 2032(%rdx), %xmm21, %xmm26 + +// CHECK: vfnmadd132pd 2048(%rdx), %xmm21, %xmm26 +// CHECK: encoding: [0x62,0x62,0xd5,0x00,0x9c,0x92,0x00,0x08,0x00,0x00] + vfnmadd132pd 2048(%rdx), %xmm21, %xmm26 + +// CHECK: vfnmadd132pd -2048(%rdx), %xmm21, %xmm26 +// CHECK: encoding: [0x62,0x62,0xd5,0x00,0x9c,0x52,0x80] + vfnmadd132pd -2048(%rdx), %xmm21, %xmm26 + +// CHECK: vfnmadd132pd -2064(%rdx), %xmm21, %xmm26 +// CHECK: encoding: [0x62,0x62,0xd5,0x00,0x9c,0x92,0xf0,0xf7,0xff,0xff] + vfnmadd132pd -2064(%rdx), %xmm21, %xmm26 + +// CHECK: vfnmadd132pd 1016(%rdx){1to2}, %xmm21, %xmm26 +// CHECK: encoding: [0x62,0x62,0xd5,0x10,0x9c,0x52,0x7f] + vfnmadd132pd 1016(%rdx){1to2}, %xmm21, %xmm26 + +// CHECK: vfnmadd132pd 1024(%rdx){1to2}, %xmm21, %xmm26 +// CHECK: encoding: [0x62,0x62,0xd5,0x10,0x9c,0x92,0x00,0x04,0x00,0x00] + vfnmadd132pd 1024(%rdx){1to2}, %xmm21, %xmm26 + +// CHECK: vfnmadd132pd -1024(%rdx){1to2}, %xmm21, %xmm26 +// CHECK: encoding: [0x62,0x62,0xd5,0x10,0x9c,0x52,0x80] + vfnmadd132pd -1024(%rdx){1to2}, %xmm21, %xmm26 + +// CHECK: vfnmadd132pd -1032(%rdx){1to2}, %xmm21, %xmm26 +// CHECK: encoding: [0x62,0x62,0xd5,0x10,0x9c,0x92,0xf8,0xfb,0xff,0xff] + vfnmadd132pd -1032(%rdx){1to2}, %xmm21, %xmm26 + +// CHECK: vfnmadd132pd %ymm18, %ymm22, %ymm25 +// CHECK: encoding: [0x62,0x22,0xcd,0x20,0x9c,0xca] + vfnmadd132pd %ymm18, %ymm22, %ymm25 + +// CHECK: vfnmadd132pd %ymm18, %ymm22, %ymm25 {%k4} +// CHECK: encoding: [0x62,0x22,0xcd,0x24,0x9c,0xca] + vfnmadd132pd %ymm18, %ymm22, %ymm25 {%k4} + +// CHECK: vfnmadd132pd %ymm18, %ymm22, %ymm25 {%k4} {z} +// CHECK: encoding: [0x62,0x22,0xcd,0xa4,0x9c,0xca] + vfnmadd132pd %ymm18, %ymm22, %ymm25 {%k4} {z} + +// CHECK: vfnmadd132pd (%rcx), %ymm22, %ymm25 +// CHECK: encoding: [0x62,0x62,0xcd,0x20,0x9c,0x09] + vfnmadd132pd (%rcx), %ymm22, %ymm25 + +// CHECK: vfnmadd132pd 291(%rax,%r14,8), %ymm22, %ymm25 +// CHECK: encoding: [0x62,0x22,0xcd,0x20,0x9c,0x8c,0xf0,0x23,0x01,0x00,0x00] + vfnmadd132pd 291(%rax,%r14,8), %ymm22, %ymm25 + +// CHECK: vfnmadd132pd (%rcx){1to4}, %ymm22, %ymm25 +// CHECK: encoding: [0x62,0x62,0xcd,0x30,0x9c,0x09] + vfnmadd132pd (%rcx){1to4}, %ymm22, %ymm25 + +// CHECK: vfnmadd132pd 4064(%rdx), %ymm22, %ymm25 +// CHECK: encoding: [0x62,0x62,0xcd,0x20,0x9c,0x4a,0x7f] + vfnmadd132pd 4064(%rdx), %ymm22, %ymm25 + +// CHECK: vfnmadd132pd 4096(%rdx), %ymm22, %ymm25 +// CHECK: encoding: [0x62,0x62,0xcd,0x20,0x9c,0x8a,0x00,0x10,0x00,0x00] + vfnmadd132pd 4096(%rdx), %ymm22, %ymm25 + +// CHECK: vfnmadd132pd -4096(%rdx), %ymm22, %ymm25 +// CHECK: encoding: [0x62,0x62,0xcd,0x20,0x9c,0x4a,0x80] + vfnmadd132pd -4096(%rdx), %ymm22, %ymm25 + +// CHECK: vfnmadd132pd -4128(%rdx), %ymm22, %ymm25 +// CHECK: encoding: [0x62,0x62,0xcd,0x20,0x9c,0x8a,0xe0,0xef,0xff,0xff] + vfnmadd132pd -4128(%rdx), %ymm22, %ymm25 + +// CHECK: vfnmadd132pd 1016(%rdx){1to4}, %ymm22, %ymm25 +// CHECK: encoding: [0x62,0x62,0xcd,0x30,0x9c,0x4a,0x7f] + vfnmadd132pd 1016(%rdx){1to4}, %ymm22, %ymm25 + +// CHECK: vfnmadd132pd 1024(%rdx){1to4}, %ymm22, %ymm25 +// CHECK: encoding: [0x62,0x62,0xcd,0x30,0x9c,0x8a,0x00,0x04,0x00,0x00] + vfnmadd132pd 1024(%rdx){1to4}, %ymm22, %ymm25 + +// CHECK: vfnmadd132pd -1024(%rdx){1to4}, %ymm22, %ymm25 +// CHECK: encoding: [0x62,0x62,0xcd,0x30,0x9c,0x4a,0x80] + vfnmadd132pd -1024(%rdx){1to4}, %ymm22, %ymm25 + +// CHECK: vfnmadd132pd -1032(%rdx){1to4}, %ymm22, %ymm25 +// CHECK: encoding: [0x62,0x62,0xcd,0x30,0x9c,0x8a,0xf8,0xfb,0xff,0xff] + vfnmadd132pd -1032(%rdx){1to4}, %ymm22, %ymm25 + +// CHECK: vfnmadd213ps %xmm20, %xmm24, %xmm24 +// CHECK: encoding: [0x62,0x22,0x3d,0x00,0xac,0xc4] + vfnmadd213ps %xmm20, %xmm24, %xmm24 + +// CHECK: vfnmadd213ps %xmm20, %xmm24, %xmm24 {%k4} +// CHECK: encoding: [0x62,0x22,0x3d,0x04,0xac,0xc4] + vfnmadd213ps %xmm20, %xmm24, %xmm24 {%k4} + +// CHECK: vfnmadd213ps %xmm20, %xmm24, %xmm24 {%k4} {z} +// CHECK: encoding: [0x62,0x22,0x3d,0x84,0xac,0xc4] + vfnmadd213ps %xmm20, %xmm24, %xmm24 {%k4} {z} + +// CHECK: vfnmadd213ps (%rcx), %xmm24, %xmm24 +// CHECK: encoding: [0x62,0x62,0x3d,0x00,0xac,0x01] + vfnmadd213ps (%rcx), %xmm24, %xmm24 + +// CHECK: vfnmadd213ps 291(%rax,%r14,8), %xmm24, %xmm24 +// CHECK: encoding: [0x62,0x22,0x3d,0x00,0xac,0x84,0xf0,0x23,0x01,0x00,0x00] + vfnmadd213ps 291(%rax,%r14,8), %xmm24, %xmm24 + +// CHECK: vfnmadd213ps (%rcx){1to4}, %xmm24, %xmm24 +// CHECK: encoding: [0x62,0x62,0x3d,0x10,0xac,0x01] + vfnmadd213ps (%rcx){1to4}, %xmm24, %xmm24 + +// CHECK: vfnmadd213ps 2032(%rdx), %xmm24, %xmm24 +// CHECK: encoding: [0x62,0x62,0x3d,0x00,0xac,0x42,0x7f] + vfnmadd213ps 2032(%rdx), %xmm24, %xmm24 + +// CHECK: vfnmadd213ps 2048(%rdx), %xmm24, %xmm24 +// CHECK: encoding: [0x62,0x62,0x3d,0x00,0xac,0x82,0x00,0x08,0x00,0x00] + vfnmadd213ps 2048(%rdx), %xmm24, %xmm24 + +// CHECK: vfnmadd213ps -2048(%rdx), %xmm24, %xmm24 +// CHECK: encoding: [0x62,0x62,0x3d,0x00,0xac,0x42,0x80] + vfnmadd213ps -2048(%rdx), %xmm24, %xmm24 + +// CHECK: vfnmadd213ps -2064(%rdx), %xmm24, %xmm24 +// CHECK: encoding: [0x62,0x62,0x3d,0x00,0xac,0x82,0xf0,0xf7,0xff,0xff] + vfnmadd213ps -2064(%rdx), %xmm24, %xmm24 + +// CHECK: vfnmadd213ps 508(%rdx){1to4}, %xmm24, %xmm24 +// CHECK: encoding: [0x62,0x62,0x3d,0x10,0xac,0x42,0x7f] + vfnmadd213ps 508(%rdx){1to4}, %xmm24, %xmm24 + +// CHECK: vfnmadd213ps 512(%rdx){1to4}, %xmm24, %xmm24 +// CHECK: encoding: [0x62,0x62,0x3d,0x10,0xac,0x82,0x00,0x02,0x00,0x00] + vfnmadd213ps 512(%rdx){1to4}, %xmm24, %xmm24 + +// CHECK: vfnmadd213ps -512(%rdx){1to4}, %xmm24, %xmm24 +// CHECK: encoding: [0x62,0x62,0x3d,0x10,0xac,0x42,0x80] + vfnmadd213ps -512(%rdx){1to4}, %xmm24, %xmm24 + +// CHECK: vfnmadd213ps -516(%rdx){1to4}, %xmm24, %xmm24 +// CHECK: encoding: [0x62,0x62,0x3d,0x10,0xac,0x82,0xfc,0xfd,0xff,0xff] + vfnmadd213ps -516(%rdx){1to4}, %xmm24, %xmm24 + +// CHECK: vfnmadd213ps %ymm22, %ymm19, %ymm21 +// CHECK: encoding: [0x62,0xa2,0x65,0x20,0xac,0xee] + vfnmadd213ps %ymm22, %ymm19, %ymm21 + +// CHECK: vfnmadd213ps %ymm22, %ymm19, %ymm21 {%k2} +// CHECK: encoding: [0x62,0xa2,0x65,0x22,0xac,0xee] + vfnmadd213ps %ymm22, %ymm19, %ymm21 {%k2} + +// CHECK: vfnmadd213ps %ymm22, %ymm19, %ymm21 {%k2} {z} +// CHECK: encoding: [0x62,0xa2,0x65,0xa2,0xac,0xee] + vfnmadd213ps %ymm22, %ymm19, %ymm21 {%k2} {z} + +// CHECK: vfnmadd213ps (%rcx), %ymm19, %ymm21 +// CHECK: encoding: [0x62,0xe2,0x65,0x20,0xac,0x29] + vfnmadd213ps (%rcx), %ymm19, %ymm21 + +// CHECK: vfnmadd213ps 291(%rax,%r14,8), %ymm19, %ymm21 +// CHECK: encoding: [0x62,0xa2,0x65,0x20,0xac,0xac,0xf0,0x23,0x01,0x00,0x00] + vfnmadd213ps 291(%rax,%r14,8), %ymm19, %ymm21 + +// CHECK: vfnmadd213ps (%rcx){1to8}, %ymm19, %ymm21 +// CHECK: encoding: [0x62,0xe2,0x65,0x30,0xac,0x29] + vfnmadd213ps (%rcx){1to8}, %ymm19, %ymm21 + +// CHECK: vfnmadd213ps 4064(%rdx), %ymm19, %ymm21 +// CHECK: encoding: [0x62,0xe2,0x65,0x20,0xac,0x6a,0x7f] + vfnmadd213ps 4064(%rdx), %ymm19, %ymm21 + +// CHECK: vfnmadd213ps 4096(%rdx), %ymm19, %ymm21 +// CHECK: encoding: [0x62,0xe2,0x65,0x20,0xac,0xaa,0x00,0x10,0x00,0x00] + vfnmadd213ps 4096(%rdx), %ymm19, %ymm21 + +// CHECK: vfnmadd213ps -4096(%rdx), %ymm19, %ymm21 +// CHECK: encoding: [0x62,0xe2,0x65,0x20,0xac,0x6a,0x80] + vfnmadd213ps -4096(%rdx), %ymm19, %ymm21 + +// CHECK: vfnmadd213ps -4128(%rdx), %ymm19, %ymm21 +// CHECK: encoding: [0x62,0xe2,0x65,0x20,0xac,0xaa,0xe0,0xef,0xff,0xff] + vfnmadd213ps -4128(%rdx), %ymm19, %ymm21 + +// CHECK: vfnmadd213ps 508(%rdx){1to8}, %ymm19, %ymm21 +// CHECK: encoding: [0x62,0xe2,0x65,0x30,0xac,0x6a,0x7f] + vfnmadd213ps 508(%rdx){1to8}, %ymm19, %ymm21 + +// CHECK: vfnmadd213ps 512(%rdx){1to8}, %ymm19, %ymm21 +// CHECK: encoding: [0x62,0xe2,0x65,0x30,0xac,0xaa,0x00,0x02,0x00,0x00] + vfnmadd213ps 512(%rdx){1to8}, %ymm19, %ymm21 + +// CHECK: vfnmadd213ps -512(%rdx){1to8}, %ymm19, %ymm21 +// CHECK: encoding: [0x62,0xe2,0x65,0x30,0xac,0x6a,0x80] + vfnmadd213ps -512(%rdx){1to8}, %ymm19, %ymm21 + +// CHECK: vfnmadd213ps -516(%rdx){1to8}, %ymm19, %ymm21 +// CHECK: encoding: [0x62,0xe2,0x65,0x30,0xac,0xaa,0xfc,0xfd,0xff,0xff] + vfnmadd213ps -516(%rdx){1to8}, %ymm19, %ymm21 + +// CHECK: vfnmadd213pd %xmm24, %xmm25, %xmm24 +// CHECK: encoding: [0x62,0x02,0xb5,0x00,0xac,0xc0] + vfnmadd213pd %xmm24, %xmm25, %xmm24 + +// CHECK: vfnmadd213pd %xmm24, %xmm25, %xmm24 {%k4} +// CHECK: encoding: [0x62,0x02,0xb5,0x04,0xac,0xc0] + vfnmadd213pd %xmm24, %xmm25, %xmm24 {%k4} + +// CHECK: vfnmadd213pd %xmm24, %xmm25, %xmm24 {%k4} {z} +// CHECK: encoding: [0x62,0x02,0xb5,0x84,0xac,0xc0] + vfnmadd213pd %xmm24, %xmm25, %xmm24 {%k4} {z} + +// CHECK: vfnmadd213pd (%rcx), %xmm25, %xmm24 +// CHECK: encoding: [0x62,0x62,0xb5,0x00,0xac,0x01] + vfnmadd213pd (%rcx), %xmm25, %xmm24 + +// CHECK: vfnmadd213pd 291(%rax,%r14,8), %xmm25, %xmm24 +// CHECK: encoding: [0x62,0x22,0xb5,0x00,0xac,0x84,0xf0,0x23,0x01,0x00,0x00] + vfnmadd213pd 291(%rax,%r14,8), %xmm25, %xmm24 + +// CHECK: vfnmadd213pd (%rcx){1to2}, %xmm25, %xmm24 +// CHECK: encoding: [0x62,0x62,0xb5,0x10,0xac,0x01] + vfnmadd213pd (%rcx){1to2}, %xmm25, %xmm24 + +// CHECK: vfnmadd213pd 2032(%rdx), %xmm25, %xmm24 +// CHECK: encoding: [0x62,0x62,0xb5,0x00,0xac,0x42,0x7f] + vfnmadd213pd 2032(%rdx), %xmm25, %xmm24 + +// CHECK: vfnmadd213pd 2048(%rdx), %xmm25, %xmm24 +// CHECK: encoding: [0x62,0x62,0xb5,0x00,0xac,0x82,0x00,0x08,0x00,0x00] + vfnmadd213pd 2048(%rdx), %xmm25, %xmm24 + +// CHECK: vfnmadd213pd -2048(%rdx), %xmm25, %xmm24 +// CHECK: encoding: [0x62,0x62,0xb5,0x00,0xac,0x42,0x80] + vfnmadd213pd -2048(%rdx), %xmm25, %xmm24 + +// CHECK: vfnmadd213pd -2064(%rdx), %xmm25, %xmm24 +// CHECK: encoding: [0x62,0x62,0xb5,0x00,0xac,0x82,0xf0,0xf7,0xff,0xff] + vfnmadd213pd -2064(%rdx), %xmm25, %xmm24 + +// CHECK: vfnmadd213pd 1016(%rdx){1to2}, %xmm25, %xmm24 +// CHECK: encoding: [0x62,0x62,0xb5,0x10,0xac,0x42,0x7f] + vfnmadd213pd 1016(%rdx){1to2}, %xmm25, %xmm24 + +// CHECK: vfnmadd213pd 1024(%rdx){1to2}, %xmm25, %xmm24 +// CHECK: encoding: [0x62,0x62,0xb5,0x10,0xac,0x82,0x00,0x04,0x00,0x00] + vfnmadd213pd 1024(%rdx){1to2}, %xmm25, %xmm24 + +// CHECK: vfnmadd213pd -1024(%rdx){1to2}, %xmm25, %xmm24 +// CHECK: encoding: [0x62,0x62,0xb5,0x10,0xac,0x42,0x80] + vfnmadd213pd -1024(%rdx){1to2}, %xmm25, %xmm24 + +// CHECK: vfnmadd213pd -1032(%rdx){1to2}, %xmm25, %xmm24 +// CHECK: encoding: [0x62,0x62,0xb5,0x10,0xac,0x82,0xf8,0xfb,0xff,0xff] + vfnmadd213pd -1032(%rdx){1to2}, %xmm25, %xmm24 + +// CHECK: vfnmadd213pd %ymm24, %ymm27, %ymm20 +// CHECK: encoding: [0x62,0x82,0xa5,0x20,0xac,0xe0] + vfnmadd213pd %ymm24, %ymm27, %ymm20 + +// CHECK: vfnmadd213pd %ymm24, %ymm27, %ymm20 {%k4} +// CHECK: encoding: [0x62,0x82,0xa5,0x24,0xac,0xe0] + vfnmadd213pd %ymm24, %ymm27, %ymm20 {%k4} + +// CHECK: vfnmadd213pd %ymm24, %ymm27, %ymm20 {%k4} {z} +// CHECK: encoding: [0x62,0x82,0xa5,0xa4,0xac,0xe0] + vfnmadd213pd %ymm24, %ymm27, %ymm20 {%k4} {z} + +// CHECK: vfnmadd213pd (%rcx), %ymm27, %ymm20 +// CHECK: encoding: [0x62,0xe2,0xa5,0x20,0xac,0x21] + vfnmadd213pd (%rcx), %ymm27, %ymm20 + +// CHECK: vfnmadd213pd 291(%rax,%r14,8), %ymm27, %ymm20 +// CHECK: encoding: [0x62,0xa2,0xa5,0x20,0xac,0xa4,0xf0,0x23,0x01,0x00,0x00] + vfnmadd213pd 291(%rax,%r14,8), %ymm27, %ymm20 + +// CHECK: vfnmadd213pd (%rcx){1to4}, %ymm27, %ymm20 +// CHECK: encoding: [0x62,0xe2,0xa5,0x30,0xac,0x21] + vfnmadd213pd (%rcx){1to4}, %ymm27, %ymm20 + +// CHECK: vfnmadd213pd 4064(%rdx), %ymm27, %ymm20 +// CHECK: encoding: [0x62,0xe2,0xa5,0x20,0xac,0x62,0x7f] + vfnmadd213pd 4064(%rdx), %ymm27, %ymm20 + +// CHECK: vfnmadd213pd 4096(%rdx), %ymm27, %ymm20 +// CHECK: encoding: [0x62,0xe2,0xa5,0x20,0xac,0xa2,0x00,0x10,0x00,0x00] + vfnmadd213pd 4096(%rdx), %ymm27, %ymm20 + +// CHECK: vfnmadd213pd -4096(%rdx), %ymm27, %ymm20 +// CHECK: encoding: [0x62,0xe2,0xa5,0x20,0xac,0x62,0x80] + vfnmadd213pd -4096(%rdx), %ymm27, %ymm20 + +// CHECK: vfnmadd213pd -4128(%rdx), %ymm27, %ymm20 +// CHECK: encoding: [0x62,0xe2,0xa5,0x20,0xac,0xa2,0xe0,0xef,0xff,0xff] + vfnmadd213pd -4128(%rdx), %ymm27, %ymm20 + +// CHECK: vfnmadd213pd 1016(%rdx){1to4}, %ymm27, %ymm20 +// CHECK: encoding: [0x62,0xe2,0xa5,0x30,0xac,0x62,0x7f] + vfnmadd213pd 1016(%rdx){1to4}, %ymm27, %ymm20 + +// CHECK: vfnmadd213pd 1024(%rdx){1to4}, %ymm27, %ymm20 +// CHECK: encoding: [0x62,0xe2,0xa5,0x30,0xac,0xa2,0x00,0x04,0x00,0x00] + vfnmadd213pd 1024(%rdx){1to4}, %ymm27, %ymm20 + +// CHECK: vfnmadd213pd -1024(%rdx){1to4}, %ymm27, %ymm20 +// CHECK: encoding: [0x62,0xe2,0xa5,0x30,0xac,0x62,0x80] + vfnmadd213pd -1024(%rdx){1to4}, %ymm27, %ymm20 + +// CHECK: vfnmadd213pd -1032(%rdx){1to4}, %ymm27, %ymm20 +// CHECK: encoding: [0x62,0xe2,0xa5,0x30,0xac,0xa2,0xf8,0xfb,0xff,0xff] + vfnmadd213pd -1032(%rdx){1to4}, %ymm27, %ymm20 + +// CHECK: vfnmadd231ps %xmm24, %xmm26, %xmm18 +// CHECK: encoding: [0x62,0x82,0x2d,0x00,0xbc,0xd0] + vfnmadd231ps %xmm24, %xmm26, %xmm18 + +// CHECK: vfnmadd231ps %xmm24, %xmm26, %xmm18 {%k1} +// CHECK: encoding: [0x62,0x82,0x2d,0x01,0xbc,0xd0] + vfnmadd231ps %xmm24, %xmm26, %xmm18 {%k1} + +// CHECK: vfnmadd231ps %xmm24, %xmm26, %xmm18 {%k1} {z} +// CHECK: encoding: [0x62,0x82,0x2d,0x81,0xbc,0xd0] + vfnmadd231ps %xmm24, %xmm26, %xmm18 {%k1} {z} + +// CHECK: vfnmadd231ps (%rcx), %xmm26, %xmm18 +// CHECK: encoding: [0x62,0xe2,0x2d,0x00,0xbc,0x11] + vfnmadd231ps (%rcx), %xmm26, %xmm18 + +// CHECK: vfnmadd231ps 291(%rax,%r14,8), %xmm26, %xmm18 +// CHECK: encoding: [0x62,0xa2,0x2d,0x00,0xbc,0x94,0xf0,0x23,0x01,0x00,0x00] + vfnmadd231ps 291(%rax,%r14,8), %xmm26, %xmm18 + +// CHECK: vfnmadd231ps (%rcx){1to4}, %xmm26, %xmm18 +// CHECK: encoding: [0x62,0xe2,0x2d,0x10,0xbc,0x11] + vfnmadd231ps (%rcx){1to4}, %xmm26, %xmm18 + +// CHECK: vfnmadd231ps 2032(%rdx), %xmm26, %xmm18 +// CHECK: encoding: [0x62,0xe2,0x2d,0x00,0xbc,0x52,0x7f] + vfnmadd231ps 2032(%rdx), %xmm26, %xmm18 + +// CHECK: vfnmadd231ps 2048(%rdx), %xmm26, %xmm18 +// CHECK: encoding: [0x62,0xe2,0x2d,0x00,0xbc,0x92,0x00,0x08,0x00,0x00] + vfnmadd231ps 2048(%rdx), %xmm26, %xmm18 + +// CHECK: vfnmadd231ps -2048(%rdx), %xmm26, %xmm18 +// CHECK: encoding: [0x62,0xe2,0x2d,0x00,0xbc,0x52,0x80] + vfnmadd231ps -2048(%rdx), %xmm26, %xmm18 + +// CHECK: vfnmadd231ps -2064(%rdx), %xmm26, %xmm18 +// CHECK: encoding: [0x62,0xe2,0x2d,0x00,0xbc,0x92,0xf0,0xf7,0xff,0xff] + vfnmadd231ps -2064(%rdx), %xmm26, %xmm18 + +// CHECK: vfnmadd231ps 508(%rdx){1to4}, %xmm26, %xmm18 +// CHECK: encoding: [0x62,0xe2,0x2d,0x10,0xbc,0x52,0x7f] + vfnmadd231ps 508(%rdx){1to4}, %xmm26, %xmm18 + +// CHECK: vfnmadd231ps 512(%rdx){1to4}, %xmm26, %xmm18 +// CHECK: encoding: [0x62,0xe2,0x2d,0x10,0xbc,0x92,0x00,0x02,0x00,0x00] + vfnmadd231ps 512(%rdx){1to4}, %xmm26, %xmm18 + +// CHECK: vfnmadd231ps -512(%rdx){1to4}, %xmm26, %xmm18 +// CHECK: encoding: [0x62,0xe2,0x2d,0x10,0xbc,0x52,0x80] + vfnmadd231ps -512(%rdx){1to4}, %xmm26, %xmm18 + +// CHECK: vfnmadd231ps -516(%rdx){1to4}, %xmm26, %xmm18 +// CHECK: encoding: [0x62,0xe2,0x2d,0x10,0xbc,0x92,0xfc,0xfd,0xff,0xff] + vfnmadd231ps -516(%rdx){1to4}, %xmm26, %xmm18 + +// CHECK: vfnmadd231ps %ymm21, %ymm20, %ymm20 +// CHECK: encoding: [0x62,0xa2,0x5d,0x20,0xbc,0xe5] + vfnmadd231ps %ymm21, %ymm20, %ymm20 + +// CHECK: vfnmadd231ps %ymm21, %ymm20, %ymm20 {%k4} +// CHECK: encoding: [0x62,0xa2,0x5d,0x24,0xbc,0xe5] + vfnmadd231ps %ymm21, %ymm20, %ymm20 {%k4} + +// CHECK: vfnmadd231ps %ymm21, %ymm20, %ymm20 {%k4} {z} +// CHECK: encoding: [0x62,0xa2,0x5d,0xa4,0xbc,0xe5] + vfnmadd231ps %ymm21, %ymm20, %ymm20 {%k4} {z} + +// CHECK: vfnmadd231ps (%rcx), %ymm20, %ymm20 +// CHECK: encoding: [0x62,0xe2,0x5d,0x20,0xbc,0x21] + vfnmadd231ps (%rcx), %ymm20, %ymm20 + +// CHECK: vfnmadd231ps 291(%rax,%r14,8), %ymm20, %ymm20 +// CHECK: encoding: [0x62,0xa2,0x5d,0x20,0xbc,0xa4,0xf0,0x23,0x01,0x00,0x00] + vfnmadd231ps 291(%rax,%r14,8), %ymm20, %ymm20 + +// CHECK: vfnmadd231ps (%rcx){1to8}, %ymm20, %ymm20 +// CHECK: encoding: [0x62,0xe2,0x5d,0x30,0xbc,0x21] + vfnmadd231ps (%rcx){1to8}, %ymm20, %ymm20 + +// CHECK: vfnmadd231ps 4064(%rdx), %ymm20, %ymm20 +// CHECK: encoding: [0x62,0xe2,0x5d,0x20,0xbc,0x62,0x7f] + vfnmadd231ps 4064(%rdx), %ymm20, %ymm20 + +// CHECK: vfnmadd231ps 4096(%rdx), %ymm20, %ymm20 +// CHECK: encoding: [0x62,0xe2,0x5d,0x20,0xbc,0xa2,0x00,0x10,0x00,0x00] + vfnmadd231ps 4096(%rdx), %ymm20, %ymm20 + +// CHECK: vfnmadd231ps -4096(%rdx), %ymm20, %ymm20 +// CHECK: encoding: [0x62,0xe2,0x5d,0x20,0xbc,0x62,0x80] + vfnmadd231ps -4096(%rdx), %ymm20, %ymm20 + +// CHECK: vfnmadd231ps -4128(%rdx), %ymm20, %ymm20 +// CHECK: encoding: [0x62,0xe2,0x5d,0x20,0xbc,0xa2,0xe0,0xef,0xff,0xff] + vfnmadd231ps -4128(%rdx), %ymm20, %ymm20 + +// CHECK: vfnmadd231ps 508(%rdx){1to8}, %ymm20, %ymm20 +// CHECK: encoding: [0x62,0xe2,0x5d,0x30,0xbc,0x62,0x7f] + vfnmadd231ps 508(%rdx){1to8}, %ymm20, %ymm20 + +// CHECK: vfnmadd231ps 512(%rdx){1to8}, %ymm20, %ymm20 +// CHECK: encoding: [0x62,0xe2,0x5d,0x30,0xbc,0xa2,0x00,0x02,0x00,0x00] + vfnmadd231ps 512(%rdx){1to8}, %ymm20, %ymm20 + +// CHECK: vfnmadd231ps -512(%rdx){1to8}, %ymm20, %ymm20 +// CHECK: encoding: [0x62,0xe2,0x5d,0x30,0xbc,0x62,0x80] + vfnmadd231ps -512(%rdx){1to8}, %ymm20, %ymm20 + +// CHECK: vfnmadd231ps -516(%rdx){1to8}, %ymm20, %ymm20 +// CHECK: encoding: [0x62,0xe2,0x5d,0x30,0xbc,0xa2,0xfc,0xfd,0xff,0xff] + vfnmadd231ps -516(%rdx){1to8}, %ymm20, %ymm20 + +// CHECK: vfnmadd231pd %xmm26, %xmm25, %xmm29 +// CHECK: encoding: [0x62,0x02,0xb5,0x00,0xbc,0xea] + vfnmadd231pd %xmm26, %xmm25, %xmm29 + +// CHECK: vfnmadd231pd %xmm26, %xmm25, %xmm29 {%k3} +// CHECK: encoding: [0x62,0x02,0xb5,0x03,0xbc,0xea] + vfnmadd231pd %xmm26, %xmm25, %xmm29 {%k3} + +// CHECK: vfnmadd231pd %xmm26, %xmm25, %xmm29 {%k3} {z} +// CHECK: encoding: [0x62,0x02,0xb5,0x83,0xbc,0xea] + vfnmadd231pd %xmm26, %xmm25, %xmm29 {%k3} {z} + +// CHECK: vfnmadd231pd (%rcx), %xmm25, %xmm29 +// CHECK: encoding: [0x62,0x62,0xb5,0x00,0xbc,0x29] + vfnmadd231pd (%rcx), %xmm25, %xmm29 + +// CHECK: vfnmadd231pd 291(%rax,%r14,8), %xmm25, %xmm29 +// CHECK: encoding: [0x62,0x22,0xb5,0x00,0xbc,0xac,0xf0,0x23,0x01,0x00,0x00] + vfnmadd231pd 291(%rax,%r14,8), %xmm25, %xmm29 + +// CHECK: vfnmadd231pd (%rcx){1to2}, %xmm25, %xmm29 +// CHECK: encoding: [0x62,0x62,0xb5,0x10,0xbc,0x29] + vfnmadd231pd (%rcx){1to2}, %xmm25, %xmm29 + +// CHECK: vfnmadd231pd 2032(%rdx), %xmm25, %xmm29 +// CHECK: encoding: [0x62,0x62,0xb5,0x00,0xbc,0x6a,0x7f] + vfnmadd231pd 2032(%rdx), %xmm25, %xmm29 + +// CHECK: vfnmadd231pd 2048(%rdx), %xmm25, %xmm29 +// CHECK: encoding: [0x62,0x62,0xb5,0x00,0xbc,0xaa,0x00,0x08,0x00,0x00] + vfnmadd231pd 2048(%rdx), %xmm25, %xmm29 + +// CHECK: vfnmadd231pd -2048(%rdx), %xmm25, %xmm29 +// CHECK: encoding: [0x62,0x62,0xb5,0x00,0xbc,0x6a,0x80] + vfnmadd231pd -2048(%rdx), %xmm25, %xmm29 + +// CHECK: vfnmadd231pd -2064(%rdx), %xmm25, %xmm29 +// CHECK: encoding: [0x62,0x62,0xb5,0x00,0xbc,0xaa,0xf0,0xf7,0xff,0xff] + vfnmadd231pd -2064(%rdx), %xmm25, %xmm29 + +// CHECK: vfnmadd231pd 1016(%rdx){1to2}, %xmm25, %xmm29 +// CHECK: encoding: [0x62,0x62,0xb5,0x10,0xbc,0x6a,0x7f] + vfnmadd231pd 1016(%rdx){1to2}, %xmm25, %xmm29 + +// CHECK: vfnmadd231pd 1024(%rdx){1to2}, %xmm25, %xmm29 +// CHECK: encoding: [0x62,0x62,0xb5,0x10,0xbc,0xaa,0x00,0x04,0x00,0x00] + vfnmadd231pd 1024(%rdx){1to2}, %xmm25, %xmm29 + +// CHECK: vfnmadd231pd -1024(%rdx){1to2}, %xmm25, %xmm29 +// CHECK: encoding: [0x62,0x62,0xb5,0x10,0xbc,0x6a,0x80] + vfnmadd231pd -1024(%rdx){1to2}, %xmm25, %xmm29 + +// CHECK: vfnmadd231pd -1032(%rdx){1to2}, %xmm25, %xmm29 +// CHECK: encoding: [0x62,0x62,0xb5,0x10,0xbc,0xaa,0xf8,0xfb,0xff,0xff] + vfnmadd231pd -1032(%rdx){1to2}, %xmm25, %xmm29 + +// CHECK: vfnmadd231pd %ymm23, %ymm29, %ymm22 +// CHECK: encoding: [0x62,0xa2,0x95,0x20,0xbc,0xf7] + vfnmadd231pd %ymm23, %ymm29, %ymm22 + +// CHECK: vfnmadd231pd %ymm23, %ymm29, %ymm22 {%k1} +// CHECK: encoding: [0x62,0xa2,0x95,0x21,0xbc,0xf7] + vfnmadd231pd %ymm23, %ymm29, %ymm22 {%k1} + +// CHECK: vfnmadd231pd %ymm23, %ymm29, %ymm22 {%k1} {z} +// CHECK: encoding: [0x62,0xa2,0x95,0xa1,0xbc,0xf7] + vfnmadd231pd %ymm23, %ymm29, %ymm22 {%k1} {z} + +// CHECK: vfnmadd231pd (%rcx), %ymm29, %ymm22 +// CHECK: encoding: [0x62,0xe2,0x95,0x20,0xbc,0x31] + vfnmadd231pd (%rcx), %ymm29, %ymm22 + +// CHECK: vfnmadd231pd 291(%rax,%r14,8), %ymm29, %ymm22 +// CHECK: encoding: [0x62,0xa2,0x95,0x20,0xbc,0xb4,0xf0,0x23,0x01,0x00,0x00] + vfnmadd231pd 291(%rax,%r14,8), %ymm29, %ymm22 + +// CHECK: vfnmadd231pd (%rcx){1to4}, %ymm29, %ymm22 +// CHECK: encoding: [0x62,0xe2,0x95,0x30,0xbc,0x31] + vfnmadd231pd (%rcx){1to4}, %ymm29, %ymm22 + +// CHECK: vfnmadd231pd 4064(%rdx), %ymm29, %ymm22 +// CHECK: encoding: [0x62,0xe2,0x95,0x20,0xbc,0x72,0x7f] + vfnmadd231pd 4064(%rdx), %ymm29, %ymm22 + +// CHECK: vfnmadd231pd 4096(%rdx), %ymm29, %ymm22 +// CHECK: encoding: [0x62,0xe2,0x95,0x20,0xbc,0xb2,0x00,0x10,0x00,0x00] + vfnmadd231pd 4096(%rdx), %ymm29, %ymm22 + +// CHECK: vfnmadd231pd -4096(%rdx), %ymm29, %ymm22 +// CHECK: encoding: [0x62,0xe2,0x95,0x20,0xbc,0x72,0x80] + vfnmadd231pd -4096(%rdx), %ymm29, %ymm22 + +// CHECK: vfnmadd231pd -4128(%rdx), %ymm29, %ymm22 +// CHECK: encoding: [0x62,0xe2,0x95,0x20,0xbc,0xb2,0xe0,0xef,0xff,0xff] + vfnmadd231pd -4128(%rdx), %ymm29, %ymm22 + +// CHECK: vfnmadd231pd 1016(%rdx){1to4}, %ymm29, %ymm22 +// CHECK: encoding: [0x62,0xe2,0x95,0x30,0xbc,0x72,0x7f] + vfnmadd231pd 1016(%rdx){1to4}, %ymm29, %ymm22 + +// CHECK: vfnmadd231pd 1024(%rdx){1to4}, %ymm29, %ymm22 +// CHECK: encoding: [0x62,0xe2,0x95,0x30,0xbc,0xb2,0x00,0x04,0x00,0x00] + vfnmadd231pd 1024(%rdx){1to4}, %ymm29, %ymm22 + +// CHECK: vfnmadd231pd -1024(%rdx){1to4}, %ymm29, %ymm22 +// CHECK: encoding: [0x62,0xe2,0x95,0x30,0xbc,0x72,0x80] + vfnmadd231pd -1024(%rdx){1to4}, %ymm29, %ymm22 + +// CHECK: vfnmadd231pd -1032(%rdx){1to4}, %ymm29, %ymm22 +// CHECK: encoding: [0x62,0xe2,0x95,0x30,0xbc,0xb2,0xf8,0xfb,0xff,0xff] + vfnmadd231pd -1032(%rdx){1to4}, %ymm29, %ymm22 + +// CHECK: vfnmsub132ps %xmm26, %xmm25, %xmm21 +// CHECK: encoding: [0x62,0x82,0x35,0x00,0x9e,0xea] + vfnmsub132ps %xmm26, %xmm25, %xmm21 + +// CHECK: vfnmsub132ps %xmm26, %xmm25, %xmm21 {%k3} +// CHECK: encoding: [0x62,0x82,0x35,0x03,0x9e,0xea] + vfnmsub132ps %xmm26, %xmm25, %xmm21 {%k3} + +// CHECK: vfnmsub132ps %xmm26, %xmm25, %xmm21 {%k3} {z} +// CHECK: encoding: [0x62,0x82,0x35,0x83,0x9e,0xea] + vfnmsub132ps %xmm26, %xmm25, %xmm21 {%k3} {z} + +// CHECK: vfnmsub132ps (%rcx), %xmm25, %xmm21 +// CHECK: encoding: [0x62,0xe2,0x35,0x00,0x9e,0x29] + vfnmsub132ps (%rcx), %xmm25, %xmm21 + +// CHECK: vfnmsub132ps 291(%rax,%r14,8), %xmm25, %xmm21 +// CHECK: encoding: [0x62,0xa2,0x35,0x00,0x9e,0xac,0xf0,0x23,0x01,0x00,0x00] + vfnmsub132ps 291(%rax,%r14,8), %xmm25, %xmm21 + +// CHECK: vfnmsub132ps (%rcx){1to4}, %xmm25, %xmm21 +// CHECK: encoding: [0x62,0xe2,0x35,0x10,0x9e,0x29] + vfnmsub132ps (%rcx){1to4}, %xmm25, %xmm21 + +// CHECK: vfnmsub132ps 2032(%rdx), %xmm25, %xmm21 +// CHECK: encoding: [0x62,0xe2,0x35,0x00,0x9e,0x6a,0x7f] + vfnmsub132ps 2032(%rdx), %xmm25, %xmm21 + +// CHECK: vfnmsub132ps 2048(%rdx), %xmm25, %xmm21 +// CHECK: encoding: [0x62,0xe2,0x35,0x00,0x9e,0xaa,0x00,0x08,0x00,0x00] + vfnmsub132ps 2048(%rdx), %xmm25, %xmm21 + +// CHECK: vfnmsub132ps -2048(%rdx), %xmm25, %xmm21 +// CHECK: encoding: [0x62,0xe2,0x35,0x00,0x9e,0x6a,0x80] + vfnmsub132ps -2048(%rdx), %xmm25, %xmm21 + +// CHECK: vfnmsub132ps -2064(%rdx), %xmm25, %xmm21 +// CHECK: encoding: [0x62,0xe2,0x35,0x00,0x9e,0xaa,0xf0,0xf7,0xff,0xff] + vfnmsub132ps -2064(%rdx), %xmm25, %xmm21 + +// CHECK: vfnmsub132ps 508(%rdx){1to4}, %xmm25, %xmm21 +// CHECK: encoding: [0x62,0xe2,0x35,0x10,0x9e,0x6a,0x7f] + vfnmsub132ps 508(%rdx){1to4}, %xmm25, %xmm21 + +// CHECK: vfnmsub132ps 512(%rdx){1to4}, %xmm25, %xmm21 +// CHECK: encoding: [0x62,0xe2,0x35,0x10,0x9e,0xaa,0x00,0x02,0x00,0x00] + vfnmsub132ps 512(%rdx){1to4}, %xmm25, %xmm21 + +// CHECK: vfnmsub132ps -512(%rdx){1to4}, %xmm25, %xmm21 +// CHECK: encoding: [0x62,0xe2,0x35,0x10,0x9e,0x6a,0x80] + vfnmsub132ps -512(%rdx){1to4}, %xmm25, %xmm21 + +// CHECK: vfnmsub132ps -516(%rdx){1to4}, %xmm25, %xmm21 +// CHECK: encoding: [0x62,0xe2,0x35,0x10,0x9e,0xaa,0xfc,0xfd,0xff,0xff] + vfnmsub132ps -516(%rdx){1to4}, %xmm25, %xmm21 + +// CHECK: vfnmsub132ps %ymm22, %ymm24, %ymm18 +// CHECK: encoding: [0x62,0xa2,0x3d,0x20,0x9e,0xd6] + vfnmsub132ps %ymm22, %ymm24, %ymm18 + +// CHECK: vfnmsub132ps %ymm22, %ymm24, %ymm18 {%k5} +// CHECK: encoding: [0x62,0xa2,0x3d,0x25,0x9e,0xd6] + vfnmsub132ps %ymm22, %ymm24, %ymm18 {%k5} + +// CHECK: vfnmsub132ps %ymm22, %ymm24, %ymm18 {%k5} {z} +// CHECK: encoding: [0x62,0xa2,0x3d,0xa5,0x9e,0xd6] + vfnmsub132ps %ymm22, %ymm24, %ymm18 {%k5} {z} + +// CHECK: vfnmsub132ps (%rcx), %ymm24, %ymm18 +// CHECK: encoding: [0x62,0xe2,0x3d,0x20,0x9e,0x11] + vfnmsub132ps (%rcx), %ymm24, %ymm18 + +// CHECK: vfnmsub132ps 291(%rax,%r14,8), %ymm24, %ymm18 +// CHECK: encoding: [0x62,0xa2,0x3d,0x20,0x9e,0x94,0xf0,0x23,0x01,0x00,0x00] + vfnmsub132ps 291(%rax,%r14,8), %ymm24, %ymm18 + +// CHECK: vfnmsub132ps (%rcx){1to8}, %ymm24, %ymm18 +// CHECK: encoding: [0x62,0xe2,0x3d,0x30,0x9e,0x11] + vfnmsub132ps (%rcx){1to8}, %ymm24, %ymm18 + +// CHECK: vfnmsub132ps 4064(%rdx), %ymm24, %ymm18 +// CHECK: encoding: [0x62,0xe2,0x3d,0x20,0x9e,0x52,0x7f] + vfnmsub132ps 4064(%rdx), %ymm24, %ymm18 + +// CHECK: vfnmsub132ps 4096(%rdx), %ymm24, %ymm18 +// CHECK: encoding: [0x62,0xe2,0x3d,0x20,0x9e,0x92,0x00,0x10,0x00,0x00] + vfnmsub132ps 4096(%rdx), %ymm24, %ymm18 + +// CHECK: vfnmsub132ps -4096(%rdx), %ymm24, %ymm18 +// CHECK: encoding: [0x62,0xe2,0x3d,0x20,0x9e,0x52,0x80] + vfnmsub132ps -4096(%rdx), %ymm24, %ymm18 + +// CHECK: vfnmsub132ps -4128(%rdx), %ymm24, %ymm18 +// CHECK: encoding: [0x62,0xe2,0x3d,0x20,0x9e,0x92,0xe0,0xef,0xff,0xff] + vfnmsub132ps -4128(%rdx), %ymm24, %ymm18 + +// CHECK: vfnmsub132ps 508(%rdx){1to8}, %ymm24, %ymm18 +// CHECK: encoding: [0x62,0xe2,0x3d,0x30,0x9e,0x52,0x7f] + vfnmsub132ps 508(%rdx){1to8}, %ymm24, %ymm18 + +// CHECK: vfnmsub132ps 512(%rdx){1to8}, %ymm24, %ymm18 +// CHECK: encoding: [0x62,0xe2,0x3d,0x30,0x9e,0x92,0x00,0x02,0x00,0x00] + vfnmsub132ps 512(%rdx){1to8}, %ymm24, %ymm18 + +// CHECK: vfnmsub132ps -512(%rdx){1to8}, %ymm24, %ymm18 +// CHECK: encoding: [0x62,0xe2,0x3d,0x30,0x9e,0x52,0x80] + vfnmsub132ps -512(%rdx){1to8}, %ymm24, %ymm18 + +// CHECK: vfnmsub132ps -516(%rdx){1to8}, %ymm24, %ymm18 +// CHECK: encoding: [0x62,0xe2,0x3d,0x30,0x9e,0x92,0xfc,0xfd,0xff,0xff] + vfnmsub132ps -516(%rdx){1to8}, %ymm24, %ymm18 + +// CHECK: vfnmsub132pd %xmm17, %xmm25, %xmm19 +// CHECK: encoding: [0x62,0xa2,0xb5,0x00,0x9e,0xd9] + vfnmsub132pd %xmm17, %xmm25, %xmm19 + +// CHECK: vfnmsub132pd %xmm17, %xmm25, %xmm19 {%k4} +// CHECK: encoding: [0x62,0xa2,0xb5,0x04,0x9e,0xd9] + vfnmsub132pd %xmm17, %xmm25, %xmm19 {%k4} + +// CHECK: vfnmsub132pd %xmm17, %xmm25, %xmm19 {%k4} {z} +// CHECK: encoding: [0x62,0xa2,0xb5,0x84,0x9e,0xd9] + vfnmsub132pd %xmm17, %xmm25, %xmm19 {%k4} {z} + +// CHECK: vfnmsub132pd (%rcx), %xmm25, %xmm19 +// CHECK: encoding: [0x62,0xe2,0xb5,0x00,0x9e,0x19] + vfnmsub132pd (%rcx), %xmm25, %xmm19 + +// CHECK: vfnmsub132pd 291(%rax,%r14,8), %xmm25, %xmm19 +// CHECK: encoding: [0x62,0xa2,0xb5,0x00,0x9e,0x9c,0xf0,0x23,0x01,0x00,0x00] + vfnmsub132pd 291(%rax,%r14,8), %xmm25, %xmm19 + +// CHECK: vfnmsub132pd (%rcx){1to2}, %xmm25, %xmm19 +// CHECK: encoding: [0x62,0xe2,0xb5,0x10,0x9e,0x19] + vfnmsub132pd (%rcx){1to2}, %xmm25, %xmm19 + +// CHECK: vfnmsub132pd 2032(%rdx), %xmm25, %xmm19 +// CHECK: encoding: [0x62,0xe2,0xb5,0x00,0x9e,0x5a,0x7f] + vfnmsub132pd 2032(%rdx), %xmm25, %xmm19 + +// CHECK: vfnmsub132pd 2048(%rdx), %xmm25, %xmm19 +// CHECK: encoding: [0x62,0xe2,0xb5,0x00,0x9e,0x9a,0x00,0x08,0x00,0x00] + vfnmsub132pd 2048(%rdx), %xmm25, %xmm19 + +// CHECK: vfnmsub132pd -2048(%rdx), %xmm25, %xmm19 +// CHECK: encoding: [0x62,0xe2,0xb5,0x00,0x9e,0x5a,0x80] + vfnmsub132pd -2048(%rdx), %xmm25, %xmm19 + +// CHECK: vfnmsub132pd -2064(%rdx), %xmm25, %xmm19 +// CHECK: encoding: [0x62,0xe2,0xb5,0x00,0x9e,0x9a,0xf0,0xf7,0xff,0xff] + vfnmsub132pd -2064(%rdx), %xmm25, %xmm19 + +// CHECK: vfnmsub132pd 1016(%rdx){1to2}, %xmm25, %xmm19 +// CHECK: encoding: [0x62,0xe2,0xb5,0x10,0x9e,0x5a,0x7f] + vfnmsub132pd 1016(%rdx){1to2}, %xmm25, %xmm19 + +// CHECK: vfnmsub132pd 1024(%rdx){1to2}, %xmm25, %xmm19 +// CHECK: encoding: [0x62,0xe2,0xb5,0x10,0x9e,0x9a,0x00,0x04,0x00,0x00] + vfnmsub132pd 1024(%rdx){1to2}, %xmm25, %xmm19 + +// CHECK: vfnmsub132pd -1024(%rdx){1to2}, %xmm25, %xmm19 +// CHECK: encoding: [0x62,0xe2,0xb5,0x10,0x9e,0x5a,0x80] + vfnmsub132pd -1024(%rdx){1to2}, %xmm25, %xmm19 + +// CHECK: vfnmsub132pd -1032(%rdx){1to2}, %xmm25, %xmm19 +// CHECK: encoding: [0x62,0xe2,0xb5,0x10,0x9e,0x9a,0xf8,0xfb,0xff,0xff] + vfnmsub132pd -1032(%rdx){1to2}, %xmm25, %xmm19 + +// CHECK: vfnmsub132pd %ymm22, %ymm28, %ymm17 +// CHECK: encoding: [0x62,0xa2,0x9d,0x20,0x9e,0xce] + vfnmsub132pd %ymm22, %ymm28, %ymm17 + +// CHECK: vfnmsub132pd %ymm22, %ymm28, %ymm17 {%k5} +// CHECK: encoding: [0x62,0xa2,0x9d,0x25,0x9e,0xce] + vfnmsub132pd %ymm22, %ymm28, %ymm17 {%k5} + +// CHECK: vfnmsub132pd %ymm22, %ymm28, %ymm17 {%k5} {z} +// CHECK: encoding: [0x62,0xa2,0x9d,0xa5,0x9e,0xce] + vfnmsub132pd %ymm22, %ymm28, %ymm17 {%k5} {z} + +// CHECK: vfnmsub132pd (%rcx), %ymm28, %ymm17 +// CHECK: encoding: [0x62,0xe2,0x9d,0x20,0x9e,0x09] + vfnmsub132pd (%rcx), %ymm28, %ymm17 + +// CHECK: vfnmsub132pd 291(%rax,%r14,8), %ymm28, %ymm17 +// CHECK: encoding: [0x62,0xa2,0x9d,0x20,0x9e,0x8c,0xf0,0x23,0x01,0x00,0x00] + vfnmsub132pd 291(%rax,%r14,8), %ymm28, %ymm17 + +// CHECK: vfnmsub132pd (%rcx){1to4}, %ymm28, %ymm17 +// CHECK: encoding: [0x62,0xe2,0x9d,0x30,0x9e,0x09] + vfnmsub132pd (%rcx){1to4}, %ymm28, %ymm17 + +// CHECK: vfnmsub132pd 4064(%rdx), %ymm28, %ymm17 +// CHECK: encoding: [0x62,0xe2,0x9d,0x20,0x9e,0x4a,0x7f] + vfnmsub132pd 4064(%rdx), %ymm28, %ymm17 + +// CHECK: vfnmsub132pd 4096(%rdx), %ymm28, %ymm17 +// CHECK: encoding: [0x62,0xe2,0x9d,0x20,0x9e,0x8a,0x00,0x10,0x00,0x00] + vfnmsub132pd 4096(%rdx), %ymm28, %ymm17 + +// CHECK: vfnmsub132pd -4096(%rdx), %ymm28, %ymm17 +// CHECK: encoding: [0x62,0xe2,0x9d,0x20,0x9e,0x4a,0x80] + vfnmsub132pd -4096(%rdx), %ymm28, %ymm17 + +// CHECK: vfnmsub132pd -4128(%rdx), %ymm28, %ymm17 +// CHECK: encoding: [0x62,0xe2,0x9d,0x20,0x9e,0x8a,0xe0,0xef,0xff,0xff] + vfnmsub132pd -4128(%rdx), %ymm28, %ymm17 + +// CHECK: vfnmsub132pd 1016(%rdx){1to4}, %ymm28, %ymm17 +// CHECK: encoding: [0x62,0xe2,0x9d,0x30,0x9e,0x4a,0x7f] + vfnmsub132pd 1016(%rdx){1to4}, %ymm28, %ymm17 + +// CHECK: vfnmsub132pd 1024(%rdx){1to4}, %ymm28, %ymm17 +// CHECK: encoding: [0x62,0xe2,0x9d,0x30,0x9e,0x8a,0x00,0x04,0x00,0x00] + vfnmsub132pd 1024(%rdx){1to4}, %ymm28, %ymm17 + +// CHECK: vfnmsub132pd -1024(%rdx){1to4}, %ymm28, %ymm17 +// CHECK: encoding: [0x62,0xe2,0x9d,0x30,0x9e,0x4a,0x80] + vfnmsub132pd -1024(%rdx){1to4}, %ymm28, %ymm17 + +// CHECK: vfnmsub132pd -1032(%rdx){1to4}, %ymm28, %ymm17 +// CHECK: encoding: [0x62,0xe2,0x9d,0x30,0x9e,0x8a,0xf8,0xfb,0xff,0xff] + vfnmsub132pd -1032(%rdx){1to4}, %ymm28, %ymm17 + +// CHECK: vfnmsub213ps %xmm18, %xmm28, %xmm28 +// CHECK: encoding: [0x62,0x22,0x1d,0x00,0xae,0xe2] + vfnmsub213ps %xmm18, %xmm28, %xmm28 + +// CHECK: vfnmsub213ps %xmm18, %xmm28, %xmm28 {%k4} +// CHECK: encoding: [0x62,0x22,0x1d,0x04,0xae,0xe2] + vfnmsub213ps %xmm18, %xmm28, %xmm28 {%k4} + +// CHECK: vfnmsub213ps %xmm18, %xmm28, %xmm28 {%k4} {z} +// CHECK: encoding: [0x62,0x22,0x1d,0x84,0xae,0xe2] + vfnmsub213ps %xmm18, %xmm28, %xmm28 {%k4} {z} + +// CHECK: vfnmsub213ps (%rcx), %xmm28, %xmm28 +// CHECK: encoding: [0x62,0x62,0x1d,0x00,0xae,0x21] + vfnmsub213ps (%rcx), %xmm28, %xmm28 + +// CHECK: vfnmsub213ps 291(%rax,%r14,8), %xmm28, %xmm28 +// CHECK: encoding: [0x62,0x22,0x1d,0x00,0xae,0xa4,0xf0,0x23,0x01,0x00,0x00] + vfnmsub213ps 291(%rax,%r14,8), %xmm28, %xmm28 + +// CHECK: vfnmsub213ps (%rcx){1to4}, %xmm28, %xmm28 +// CHECK: encoding: [0x62,0x62,0x1d,0x10,0xae,0x21] + vfnmsub213ps (%rcx){1to4}, %xmm28, %xmm28 + +// CHECK: vfnmsub213ps 2032(%rdx), %xmm28, %xmm28 +// CHECK: encoding: [0x62,0x62,0x1d,0x00,0xae,0x62,0x7f] + vfnmsub213ps 2032(%rdx), %xmm28, %xmm28 + +// CHECK: vfnmsub213ps 2048(%rdx), %xmm28, %xmm28 +// CHECK: encoding: [0x62,0x62,0x1d,0x00,0xae,0xa2,0x00,0x08,0x00,0x00] + vfnmsub213ps 2048(%rdx), %xmm28, %xmm28 + +// CHECK: vfnmsub213ps -2048(%rdx), %xmm28, %xmm28 +// CHECK: encoding: [0x62,0x62,0x1d,0x00,0xae,0x62,0x80] + vfnmsub213ps -2048(%rdx), %xmm28, %xmm28 + +// CHECK: vfnmsub213ps -2064(%rdx), %xmm28, %xmm28 +// CHECK: encoding: [0x62,0x62,0x1d,0x00,0xae,0xa2,0xf0,0xf7,0xff,0xff] + vfnmsub213ps -2064(%rdx), %xmm28, %xmm28 + +// CHECK: vfnmsub213ps 508(%rdx){1to4}, %xmm28, %xmm28 +// CHECK: encoding: [0x62,0x62,0x1d,0x10,0xae,0x62,0x7f] + vfnmsub213ps 508(%rdx){1to4}, %xmm28, %xmm28 + +// CHECK: vfnmsub213ps 512(%rdx){1to4}, %xmm28, %xmm28 +// CHECK: encoding: [0x62,0x62,0x1d,0x10,0xae,0xa2,0x00,0x02,0x00,0x00] + vfnmsub213ps 512(%rdx){1to4}, %xmm28, %xmm28 + +// CHECK: vfnmsub213ps -512(%rdx){1to4}, %xmm28, %xmm28 +// CHECK: encoding: [0x62,0x62,0x1d,0x10,0xae,0x62,0x80] + vfnmsub213ps -512(%rdx){1to4}, %xmm28, %xmm28 + +// CHECK: vfnmsub213ps -516(%rdx){1to4}, %xmm28, %xmm28 +// CHECK: encoding: [0x62,0x62,0x1d,0x10,0xae,0xa2,0xfc,0xfd,0xff,0xff] + vfnmsub213ps -516(%rdx){1to4}, %xmm28, %xmm28 + +// CHECK: vfnmsub213ps %ymm23, %ymm25, %ymm20 +// CHECK: encoding: [0x62,0xa2,0x35,0x20,0xae,0xe7] + vfnmsub213ps %ymm23, %ymm25, %ymm20 + +// CHECK: vfnmsub213ps %ymm23, %ymm25, %ymm20 {%k1} +// CHECK: encoding: [0x62,0xa2,0x35,0x21,0xae,0xe7] + vfnmsub213ps %ymm23, %ymm25, %ymm20 {%k1} + +// CHECK: vfnmsub213ps %ymm23, %ymm25, %ymm20 {%k1} {z} +// CHECK: encoding: [0x62,0xa2,0x35,0xa1,0xae,0xe7] + vfnmsub213ps %ymm23, %ymm25, %ymm20 {%k1} {z} + +// CHECK: vfnmsub213ps (%rcx), %ymm25, %ymm20 +// CHECK: encoding: [0x62,0xe2,0x35,0x20,0xae,0x21] + vfnmsub213ps (%rcx), %ymm25, %ymm20 + +// CHECK: vfnmsub213ps 291(%rax,%r14,8), %ymm25, %ymm20 +// CHECK: encoding: [0x62,0xa2,0x35,0x20,0xae,0xa4,0xf0,0x23,0x01,0x00,0x00] + vfnmsub213ps 291(%rax,%r14,8), %ymm25, %ymm20 + +// CHECK: vfnmsub213ps (%rcx){1to8}, %ymm25, %ymm20 +// CHECK: encoding: [0x62,0xe2,0x35,0x30,0xae,0x21] + vfnmsub213ps (%rcx){1to8}, %ymm25, %ymm20 + +// CHECK: vfnmsub213ps 4064(%rdx), %ymm25, %ymm20 +// CHECK: encoding: [0x62,0xe2,0x35,0x20,0xae,0x62,0x7f] + vfnmsub213ps 4064(%rdx), %ymm25, %ymm20 + +// CHECK: vfnmsub213ps 4096(%rdx), %ymm25, %ymm20 +// CHECK: encoding: [0x62,0xe2,0x35,0x20,0xae,0xa2,0x00,0x10,0x00,0x00] + vfnmsub213ps 4096(%rdx), %ymm25, %ymm20 + +// CHECK: vfnmsub213ps -4096(%rdx), %ymm25, %ymm20 +// CHECK: encoding: [0x62,0xe2,0x35,0x20,0xae,0x62,0x80] + vfnmsub213ps -4096(%rdx), %ymm25, %ymm20 + +// CHECK: vfnmsub213ps -4128(%rdx), %ymm25, %ymm20 +// CHECK: encoding: [0x62,0xe2,0x35,0x20,0xae,0xa2,0xe0,0xef,0xff,0xff] + vfnmsub213ps -4128(%rdx), %ymm25, %ymm20 + +// CHECK: vfnmsub213ps 508(%rdx){1to8}, %ymm25, %ymm20 +// CHECK: encoding: [0x62,0xe2,0x35,0x30,0xae,0x62,0x7f] + vfnmsub213ps 508(%rdx){1to8}, %ymm25, %ymm20 + +// CHECK: vfnmsub213ps 512(%rdx){1to8}, %ymm25, %ymm20 +// CHECK: encoding: [0x62,0xe2,0x35,0x30,0xae,0xa2,0x00,0x02,0x00,0x00] + vfnmsub213ps 512(%rdx){1to8}, %ymm25, %ymm20 + +// CHECK: vfnmsub213ps -512(%rdx){1to8}, %ymm25, %ymm20 +// CHECK: encoding: [0x62,0xe2,0x35,0x30,0xae,0x62,0x80] + vfnmsub213ps -512(%rdx){1to8}, %ymm25, %ymm20 + +// CHECK: vfnmsub213ps -516(%rdx){1to8}, %ymm25, %ymm20 +// CHECK: encoding: [0x62,0xe2,0x35,0x30,0xae,0xa2,0xfc,0xfd,0xff,0xff] + vfnmsub213ps -516(%rdx){1to8}, %ymm25, %ymm20 + +// CHECK: vfnmsub213pd %xmm25, %xmm17, %xmm20 +// CHECK: encoding: [0x62,0x82,0xf5,0x00,0xae,0xe1] + vfnmsub213pd %xmm25, %xmm17, %xmm20 + +// CHECK: vfnmsub213pd %xmm25, %xmm17, %xmm20 {%k1} +// CHECK: encoding: [0x62,0x82,0xf5,0x01,0xae,0xe1] + vfnmsub213pd %xmm25, %xmm17, %xmm20 {%k1} + +// CHECK: vfnmsub213pd %xmm25, %xmm17, %xmm20 {%k1} {z} +// CHECK: encoding: [0x62,0x82,0xf5,0x81,0xae,0xe1] + vfnmsub213pd %xmm25, %xmm17, %xmm20 {%k1} {z} + +// CHECK: vfnmsub213pd (%rcx), %xmm17, %xmm20 +// CHECK: encoding: [0x62,0xe2,0xf5,0x00,0xae,0x21] + vfnmsub213pd (%rcx), %xmm17, %xmm20 + +// CHECK: vfnmsub213pd 291(%rax,%r14,8), %xmm17, %xmm20 +// CHECK: encoding: [0x62,0xa2,0xf5,0x00,0xae,0xa4,0xf0,0x23,0x01,0x00,0x00] + vfnmsub213pd 291(%rax,%r14,8), %xmm17, %xmm20 + +// CHECK: vfnmsub213pd (%rcx){1to2}, %xmm17, %xmm20 +// CHECK: encoding: [0x62,0xe2,0xf5,0x10,0xae,0x21] + vfnmsub213pd (%rcx){1to2}, %xmm17, %xmm20 + +// CHECK: vfnmsub213pd 2032(%rdx), %xmm17, %xmm20 +// CHECK: encoding: [0x62,0xe2,0xf5,0x00,0xae,0x62,0x7f] + vfnmsub213pd 2032(%rdx), %xmm17, %xmm20 + +// CHECK: vfnmsub213pd 2048(%rdx), %xmm17, %xmm20 +// CHECK: encoding: [0x62,0xe2,0xf5,0x00,0xae,0xa2,0x00,0x08,0x00,0x00] + vfnmsub213pd 2048(%rdx), %xmm17, %xmm20 + +// CHECK: vfnmsub213pd -2048(%rdx), %xmm17, %xmm20 +// CHECK: encoding: [0x62,0xe2,0xf5,0x00,0xae,0x62,0x80] + vfnmsub213pd -2048(%rdx), %xmm17, %xmm20 + +// CHECK: vfnmsub213pd -2064(%rdx), %xmm17, %xmm20 +// CHECK: encoding: [0x62,0xe2,0xf5,0x00,0xae,0xa2,0xf0,0xf7,0xff,0xff] + vfnmsub213pd -2064(%rdx), %xmm17, %xmm20 + +// CHECK: vfnmsub213pd 1016(%rdx){1to2}, %xmm17, %xmm20 +// CHECK: encoding: [0x62,0xe2,0xf5,0x10,0xae,0x62,0x7f] + vfnmsub213pd 1016(%rdx){1to2}, %xmm17, %xmm20 + +// CHECK: vfnmsub213pd 1024(%rdx){1to2}, %xmm17, %xmm20 +// CHECK: encoding: [0x62,0xe2,0xf5,0x10,0xae,0xa2,0x00,0x04,0x00,0x00] + vfnmsub213pd 1024(%rdx){1to2}, %xmm17, %xmm20 + +// CHECK: vfnmsub213pd -1024(%rdx){1to2}, %xmm17, %xmm20 +// CHECK: encoding: [0x62,0xe2,0xf5,0x10,0xae,0x62,0x80] + vfnmsub213pd -1024(%rdx){1to2}, %xmm17, %xmm20 + +// CHECK: vfnmsub213pd -1032(%rdx){1to2}, %xmm17, %xmm20 +// CHECK: encoding: [0x62,0xe2,0xf5,0x10,0xae,0xa2,0xf8,0xfb,0xff,0xff] + vfnmsub213pd -1032(%rdx){1to2}, %xmm17, %xmm20 + +// CHECK: vfnmsub213pd %ymm28, %ymm20, %ymm19 +// CHECK: encoding: [0x62,0x82,0xdd,0x20,0xae,0xdc] + vfnmsub213pd %ymm28, %ymm20, %ymm19 + +// CHECK: vfnmsub213pd %ymm28, %ymm20, %ymm19 {%k7} +// CHECK: encoding: [0x62,0x82,0xdd,0x27,0xae,0xdc] + vfnmsub213pd %ymm28, %ymm20, %ymm19 {%k7} + +// CHECK: vfnmsub213pd %ymm28, %ymm20, %ymm19 {%k7} {z} +// CHECK: encoding: [0x62,0x82,0xdd,0xa7,0xae,0xdc] + vfnmsub213pd %ymm28, %ymm20, %ymm19 {%k7} {z} + +// CHECK: vfnmsub213pd (%rcx), %ymm20, %ymm19 +// CHECK: encoding: [0x62,0xe2,0xdd,0x20,0xae,0x19] + vfnmsub213pd (%rcx), %ymm20, %ymm19 + +// CHECK: vfnmsub213pd 291(%rax,%r14,8), %ymm20, %ymm19 +// CHECK: encoding: [0x62,0xa2,0xdd,0x20,0xae,0x9c,0xf0,0x23,0x01,0x00,0x00] + vfnmsub213pd 291(%rax,%r14,8), %ymm20, %ymm19 + +// CHECK: vfnmsub213pd (%rcx){1to4}, %ymm20, %ymm19 +// CHECK: encoding: [0x62,0xe2,0xdd,0x30,0xae,0x19] + vfnmsub213pd (%rcx){1to4}, %ymm20, %ymm19 + +// CHECK: vfnmsub213pd 4064(%rdx), %ymm20, %ymm19 +// CHECK: encoding: [0x62,0xe2,0xdd,0x20,0xae,0x5a,0x7f] + vfnmsub213pd 4064(%rdx), %ymm20, %ymm19 + +// CHECK: vfnmsub213pd 4096(%rdx), %ymm20, %ymm19 +// CHECK: encoding: [0x62,0xe2,0xdd,0x20,0xae,0x9a,0x00,0x10,0x00,0x00] + vfnmsub213pd 4096(%rdx), %ymm20, %ymm19 + +// CHECK: vfnmsub213pd -4096(%rdx), %ymm20, %ymm19 +// CHECK: encoding: [0x62,0xe2,0xdd,0x20,0xae,0x5a,0x80] + vfnmsub213pd -4096(%rdx), %ymm20, %ymm19 + +// CHECK: vfnmsub213pd -4128(%rdx), %ymm20, %ymm19 +// CHECK: encoding: [0x62,0xe2,0xdd,0x20,0xae,0x9a,0xe0,0xef,0xff,0xff] + vfnmsub213pd -4128(%rdx), %ymm20, %ymm19 + +// CHECK: vfnmsub213pd 1016(%rdx){1to4}, %ymm20, %ymm19 +// CHECK: encoding: [0x62,0xe2,0xdd,0x30,0xae,0x5a,0x7f] + vfnmsub213pd 1016(%rdx){1to4}, %ymm20, %ymm19 + +// CHECK: vfnmsub213pd 1024(%rdx){1to4}, %ymm20, %ymm19 +// CHECK: encoding: [0x62,0xe2,0xdd,0x30,0xae,0x9a,0x00,0x04,0x00,0x00] + vfnmsub213pd 1024(%rdx){1to4}, %ymm20, %ymm19 + +// CHECK: vfnmsub213pd -1024(%rdx){1to4}, %ymm20, %ymm19 +// CHECK: encoding: [0x62,0xe2,0xdd,0x30,0xae,0x5a,0x80] + vfnmsub213pd -1024(%rdx){1to4}, %ymm20, %ymm19 + +// CHECK: vfnmsub213pd -1032(%rdx){1to4}, %ymm20, %ymm19 +// CHECK: encoding: [0x62,0xe2,0xdd,0x30,0xae,0x9a,0xf8,0xfb,0xff,0xff] + vfnmsub213pd -1032(%rdx){1to4}, %ymm20, %ymm19 + +// CHECK: vfnmsub231ps %xmm26, %xmm27, %xmm18 +// CHECK: encoding: [0x62,0x82,0x25,0x00,0xbe,0xd2] + vfnmsub231ps %xmm26, %xmm27, %xmm18 + +// CHECK: vfnmsub231ps %xmm26, %xmm27, %xmm18 {%k2} +// CHECK: encoding: [0x62,0x82,0x25,0x02,0xbe,0xd2] + vfnmsub231ps %xmm26, %xmm27, %xmm18 {%k2} + +// CHECK: vfnmsub231ps %xmm26, %xmm27, %xmm18 {%k2} {z} +// CHECK: encoding: [0x62,0x82,0x25,0x82,0xbe,0xd2] + vfnmsub231ps %xmm26, %xmm27, %xmm18 {%k2} {z} + +// CHECK: vfnmsub231ps (%rcx), %xmm27, %xmm18 +// CHECK: encoding: [0x62,0xe2,0x25,0x00,0xbe,0x11] + vfnmsub231ps (%rcx), %xmm27, %xmm18 + +// CHECK: vfnmsub231ps 291(%rax,%r14,8), %xmm27, %xmm18 +// CHECK: encoding: [0x62,0xa2,0x25,0x00,0xbe,0x94,0xf0,0x23,0x01,0x00,0x00] + vfnmsub231ps 291(%rax,%r14,8), %xmm27, %xmm18 + +// CHECK: vfnmsub231ps (%rcx){1to4}, %xmm27, %xmm18 +// CHECK: encoding: [0x62,0xe2,0x25,0x10,0xbe,0x11] + vfnmsub231ps (%rcx){1to4}, %xmm27, %xmm18 + +// CHECK: vfnmsub231ps 2032(%rdx), %xmm27, %xmm18 +// CHECK: encoding: [0x62,0xe2,0x25,0x00,0xbe,0x52,0x7f] + vfnmsub231ps 2032(%rdx), %xmm27, %xmm18 + +// CHECK: vfnmsub231ps 2048(%rdx), %xmm27, %xmm18 +// CHECK: encoding: [0x62,0xe2,0x25,0x00,0xbe,0x92,0x00,0x08,0x00,0x00] + vfnmsub231ps 2048(%rdx), %xmm27, %xmm18 + +// CHECK: vfnmsub231ps -2048(%rdx), %xmm27, %xmm18 +// CHECK: encoding: [0x62,0xe2,0x25,0x00,0xbe,0x52,0x80] + vfnmsub231ps -2048(%rdx), %xmm27, %xmm18 + +// CHECK: vfnmsub231ps -2064(%rdx), %xmm27, %xmm18 +// CHECK: encoding: [0x62,0xe2,0x25,0x00,0xbe,0x92,0xf0,0xf7,0xff,0xff] + vfnmsub231ps -2064(%rdx), %xmm27, %xmm18 + +// CHECK: vfnmsub231ps 508(%rdx){1to4}, %xmm27, %xmm18 +// CHECK: encoding: [0x62,0xe2,0x25,0x10,0xbe,0x52,0x7f] + vfnmsub231ps 508(%rdx){1to4}, %xmm27, %xmm18 + +// CHECK: vfnmsub231ps 512(%rdx){1to4}, %xmm27, %xmm18 +// CHECK: encoding: [0x62,0xe2,0x25,0x10,0xbe,0x92,0x00,0x02,0x00,0x00] + vfnmsub231ps 512(%rdx){1to4}, %xmm27, %xmm18 + +// CHECK: vfnmsub231ps -512(%rdx){1to4}, %xmm27, %xmm18 +// CHECK: encoding: [0x62,0xe2,0x25,0x10,0xbe,0x52,0x80] + vfnmsub231ps -512(%rdx){1to4}, %xmm27, %xmm18 + +// CHECK: vfnmsub231ps -516(%rdx){1to4}, %xmm27, %xmm18 +// CHECK: encoding: [0x62,0xe2,0x25,0x10,0xbe,0x92,0xfc,0xfd,0xff,0xff] + vfnmsub231ps -516(%rdx){1to4}, %xmm27, %xmm18 + +// CHECK: vfnmsub231ps %ymm18, %ymm20, %ymm30 +// CHECK: encoding: [0x62,0x22,0x5d,0x20,0xbe,0xf2] + vfnmsub231ps %ymm18, %ymm20, %ymm30 + +// CHECK: vfnmsub231ps %ymm18, %ymm20, %ymm30 {%k1} +// CHECK: encoding: [0x62,0x22,0x5d,0x21,0xbe,0xf2] + vfnmsub231ps %ymm18, %ymm20, %ymm30 {%k1} + +// CHECK: vfnmsub231ps %ymm18, %ymm20, %ymm30 {%k1} {z} +// CHECK: encoding: [0x62,0x22,0x5d,0xa1,0xbe,0xf2] + vfnmsub231ps %ymm18, %ymm20, %ymm30 {%k1} {z} + +// CHECK: vfnmsub231ps (%rcx), %ymm20, %ymm30 +// CHECK: encoding: [0x62,0x62,0x5d,0x20,0xbe,0x31] + vfnmsub231ps (%rcx), %ymm20, %ymm30 + +// CHECK: vfnmsub231ps 291(%rax,%r14,8), %ymm20, %ymm30 +// CHECK: encoding: [0x62,0x22,0x5d,0x20,0xbe,0xb4,0xf0,0x23,0x01,0x00,0x00] + vfnmsub231ps 291(%rax,%r14,8), %ymm20, %ymm30 + +// CHECK: vfnmsub231ps (%rcx){1to8}, %ymm20, %ymm30 +// CHECK: encoding: [0x62,0x62,0x5d,0x30,0xbe,0x31] + vfnmsub231ps (%rcx){1to8}, %ymm20, %ymm30 + +// CHECK: vfnmsub231ps 4064(%rdx), %ymm20, %ymm30 +// CHECK: encoding: [0x62,0x62,0x5d,0x20,0xbe,0x72,0x7f] + vfnmsub231ps 4064(%rdx), %ymm20, %ymm30 + +// CHECK: vfnmsub231ps 4096(%rdx), %ymm20, %ymm30 +// CHECK: encoding: [0x62,0x62,0x5d,0x20,0xbe,0xb2,0x00,0x10,0x00,0x00] + vfnmsub231ps 4096(%rdx), %ymm20, %ymm30 + +// CHECK: vfnmsub231ps -4096(%rdx), %ymm20, %ymm30 +// CHECK: encoding: [0x62,0x62,0x5d,0x20,0xbe,0x72,0x80] + vfnmsub231ps -4096(%rdx), %ymm20, %ymm30 + +// CHECK: vfnmsub231ps -4128(%rdx), %ymm20, %ymm30 +// CHECK: encoding: [0x62,0x62,0x5d,0x20,0xbe,0xb2,0xe0,0xef,0xff,0xff] + vfnmsub231ps -4128(%rdx), %ymm20, %ymm30 + +// CHECK: vfnmsub231ps 508(%rdx){1to8}, %ymm20, %ymm30 +// CHECK: encoding: [0x62,0x62,0x5d,0x30,0xbe,0x72,0x7f] + vfnmsub231ps 508(%rdx){1to8}, %ymm20, %ymm30 + +// CHECK: vfnmsub231ps 512(%rdx){1to8}, %ymm20, %ymm30 +// CHECK: encoding: [0x62,0x62,0x5d,0x30,0xbe,0xb2,0x00,0x02,0x00,0x00] + vfnmsub231ps 512(%rdx){1to8}, %ymm20, %ymm30 + +// CHECK: vfnmsub231ps -512(%rdx){1to8}, %ymm20, %ymm30 +// CHECK: encoding: [0x62,0x62,0x5d,0x30,0xbe,0x72,0x80] + vfnmsub231ps -512(%rdx){1to8}, %ymm20, %ymm30 + +// CHECK: vfnmsub231ps -516(%rdx){1to8}, %ymm20, %ymm30 +// CHECK: encoding: [0x62,0x62,0x5d,0x30,0xbe,0xb2,0xfc,0xfd,0xff,0xff] + vfnmsub231ps -516(%rdx){1to8}, %ymm20, %ymm30 + +// CHECK: vfnmsub231pd %xmm25, %xmm19, %xmm23 +// CHECK: encoding: [0x62,0x82,0xe5,0x00,0xbe,0xf9] + vfnmsub231pd %xmm25, %xmm19, %xmm23 + +// CHECK: vfnmsub231pd %xmm25, %xmm19, %xmm23 {%k3} +// CHECK: encoding: [0x62,0x82,0xe5,0x03,0xbe,0xf9] + vfnmsub231pd %xmm25, %xmm19, %xmm23 {%k3} + +// CHECK: vfnmsub231pd %xmm25, %xmm19, %xmm23 {%k3} {z} +// CHECK: encoding: [0x62,0x82,0xe5,0x83,0xbe,0xf9] + vfnmsub231pd %xmm25, %xmm19, %xmm23 {%k3} {z} + +// CHECK: vfnmsub231pd (%rcx), %xmm19, %xmm23 +// CHECK: encoding: [0x62,0xe2,0xe5,0x00,0xbe,0x39] + vfnmsub231pd (%rcx), %xmm19, %xmm23 + +// CHECK: vfnmsub231pd 291(%rax,%r14,8), %xmm19, %xmm23 +// CHECK: encoding: [0x62,0xa2,0xe5,0x00,0xbe,0xbc,0xf0,0x23,0x01,0x00,0x00] + vfnmsub231pd 291(%rax,%r14,8), %xmm19, %xmm23 + +// CHECK: vfnmsub231pd (%rcx){1to2}, %xmm19, %xmm23 +// CHECK: encoding: [0x62,0xe2,0xe5,0x10,0xbe,0x39] + vfnmsub231pd (%rcx){1to2}, %xmm19, %xmm23 + +// CHECK: vfnmsub231pd 2032(%rdx), %xmm19, %xmm23 +// CHECK: encoding: [0x62,0xe2,0xe5,0x00,0xbe,0x7a,0x7f] + vfnmsub231pd 2032(%rdx), %xmm19, %xmm23 + +// CHECK: vfnmsub231pd 2048(%rdx), %xmm19, %xmm23 +// CHECK: encoding: [0x62,0xe2,0xe5,0x00,0xbe,0xba,0x00,0x08,0x00,0x00] + vfnmsub231pd 2048(%rdx), %xmm19, %xmm23 + +// CHECK: vfnmsub231pd -2048(%rdx), %xmm19, %xmm23 +// CHECK: encoding: [0x62,0xe2,0xe5,0x00,0xbe,0x7a,0x80] + vfnmsub231pd -2048(%rdx), %xmm19, %xmm23 + +// CHECK: vfnmsub231pd -2064(%rdx), %xmm19, %xmm23 +// CHECK: encoding: [0x62,0xe2,0xe5,0x00,0xbe,0xba,0xf0,0xf7,0xff,0xff] + vfnmsub231pd -2064(%rdx), %xmm19, %xmm23 + +// CHECK: vfnmsub231pd 1016(%rdx){1to2}, %xmm19, %xmm23 +// CHECK: encoding: [0x62,0xe2,0xe5,0x10,0xbe,0x7a,0x7f] + vfnmsub231pd 1016(%rdx){1to2}, %xmm19, %xmm23 + +// CHECK: vfnmsub231pd 1024(%rdx){1to2}, %xmm19, %xmm23 +// CHECK: encoding: [0x62,0xe2,0xe5,0x10,0xbe,0xba,0x00,0x04,0x00,0x00] + vfnmsub231pd 1024(%rdx){1to2}, %xmm19, %xmm23 + +// CHECK: vfnmsub231pd -1024(%rdx){1to2}, %xmm19, %xmm23 +// CHECK: encoding: [0x62,0xe2,0xe5,0x10,0xbe,0x7a,0x80] + vfnmsub231pd -1024(%rdx){1to2}, %xmm19, %xmm23 + +// CHECK: vfnmsub231pd -1032(%rdx){1to2}, %xmm19, %xmm23 +// CHECK: encoding: [0x62,0xe2,0xe5,0x10,0xbe,0xba,0xf8,0xfb,0xff,0xff] + vfnmsub231pd -1032(%rdx){1to2}, %xmm19, %xmm23 + +// CHECK: vfnmsub231pd %ymm20, %ymm18, %ymm22 +// CHECK: encoding: [0x62,0xa2,0xed,0x20,0xbe,0xf4] + vfnmsub231pd %ymm20, %ymm18, %ymm22 + +// CHECK: vfnmsub231pd %ymm20, %ymm18, %ymm22 {%k1} +// CHECK: encoding: [0x62,0xa2,0xed,0x21,0xbe,0xf4] + vfnmsub231pd %ymm20, %ymm18, %ymm22 {%k1} + +// CHECK: vfnmsub231pd %ymm20, %ymm18, %ymm22 {%k1} {z} +// CHECK: encoding: [0x62,0xa2,0xed,0xa1,0xbe,0xf4] + vfnmsub231pd %ymm20, %ymm18, %ymm22 {%k1} {z} + +// CHECK: vfnmsub231pd (%rcx), %ymm18, %ymm22 +// CHECK: encoding: [0x62,0xe2,0xed,0x20,0xbe,0x31] + vfnmsub231pd (%rcx), %ymm18, %ymm22 + +// CHECK: vfnmsub231pd 291(%rax,%r14,8), %ymm18, %ymm22 +// CHECK: encoding: [0x62,0xa2,0xed,0x20,0xbe,0xb4,0xf0,0x23,0x01,0x00,0x00] + vfnmsub231pd 291(%rax,%r14,8), %ymm18, %ymm22 + +// CHECK: vfnmsub231pd (%rcx){1to4}, %ymm18, %ymm22 +// CHECK: encoding: [0x62,0xe2,0xed,0x30,0xbe,0x31] + vfnmsub231pd (%rcx){1to4}, %ymm18, %ymm22 + +// CHECK: vfnmsub231pd 4064(%rdx), %ymm18, %ymm22 +// CHECK: encoding: [0x62,0xe2,0xed,0x20,0xbe,0x72,0x7f] + vfnmsub231pd 4064(%rdx), %ymm18, %ymm22 + +// CHECK: vfnmsub231pd 4096(%rdx), %ymm18, %ymm22 +// CHECK: encoding: [0x62,0xe2,0xed,0x20,0xbe,0xb2,0x00,0x10,0x00,0x00] + vfnmsub231pd 4096(%rdx), %ymm18, %ymm22 + +// CHECK: vfnmsub231pd -4096(%rdx), %ymm18, %ymm22 +// CHECK: encoding: [0x62,0xe2,0xed,0x20,0xbe,0x72,0x80] + vfnmsub231pd -4096(%rdx), %ymm18, %ymm22 + +// CHECK: vfnmsub231pd -4128(%rdx), %ymm18, %ymm22 +// CHECK: encoding: [0x62,0xe2,0xed,0x20,0xbe,0xb2,0xe0,0xef,0xff,0xff] + vfnmsub231pd -4128(%rdx), %ymm18, %ymm22 + +// CHECK: vfnmsub231pd 1016(%rdx){1to4}, %ymm18, %ymm22 +// CHECK: encoding: [0x62,0xe2,0xed,0x30,0xbe,0x72,0x7f] + vfnmsub231pd 1016(%rdx){1to4}, %ymm18, %ymm22 + +// CHECK: vfnmsub231pd 1024(%rdx){1to4}, %ymm18, %ymm22 +// CHECK: encoding: [0x62,0xe2,0xed,0x30,0xbe,0xb2,0x00,0x04,0x00,0x00] + vfnmsub231pd 1024(%rdx){1to4}, %ymm18, %ymm22 + +// CHECK: vfnmsub231pd -1024(%rdx){1to4}, %ymm18, %ymm22 +// CHECK: encoding: [0x62,0xe2,0xed,0x30,0xbe,0x72,0x80] + vfnmsub231pd -1024(%rdx){1to4}, %ymm18, %ymm22 + +// CHECK: vfnmsub231pd -1032(%rdx){1to4}, %ymm18, %ymm22 +// CHECK: encoding: [0x62,0xe2,0xed,0x30,0xbe,0xb2,0xf8,0xfb,0xff,0xff] + vfnmsub231pd -1032(%rdx){1to4}, %ymm18, %ymm22 + // CHECK: vpermi2d %xmm25, %xmm23, %xmm21 // CHECK: encoding: [0x62,0x82,0x45,0x00,0x76,0xe9] vpermi2d %xmm25, %xmm23, %xmm21