Intrinsic<[llvm_v4f64_ty],
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
[IntrNoMem]>;
- def int_x86_fma_mask_vfmadd_ps_512 : GCCBuiltin<"__builtin_ia32_vfmaddps512_mask">,
- Intrinsic<[llvm_v16f32_ty],
- [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty,
- llvm_i16_ty, llvm_i32_ty],
- [IntrNoMem]>;
- def int_x86_fma_mask_vfmadd_ps_256 : GCCBuiltin<"__builtin_ia32_vfmaddps256_mask">,
- Intrinsic<[llvm_v8f32_ty],
- [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty,
- llvm_i8_ty],
- [IntrNoMem]>;
- def int_x86_fma_mask_vfmadd_ps_128 : GCCBuiltin<"__builtin_ia32_vfmaddps128_mask">,
- Intrinsic<[llvm_v4f32_ty],
- [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
- llvm_i8_ty],
- [IntrNoMem]>;
- def int_x86_fma_mask_vfmadd_pd_512 : GCCBuiltin<"__builtin_ia32_vfmaddpd512_mask">,
- Intrinsic<[llvm_v8f64_ty],
- [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty,
- llvm_i8_ty, llvm_i32_ty],
- [IntrNoMem]>;
- def int_x86_fma_mask_vfmadd_pd_256 : GCCBuiltin<"__builtin_ia32_vfmaddpd256_mask">,
- Intrinsic<[llvm_v4f64_ty],
- [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty,
- llvm_i8_ty],
- [IntrNoMem]>;
- def int_x86_fma_mask_vfmadd_pd_128 : GCCBuiltin<"__builtin_ia32_vfmaddpd128_mask">,
- Intrinsic<[llvm_v2f64_ty],
- [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
- llvm_i8_ty],
- [IntrNoMem]>;
+
def int_x86_fma_vfmsub_ss : GCCBuiltin<"__builtin_ia32_vfmsubss">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
Intrinsic<[llvm_v4f64_ty],
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
[IntrNoMem]>;
- def int_x86_fma_mask_vfmsub_ps_512 : GCCBuiltin<"__builtin_ia32_vfmsubps512_mask">,
- Intrinsic<[llvm_v16f32_ty],
- [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty,
- llvm_i16_ty, llvm_i32_ty],
- [IntrNoMem]>;
- def int_x86_fma_mask_vfmsub_ps_256 : GCCBuiltin<"__builtin_ia32_vfmsubps256_mask">,
- Intrinsic<[llvm_v8f32_ty],
- [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty,
- llvm_i8_ty],
- [IntrNoMem]>;
- def int_x86_fma_mask_vfmsub_ps_128 : GCCBuiltin<"__builtin_ia32_vfmsubps128_mask">,
- Intrinsic<[llvm_v4f32_ty],
- [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
- llvm_i8_ty],
- [IntrNoMem]>;
- def int_x86_fma_mask_vfmsub_pd_512 : GCCBuiltin<"__builtin_ia32_vfmsubpd512_mask">,
- Intrinsic<[llvm_v8f64_ty],
- [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty,
- llvm_i8_ty, llvm_i32_ty],
- [IntrNoMem]>;
- def int_x86_fma_mask_vfmsub_pd_256 : GCCBuiltin<"__builtin_ia32_vfmsubpd256_mask">,
- Intrinsic<[llvm_v4f64_ty],
- [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty,
- llvm_i8_ty],
- [IntrNoMem]>;
- def int_x86_fma_mask_vfmsub_pd_128 : GCCBuiltin<"__builtin_ia32_vfmsubpd128_mask">,
- Intrinsic<[llvm_v2f64_ty],
- [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
- llvm_i8_ty],
- [IntrNoMem]>;
def int_x86_fma_vfnmadd_ss : GCCBuiltin<"__builtin_ia32_vfnmaddss">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
Intrinsic<[llvm_v4f64_ty],
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
[IntrNoMem]>;
- def int_x86_fma_mask_vfnmadd_ps_512 : GCCBuiltin<"__builtin_ia32_vfnmaddps512_mask">,
- Intrinsic<[llvm_v16f32_ty],
- [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty,
- llvm_i16_ty, llvm_i32_ty],
- [IntrNoMem]>;
- def int_x86_fma_mask_vfnmadd_ps_256 : GCCBuiltin<"__builtin_ia32_vfnmaddps256_mask">,
- Intrinsic<[llvm_v8f32_ty],
- [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty,
- llvm_i8_ty],
- [IntrNoMem]>;
- def int_x86_fma_mask_vfnmadd_ps_128 : GCCBuiltin<"__builtin_ia32_vfnmaddps128_mask">,
- Intrinsic<[llvm_v4f32_ty],
- [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
- llvm_i8_ty],
- [IntrNoMem]>;
- def int_x86_fma_mask_vfnmadd_pd_512 : GCCBuiltin<"__builtin_ia32_vfnmaddpd512_mask">,
- Intrinsic<[llvm_v8f64_ty],
- [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty,
- llvm_i8_ty, llvm_i32_ty],
- [IntrNoMem]>;
- def int_x86_fma_mask_vfnmadd_pd_256 : GCCBuiltin<"__builtin_ia32_vfnmaddpd256_mask">,
- Intrinsic<[llvm_v4f64_ty],
- [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty,
- llvm_i8_ty],
- [IntrNoMem]>;
- def int_x86_fma_mask_vfnmadd_pd_128 : GCCBuiltin<"__builtin_ia32_vfnmaddpd128_mask">,
- Intrinsic<[llvm_v2f64_ty],
- [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
- llvm_i8_ty],
- [IntrNoMem]>;
def int_x86_fma_vfnmsub_ss : GCCBuiltin<"__builtin_ia32_vfnmsubss">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
Intrinsic<[llvm_v4f64_ty],
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
[IntrNoMem]>;
- def int_x86_fma_mask_vfnmsub_ps_512 : GCCBuiltin<"__builtin_ia32_vfnmsubps512_mask">,
- Intrinsic<[llvm_v16f32_ty],
- [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty,
- llvm_i16_ty, llvm_i32_ty],
- [IntrNoMem]>;
- def int_x86_fma_mask_vfnmsub_ps_256 : GCCBuiltin<"__builtin_ia32_vfnmsubps256_mask">,
- Intrinsic<[llvm_v8f32_ty],
- [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty,
- llvm_i8_ty],
- [IntrNoMem]>;
- def int_x86_fma_mask_vfnmsub_ps_128 : GCCBuiltin<"__builtin_ia32_vfnmsubps128_mask">,
- Intrinsic<[llvm_v4f32_ty],
- [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
- llvm_i8_ty],
- [IntrNoMem]>;
- def int_x86_fma_mask_vfnmsub_pd_512 : GCCBuiltin<"__builtin_ia32_vfnmsubpd512_mask">,
- Intrinsic<[llvm_v8f64_ty],
- [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty,
- llvm_i8_ty, llvm_i32_ty],
- [IntrNoMem]>;
- def int_x86_fma_mask_vfnmsub_pd_256 : GCCBuiltin<"__builtin_ia32_vfnmsubpd256_mask">,
- Intrinsic<[llvm_v4f64_ty],
- [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty,
- llvm_i8_ty],
- [IntrNoMem]>;
- def int_x86_fma_mask_vfnmsub_pd_128 : GCCBuiltin<"__builtin_ia32_vfnmsubpd128_mask">,
- Intrinsic<[llvm_v2f64_ty],
- [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
- llvm_i8_ty],
- [IntrNoMem]>;
def int_x86_fma_vfmaddsub_ps : GCCBuiltin<"__builtin_ia32_vfmaddsubps">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
Intrinsic<[llvm_v4f64_ty],
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
[IntrNoMem]>;
- def int_x86_fma_mask_vfmaddsub_ps_512 : GCCBuiltin<"__builtin_ia32_vfmaddsubps512_mask">,
- Intrinsic<[llvm_v16f32_ty],
- [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty,
- llvm_i16_ty, llvm_i32_ty],
- [IntrNoMem]>;
- def int_x86_fma_mask_vfmaddsub_ps_256 : GCCBuiltin<"__builtin_ia32_vfmaddsubps256_mask">,
- Intrinsic<[llvm_v8f32_ty],
- [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty,
- llvm_i8_ty],
- [IntrNoMem]>;
- def int_x86_fma_mask_vfmaddsub_ps_128 : GCCBuiltin<"__builtin_ia32_vfmaddsubps128_mask">,
- Intrinsic<[llvm_v4f32_ty],
- [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
- llvm_i8_ty],
- [IntrNoMem]>;
- def int_x86_fma_mask_vfmaddsub_pd_512 : GCCBuiltin<"__builtin_ia32_vfmaddsubpd512_mask">,
- Intrinsic<[llvm_v8f64_ty],
- [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty,
- llvm_i8_ty, llvm_i32_ty],
- [IntrNoMem]>;
- def int_x86_fma_mask_vfmaddsub_pd_256 : GCCBuiltin<"__builtin_ia32_vfmaddsubpd256_mask">,
- Intrinsic<[llvm_v4f64_ty],
- [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty,
- llvm_i8_ty],
- [IntrNoMem]>;
- def int_x86_fma_mask_vfmaddsub_pd_128 : GCCBuiltin<"__builtin_ia32_vfmaddsubpd128_mask">,
- Intrinsic<[llvm_v2f64_ty],
- [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
- llvm_i8_ty],
- [IntrNoMem]>;
def int_x86_fma_vfmsubadd_ps : GCCBuiltin<"__builtin_ia32_vfmsubaddps">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
Intrinsic<[llvm_v4f64_ty],
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
[IntrNoMem]>;
- def int_x86_fma_mask_vfmsubadd_ps_512 : GCCBuiltin<"__builtin_ia32_vfmsubaddps512_mask">,
- Intrinsic<[llvm_v16f32_ty],
- [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty,
- llvm_i16_ty, llvm_i32_ty],
- [IntrNoMem]>;
- def int_x86_fma_mask_vfmsubadd_ps_256 : GCCBuiltin<"__builtin_ia32_vfmsubaddps256_mask">,
- Intrinsic<[llvm_v8f32_ty],
- [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty,
- llvm_i8_ty],
- [IntrNoMem]>;
- def int_x86_fma_mask_vfmsubadd_ps_128 : GCCBuiltin<"__builtin_ia32_vfmsubaddps128_mask">,
- Intrinsic<[llvm_v4f32_ty],
- [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
- llvm_i8_ty],
- [IntrNoMem]>;
- def int_x86_fma_mask_vfmsubadd_pd_512 : GCCBuiltin<"__builtin_ia32_vfmsubaddpd512_mask">,
- Intrinsic<[llvm_v8f64_ty],
- [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty,
- llvm_i8_ty, llvm_i32_ty],
- [IntrNoMem]>;
- def int_x86_fma_mask_vfmsubadd_pd_256 : GCCBuiltin<"__builtin_ia32_vfmsubaddpd256_mask">,
- Intrinsic<[llvm_v4f64_ty],
- [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty,
- llvm_i8_ty],
- [IntrNoMem]>;
- def int_x86_fma_mask_vfmsubadd_pd_128 : GCCBuiltin<"__builtin_ia32_vfmsubaddpd128_mask">,
- Intrinsic<[llvm_v2f64_ty],
- [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
- llvm_i8_ty],
- [IntrNoMem]>;
+
+ def int_x86_avx512_mask_vfmadd_pd_128 :
+ GCCBuiltin<"__builtin_ia32_vfmaddpd128_mask">,
+ Intrinsic<[llvm_v2f64_ty],
+ [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask3_vfmadd_pd_128 :
+ GCCBuiltin<"__builtin_ia32_vfmaddpd128_mask3">,
+ Intrinsic<[llvm_v2f64_ty],
+ [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_maskz_vfmadd_pd_128 :
+ GCCBuiltin<"__builtin_ia32_vfmaddpd128_maskz">,
+ Intrinsic<[llvm_v2f64_ty],
+ [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_vfmadd_pd_256 :
+ GCCBuiltin<"__builtin_ia32_vfmaddpd256_mask">,
+ Intrinsic<[llvm_v4f64_ty],
+ [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask3_vfmadd_pd_256 :
+ GCCBuiltin<"__builtin_ia32_vfmaddpd256_mask3">,
+ Intrinsic<[llvm_v4f64_ty],
+ [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_maskz_vfmadd_pd_256 :
+ GCCBuiltin<"__builtin_ia32_vfmaddpd256_maskz">,
+ Intrinsic<[llvm_v4f64_ty],
+ [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_vfmadd_pd_512 :
+ GCCBuiltin<"__builtin_ia32_vfmaddpd512_mask">,
+ Intrinsic<[llvm_v8f64_ty],
+ [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty,
+ llvm_i32_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_mask3_vfmadd_pd_512 :
+ GCCBuiltin<"__builtin_ia32_vfmaddpd512_mask3">,
+ Intrinsic<[llvm_v8f64_ty],
+ [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty,
+ llvm_i32_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_maskz_vfmadd_pd_512 :
+ GCCBuiltin<"__builtin_ia32_vfmaddpd512_maskz">,
+ Intrinsic<[llvm_v8f64_ty],
+ [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty,
+ llvm_i32_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_mask_vfmadd_ps_128 :
+ GCCBuiltin<"__builtin_ia32_vfmaddps128_mask">,
+ Intrinsic<[llvm_v4f32_ty],
+ [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask3_vfmadd_ps_128 :
+ GCCBuiltin<"__builtin_ia32_vfmaddps128_mask3">,
+ Intrinsic<[llvm_v4f32_ty],
+ [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_maskz_vfmadd_ps_128 :
+ GCCBuiltin<"__builtin_ia32_vfmaddps128_maskz">,
+ Intrinsic<[llvm_v4f32_ty],
+ [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_vfmadd_ps_256 :
+ GCCBuiltin<"__builtin_ia32_vfmaddps256_mask">,
+ Intrinsic<[llvm_v8f32_ty],
+ [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask3_vfmadd_ps_256 :
+ GCCBuiltin<"__builtin_ia32_vfmaddps256_mask3">,
+ Intrinsic<[llvm_v8f32_ty],
+ [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_maskz_vfmadd_ps_256 :
+ GCCBuiltin<"__builtin_ia32_vfmaddps256_maskz">,
+ Intrinsic<[llvm_v8f32_ty],
+ [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_vfmadd_ps_512 :
+ GCCBuiltin<"__builtin_ia32_vfmaddps512_mask">,
+ Intrinsic<[llvm_v16f32_ty],
+ [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty,
+ llvm_i32_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_mask3_vfmadd_ps_512 :
+ GCCBuiltin<"__builtin_ia32_vfmaddps512_mask3">,
+ Intrinsic<[llvm_v16f32_ty],
+ [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty,
+ llvm_i32_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_maskz_vfmadd_ps_512 :
+ GCCBuiltin<"__builtin_ia32_vfmaddps512_maskz">,
+ Intrinsic<[llvm_v16f32_ty],
+ [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty,
+ llvm_i32_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_mask_vfmaddsub_pd_128 :
+ GCCBuiltin<"__builtin_ia32_vfmaddsubpd128_mask">,
+ Intrinsic<[llvm_v2f64_ty],
+ [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask3_vfmaddsub_pd_128 :
+ GCCBuiltin<"__builtin_ia32_vfmaddsubpd128_mask3">,
+ Intrinsic<[llvm_v2f64_ty],
+ [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_maskz_vfmaddsub_pd_128 :
+ GCCBuiltin<"__builtin_ia32_vfmaddsubpd128_maskz">,
+ Intrinsic<[llvm_v2f64_ty],
+ [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_vfmaddsub_pd_256 :
+ GCCBuiltin<"__builtin_ia32_vfmaddsubpd256_mask">,
+ Intrinsic<[llvm_v4f64_ty],
+ [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask3_vfmaddsub_pd_256 :
+ GCCBuiltin<"__builtin_ia32_vfmaddsubpd256_mask3">,
+ Intrinsic<[llvm_v4f64_ty],
+ [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_maskz_vfmaddsub_pd_256 :
+ GCCBuiltin<"__builtin_ia32_vfmaddsubpd256_maskz">,
+ Intrinsic<[llvm_v4f64_ty],
+ [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_vfmaddsub_pd_512 :
+ GCCBuiltin<"__builtin_ia32_vfmaddsubpd512_mask">,
+ Intrinsic<[llvm_v8f64_ty],
+ [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty,
+ llvm_i32_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_mask3_vfmaddsub_pd_512 :
+ GCCBuiltin<"__builtin_ia32_vfmaddsubpd512_mask3">,
+ Intrinsic<[llvm_v8f64_ty],
+ [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty,
+ llvm_i32_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_maskz_vfmaddsub_pd_512 :
+ GCCBuiltin<"__builtin_ia32_vfmaddsubpd512_maskz">,
+ Intrinsic<[llvm_v8f64_ty],
+ [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty,
+ llvm_i32_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_mask_vfmaddsub_ps_128 :
+ GCCBuiltin<"__builtin_ia32_vfmaddsubps128_mask">,
+ Intrinsic<[llvm_v4f32_ty],
+ [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask3_vfmaddsub_ps_128 :
+ GCCBuiltin<"__builtin_ia32_vfmaddsubps128_mask3">,
+ Intrinsic<[llvm_v4f32_ty],
+ [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_maskz_vfmaddsub_ps_128 :
+ GCCBuiltin<"__builtin_ia32_vfmaddsubps128_maskz">,
+ Intrinsic<[llvm_v4f32_ty],
+ [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_vfmaddsub_ps_256 :
+ GCCBuiltin<"__builtin_ia32_vfmaddsubps256_mask">,
+ Intrinsic<[llvm_v8f32_ty],
+ [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask3_vfmaddsub_ps_256 :
+ GCCBuiltin<"__builtin_ia32_vfmaddsubps256_mask3">,
+ Intrinsic<[llvm_v8f32_ty],
+ [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_maskz_vfmaddsub_ps_256 :
+ GCCBuiltin<"__builtin_ia32_vfmaddsubps256_maskz">,
+ Intrinsic<[llvm_v8f32_ty],
+ [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_vfmaddsub_ps_512 :
+ GCCBuiltin<"__builtin_ia32_vfmaddsubps512_mask">,
+ Intrinsic<[llvm_v16f32_ty],
+ [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty,
+ llvm_i32_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_mask3_vfmaddsub_ps_512 :
+ GCCBuiltin<"__builtin_ia32_vfmaddsubps512_mask3">,
+ Intrinsic<[llvm_v16f32_ty],
+ [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty,
+ llvm_i32_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_maskz_vfmaddsub_ps_512 :
+ GCCBuiltin<"__builtin_ia32_vfmaddsubps512_maskz">,
+ Intrinsic<[llvm_v16f32_ty],
+ [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty,
+ llvm_i32_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_mask3_vfmsub_pd_128 :
+ GCCBuiltin<"__builtin_ia32_vfmsubpd128_mask3">,
+ Intrinsic<[llvm_v2f64_ty],
+ [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask3_vfmsub_pd_256 :
+ GCCBuiltin<"__builtin_ia32_vfmsubpd256_mask3">,
+ Intrinsic<[llvm_v4f64_ty],
+ [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask3_vfmsub_pd_512 :
+ GCCBuiltin<"__builtin_ia32_vfmsubpd512_mask3">,
+ Intrinsic<[llvm_v8f64_ty],
+ [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty,
+ llvm_i32_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_mask3_vfmsub_ps_128 :
+ GCCBuiltin<"__builtin_ia32_vfmsubps128_mask3">,
+ Intrinsic<[llvm_v4f32_ty],
+ [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask3_vfmsub_ps_256 :
+ GCCBuiltin<"__builtin_ia32_vfmsubps256_mask3">,
+ Intrinsic<[llvm_v8f32_ty],
+ [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask3_vfmsub_ps_512 :
+ GCCBuiltin<"__builtin_ia32_vfmsubps512_mask3">,
+ Intrinsic<[llvm_v16f32_ty],
+ [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty,
+ llvm_i32_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_mask3_vfmsubadd_pd_128 :
+ GCCBuiltin<"__builtin_ia32_vfmsubaddpd128_mask3">,
+ Intrinsic<[llvm_v2f64_ty],
+ [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask3_vfmsubadd_pd_256 :
+ GCCBuiltin<"__builtin_ia32_vfmsubaddpd256_mask3">,
+ Intrinsic<[llvm_v4f64_ty],
+ [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask3_vfmsubadd_pd_512 :
+ GCCBuiltin<"__builtin_ia32_vfmsubaddpd512_mask3">,
+ Intrinsic<[llvm_v8f64_ty],
+ [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty,
+ llvm_i32_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_mask3_vfmsubadd_ps_128 :
+ GCCBuiltin<"__builtin_ia32_vfmsubaddps128_mask3">,
+ Intrinsic<[llvm_v4f32_ty],
+ [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask3_vfmsubadd_ps_256 :
+ GCCBuiltin<"__builtin_ia32_vfmsubaddps256_mask3">,
+ Intrinsic<[llvm_v8f32_ty],
+ [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask3_vfmsubadd_ps_512 :
+ GCCBuiltin<"__builtin_ia32_vfmsubaddps512_mask3">,
+ Intrinsic<[llvm_v16f32_ty],
+ [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty,
+ llvm_i32_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_mask_vfnmadd_pd_128 :
+ GCCBuiltin<"__builtin_ia32_vfnmaddpd128_mask">,
+ Intrinsic<[llvm_v2f64_ty],
+ [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_vfnmadd_pd_256 :
+ GCCBuiltin<"__builtin_ia32_vfnmaddpd256_mask">,
+ Intrinsic<[llvm_v4f64_ty],
+ [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_vfnmadd_pd_512 :
+ GCCBuiltin<"__builtin_ia32_vfnmaddpd512_mask">,
+ Intrinsic<[llvm_v8f64_ty],
+ [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty,
+ llvm_i32_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_mask_vfnmadd_ps_128 :
+ GCCBuiltin<"__builtin_ia32_vfnmaddps128_mask">,
+ Intrinsic<[llvm_v4f32_ty],
+ [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_vfnmadd_ps_256 :
+ GCCBuiltin<"__builtin_ia32_vfnmaddps256_mask">,
+ Intrinsic<[llvm_v8f32_ty],
+ [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_vfnmadd_ps_512 :
+ GCCBuiltin<"__builtin_ia32_vfnmaddps512_mask">,
+ Intrinsic<[llvm_v16f32_ty],
+ [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty,
+ llvm_i32_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_mask_vfnmsub_pd_128 :
+ GCCBuiltin<"__builtin_ia32_vfnmsubpd128_mask">,
+ Intrinsic<[llvm_v2f64_ty],
+ [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask3_vfnmsub_pd_128 :
+ GCCBuiltin<"__builtin_ia32_vfnmsubpd128_mask3">,
+ Intrinsic<[llvm_v2f64_ty],
+ [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_vfnmsub_pd_256 :
+ GCCBuiltin<"__builtin_ia32_vfnmsubpd256_mask">,
+ Intrinsic<[llvm_v4f64_ty],
+ [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask3_vfnmsub_pd_256 :
+ GCCBuiltin<"__builtin_ia32_vfnmsubpd256_mask3">,
+ Intrinsic<[llvm_v4f64_ty],
+ [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_vfnmsub_pd_512 :
+ GCCBuiltin<"__builtin_ia32_vfnmsubpd512_mask">,
+ Intrinsic<[llvm_v8f64_ty],
+ [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty,
+ llvm_i32_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_mask3_vfnmsub_pd_512 :
+ GCCBuiltin<"__builtin_ia32_vfnmsubpd512_mask3">,
+ Intrinsic<[llvm_v8f64_ty],
+ [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty,
+ llvm_i32_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_mask_vfnmsub_ps_128 :
+ GCCBuiltin<"__builtin_ia32_vfnmsubps128_mask">,
+ Intrinsic<[llvm_v4f32_ty],
+ [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask3_vfnmsub_ps_128 :
+ GCCBuiltin<"__builtin_ia32_vfnmsubps128_mask3">,
+ Intrinsic<[llvm_v4f32_ty],
+ [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_vfnmsub_ps_256 :
+ GCCBuiltin<"__builtin_ia32_vfnmsubps256_mask">,
+ Intrinsic<[llvm_v8f32_ty],
+ [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask3_vfnmsub_ps_256 :
+ GCCBuiltin<"__builtin_ia32_vfnmsubps256_mask3">,
+ Intrinsic<[llvm_v8f32_ty],
+ [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_vfnmsub_ps_512 :
+ GCCBuiltin<"__builtin_ia32_vfnmsubps512_mask">,
+ Intrinsic<[llvm_v16f32_ty],
+ [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty,
+ llvm_i32_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_mask3_vfnmsub_ps_512 :
+ GCCBuiltin<"__builtin_ia32_vfnmsubps512_mask3">,
+ Intrinsic<[llvm_v16f32_ty],
+ [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty,
+ llvm_i32_ty], [IntrNoMem]>;
+
}
//===----------------------------------------------------------------------===//
Mask, PassThru, Subtarget, DAG);
}
case VPERM_3OP_MASKZ:
- case VPERM_3OP_MASK:
+ case VPERM_3OP_MASK:
+ case FMA_OP_MASK3:
case FMA_OP_MASKZ:
case FMA_OP_MASK: {
SDValue Src1 = Op.getOperand(1);
SDValue Src3 = Op.getOperand(3);
SDValue Mask = Op.getOperand(4);
EVT VT = Op.getValueType();
- SDValue PassThru =
- (IntrData->Type == VPERM_3OP_MASKZ || IntrData->Type == FMA_OP_MASKZ) ?
- getZeroVector(VT, Subtarget, DAG, dl) : Src1;
+ SDValue PassThru = SDValue();
+
+ // set PassThru element
+ if (IntrData->Type == VPERM_3OP_MASKZ || IntrData->Type == FMA_OP_MASKZ)
+ PassThru = getZeroVector(VT, Subtarget, DAG, dl);
+ else if (IntrData->Type == FMA_OP_MASK3)
+ PassThru = Src3;
+ else
+ PassThru = Src1;
+
// We specify 2 possible opcodes for intrinsics with rounding modes.
// First, we check if the intrinsic may have non-default rounding mode,
// (IntrData->Opc1 != 0), then we check the rounding mode operand.
//
let Constraints = "$src1 = $dst" in {
-// Omitting the parameter OpNode (= null_frag) disables ISel pattern matching.
-multiclass avx512_fma3p_rm<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
- SDPatternOperator OpNode = null_frag> {
+multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _> {
defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3))>,
AVX512FMA3Base;
- let mayLoad = 1 in
- defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ let mayLoad = 1 in {
+ defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.MemOp:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (OpNode _.RC:$src1, _.RC:$src2, (_.LdFrag addr:$src3)))>,
AVX512FMA3Base;
- defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3),
OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
!strconcat("$src2, ${src3}", _.BroadcastStr ),
(OpNode _.RC:$src1,
_.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))>,
AVX512FMA3Base, EVEX_B;
- }
-} // Constraints = "$src1 = $dst"
+ }
+}
-let Constraints = "$src1 = $dst" in {
-// Omitting the parameter OpNode (= null_frag) disables ISel pattern matching.
-multiclass avx512_fma3_round_rrb<bits<8> opc, string OpcodeStr,
- X86VectorVTInfo _,
- SDPatternOperator OpNode> {
- defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
+multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _> {
+ defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
(_.VT ( OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3, (i32 imm:$rc)))>,
AVX512FMA3Base, EVEX_B, EVEX_RC;
- }
+}
} // Constraints = "$src1 = $dst"
-multiclass avx512_fma3_round_forms<bits<8> opc213, string OpcodeStr,
- X86VectorVTInfo VTI, SDPatternOperator OpNode> {
- defm v213r : avx512_fma3_round_rrb<opc213, !strconcat(OpcodeStr, "213", VTI.Suffix),
- VTI, OpNode>, EVEX_CD8<VTI.EltSize, CD8VF>;
+multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ SDNode OpNodeRnd, AVX512VLVectorVTInfo _> {
+ let Predicates = [HasAVX512] in {
+ defm Z : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info512>,
+ avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, _.info512>,
+ EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
+ }
+ let Predicates = [HasVLX, HasAVX512] in {
+ defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info256>,
+ EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
+ defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info128>,
+ EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
+ }
}
-multiclass avx512_fma3p_forms<bits<8> opc213, bits<8> opc231,
- string OpcodeStr, X86VectorVTInfo VTI,
- SDPatternOperator OpNode> {
- defm v213r : avx512_fma3p_rm<opc213, !strconcat(OpcodeStr, "213", VTI.Suffix),
- VTI, OpNode>, EVEX_CD8<VTI.EltSize, CD8VF>;
- defm v231r : avx512_fma3p_rm<opc231, !strconcat(OpcodeStr, "231", VTI.Suffix),
- VTI>, EVEX_CD8<VTI.EltSize, CD8VF>;
+multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ SDNode OpNodeRnd > {
+ defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
+ avx512vl_f32_info>;
+ defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
+ avx512vl_f64_info>, VEX_W;
}
-multiclass avx512_fma3p<bits<8> opc213, bits<8> opc231,
- string OpcodeStr,
- SDPatternOperator OpNode,
- SDPatternOperator OpNodeRnd> {
-let ExeDomain = SSEPackedSingle in {
- defm NAME##PSZ : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
- v16f32_info, OpNode>,
- avx512_fma3_round_forms<opc213, OpcodeStr,
- v16f32_info, OpNodeRnd>, EVEX_V512;
- defm NAME##PSZ256 : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
- v8f32x_info, OpNode>, EVEX_V256;
- defm NAME##PSZ128 : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
- v4f32x_info, OpNode>, EVEX_V128;
+defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", X86Fmadd, X86FmaddRnd>;
+defm VFMSUB213 : avx512_fma3p_213_f<0xAA, "vfmsub213", X86Fmsub, X86FmsubRnd>;
+defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub, X86FmaddsubRnd>;
+defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd, X86FmsubaddRnd>;
+defm VFNMADD213 : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86Fnmadd, X86FnmaddRnd>;
+defm VFNMSUB213 : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86Fnmsub, X86FnmsubRnd>;
+
+
+let Constraints = "$src1 = $dst" in {
+multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _> {
+ defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src2, _.RC:$src3),
+ OpcodeStr, "$src3, $src2", "$src2, $src3",
+ (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1))>,
+ AVX512FMA3Base;
+
+ let mayLoad = 1 in {
+ defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src2, _.MemOp:$src3),
+ OpcodeStr, "$src3, $src2", "$src2, $src3",
+ (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>,
+ AVX512FMA3Base;
+
+ defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src2, _.ScalarMemOp:$src3),
+ OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
+ "$src2, ${src3}"##_.BroadcastStr,
+ (_.VT (OpNode _.RC:$src2,
+ (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
+ _.RC:$src1))>, AVX512FMA3Base, EVEX_B;
}
-let ExeDomain = SSEPackedDouble in {
- defm NAME##PDZ : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
- v8f64_info, OpNode>,
- avx512_fma3_round_forms<opc213, OpcodeStr, v8f64_info,
- OpNodeRnd>, EVEX_V512, VEX_W;
- defm NAME##PDZ256 : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
- v4f64x_info, OpNode>,
- EVEX_V256, VEX_W;
- defm NAME##PDZ128 : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
- v2f64x_info, OpNode>,
- EVEX_V128, VEX_W;
+}
+
+multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _> {
+ defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
+ OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
+ (_.VT ( OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 imm:$rc)))>,
+ AVX512FMA3Base, EVEX_B, EVEX_RC;
+}
+} // Constraints = "$src1 = $dst"
+
+multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ SDNode OpNodeRnd, AVX512VLVectorVTInfo _> {
+ let Predicates = [HasAVX512] in {
+ defm Z : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info512>,
+ avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, _.info512>,
+ EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
+ }
+ let Predicates = [HasVLX, HasAVX512] in {
+ defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info256>,
+ EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
+ defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info128>,
+ EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
}
}
-defm VFMADD : avx512_fma3p<0xA8, 0xB8, "vfmadd", X86Fmadd, X86FmaddRnd>;
-defm VFMSUB : avx512_fma3p<0xAA, 0xBA, "vfmsub", X86Fmsub, X86FmsubRnd>;
-defm VFMADDSUB : avx512_fma3p<0xA6, 0xB6, "vfmaddsub", X86Fmaddsub, X86FmaddsubRnd>;
-defm VFMSUBADD : avx512_fma3p<0xA7, 0xB7, "vfmsubadd", X86Fmsubadd, X86FmsubaddRnd>;
-defm VFNMADD : avx512_fma3p<0xAC, 0xBC, "vfnmadd", X86Fnmadd, X86FnmaddRnd>;
-defm VFNMSUB : avx512_fma3p<0xAE, 0xBE, "vfnmsub", X86Fnmsub, X86FnmsubRnd>;
+multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ SDNode OpNodeRnd > {
+ defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
+ avx512vl_f32_info>;
+ defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
+ avx512vl_f64_info>, VEX_W;
+}
+
+defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", X86Fmadd, X86FmaddRnd>;
+defm VFMSUB231 : avx512_fma3p_231_f<0xBA, "vfmsub231", X86Fmsub, X86FmsubRnd>;
+defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub, X86FmaddsubRnd>;
+defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd, X86FmsubaddRnd>;
+defm VFNMADD231 : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86Fnmadd, X86FnmaddRnd>;
+defm VFNMSUB231 : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86Fnmsub, X86FnmsubRnd>;
let Constraints = "$src1 = $dst" in {
-multiclass avx512_fma3p_m132<bits<8> opc, string OpcodeStr, SDNode OpNode,
- X86VectorVTInfo _> {
- let mayLoad = 1 in
- def m: AVX512FMA3<opc, MRMSrcMem, (outs _.RC:$dst),
- (ins _.RC:$src1, _.RC:$src3, _.MemOp:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $src3, $dst|$dst, $src3, $src2}"),
- [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2),
- _.RC:$src3)))]>;
- def mb: AVX512FMA3<opc, MRMSrcMem, (outs _.RC:$dst),
- (ins _.RC:$src1, _.RC:$src3, _.ScalarMemOp:$src2),
- !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr,
- ", $src3, $dst|$dst, $src3, ${src2}", _.BroadcastStr, "}"),
- [(set _.RC:$dst,
- (OpNode _.RC:$src1, (_.VT (X86VBroadcast
- (_.ScalarLdFrag addr:$src2))),
- _.RC:$src3))]>, EVEX_B;
+multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _> {
+ defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src3, _.RC:$src2),
+ OpcodeStr, "$src2, $src3", "$src3, $src2",
+ (_.VT (OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3))>,
+ AVX512FMA3Base;
+
+ let mayLoad = 1 in {
+ defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src3, _.MemOp:$src2),
+ OpcodeStr, "$src2, $src3", "$src3, $src2",
+ (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2), _.RC:$src3))>,
+ AVX512FMA3Base;
+
+ defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src3, _.ScalarMemOp:$src2),
+ OpcodeStr, "${src2}"##_.BroadcastStr##", $src3",
+ "$src3, ${src2}"##_.BroadcastStr,
+ (_.VT (OpNode _.RC:$src1,
+ (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
+ _.RC:$src3))>, AVX512FMA3Base, EVEX_B;
+ }
}
-} // Constraints = "$src1 = $dst"
-multiclass avx512_fma3p_m132_f<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _> {
+ defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src3, _.RC:$src2, AVX512RC:$rc),
+ OpcodeStr, "$rc, $src2, $src3", "$src3, $src2, $rc",
+ (_.VT ( OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3, (i32 imm:$rc)))>,
+ AVX512FMA3Base, EVEX_B, EVEX_RC;
+}
+} // Constraints = "$src1 = $dst"
-let ExeDomain = SSEPackedSingle in {
- defm NAME##PSZ : avx512_fma3p_m132<opc, OpcodeStr##ps,
- OpNode,v16f32_info>, EVEX_V512,
- EVEX_CD8<32, CD8VF>;
- defm NAME##PSZ256 : avx512_fma3p_m132<opc, OpcodeStr##ps,
- OpNode, v8f32x_info>, EVEX_V256,
- EVEX_CD8<32, CD8VF>;
- defm NAME##PSZ128 : avx512_fma3p_m132<opc, OpcodeStr##ps,
- OpNode, v4f32x_info>, EVEX_V128,
- EVEX_CD8<32, CD8VF>;
+multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ SDNode OpNodeRnd, AVX512VLVectorVTInfo _> {
+ let Predicates = [HasAVX512] in {
+ defm Z : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info512>,
+ avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, _.info512>,
+ EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
}
-let ExeDomain = SSEPackedDouble in {
- defm NAME##PDZ : avx512_fma3p_m132<opc, OpcodeStr##pd,
- OpNode, v8f64_info>, EVEX_V512,
- VEX_W, EVEX_CD8<32, CD8VF>;
- defm NAME##PDZ256 : avx512_fma3p_m132<opc, OpcodeStr##pd,
- OpNode, v4f64x_info>, EVEX_V256,
- VEX_W, EVEX_CD8<32, CD8VF>;
- defm NAME##PDZ128 : avx512_fma3p_m132<opc, OpcodeStr##pd,
- OpNode, v2f64x_info>, EVEX_V128,
- VEX_W, EVEX_CD8<32, CD8VF>;
+ let Predicates = [HasVLX, HasAVX512] in {
+ defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info256>,
+ EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
+ defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info128>,
+ EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
}
}
-defm VFMADD132 : avx512_fma3p_m132_f<0x98, "vfmadd132", X86Fmadd>;
-defm VFMSUB132 : avx512_fma3p_m132_f<0x9A, "vfmsub132", X86Fmsub>;
-defm VFMADDSUB132 : avx512_fma3p_m132_f<0x96, "vfmaddsub132", X86Fmaddsub>;
-defm VFMSUBADD132 : avx512_fma3p_m132_f<0x97, "vfmsubadd132", X86Fmsubadd>;
-defm VFNMADD132 : avx512_fma3p_m132_f<0x9C, "vfnmadd132", X86Fnmadd>;
-defm VFNMSUB132 : avx512_fma3p_m132_f<0x9E, "vfnmsub132", X86Fnmsub>;
+multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ SDNode OpNodeRnd > {
+ defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
+ avx512vl_f32_info>;
+ defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
+ avx512vl_f64_info>, VEX_W;
+}
+
+defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", X86Fmadd, X86FmaddRnd>;
+defm VFMSUB132 : avx512_fma3p_132_f<0x9A, "vfmsub132", X86Fmsub, X86FmsubRnd>;
+defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub, X86FmaddsubRnd>;
+defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd, X86FmsubaddRnd>;
+defm VFNMADD132 : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86Fnmadd, X86FnmaddRnd>;
+defm VFNMSUB132 : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86Fnmsub, X86FnmsubRnd>;
// Scalar FMA
let Constraints = "$src1 = $dst" in {
INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP,
CMP_MASK, CMP_MASK_CC, VSHIFT, VSHIFT_MASK, COMI,
INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM,
- INTR_TYPE_3OP_MASK, FMA_OP_MASK, FMA_OP_MASKZ, VPERM_3OP_MASK,
+ INTR_TYPE_3OP_MASK, FMA_OP_MASK, FMA_OP_MASKZ, FMA_OP_MASK3, VPERM_3OP_MASK,
VPERM_3OP_MASKZ,
INTR_TYPE_SCALAR_MASK_RM, COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM,
EXPAND_FROM_MEM, BLEND
X86_INTRINSIC_DATA(avx512_cvtusi642ss, INTR_TYPE_3OP, X86ISD::UINT_TO_FP_RND, 0),
X86_INTRINSIC_DATA(avx512_exp2_pd, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0),
X86_INTRINSIC_DATA(avx512_exp2_ps, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0),
+
+ X86_INTRINSIC_DATA(avx512_mask3_vfmadd_pd_128, FMA_OP_MASK3, X86ISD::FMADD, 0),
+ X86_INTRINSIC_DATA(avx512_mask3_vfmadd_pd_256, FMA_OP_MASK3, X86ISD::FMADD, 0),
+ X86_INTRINSIC_DATA(avx512_mask3_vfmadd_pd_512, FMA_OP_MASK3, X86ISD::FMADD,
+ X86ISD::FMADD_RND),
+ X86_INTRINSIC_DATA(avx512_mask3_vfmadd_ps_128, FMA_OP_MASK3, X86ISD::FMADD, 0),
+ X86_INTRINSIC_DATA(avx512_mask3_vfmadd_ps_256, FMA_OP_MASK3, X86ISD::FMADD, 0),
+ X86_INTRINSIC_DATA(avx512_mask3_vfmadd_ps_512, FMA_OP_MASK3, X86ISD::FMADD,
+ X86ISD::FMADD_RND),
+
+ X86_INTRINSIC_DATA(avx512_mask3_vfmaddsub_pd_128, FMA_OP_MASK3, X86ISD::FMADDSUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask3_vfmaddsub_pd_256, FMA_OP_MASK3, X86ISD::FMADDSUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask3_vfmaddsub_pd_512, FMA_OP_MASK3, X86ISD::FMADDSUB,
+ X86ISD::FMADDSUB_RND),
+ X86_INTRINSIC_DATA(avx512_mask3_vfmaddsub_ps_128, FMA_OP_MASK3, X86ISD::FMADDSUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask3_vfmaddsub_ps_256, FMA_OP_MASK3, X86ISD::FMADDSUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask3_vfmaddsub_ps_512, FMA_OP_MASK3, X86ISD::FMADDSUB,
+ X86ISD::FMADDSUB_RND),
+
+ X86_INTRINSIC_DATA(avx512_mask3_vfmsub_pd_128, FMA_OP_MASK3, X86ISD::FMSUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask3_vfmsub_pd_256, FMA_OP_MASK3, X86ISD::FMSUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask3_vfmsub_pd_512, FMA_OP_MASK3, X86ISD::FMSUB,
+ X86ISD::FMSUB_RND),
+ X86_INTRINSIC_DATA(avx512_mask3_vfmsub_ps_128, FMA_OP_MASK3, X86ISD::FMSUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask3_vfmsub_ps_256, FMA_OP_MASK3, X86ISD::FMSUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask3_vfmsub_ps_512, FMA_OP_MASK3, X86ISD::FMSUB,
+ X86ISD::FMSUB_RND),
+
+ X86_INTRINSIC_DATA(avx512_mask3_vfmsubadd_pd_128, FMA_OP_MASK3, X86ISD::FMSUBADD, 0),
+ X86_INTRINSIC_DATA(avx512_mask3_vfmsubadd_pd_256, FMA_OP_MASK3, X86ISD::FMSUBADD, 0),
+ X86_INTRINSIC_DATA(avx512_mask3_vfmsubadd_pd_512, FMA_OP_MASK3, X86ISD::FMSUBADD,
+ X86ISD::FMSUBADD_RND),
+ X86_INTRINSIC_DATA(avx512_mask3_vfmsubadd_ps_128, FMA_OP_MASK3, X86ISD::FMSUBADD, 0),
+ X86_INTRINSIC_DATA(avx512_mask3_vfmsubadd_ps_256, FMA_OP_MASK3, X86ISD::FMSUBADD, 0),
+ X86_INTRINSIC_DATA(avx512_mask3_vfmsubadd_ps_512, FMA_OP_MASK3, X86ISD::FMSUBADD,
+ X86ISD::FMSUBADD_RND),
+
+ X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_pd_128, FMA_OP_MASK3, X86ISD::FNMSUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_pd_256, FMA_OP_MASK3, X86ISD::FNMSUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_pd_512, FMA_OP_MASK3, X86ISD::FNMSUB,
+ X86ISD::FNMSUB_RND),
+ X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_ps_128, FMA_OP_MASK3, X86ISD::FNMSUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_ps_256, FMA_OP_MASK3, X86ISD::FNMSUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_ps_512, FMA_OP_MASK3, X86ISD::FNMSUB,
+ X86ISD::FNMSUB_RND),
+
X86_INTRINSIC_DATA(avx512_mask_add_pd_128, INTR_TYPE_2OP_MASK, ISD::FADD, 0),
X86_INTRINSIC_DATA(avx512_mask_add_pd_256, INTR_TYPE_2OP_MASK, ISD::FADD, 0),
X86_INTRINSIC_DATA(avx512_mask_add_pd_512, INTR_TYPE_2OP_MASK, ISD::FADD,
X86_INTRINSIC_DATA(avx512_mask_ucmp_w_512, CMP_MASK_CC, X86ISD::CMPMU, 0),
X86_INTRINSIC_DATA(avx512_mask_valign_d_512, INTR_TYPE_3OP_MASK, X86ISD::VALIGN, 0),
X86_INTRINSIC_DATA(avx512_mask_valign_q_512, INTR_TYPE_3OP_MASK, X86ISD::VALIGN, 0),
+
+ X86_INTRINSIC_DATA(avx512_mask_vfmadd_pd_128, FMA_OP_MASK, X86ISD::FMADD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vfmadd_pd_256, FMA_OP_MASK, X86ISD::FMADD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vfmadd_pd_512, FMA_OP_MASK, X86ISD::FMADD,
+ X86ISD::FMADD_RND),
+ X86_INTRINSIC_DATA(avx512_mask_vfmadd_ps_128, FMA_OP_MASK, X86ISD::FMADD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vfmadd_ps_256, FMA_OP_MASK, X86ISD::FMADD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vfmadd_ps_512, FMA_OP_MASK, X86ISD::FMADD,
+ X86ISD::FMADD_RND),
+
+ X86_INTRINSIC_DATA(avx512_mask_vfmaddsub_pd_128, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vfmaddsub_pd_256, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vfmaddsub_pd_512, FMA_OP_MASK, X86ISD::FMADDSUB,
+ X86ISD::FMADDSUB_RND),
+ X86_INTRINSIC_DATA(avx512_mask_vfmaddsub_ps_128, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vfmaddsub_ps_256, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vfmaddsub_ps_512, FMA_OP_MASK, X86ISD::FMADDSUB,
+ X86ISD::FMADDSUB_RND),
+
+ X86_INTRINSIC_DATA(avx512_mask_vfnmadd_pd_128, FMA_OP_MASK, X86ISD::FNMADD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vfnmadd_pd_256, FMA_OP_MASK, X86ISD::FNMADD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vfnmadd_pd_512, FMA_OP_MASK, X86ISD::FNMADD,
+ X86ISD::FNMADD_RND),
+ X86_INTRINSIC_DATA(avx512_mask_vfnmadd_ps_128, FMA_OP_MASK, X86ISD::FNMADD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vfnmadd_ps_256, FMA_OP_MASK, X86ISD::FNMADD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vfnmadd_ps_512, FMA_OP_MASK, X86ISD::FNMADD,
+ X86ISD::FNMADD_RND),
+
+ X86_INTRINSIC_DATA(avx512_mask_vfnmsub_pd_128, FMA_OP_MASK, X86ISD::FNMSUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vfnmsub_pd_256, FMA_OP_MASK, X86ISD::FNMSUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vfnmsub_pd_512, FMA_OP_MASK, X86ISD::FNMSUB,
+ X86ISD::FNMSUB_RND),
+ X86_INTRINSIC_DATA(avx512_mask_vfnmsub_ps_128, FMA_OP_MASK, X86ISD::FNMSUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vfnmsub_ps_256, FMA_OP_MASK, X86ISD::FNMSUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vfnmsub_ps_512, FMA_OP_MASK, X86ISD::FNMSUB,
+ X86ISD::FNMSUB_RND),
+
+
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_128, VPERM_3OP_MASK,
X86ISD::VPERMIV3, 0),
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_256, VPERM_3OP_MASK,
X86_INTRINSIC_DATA(avx512_mask_xor_ps_128, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
X86_INTRINSIC_DATA(avx512_mask_xor_ps_256, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
X86_INTRINSIC_DATA(avx512_mask_xor_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
+
+ X86_INTRINSIC_DATA(avx512_maskz_vfmadd_pd_128, FMA_OP_MASKZ, X86ISD::FMADD, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vfmadd_pd_256, FMA_OP_MASKZ, X86ISD::FMADD, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vfmadd_pd_512, FMA_OP_MASKZ, X86ISD::FMADD,
+ X86ISD::FMADD_RND),
+ X86_INTRINSIC_DATA(avx512_maskz_vfmadd_ps_128, FMA_OP_MASKZ, X86ISD::FMADD, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vfmadd_ps_256, FMA_OP_MASKZ, X86ISD::FMADD, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vfmadd_ps_512, FMA_OP_MASKZ, X86ISD::FMADD,
+ X86ISD::FMADD_RND),
+
+ X86_INTRINSIC_DATA(avx512_maskz_vfmaddsub_pd_128, FMA_OP_MASKZ, X86ISD::FMADDSUB, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vfmaddsub_pd_256, FMA_OP_MASKZ, X86ISD::FMADDSUB, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vfmaddsub_pd_512, FMA_OP_MASKZ, X86ISD::FMADDSUB,
+ X86ISD::FMADDSUB_RND),
+ X86_INTRINSIC_DATA(avx512_maskz_vfmaddsub_ps_128, FMA_OP_MASKZ, X86ISD::FMADDSUB, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vfmaddsub_ps_256, FMA_OP_MASKZ, X86ISD::FMADDSUB, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vfmaddsub_ps_512, FMA_OP_MASKZ, X86ISD::FMADDSUB,
+ X86ISD::FMADDSUB_RND),
+
X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_d_128, VPERM_3OP_MASKZ,
X86ISD::VPERMV3, 0),
X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_d_256, VPERM_3OP_MASKZ,
X86_INTRINSIC_DATA(avx_vperm2f128_pd_256, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0),
X86_INTRINSIC_DATA(avx_vperm2f128_ps_256, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0),
X86_INTRINSIC_DATA(avx_vperm2f128_si_256, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0),
- X86_INTRINSIC_DATA(fma_mask_vfmadd_pd_128, FMA_OP_MASK, X86ISD::FMADD, 0),
- X86_INTRINSIC_DATA(fma_mask_vfmadd_pd_256, FMA_OP_MASK, X86ISD::FMADD, 0),
- X86_INTRINSIC_DATA(fma_mask_vfmadd_pd_512, FMA_OP_MASK, X86ISD::FMADD,
- X86ISD::FMADD_RND),
- X86_INTRINSIC_DATA(fma_mask_vfmadd_ps_128, FMA_OP_MASK, X86ISD::FMADD, 0),
- X86_INTRINSIC_DATA(fma_mask_vfmadd_ps_256, FMA_OP_MASK, X86ISD::FMADD, 0),
- X86_INTRINSIC_DATA(fma_mask_vfmadd_ps_512, FMA_OP_MASK, X86ISD::FMADD,
- X86ISD::FMADD_RND),
- X86_INTRINSIC_DATA(fma_mask_vfmaddsub_pd_128, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
- X86_INTRINSIC_DATA(fma_mask_vfmaddsub_pd_256, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
- X86_INTRINSIC_DATA(fma_mask_vfmaddsub_pd_512, FMA_OP_MASK, X86ISD::FMADDSUB,
- X86ISD::FMADDSUB_RND),
- X86_INTRINSIC_DATA(fma_mask_vfmaddsub_ps_128, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
- X86_INTRINSIC_DATA(fma_mask_vfmaddsub_ps_256, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
- X86_INTRINSIC_DATA(fma_mask_vfmaddsub_ps_512, FMA_OP_MASK, X86ISD::FMADDSUB,
- X86ISD::FMADDSUB_RND),
- X86_INTRINSIC_DATA(fma_mask_vfmsub_pd_128, FMA_OP_MASK, X86ISD::FMSUB, 0),
- X86_INTRINSIC_DATA(fma_mask_vfmsub_pd_256, FMA_OP_MASK, X86ISD::FMSUB, 0),
- X86_INTRINSIC_DATA(fma_mask_vfmsub_pd_512, FMA_OP_MASK, X86ISD::FMSUB,
- X86ISD::FMSUB_RND),
- X86_INTRINSIC_DATA(fma_mask_vfmsub_ps_128, FMA_OP_MASK, X86ISD::FMSUB, 0),
- X86_INTRINSIC_DATA(fma_mask_vfmsub_ps_256, FMA_OP_MASK, X86ISD::FMSUB, 0),
- X86_INTRINSIC_DATA(fma_mask_vfmsub_ps_512, FMA_OP_MASK, X86ISD::FMSUB,
- X86ISD::FMSUB_RND),
- X86_INTRINSIC_DATA(fma_mask_vfmsubadd_pd_128, FMA_OP_MASK, X86ISD::FMSUBADD, 0),
- X86_INTRINSIC_DATA(fma_mask_vfmsubadd_pd_256, FMA_OP_MASK, X86ISD::FMSUBADD, 0),
- X86_INTRINSIC_DATA(fma_mask_vfmsubadd_pd_512, FMA_OP_MASK, X86ISD::FMSUBADD,
- X86ISD::FMSUBADD_RND),
- X86_INTRINSIC_DATA(fma_mask_vfmsubadd_ps_128, FMA_OP_MASK, X86ISD::FMSUBADD, 0),
- X86_INTRINSIC_DATA(fma_mask_vfmsubadd_ps_256, FMA_OP_MASK, X86ISD::FMSUBADD, 0),
- X86_INTRINSIC_DATA(fma_mask_vfmsubadd_ps_512, FMA_OP_MASK, X86ISD::FMSUBADD,
- X86ISD::FMSUBADD_RND),
- X86_INTRINSIC_DATA(fma_mask_vfnmadd_pd_128, FMA_OP_MASK, X86ISD::FNMADD, 0),
- X86_INTRINSIC_DATA(fma_mask_vfnmadd_pd_256, FMA_OP_MASK, X86ISD::FNMADD, 0),
- X86_INTRINSIC_DATA(fma_mask_vfnmadd_pd_512, FMA_OP_MASK, X86ISD::FNMADD,
- X86ISD::FNMADD_RND),
- X86_INTRINSIC_DATA(fma_mask_vfnmadd_ps_128, FMA_OP_MASK, X86ISD::FNMADD, 0),
- X86_INTRINSIC_DATA(fma_mask_vfnmadd_ps_256, FMA_OP_MASK, X86ISD::FNMADD, 0),
- X86_INTRINSIC_DATA(fma_mask_vfnmadd_ps_512, FMA_OP_MASK, X86ISD::FNMADD,
- X86ISD::FNMADD_RND),
- X86_INTRINSIC_DATA(fma_mask_vfnmsub_pd_128, FMA_OP_MASK, X86ISD::FNMSUB, 0),
- X86_INTRINSIC_DATA(fma_mask_vfnmsub_pd_256, FMA_OP_MASK, X86ISD::FNMSUB, 0),
- X86_INTRINSIC_DATA(fma_mask_vfnmsub_pd_512, FMA_OP_MASK, X86ISD::FNMSUB,
- X86ISD::FNMSUB_RND),
- X86_INTRINSIC_DATA(fma_mask_vfnmsub_ps_128, FMA_OP_MASK, X86ISD::FNMSUB, 0),
- X86_INTRINSIC_DATA(fma_mask_vfnmsub_ps_256, FMA_OP_MASK, X86ISD::FNMSUB, 0),
- X86_INTRINSIC_DATA(fma_mask_vfnmsub_ps_512, FMA_OP_MASK, X86ISD::FNMSUB,
- X86ISD::FNMSUB_RND),
X86_INTRINSIC_DATA(fma_vfmadd_pd, INTR_TYPE_3OP, X86ISD::FMADD, 0),
X86_INTRINSIC_DATA(fma_vfmadd_pd_256, INTR_TYPE_3OP, X86ISD::FMADD, 0),
X86_INTRINSIC_DATA(fma_vfmadd_ps, INTR_TYPE_3OP, X86ISD::FMADD, 0),
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f --show-mc-encoding | FileCheck %s
-declare <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
-declare <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
-declare <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
-
-define <8 x double> @test_x86_vfmsubpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
- ; CHECK-LABEL: test_x86_vfmsubpd_z
- ; CHECK: vfmsub213pd %zmm
- %res = call <8 x double> @llvm.x86.fma.mask.vfmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
- ret <8 x double> %res
-}
-declare <8 x double> @llvm.x86.fma.mask.vfmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
-
-define <8 x double> @test_mask_vfmsub_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
- ; CHECK-LABEL: test_mask_vfmsub_pd
- ; CHECK: vfmsub213pd %zmm
- %res = call <8 x double> @llvm.x86.fma.mask.vfmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
- ret <8 x double> %res
-}
+declare <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
+declare <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
define <16 x float> @test_x86_vfnmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
; CHECK-LABEL: test_x86_vfnmadd_ps_z
; CHECK: vfnmadd213ps %zmm
- %res = call <16 x float> @llvm.x86.fma.mask.vfnmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
+ %res = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
ret <16 x float> %res
}
-declare <16 x float> @llvm.x86.fma.mask.vfnmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
+declare <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
define <16 x float> @test_mask_vfnmadd_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
; CHECK-LABEL: test_mask_vfnmadd_ps
; CHECK: vfnmadd213ps %zmm
- %res = call <16 x float> @llvm.x86.fma.mask.vfnmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
+ %res = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
ret <16 x float> %res
}
define <8 x double> @test_x86_vfnmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
; CHECK-LABEL: test_x86_vfnmadd_pd_z
; CHECK: vfnmadd213pd %zmm
- %res = call <8 x double> @llvm.x86.fma.mask.vfnmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
+ %res = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
ret <8 x double> %res
}
-declare <8 x double> @llvm.x86.fma.mask.vfnmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
+declare <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
define <8 x double> @test_mask_vfnmadd_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_vfnmadd_pd
; CHECK: vfnmadd213pd %zmm
- %res = call <8 x double> @llvm.x86.fma.mask.vfnmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
+ %res = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
ret <8 x double> %res
}
define <16 x float> @test_x86_vfnmsubps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
; CHECK-LABEL: test_x86_vfnmsubps_z
; CHECK: vfnmsub213ps %zmm
- %res = call <16 x float> @llvm.x86.fma.mask.vfnmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
+ %res = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
ret <16 x float> %res
}
-declare <16 x float> @llvm.x86.fma.mask.vfnmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
+declare <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
define <16 x float> @test_mask_vfnmsub_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
; CHECK-LABEL: test_mask_vfnmsub_ps
; CHECK: vfnmsub213ps %zmm
- %res = call <16 x float> @llvm.x86.fma.mask.vfnmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
+ %res = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
ret <16 x float> %res
}
define <8 x double> @test_x86_vfnmsubpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
; CHECK-LABEL: test_x86_vfnmsubpd_z
; CHECK: vfnmsub213pd %zmm
- %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
+ %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
ret <8 x double> %res
}
-declare <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
+declare <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
define <8 x double> @test_mask_vfnmsub_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_vfnmsub_pd
; CHECK: vfnmsub213pd %zmm
- %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
+ %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
ret <8 x double> %res
}
define <16 x float> @test_x86_vfmaddsubps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
; CHECK-LABEL: test_x86_vfmaddsubps_z
; CHECK: vfmaddsub213ps %zmm
- %res = call <16 x float> @llvm.x86.fma.mask.vfmaddsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
+ %res = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
ret <16 x float> %res
}
define <16 x float> @test_mask_fmaddsub_ps(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) {
; CHECK-LABEL: test_mask_fmaddsub_ps:
; CHECK: vfmaddsub213ps %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0xa6,0xc2]
- %res = call <16 x float> @llvm.x86.fma.mask.vfmaddsub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask, i32 4)
+ %res = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask, i32 4)
ret <16 x float> %res
}
-declare <16 x float> @llvm.x86.fma.mask.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
+declare <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
define <8 x double> @test_x86_vfmaddsubpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
; CHECK-LABEL: test_x86_vfmaddsubpd_z
; CHECK: vfmaddsub213pd %zmm
- %res = call <8 x double> @llvm.x86.fma.mask.vfmaddsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
+ %res = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
ret <8 x double> %res
}
-declare <8 x double> @llvm.x86.fma.mask.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
+declare <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
define <8 x double> @test_mask_vfmaddsub_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_vfmaddsub_pd
; CHECK: vfmaddsub213pd %zmm
- %res = call <8 x double> @llvm.x86.fma.mask.vfmaddsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
- ret <8 x double> %res
-}
-
-define <16 x float> @test_x86_vfmsubaddps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
- ; CHECK-LABEL: test_x86_vfmsubaddps_z
- ; CHECK: vfmsubadd213ps %zmm
- %res = call <16 x float> @llvm.x86.fma.mask.vfmsubadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
- ret <16 x float> %res
-}
-declare <16 x float> @llvm.x86.fma.mask.vfmsubadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
-
-define <16 x float> @test_mask_vfmsubadd_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
- ; CHECK-LABEL: test_mask_vfmsubadd_ps
- ; CHECK: vfmsubadd213ps %zmm
- %res = call <16 x float> @llvm.x86.fma.mask.vfmsubadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
- ret <16 x float> %res
-}
-
-define <8 x double> @test_x86_vfmsubaddpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
- ; CHECK-LABEL: test_x86_vfmsubaddpd_z
- ; CHECK: vfmsubadd213pd %zmm
- %res = call <8 x double> @llvm.x86.fma.mask.vfmsubadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
+ %res = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
ret <8 x double> %res
}
-declare <8 x double> @llvm.x86.fma.mask.vfmsubadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
-define <8 x double> @test_mask_vfmsubadd_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
- ; CHECK-LABEL: test_mask_vfmsubadd_pd
- ; CHECK: vfmsubadd213pd %zmm
- %res = call <8 x double> @llvm.x86.fma.mask.vfmsubadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
- ret <8 x double> %res
+define <8 x double>@test_int_x86_avx512_mask_vfmaddsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
+; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_pd_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vfmaddsub213pd %zmm2, %zmm1, %zmm3 {%k1}
+; CHECK-NEXT: vfmaddsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0
+; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
+ %res1 = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
+ %res2 = fadd <8 x double> %res, %res1
+ ret <8 x double> %res2
+}
+
+declare <8 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
+
+define <8 x double>@test_int_x86_avx512_mask3_vfmaddsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
+; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps %zmm2, %zmm3
+; CHECK-NEXT: vfmaddsub231pd %zmm1, %zmm0, %zmm3 {%k1}
+; CHECK-NEXT: vfmaddsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0
+; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
+ %res1 = call <8 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
+ %res2 = fadd <8 x double> %res, %res1
+ ret <8 x double> %res2
+}
+
+declare <8 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
+
+define <8 x double>@test_int_x86_avx512_maskz_vfmaddsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
+; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vfmaddsub213pd %zmm2, %zmm1, %zmm3 {%k1} {z}
+; CHECK-NEXT: vfmaddsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0
+; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
+ %res1 = call <8 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
+ %res2 = fadd <8 x double> %res, %res1
+ ret <8 x double> %res2
+}
+
+define <16 x float>@test_int_x86_avx512_mask_vfmaddsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
+; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_ps_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vfmaddsub213ps %zmm2, %zmm1, %zmm3 {%k1}
+; CHECK-NEXT: vfmaddsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0
+; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
+ %res1 = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
+ %res2 = fadd <16 x float> %res, %res1
+ ret <16 x float> %res2
+}
+
+declare <16 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
+
+define <16 x float>@test_int_x86_avx512_mask3_vfmaddsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
+; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vmovaps %zmm2, %zmm3
+; CHECK-NEXT: vfmaddsub231ps %zmm1, %zmm0, %zmm3 {%k1}
+; CHECK-NEXT: vfmaddsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0
+; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
+ %res1 = call <16 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
+ %res2 = fadd <16 x float> %res, %res1
+ ret <16 x float> %res2
+}
+
+declare <16 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
+
+define <16 x float>@test_int_x86_avx512_maskz_vfmaddsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
+; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vfmaddsub213ps %zmm2, %zmm1, %zmm3 {%k1} {z}
+; CHECK-NEXT: vfmaddsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0
+; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
+ %res1 = call <16 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
+ %res2 = fadd <16 x float> %res, %res1
+ ret <16 x float> %res2
+}
+
+declare <8 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
+
+define <8 x double>@test_int_x86_avx512_mask3_vfmsubadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
+; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps %zmm2, %zmm3
+; CHECK-NEXT: vfmsubadd231pd %zmm1, %zmm0, %zmm3 {%k1}
+; CHECK-NEXT: vfmsubadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0
+; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
+ %res1 = call <8 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
+ %res2 = fadd <8 x double> %res, %res1
+ ret <8 x double> %res2
+}
+
+declare <16 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
+
+define <16 x float>@test_int_x86_avx512_mask3_vfmsubadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
+; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vmovaps %zmm2, %zmm3
+; CHECK-NEXT: vfmsubadd231ps %zmm1, %zmm0, %zmm3 {%k1}
+; CHECK-NEXT: vfmsubadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0
+; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
+ %res1 = call <16 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
+ %res2 = fadd <16 x float> %res, %res1
+ ret <16 x float> %res2
}
define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rne(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rne
; CHECK: vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x19,0xa8,0xc2]
- %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 0) nounwind
+ %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 0) nounwind
ret <16 x float> %res
}
define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtn(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtn
; CHECK: vfmadd213ps {rd-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x39,0xa8,0xc2]
- %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 1) nounwind
+ %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 1) nounwind
ret <16 x float> %res
}
define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtp(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtp
; CHECK: vfmadd213ps {ru-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x59,0xa8,0xc2]
- %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 2) nounwind
+ %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 2) nounwind
ret <16 x float> %res
}
define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtz(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtz
; CHECK: vfmadd213ps {rz-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x79,0xa8,0xc2]
- %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 3) nounwind
+ %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 3) nounwind
ret <16 x float> %res
}
define <16 x float> @test_mask_round_vfmadd512_ps_rrb_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_current
; CHECK: vfmadd213ps %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0xa8,0xc2]
- %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
+ %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
ret <16 x float> %res
}
define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rne(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rne
; CHECK: vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x18,0xa8,0xc2]
- %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 0) nounwind
+ %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 0) nounwind
ret <16 x float> %res
}
define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtn(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtn
; CHECK: vfmadd213ps {rd-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x38,0xa8,0xc2]
- %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 1) nounwind
+ %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 1) nounwind
ret <16 x float> %res
}
define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtp(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtp
; CHECK: vfmadd213ps {ru-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x58,0xa8,0xc2]
- %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 2) nounwind
+ %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 2) nounwind
ret <16 x float> %res
}
define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtz(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtz
; CHECK: vfmadd213ps {rz-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x78,0xa8,0xc2]
- %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 3) nounwind
+ %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 3) nounwind
ret <16 x float> %res
}
define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_current
; CHECK: vfmadd213ps %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x48,0xa8,0xc2]
- %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
- ret <16 x float> %res
-}
-
-define <16 x float> @test_mask_round_vfmsub512_ps_rrb_rne(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
- ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrb_rne
- ; CHECK: vfmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x19,0xaa,0xc2]
- %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 0) nounwind
+ %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
ret <16 x float> %res
}
-define <16 x float> @test_mask_round_vfmsub512_ps_rrb_rtn(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
- ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrb_rtn
- ; CHECK: vfmsub213ps {rd-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x39,0xaa,0xc2]
- %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 1) nounwind
- ret <16 x float> %res
-}
-
-define <16 x float> @test_mask_round_vfmsub512_ps_rrb_rtp(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
- ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrb_rtp
- ; CHECK: vfmsub213ps {ru-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x59,0xaa,0xc2]
- %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 2) nounwind
- ret <16 x float> %res
-}
-
-define <16 x float> @test_mask_round_vfmsub512_ps_rrb_rtz(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
- ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrb_rtz
- ; CHECK: vfmsub213ps {rz-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x79,0xaa,0xc2]
- %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 3) nounwind
- ret <16 x float> %res
-}
-
-define <16 x float> @test_mask_round_vfmsub512_ps_rrb_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
- ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrb_current
- ; CHECK: vfmsub213ps %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0xaa,0xc2]
- %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
- ret <16 x float> %res
-}
-
-define <16 x float> @test_mask_round_vfmsub512_ps_rrbz_rne(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
- ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrbz_rne
- ; CHECK: vfmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x18,0xaa,0xc2]
- %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 0) nounwind
- ret <16 x float> %res
-}
-
-define <16 x float> @test_mask_round_vfmsub512_ps_rrbz_rtn(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
- ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrbz_rtn
- ; CHECK: vfmsub213ps {rd-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x38,0xaa,0xc2]
- %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 1) nounwind
- ret <16 x float> %res
-}
-
-define <16 x float> @test_mask_round_vfmsub512_ps_rrbz_rtp(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
- ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrbz_rtp
- ; CHECK: vfmsub213ps {ru-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x58,0xaa,0xc2]
- %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 2) nounwind
- ret <16 x float> %res
-}
-
-define <16 x float> @test_mask_round_vfmsub512_ps_rrbz_rtz(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
- ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrbz_rtz
- ; CHECK: vfmsub213ps {rz-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x78,0xaa,0xc2]
- %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 3) nounwind
- ret <16 x float> %res
-}
-
-define <16 x float> @test_mask_round_vfmsub512_ps_rrbz_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
- ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrbz_current
- ; CHECK: vfmsub213ps %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x48,0xaa,0xc2]
- %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
- ret <16 x float> %res
+declare <8 x double> @llvm.x86.avx512.mask3.vfmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
+
+define <8 x double>@test_int_x86_avx512_mask3_vfmsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
+; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps %zmm2, %zmm3
+; CHECK-NEXT: vfmsub231pd %zmm1, %zmm0, %zmm3 {%k1}
+; CHECK-NEXT: vfmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0
+; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x double> @llvm.x86.avx512.mask3.vfmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
+ %res1 = call <8 x double> @llvm.x86.avx512.mask3.vfmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
+ %res2 = fadd <8 x double> %res, %res1
+ ret <8 x double> %res2
+}
+
+declare <16 x float> @llvm.x86.avx512.mask3.vfmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
+
+define <16 x float>@test_int_x86_avx512_mask3_vfmsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
+; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vmovaps %zmm2, %zmm3
+; CHECK-NEXT: vfmsub231ps %zmm1, %zmm0, %zmm3 {%k1}
+; CHECK-NEXT: vfmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0
+; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x float> @llvm.x86.avx512.mask3.vfmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
+ %res1 = call <16 x float> @llvm.x86.avx512.mask3.vfmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
+ %res2 = fadd <16 x float> %res, %res1
+ ret <16 x float> %res2
}
define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rne
; CHECK: vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x19,0xa8,0xc2]
- %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 0) nounwind
+ %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 0) nounwind
ret <8 x double> %res
}
define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtn
; CHECK: vfmadd213pd {rd-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x39,0xa8,0xc2]
- %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 1) nounwind
+ %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 1) nounwind
ret <8 x double> %res
}
define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtp
; CHECK: vfmadd213pd {ru-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x59,0xa8,0xc2]
- %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 2) nounwind
+ %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 2) nounwind
ret <8 x double> %res
}
define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtz
; CHECK: vfmadd213pd {rz-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x79,0xa8,0xc2]
- %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 3) nounwind
+ %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 3) nounwind
ret <8 x double> %res
}
define <8 x double> @test_mask_round_vfmadd512_pd_rrb_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_current
; CHECK: vfmadd213pd %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0xa8,0xc2]
- %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
+ %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
ret <8 x double> %res
}
define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rne
; CHECK: vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x18,0xa8,0xc2]
- %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 0) nounwind
+ %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 0) nounwind
ret <8 x double> %res
}
define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtn
; CHECK: vfmadd213pd {rd-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x38,0xa8,0xc2]
- %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 1) nounwind
+ %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 1) nounwind
ret <8 x double> %res
}
define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtp
; CHECK: vfmadd213pd {ru-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x58,0xa8,0xc2]
- %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 2) nounwind
+ %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 2) nounwind
ret <8 x double> %res
}
define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtz
; CHECK: vfmadd213pd {rz-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x78,0xa8,0xc2]
- %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 3) nounwind
+ %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 3) nounwind
ret <8 x double> %res
}
define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_current
; CHECK: vfmadd213pd %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x48,0xa8,0xc2]
- %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
+ %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
ret <8 x double> %res
}
+define <8 x double>@test_int_x86_avx512_mask_vfmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
+; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_pd_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vfmadd213pd %zmm2, %zmm1, %zmm3 {%k1}
+; CHECK-NEXT: vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0
+; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
+ %res1 = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
+ %res2 = fadd <8 x double> %res, %res1
+ ret <8 x double> %res2
+}
+
+declare <8 x double> @llvm.x86.avx512.mask3.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
+
+define <8 x double>@test_int_x86_avx512_mask3_vfmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
+; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps %zmm2, %zmm3
+; CHECK-NEXT: vfmadd231pd %zmm1, %zmm0, %zmm3 {%k1}
+; CHECK-NEXT: vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0
+; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x double> @llvm.x86.avx512.mask3.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
+ %res1 = call <8 x double> @llvm.x86.avx512.mask3.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
+ %res2 = fadd <8 x double> %res, %res1
+ ret <8 x double> %res2
+}
+
+declare <8 x double> @llvm.x86.avx512.maskz.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
+
+define <8 x double>@test_int_x86_avx512_maskz_vfmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
+; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vfmadd213pd %zmm2, %zmm1, %zmm3 {%k1} {z}
+; CHECK-NEXT: vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0
+; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x double> @llvm.x86.avx512.maskz.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
+ %res1 = call <8 x double> @llvm.x86.avx512.maskz.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
+ %res2 = fadd <8 x double> %res, %res1
+ ret <8 x double> %res2
+}
+
+define <16 x float>@test_int_x86_avx512_mask_vfmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
+; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_ps_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vfmadd213ps %zmm2, %zmm1, %zmm3 {%k1}
+; CHECK-NEXT: vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0
+; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
+ %res1 = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
+ %res2 = fadd <16 x float> %res, %res1
+ ret <16 x float> %res2
+}
+
+declare <16 x float> @llvm.x86.avx512.mask3.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
+
+define <16 x float>@test_int_x86_avx512_mask3_vfmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
+; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vmovaps %zmm2, %zmm3
+; CHECK-NEXT: vfmadd231ps %zmm1, %zmm0, %zmm3 {%k1}
+; CHECK-NEXT: vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0
+; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x float> @llvm.x86.avx512.mask3.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
+ %res1 = call <16 x float> @llvm.x86.avx512.mask3.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
+ %res2 = fadd <16 x float> %res, %res1
+ ret <16 x float> %res2
+}
+
+declare <16 x float> @llvm.x86.avx512.maskz.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
+
+define <16 x float>@test_int_x86_avx512_maskz_vfmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
+; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vfmadd213ps %zmm2, %zmm1, %zmm3 {%k1} {z}
+; CHECK-NEXT: vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0
+; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x float> @llvm.x86.avx512.maskz.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
+ %res1 = call <16 x float> @llvm.x86.avx512.maskz.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
+ %res2 = fadd <16 x float> %res, %res1
+ ret <16 x float> %res2
+}
+
define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rne
; CHECK: vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x19,0xae,0xc2]
- %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 0) nounwind
+ %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 0) nounwind
ret <8 x double> %res
}
define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtn
; CHECK: vfnmsub213pd {rd-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x39,0xae,0xc2]
- %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 1) nounwind
+ %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 1) nounwind
ret <8 x double> %res
}
define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtp
; CHECK: vfnmsub213pd {ru-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x59,0xae,0xc2]
- %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 2) nounwind
+ %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 2) nounwind
ret <8 x double> %res
}
define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtz
; CHECK: vfnmsub213pd {rz-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x79,0xae,0xc2]
- %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 3) nounwind
+ %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 3) nounwind
ret <8 x double> %res
}
define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_current
; CHECK: vfnmsub213pd %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0xae,0xc2]
- %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
+ %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
ret <8 x double> %res
}
define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rne
; CHECK: vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x18,0xae,0xc2]
- %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 0) nounwind
+ %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 0) nounwind
ret <8 x double> %res
}
define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtn
; CHECK: vfnmsub213pd {rd-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x38,0xae,0xc2]
- %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 1) nounwind
+ %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 1) nounwind
ret <8 x double> %res
}
define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtp
; CHECK: vfnmsub213pd {ru-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x58,0xae,0xc2]
- %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 2) nounwind
+ %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 2) nounwind
ret <8 x double> %res
}
define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtz
; CHECK: vfnmsub213pd {rz-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x78,0xae,0xc2]
- %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 3) nounwind
+ %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 3) nounwind
ret <8 x double> %res
}
define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_current
; CHECK: vfnmsub213pd %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x48,0xae,0xc2]
- %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
+ %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
ret <8 x double> %res
}
+
+define <8 x double>@test_int_x86_avx512_mask_vfnmsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
+; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_pd_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vfnmsub213pd %zmm2, %zmm1, %zmm3 {%k1}
+; CHECK-NEXT: vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0
+; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
+ %res1 = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
+ %res2 = fadd <8 x double> %res, %res1
+ ret <8 x double> %res2
+}
+
+declare <8 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
+
+define <8 x double>@test_int_x86_avx512_mask3_vfnmsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
+; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps %zmm2, %zmm3
+; CHECK-NEXT: vfnmsub231pd %zmm1, %zmm0, %zmm3 {%k1}
+; CHECK-NEXT: vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0
+; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
+ %res1 = call <8 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
+ %res2 = fadd <8 x double> %res, %res1
+ ret <8 x double> %res2
+}
+
+define <16 x float>@test_int_x86_avx512_mask_vfnmsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
+; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_ps_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vfnmsub213ps %zmm2, %zmm1, %zmm3 {%k1}
+; CHECK-NEXT: vfnmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0
+; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
+ %res1 = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
+ %res2 = fadd <16 x float> %res, %res1
+ ret <16 x float> %res2
+}
+
+declare <16 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
+
+define <16 x float>@test_int_x86_avx512_mask3_vfnmsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
+; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vmovaps %zmm2, %zmm3
+; CHECK-NEXT: vfnmsub231ps %zmm1, %zmm0, %zmm3 {%k1}
+; CHECK-NEXT: vfnmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0
+; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
+ %res1 = call <16 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
+ %res2 = fadd <16 x float> %res, %res1
+ ret <16 x float> %res2
+}
+
+define <8 x double>@test_int_x86_avx512_mask_vfnmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
+; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_pd_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vfnmadd213pd %zmm2, %zmm1, %zmm3 {%k1}
+; CHECK-NEXT: vfnmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0
+; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
+ %res1 = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
+ %res2 = fadd <8 x double> %res, %res1
+ ret <8 x double> %res2
+}
+
+define <16 x float>@test_int_x86_avx512_mask_vfnmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
+; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_ps_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vfnmadd213ps %zmm2, %zmm1, %zmm3 {%k1}
+; CHECK-NEXT: vfnmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0
+; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
+ %res1 = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
+ %res2 = fadd <16 x float> %res, %res1
+ ret <16 x float> %res2
+}
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f -fp-contract=fast | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx -fp-contract=fast | FileCheck %s --check-prefix=SKX
; CHECK-LABEL: test_x86_fmadd_ps_z
; CHECK: vfmadd213ps %zmm2, %zmm1, %zmm0
ret double %res
}
-;CHECK-LABEL: test132_br
-;CHECK: vfmadd132ps LCP{{.*}}(%rip){1to16}
-;CHECK: ret
-define <16 x float> @test132_br(<16 x float> %a1, <16 x float> %a2) nounwind {
+define <16 x float> @test231_br(<16 x float> %a1, <16 x float> %a2) nounwind {
+; CHECK-LABEL: test231_br:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vfmadd231ps {{.*}}(%rip){1to16}, %zmm0, %zmm1
+; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: retq
%b1 = fmul <16 x float> %a1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
%b2 = fadd <16 x float> %b1, %a2
ret <16 x float> %b2
}
-;CHECK-LABEL: test213_br
-;CHECK: vfmadd213ps LCP{{.*}}(%rip){1to16}
-;CHECK: ret
define <16 x float> @test213_br(<16 x float> %a1, <16 x float> %a2) nounwind {
+; CHECK-LABEL: test213_br:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vfmadd213ps {{.*}}(%rip){1to16}, %zmm1, %zmm0
+; CHECK-NEXT: retq
%b1 = fmul <16 x float> %a1, %a2
%b2 = fadd <16 x float> %b1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
ret <16 x float> %b2
}
+
+;mask (a*c+b , a)
+define <16 x float> @test_x86_fmadd132_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) {
+; CHECK-LABEL: test_x86_fmadd132_ps:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpmovsxbd %xmm2, %zmm2
+; CHECK-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
+; CHECK-NEXT: vptestmd %zmm2, %zmm2, %k1
+; CHECK-NEXT: vfmadd132ps (%rdi), %zmm1, %zmm0 {%k1}
+; CHECK-NEXT: retq
+;
+; SKX-LABEL: test_x86_fmadd132_ps:
+; SKX: ## BB#0:
+; SKX-NEXT: vpmovb2m %xmm2, %k1
+; SKX-NEXT: vfmadd132ps (%rdi), %zmm1, %zmm0 {%k1}
+; SKX-NEXT: retq
+ %a2 = load <16 x float>,<16 x float> *%a2_ptrt,align 1
+ %x = fmul <16 x float> %a0, %a2
+ %y = fadd <16 x float> %x, %a1
+ %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a0
+ ret <16 x float> %res
+}
+
+;mask (a*c+b , b)
+define <16 x float> @test_x86_fmadd231_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) {
+; CHECK-LABEL: test_x86_fmadd231_ps:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpmovsxbd %xmm2, %zmm2
+; CHECK-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
+; CHECK-NEXT: vptestmd %zmm2, %zmm2, %k1
+; CHECK-NEXT: vfmadd231ps (%rdi), %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: retq
+;
+; SKX-LABEL: test_x86_fmadd231_ps:
+; SKX: ## BB#0:
+; SKX-NEXT: vpmovb2m %xmm2, %k1
+; SKX-NEXT: vfmadd231ps (%rdi), %zmm0, %zmm1 {%k1}
+; SKX-NEXT: vmovaps %zmm1, %zmm0
+; SKX-NEXT: retq
+ %a2 = load <16 x float>,<16 x float> *%a2_ptrt,align 1
+ %x = fmul <16 x float> %a0, %a2
+ %y = fadd <16 x float> %x, %a1
+ %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a1
+ ret <16 x float> %res
+}
+
+;mask (b*a+c , b)
+define <16 x float> @test_x86_fmadd213_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) {
+; CHECK-LABEL: test_x86_fmadd213_ps:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpmovsxbd %xmm2, %zmm2
+; CHECK-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
+; CHECK-NEXT: vptestmd %zmm2, %zmm2, %k1
+; CHECK-NEXT: vfmadd213ps (%rdi), %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: retq
+;
+; SKX-LABEL: test_x86_fmadd213_ps:
+; SKX: ## BB#0:
+; SKX-NEXT: vpmovb2m %xmm2, %k1
+; SKX-NEXT: vfmadd213ps (%rdi), %zmm0, %zmm1 {%k1}
+; SKX-NEXT: vmovaps %zmm1, %zmm0
+; SKX-NEXT: retq
+ %a2 = load <16 x float>,<16 x float> *%a2_ptrt,align 1
+ %x = fmul <16 x float> %a1, %a0
+ %y = fadd <16 x float> %x, %a2
+ %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a1
+ ret <16 x float> %res
+}
+
declare i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16>, <8 x i16>, i32, i8) nounwind readnone
-declare <8 x float> @llvm.x86.fma.mask.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
+declare <8 x float> @llvm.x86.avx512.mask.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
define <8 x float> @test_mask_vfmadd256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_vfmadd256_ps
; CHECK: vfmadd213ps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0xa8,0xc2]
- %res = call <8 x float> @llvm.x86.fma.mask.vfmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind
+ %res = call <8 x float> @llvm.x86.avx512.mask.vfmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind
ret <8 x float> %res
}
-declare <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
+declare <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
define <4 x float> @test_mask_vfmadd128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_vfmadd128_ps
; CHECK: vfmadd213ps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa8,0xc2]
- %res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
+ %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
ret <4 x float> %res
}
-declare <4 x double> @llvm.x86.fma.mask.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
+declare <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
define <4 x double> @test_mask_fmadd256_pd(<4 x double> %a, <4 x double> %b, <4 x double> %c, i8 %mask) {
; CHECK-LABEL: test_mask_fmadd256_pd:
; CHECK: vfmadd213pd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xa8,0xc2]
- %res = call <4 x double> @llvm.x86.fma.mask.vfmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c, i8 %mask)
+ %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c, i8 %mask)
ret <4 x double> %res
}
-declare <2 x double> @llvm.x86.fma.mask.vfmadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
+declare <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
define <2 x double> @test_mask_fmadd128_pd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
; CHECK-LABEL: test_mask_fmadd128_pd:
; CHECK: vfmadd213pd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa8,0xc2]
- %res = call <2 x double> @llvm.x86.fma.mask.vfmadd.pd.128(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask)
+ %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask)
ret <2 x double> %res
}
-declare <8 x float> @llvm.x86.fma.mask.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
-
-define <8 x float> @test_mask_vfmsub256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) {
- ; CHECK-LABEL: test_mask_vfmsub256_ps
- ; CHECK: vfmsub213ps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0xaa,0xc2]
- %res = call <8 x float> @llvm.x86.fma.mask.vfmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind
- ret <8 x float> %res
-}
-
-declare <4 x float> @llvm.x86.fma.mask.vfmsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
-
-define <4 x float> @test_mask_vfmsub128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
- ; CHECK-LABEL: test_mask_vfmsub128_ps
- ; CHECK: vfmsub213ps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xaa,0xc2]
- %res = call <4 x float> @llvm.x86.fma.mask.vfmsub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
- ret <4 x float> %res
-}
-
-declare <4 x double> @llvm.x86.fma.mask.vfmsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone
-
-define <4 x double> @test_mask_vfmsub256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) {
- ; CHECK-LABEL: test_mask_vfmsub256_pd
- ; CHECK: vfmsub213pd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xaa,0xc2]
- %res = call <4 x double> @llvm.x86.fma.mask.vfmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
- ret <4 x double> %res
-}
-
-declare <2 x double> @llvm.x86.fma.mask.vfmsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
-
-define <2 x double> @test_mask_vfmsub128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
- ; CHECK-LABEL: test_mask_vfmsub128_pd
- ; CHECK: vfmsub213pd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xaa,0xc2]
- %res = call <2 x double> @llvm.x86.fma.mask.vfmsub.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
- ret <2 x double> %res
-}
-
-declare <8 x float> @llvm.x86.fma.mask.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
+define <2 x double>@test_int_x86_avx512_mask_vfmadd_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_pd_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm3 {%k1}
+; CHECK-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0
+; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
+ %res1 = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
+ %res2 = fadd <2 x double> %res, %res1
+ ret <2 x double> %res2
+}
+
+declare <2 x double> @llvm.x86.avx512.mask3.vfmadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
+
+define <2 x double>@test_int_x86_avx512_mask3_vfmadd_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps %zmm2, %zmm3
+; CHECK-NEXT: vfmadd231pd %xmm1, %xmm0, %xmm3 {%k1}
+; CHECK-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0
+; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
+ %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
+ %res2 = fadd <2 x double> %res, %res1
+ ret <2 x double> %res2
+}
+
+declare <2 x double> @llvm.x86.avx512.maskz.vfmadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
+
+define <2 x double>@test_int_x86_avx512_maskz_vfmadd_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm3 {%k1} {z}
+; CHECK-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0
+; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.avx512.maskz.vfmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
+ %res1 = call <2 x double> @llvm.x86.avx512.maskz.vfmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
+ %res2 = fadd <2 x double> %res, %res1
+ ret <2 x double> %res2
+}
+
+define <4 x double>@test_int_x86_avx512_mask_vfmadd_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_pd_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm3 {%k1}
+; CHECK-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0
+; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
+ %res1 = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
+ %res2 = fadd <4 x double> %res, %res1
+ ret <4 x double> %res2
+}
+
+declare <4 x double> @llvm.x86.avx512.mask3.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
+
+define <4 x double>@test_int_x86_avx512_mask3_vfmadd_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps %zmm2, %zmm3
+; CHECK-NEXT: vfmadd231pd %ymm1, %ymm0, %ymm3 {%k1}
+; CHECK-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0
+; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x double> @llvm.x86.avx512.mask3.vfmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
+ %res1 = call <4 x double> @llvm.x86.avx512.mask3.vfmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
+ %res2 = fadd <4 x double> %res, %res1
+ ret <4 x double> %res2
+}
+
+declare <4 x double> @llvm.x86.avx512.maskz.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
+
+define <4 x double>@test_int_x86_avx512_maskz_vfmadd_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm3 {%k1} {z}
+; CHECK-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0
+; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x double> @llvm.x86.avx512.maskz.vfmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
+ %res1 = call <4 x double> @llvm.x86.avx512.maskz.vfmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
+ %res2 = fadd <4 x double> %res, %res1
+ ret <4 x double> %res2
+}
+
+define <4 x float>@test_int_x86_avx512_mask_vfmadd_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_ps_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm3 {%k1}
+; CHECK-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0
+; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
+ %res1 = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
+ %res2 = fadd <4 x float> %res, %res1
+ ret <4 x float> %res2
+}
+
+declare <4 x float> @llvm.x86.avx512.mask3.vfmadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
+
+define <4 x float>@test_int_x86_avx512_mask3_vfmadd_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps %zmm2, %zmm3
+; CHECK-NEXT: vfmadd231ps %xmm1, %xmm0, %xmm3 {%k1}
+; CHECK-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0
+; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
+ %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
+ %res2 = fadd <4 x float> %res, %res1
+ ret <4 x float> %res2
+}
+
+declare <4 x float> @llvm.x86.avx512.maskz.vfmadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
+
+define <4 x float>@test_int_x86_avx512_maskz_vfmadd_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm3 {%k1} {z}
+; CHECK-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0
+; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
+ %res1 = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
+ %res2 = fadd <4 x float> %res, %res1
+ ret <4 x float> %res2
+}
+
+define <8 x float>@test_int_x86_avx512_mask_vfmadd_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_ps_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm3 {%k1}
+; CHECK-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0
+; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x float> @llvm.x86.avx512.mask.vfmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
+ %res1 = call <8 x float> @llvm.x86.avx512.mask.vfmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
+ %res2 = fadd <8 x float> %res, %res1
+ ret <8 x float> %res2
+}
+
+declare <8 x float> @llvm.x86.avx512.mask3.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
+
+define <8 x float>@test_int_x86_avx512_mask3_vfmadd_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps %zmm2, %zmm3
+; CHECK-NEXT: vfmadd231ps %ymm1, %ymm0, %ymm3 {%k1}
+; CHECK-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0
+; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x float> @llvm.x86.avx512.mask3.vfmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
+ %res1 = call <8 x float> @llvm.x86.avx512.mask3.vfmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
+ %res2 = fadd <8 x float> %res, %res1
+ ret <8 x float> %res2
+}
+
+declare <8 x float> @llvm.x86.avx512.maskz.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
+
+define <8 x float>@test_int_x86_avx512_maskz_vfmadd_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm3 {%k1} {z}
+; CHECK-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0
+; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x float> @llvm.x86.avx512.maskz.vfmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
+ %res1 = call <8 x float> @llvm.x86.avx512.maskz.vfmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
+ %res2 = fadd <8 x float> %res, %res1
+ ret <8 x float> %res2
+}
+
+
+declare <2 x double> @llvm.x86.avx512.mask3.vfmsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
+
+define <2 x double>@test_int_x86_avx512_mask3_vfmsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps %zmm2, %zmm3
+; CHECK-NEXT: vfmsub231pd %xmm1, %xmm0, %xmm3 {%k1}
+; CHECK-NEXT: vfmsub213pd %xmm2, %xmm1, %xmm0
+; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
+ %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
+ %res2 = fadd <2 x double> %res, %res1
+ ret <2 x double> %res2
+}
+
+
+declare <4 x double> @llvm.x86.avx512.mask3.vfmsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
+
+define <4 x double>@test_int_x86_avx512_mask3_vfmsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps %zmm2, %zmm3
+; CHECK-NEXT: vfmsub231pd %ymm1, %ymm0, %ymm3 {%k1}
+; CHECK-NEXT: vfmsub213pd %ymm2, %ymm1, %ymm0
+; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x double> @llvm.x86.avx512.mask3.vfmsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
+ %res1 = call <4 x double> @llvm.x86.avx512.mask3.vfmsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
+ %res2 = fadd <4 x double> %res, %res1
+ ret <4 x double> %res2
+}
+
+declare <4 x float> @llvm.x86.avx512.mask3.vfmsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
+
+define <4 x float>@test_int_x86_avx512_mask3_vfmsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps %zmm2, %zmm3
+; CHECK-NEXT: vfmsub231ps %xmm1, %xmm0, %xmm3 {%k1}
+; CHECK-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0
+; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
+ %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
+ %res2 = fadd <4 x float> %res, %res1
+ ret <4 x float> %res2
+}
+
+declare <8 x float> @llvm.x86.avx512.mask3.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
+
+define <8 x float>@test_int_x86_avx512_mask3_vfmsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps %zmm2, %zmm3
+; CHECK-NEXT: vfmsub231ps %ymm1, %ymm0, %ymm3 {%k1}
+; CHECK-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0
+; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x float> @llvm.x86.avx512.mask3.vfmsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
+ %res1 = call <8 x float> @llvm.x86.avx512.mask3.vfmsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
+ %res2 = fadd <8 x float> %res, %res1
+ ret <8 x float> %res2
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
define <8 x float> @test_mask_vfnmadd256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_vfnmadd256_ps
; CHECK: vfnmadd213ps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0xac,0xc2]
- %res = call <8 x float> @llvm.x86.fma.mask.vfnmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind
+ %res = call <8 x float> @llvm.x86.avx512.mask.vfnmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind
ret <8 x float> %res
}
-declare <4 x float> @llvm.x86.fma.mask.vfnmadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
+declare <4 x float> @llvm.x86.avx512.mask.vfnmadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
define <4 x float> @test_mask_vfnmadd128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_vfnmadd128_ps
; CHECK: vfnmadd213ps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xac,0xc2]
- %res = call <4 x float> @llvm.x86.fma.mask.vfnmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
+ %res = call <4 x float> @llvm.x86.avx512.mask.vfnmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
ret <4 x float> %res
}
-declare <4 x double> @llvm.x86.fma.mask.vfnmadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone
+declare <4 x double> @llvm.x86.avx512.mask.vfnmadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone
define <4 x double> @test_mask_vfnmadd256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_vfnmadd256_pd
; CHECK: vfnmadd213pd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xac,0xc2]
- %res = call <4 x double> @llvm.x86.fma.mask.vfnmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
+ %res = call <4 x double> @llvm.x86.avx512.mask.vfnmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
ret <4 x double> %res
}
-declare <2 x double> @llvm.x86.fma.mask.vfnmadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
+declare <2 x double> @llvm.x86.avx512.mask.vfnmadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
define <2 x double> @test_mask_vfnmadd128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_vfnmadd128_pd
; CHECK: vfnmadd213pd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xac,0xc2]
- %res = call <2 x double> @llvm.x86.fma.mask.vfnmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
+ %res = call <2 x double> @llvm.x86.avx512.mask.vfnmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
ret <2 x double> %res
}
-declare <8 x float> @llvm.x86.fma.mask.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
+declare <8 x float> @llvm.x86.avx512.mask.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
define <8 x float> @test_mask_vfnmsub256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_vfnmsub256_ps
; CHECK: vfnmsub213ps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0xae,0xc2]
- %res = call <8 x float> @llvm.x86.fma.mask.vfnmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind
+ %res = call <8 x float> @llvm.x86.avx512.mask.vfnmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind
ret <8 x float> %res
}
-declare <4 x float> @llvm.x86.fma.mask.vfnmsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
+declare <4 x float> @llvm.x86.avx512.mask.vfnmsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
define <4 x float> @test_mask_vfnmsub128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_vfnmsub128_ps
; CHECK: vfnmsub213ps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xae,0xc2]
- %res = call <4 x float> @llvm.x86.fma.mask.vfnmsub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
+ %res = call <4 x float> @llvm.x86.avx512.mask.vfnmsub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
ret <4 x float> %res
}
-declare <4 x double> @llvm.x86.fma.mask.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone
+declare <4 x double> @llvm.x86.avx512.mask.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone
define <4 x double> @test_mask_vfnmsub256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_vfnmsub256_pd
; CHECK: vfnmsub213pd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xae,0xc2]
- %res = call <4 x double> @llvm.x86.fma.mask.vfnmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
+ %res = call <4 x double> @llvm.x86.avx512.mask.vfnmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
ret <4 x double> %res
}
-declare <2 x double> @llvm.x86.fma.mask.vfnmsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
+declare <2 x double> @llvm.x86.avx512.mask.vfnmsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
define <2 x double> @test_mask_vfnmsub128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_vfnmsub128_pd
; CHECK: vfnmsub213pd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xae,0xc2]
- %res = call <2 x double> @llvm.x86.fma.mask.vfnmsub.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
+ %res = call <2 x double> @llvm.x86.avx512.mask.vfnmsub.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
ret <2 x double> %res
}
-declare <8 x float> @llvm.x86.fma.mask.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
+
+define <2 x double>@test_int_x86_avx512_mask_vfnmsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_pd_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm3 {%k1}
+; CHECK-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0
+; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.avx512.mask.vfnmsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
+ %res1 = call <2 x double> @llvm.x86.avx512.mask.vfnmsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
+ %res2 = fadd <2 x double> %res, %res1
+ ret <2 x double> %res2
+}
+
+declare <2 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
+
+define <2 x double>@test_int_x86_avx512_mask3_vfnmsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps %zmm2, %zmm3
+; CHECK-NEXT: vfnmsub231pd %xmm1, %xmm0, %xmm3 {%k1}
+; CHECK-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0
+; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
+ %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
+ %res2 = fadd <2 x double> %res, %res1
+ ret <2 x double> %res2
+}
+
+define <4 x double>@test_int_x86_avx512_mask_vfnmsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_pd_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm3 {%k1}
+; CHECK-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0
+; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x double> @llvm.x86.avx512.mask.vfnmsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
+ %res1 = call <4 x double> @llvm.x86.avx512.mask.vfnmsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
+ %res2 = fadd <4 x double> %res, %res1
+ ret <4 x double> %res2
+}
+
+declare <4 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
+
+define <4 x double>@test_int_x86_avx512_mask3_vfnmsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps %zmm2, %zmm3
+; CHECK-NEXT: vfnmsub231pd %ymm1, %ymm0, %ymm3 {%k1}
+; CHECK-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0
+; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
+ %res1 = call <4 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
+ %res2 = fadd <4 x double> %res, %res1
+ ret <4 x double> %res2
+}
+
+define <4 x float>@test_int_x86_avx512_mask_vfnmsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_ps_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm3 {%k1}
+; CHECK-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0
+; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.avx512.mask.vfnmsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
+ %res1 = call <4 x float> @llvm.x86.avx512.mask.vfnmsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
+ %res2 = fadd <4 x float> %res, %res1
+ ret <4 x float> %res2
+}
+
+declare <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
+
+define <4 x float>@test_int_x86_avx512_mask3_vfnmsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps %zmm2, %zmm3
+; CHECK-NEXT: vfnmsub231ps %xmm1, %xmm0, %xmm3 {%k1}
+; CHECK-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0
+; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
+ %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
+ %res2 = fadd <4 x float> %res, %res1
+ ret <4 x float> %res2
+}
+
+define <8 x float>@test_int_x86_avx512_mask_vfnmsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_ps_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm3 {%k1}
+; CHECK-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0
+; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x float> @llvm.x86.avx512.mask.vfnmsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
+ %res1 = call <8 x float> @llvm.x86.avx512.mask.vfnmsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
+ %res2 = fadd <8 x float> %res, %res1
+ ret <8 x float> %res2
+}
+
+declare <8 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
+
+define <8 x float>@test_int_x86_avx512_mask3_vfnmsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps %zmm2, %zmm3
+; CHECK-NEXT: vfnmsub231ps %ymm1, %ymm0, %ymm3 {%k1}
+; CHECK-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0
+; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
+ %res1 = call <8 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
+ %res2 = fadd <8 x float> %res, %res1
+ ret <8 x float> %res2
+}
+
+define <2 x double>@test_int_x86_avx512_mask_vfnmadd_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_pd_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vfnmadd213pd %xmm2, %xmm1, %xmm3 {%k1}
+; CHECK-NEXT: vfnmadd213pd %xmm2, %xmm1, %xmm0
+; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.avx512.mask.vfnmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
+ %res1 = call <2 x double> @llvm.x86.avx512.mask.vfnmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
+ %res2 = fadd <2 x double> %res, %res1
+ ret <2 x double> %res2
+}
+
+define <4 x double>@test_int_x86_avx512_mask_vfnmadd_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_pd_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm3 {%k1}
+; CHECK-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0
+; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x double> @llvm.x86.avx512.mask.vfnmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
+ %res1 = call <4 x double> @llvm.x86.avx512.mask.vfnmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
+ %res2 = fadd <4 x double> %res, %res1
+ ret <4 x double> %res2
+}
+
+define <4 x float>@test_int_x86_avx512_mask_vfnmadd_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_ps_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm3 {%k1}
+; CHECK-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0
+; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.avx512.mask.vfnmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
+ %res1 = call <4 x float> @llvm.x86.avx512.mask.vfnmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
+ %res2 = fadd <4 x float> %res, %res1
+ ret <4 x float> %res2
+}
+
+define <8 x float>@test_int_x86_avx512_mask_vfnmadd_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_ps_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm3 {%k1}
+; CHECK-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0
+; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x float> @llvm.x86.avx512.mask.vfnmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
+ %res1 = call <8 x float> @llvm.x86.avx512.mask.vfnmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
+ %res2 = fadd <8 x float> %res, %res1
+ ret <8 x float> %res2
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
define <8 x float> @test_mask_fmaddsub256_ps(<8 x float> %a, <8 x float> %b, <8 x float> %c, i8 %mask) {
; CHECK-LABEL: test_mask_fmaddsub256_ps:
; CHECK: vfmaddsub213ps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0xa6,0xc2]
- %res = call <8 x float> @llvm.x86.fma.mask.vfmaddsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c, i8 %mask)
+ %res = call <8 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c, i8 %mask)
ret <8 x float> %res
}
-declare <4 x float> @llvm.x86.fma.mask.vfmaddsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
+declare <4 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
define <4 x float> @test_mask_fmaddsub128_ps(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
; CHECK-LABEL: test_mask_fmaddsub128_ps:
; CHECK: vfmaddsub213ps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa6,0xc2]
- %res = call <4 x float> @llvm.x86.fma.mask.vfmaddsub.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask)
+ %res = call <4 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask)
ret <4 x float> %res
}
-declare <4 x double> @llvm.x86.fma.mask.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone
+declare <4 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone
define <4 x double> @test_mask_vfmaddsub256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_vfmaddsub256_pd
; CHECK: vfmaddsub213pd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xa6,0xc2]
- %res = call <4 x double> @llvm.x86.fma.mask.vfmaddsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
+ %res = call <4 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
ret <4 x double> %res
}
-declare <2 x double> @llvm.x86.fma.mask.vfmaddsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
+declare <2 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
define <2 x double> @test_mask_vfmaddsub128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_vfmaddsub128_pd
; CHECK: vfmaddsub213pd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa6,0xc2]
- %res = call <2 x double> @llvm.x86.fma.mask.vfmaddsub.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
+ %res = call <2 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
ret <2 x double> %res
}
-declare <8 x float> @llvm.x86.fma.mask.vfmsubadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
-
-define <8 x float> @test_mask_vfmsubadd256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) {
- ; CHECK-LABEL: test_mask_vfmsubadd256_ps
- ; CHECK: vfmsubadd213ps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0xa7,0xc2]
- %res = call <8 x float> @llvm.x86.fma.mask.vfmsubadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind
- ret <8 x float> %res
+define <2 x double>@test_int_x86_avx512_mask_vfmaddsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_pd_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm3 {%k1}
+; CHECK-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm0
+; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
+ %res1 = call <2 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
+ %res2 = fadd <2 x double> %res, %res1
+ ret <2 x double> %res2
+}
+
+declare <2 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
+
+define <2 x double>@test_int_x86_avx512_mask3_vfmaddsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps %zmm2, %zmm3
+; CHECK-NEXT: vfmaddsub231pd %xmm1, %xmm0, %xmm3 {%k1}
+; CHECK-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm0
+; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
+ %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
+ %res2 = fadd <2 x double> %res, %res1
+ ret <2 x double> %res2
+}
+
+declare <2 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
+
+define <2 x double>@test_int_x86_avx512_maskz_vfmaddsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm3 {%k1} {z}
+; CHECK-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm0
+; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
+ %res1 = call <2 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
+ %res2 = fadd <2 x double> %res, %res1
+ ret <2 x double> %res2
+}
+
+define <4 x double>@test_int_x86_avx512_mask_vfmaddsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_pd_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm3 {%k1}
+; CHECK-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm0
+; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
+ %res1 = call <4 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
+ %res2 = fadd <4 x double> %res, %res1
+ ret <4 x double> %res2
+}
+
+declare <4 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
+
+define <4 x double>@test_int_x86_avx512_mask3_vfmaddsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps %zmm2, %zmm3
+; CHECK-NEXT: vfmaddsub231pd %ymm1, %ymm0, %ymm3 {%k1}
+; CHECK-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm0
+; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
+ %res1 = call <4 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
+ %res2 = fadd <4 x double> %res, %res1
+ ret <4 x double> %res2
+}
+
+declare <4 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
+
+define <4 x double>@test_int_x86_avx512_maskz_vfmaddsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm3 {%k1} {z}
+; CHECK-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm0
+; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
+ %res1 = call <4 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
+ %res2 = fadd <4 x double> %res, %res1
+ ret <4 x double> %res2
+}
+
+define <4 x float>@test_int_x86_avx512_mask_vfmaddsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_ps_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm3 {%k1}
+; CHECK-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm0
+; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
+ %res1 = call <4 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
+ %res2 = fadd <4 x float> %res, %res1
+ ret <4 x float> %res2
+}
+
+declare <4 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
+
+define <4 x float>@test_int_x86_avx512_mask3_vfmaddsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps %zmm2, %zmm3
+; CHECK-NEXT: vfmaddsub231ps %xmm1, %xmm0, %xmm3 {%k1}
+; CHECK-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm0
+; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
+ %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
+ %res2 = fadd <4 x float> %res, %res1
+ ret <4 x float> %res2
+}
+
+declare <4 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
+
+define <4 x float>@test_int_x86_avx512_maskz_vfmaddsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm3 {%k1} {z}
+; CHECK-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm0
+; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
+ %res1 = call <4 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
+ %res2 = fadd <4 x float> %res, %res1
+ ret <4 x float> %res2
+}
+
+define <8 x float>@test_int_x86_avx512_mask_vfmaddsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_ps_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm3 {%k1}
+; CHECK-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm0
+; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
+ %res1 = call <8 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
+ %res2 = fadd <8 x float> %res, %res1
+ ret <8 x float> %res2
+}
+
+declare <8 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
+
+define <8 x float>@test_int_x86_avx512_mask3_vfmaddsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps %zmm2, %zmm3
+; CHECK-NEXT: vfmaddsub231ps %ymm1, %ymm0, %ymm3 {%k1}
+; CHECK-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm0
+; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
+ %res1 = call <8 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
+ %res2 = fadd <8 x float> %res, %res1
+ ret <8 x float> %res2
+}
+
+declare <8 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
+
+define <8 x float>@test_int_x86_avx512_maskz_vfmaddsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm3 {%k1} {z}
+; CHECK-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm0
+; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
+ %res1 = call <8 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
+ %res2 = fadd <8 x float> %res, %res1
+ ret <8 x float> %res2
+}
+
+declare <2 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
+
+define <2 x double>@test_int_x86_avx512_mask3_vfmsubadd_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps %zmm2, %zmm3
+; CHECK-NEXT: vfmsubadd231pd %xmm1, %xmm0, %xmm3 {%k1}
+; CHECK-NEXT: vfmsubadd213pd %xmm2, %xmm1, %xmm0
+; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
+ %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
+ %res2=fadd <2 x double> %res, %res1
+ ret <2 x double> %res2
+}
+
+declare <4 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
+
+define <4 x double>@test_int_x86_avx512_mask3_vfmsubadd_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps %zmm2, %zmm3
+; CHECK-NEXT: vfmsubadd231pd %ymm1, %ymm0, %ymm3 {%k1}
+; CHECK-NEXT: vfmsubadd213pd %ymm2, %ymm1, %ymm0
+; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
+ %res1 = call <4 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
+ %res2=fadd <4 x double> %res, %res1
+ ret <4 x double> %res2
+}
+
+declare <4 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
+
+define <4 x float>@test_int_x86_avx512_mask3_vfmsubadd_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps %zmm2, %zmm3
+; CHECK-NEXT: vfmsubadd231ps %xmm1, %xmm0, %xmm3 {%k1}
+; CHECK-NEXT: vfmsubadd213ps %xmm2, %xmm1, %xmm0
+; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
+ %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
+ %res2=fadd <4 x float> %res, %res1
+ ret <4 x float> %res2
+}
+
+declare <8 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
+
+define <8 x float>@test_int_x86_avx512_mask3_vfmsubadd_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps %zmm2, %zmm3
+; CHECK-NEXT: vfmsubadd231ps %ymm1, %ymm0, %ymm3 {%k1}
+; CHECK-NEXT: vfmsubadd213ps %ymm2, %ymm1, %ymm0
+; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
+ %res1 = call <8 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
+ %res2=fadd <8 x float> %res, %res1
+ ret <8 x float> %res2
}
-declare <4 x float> @llvm.x86.fma.mask.vfmsubadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
-
-define <4 x float> @test_mask_vfmsubadd128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
- ; CHECK-LABEL: test_mask_vfmsubadd128_ps
- ; CHECK: vfmsubadd213ps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa7,0xc2]
- %res = call <4 x float> @llvm.x86.fma.mask.vfmsubadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
- ret <4 x float> %res
-}
-
-declare <4 x double> @llvm.x86.fma.mask.vfmsubadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone
-
-define <4 x double> @test_mask_vfmsubadd256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) {
- ; CHECK-LABEL: test_mask_vfmsubadd256_pd
- ; CHECK: vfmsubadd213pd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xa7,0xc2]
- %res = call <4 x double> @llvm.x86.fma.mask.vfmsubadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
- ret <4 x double> %res
-}
-declare <2 x double> @llvm.x86.fma.mask.vfmsubadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
-
-define <2 x double> @test_mask_vfmsubadd128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
- ; CHECK-LABEL: test_mask_vfmsubadd128_pd
- ; CHECK: vfmsubadd213pd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa7,0xc2]
- %res = call <2 x double> @llvm.x86.fma.mask.vfmsubadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
- ret <2 x double> %res
-}
-
-define <2 x double> @test_mask_vfmsubadd128rm_pd(<2 x double> %a0, <2 x double> %a1, <2 x double>* %ptr_a2, i8 %mask) {
- ; CHECK-LABEL: test_mask_vfmsubadd128rm_pd
- ; CHECK: vfmsubadd213pd (%rdi), %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa7,0x07]
- %a2 = load <2 x double>, <2 x double>* %ptr_a2
- %res = call <2 x double> @llvm.x86.fma.mask.vfmsubadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
- ret <2 x double> %res
-}
-declare <8 x double> @llvm.x86.fma.mask.vfmsubadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
-define <8 x double> @test_mask_vfmsubaddrm_pd(<8 x double> %a0, <8 x double> %a1, <8 x double>* %ptr_a2, i8 %mask) {
- ; CHECK-LABEL: test_mask_vfmsubaddrm_pd
- ; CHECK: vfmsubadd213pd (%rdi), %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0xa7,0x07]
- %a2 = load <8 x double>, <8 x double>* %ptr_a2, align 8
- %res = call <8 x double> @llvm.x86.fma.mask.vfmsubadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
- ret <8 x double> %res
-}
define <4 x float> @test_mask_vfmadd128_ps_r(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_vfmadd128_ps_r
; CHECK: vfmadd213ps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa8,0xc2]
- %res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
+ %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
ret <4 x float> %res
}
define <4 x float> @test_mask_vfmadd128_ps_rz(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
; CHECK-LABEL: test_mask_vfmadd128_ps_rz
; CHECK: vfmadd213ps %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x08,0xa8,0xc2]
- %res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind
+ %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind
ret <4 x float> %res
}
; CHECK-LABEL: test_mask_vfmadd128_ps_rmk
; CHECK: vfmadd213ps (%rdi), %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa8,0x07]
%a2 = load <4 x float>, <4 x float>* %ptr_a2
- %res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
+ %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
ret <4 x float> %res
}
; CHECK-LABEL: test_mask_vfmadd128_ps_rmka
; CHECK: vfmadd213ps (%rdi), %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa8,0x07]
%a2 = load <4 x float>, <4 x float>* %ptr_a2, align 8
- %res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
+ %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
ret <4 x float> %res
}
; CHECK-LABEL: test_mask_vfmadd128_ps_rmkz
; CHECK: vfmadd213ps (%rdi), %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0x71,0xa8,0x07]
%a2 = load <4 x float>, <4 x float>* %ptr_a2
- %res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind
+ %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind
ret <4 x float> %res
}
; CHECK-LABEL: test_mask_vfmadd128_ps_rmkza
; CHECK: vfmadd213ps (%rdi), %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0x71,0xa8,0x07]
%a2 = load <4 x float>, <4 x float>* %ptr_a2, align 4
- %res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind
+ %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind
ret <4 x float> %res
}
%vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
%vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
%vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
- %res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 %mask) nounwind
+ %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 %mask) nounwind
ret <4 x float> %res
}
%vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
%vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
%vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
- %res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 %mask) nounwind
+ %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 %mask) nounwind
ret <4 x float> %res
}
%vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
%vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
%vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
- %res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 -1) nounwind
+ %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 -1) nounwind
ret <4 x float> %res
}
%vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
%vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
%vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
- %res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 -1) nounwind
+ %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 -1) nounwind
ret <4 x float> %res
}
define <2 x double> @test_mask_vfmadd128_pd_r(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_vfmadd128_pd_r
; CHECK: vfmadd213pd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa8,0xc2]
- %res = call <2 x double> @llvm.x86.fma.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
+ %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
ret <2 x double> %res
}
define <2 x double> @test_mask_vfmadd128_pd_rz(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
; CHECK-LABEL: test_mask_vfmadd128_pd_rz
; CHECK: vfmadd213pd %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0xf5,0x08,0xa8,0xc2]
- %res = call <2 x double> @llvm.x86.fma.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 -1) nounwind
+ %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 -1) nounwind
ret <2 x double> %res
}
; CHECK-LABEL: test_mask_vfmadd128_pd_rmk
; CHECK: vfmadd213pd (%rdi), %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa8,0x07]
%a2 = load <2 x double>, <2 x double>* %ptr_a2
- %res = call <2 x double> @llvm.x86.fma.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
+ %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
ret <2 x double> %res
}
; CHECK-LABEL: test_mask_vfmadd128_pd_rmkz
; CHECK: vfmadd213pd (%rdi), %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0xf1,0xa8,0x07]
%a2 = load <2 x double>, <2 x double>* %ptr_a2
- %res = call <2 x double> @llvm.x86.fma.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 -1) nounwind
+ %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 -1) nounwind
ret <2 x double> %res
}
define <4 x double> @test_mask_vfmadd256_pd_r(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_vfmadd256_pd_r
; CHECK: vfmadd213pd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xa8,0xc2]
- %res = call <4 x double> @llvm.x86.fma.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
+ %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
ret <4 x double> %res
}
define <4 x double> @test_mask_vfmadd256_pd_rz(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
; CHECK-LABEL: test_mask_vfmadd256_pd_rz
; CHECK: vfmadd213pd %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0xf5,0x28,0xa8,0xc2]
- %res = call <4 x double> @llvm.x86.fma.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 -1) nounwind
+ %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 -1) nounwind
ret <4 x double> %res
}
; CHECK-LABEL: test_mask_vfmadd256_pd_rmk
; CHECK: vfmadd213pd (%rdi), %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xa8,0x07]
%a2 = load <4 x double>, <4 x double>* %ptr_a2
- %res = call <4 x double> @llvm.x86.fma.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
+ %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
ret <4 x double> %res
}
; CHECK-LABEL: test_mask_vfmadd256_pd_rmkz
; CHECK: vfmadd213pd (%rdi), %ymm1, %ymm0 ## encoding: [0xc4,0xe2,0xf5,0xa8,0x07]
%a2 = load <4 x double>, <4 x double>* %ptr_a2
- %res = call <4 x double> @llvm.x86.fma.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 -1) nounwind
+ %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 -1) nounwind
ret <4 x double> %res
}
define <8 x i16> @test_mask_add_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
// CHECK: encoding: [0x62,0x71,0xce,0x00,0x7b,0xb2,0xf8,0xfb,0xff,0xff]
vcvtusi2ssq -1032(%rdx), %xmm22, %xmm14
+// CHECK: vfmadd132ps %zmm25, %zmm20, %zmm1
+// CHECK: encoding: [0x62,0x92,0x5d,0x40,0x98,0xc9]
+ vfmadd132ps %zmm25, %zmm20, %zmm1
+
+// CHECK: vfmadd132ps %zmm25, %zmm20, %zmm1 {%k1}
+// CHECK: encoding: [0x62,0x92,0x5d,0x41,0x98,0xc9]
+ vfmadd132ps %zmm25, %zmm20, %zmm1 {%k1}
+
+// CHECK: vfmadd132ps %zmm25, %zmm20, %zmm1 {%k1} {z}
+// CHECK: encoding: [0x62,0x92,0x5d,0xc1,0x98,0xc9]
+ vfmadd132ps %zmm25, %zmm20, %zmm1 {%k1} {z}
+
+// CHECK: vfmadd132ps {rn-sae}, %zmm25, %zmm20, %zmm1
+// CHECK: encoding: [0x62,0x92,0x5d,0x10,0x98,0xc9]
+ vfmadd132ps {rn-sae}, %zmm25, %zmm20, %zmm1
+
+// CHECK: vfmadd132ps {ru-sae}, %zmm25, %zmm20, %zmm1
+// CHECK: encoding: [0x62,0x92,0x5d,0x50,0x98,0xc9]
+ vfmadd132ps {ru-sae}, %zmm25, %zmm20, %zmm1
+
+// CHECK: vfmadd132ps {rd-sae}, %zmm25, %zmm20, %zmm1
+// CHECK: encoding: [0x62,0x92,0x5d,0x30,0x98,0xc9]
+ vfmadd132ps {rd-sae}, %zmm25, %zmm20, %zmm1
+
+// CHECK: vfmadd132ps {rz-sae}, %zmm25, %zmm20, %zmm1
+// CHECK: encoding: [0x62,0x92,0x5d,0x70,0x98,0xc9]
+ vfmadd132ps {rz-sae}, %zmm25, %zmm20, %zmm1
+
+// CHECK: vfmadd132ps (%rcx), %zmm20, %zmm1
+// CHECK: encoding: [0x62,0xf2,0x5d,0x40,0x98,0x09]
+ vfmadd132ps (%rcx), %zmm20, %zmm1
+
+// CHECK: vfmadd132ps 291(%rax,%r14,8), %zmm20, %zmm1
+// CHECK: encoding: [0x62,0xb2,0x5d,0x40,0x98,0x8c,0xf0,0x23,0x01,0x00,0x00]
+ vfmadd132ps 291(%rax,%r14,8), %zmm20, %zmm1
+
+// CHECK: vfmadd132ps (%rcx){1to16}, %zmm20, %zmm1
+// CHECK: encoding: [0x62,0xf2,0x5d,0x50,0x98,0x09]
+ vfmadd132ps (%rcx){1to16}, %zmm20, %zmm1
+
+// CHECK: vfmadd132ps 8128(%rdx), %zmm20, %zmm1
+// CHECK: encoding: [0x62,0xf2,0x5d,0x40,0x98,0x4a,0x7f]
+ vfmadd132ps 8128(%rdx), %zmm20, %zmm1
+
+// CHECK: vfmadd132ps 8192(%rdx), %zmm20, %zmm1
+// CHECK: encoding: [0x62,0xf2,0x5d,0x40,0x98,0x8a,0x00,0x20,0x00,0x00]
+ vfmadd132ps 8192(%rdx), %zmm20, %zmm1
+
+// CHECK: vfmadd132ps -8192(%rdx), %zmm20, %zmm1
+// CHECK: encoding: [0x62,0xf2,0x5d,0x40,0x98,0x4a,0x80]
+ vfmadd132ps -8192(%rdx), %zmm20, %zmm1
+
+// CHECK: vfmadd132ps -8256(%rdx), %zmm20, %zmm1
+// CHECK: encoding: [0x62,0xf2,0x5d,0x40,0x98,0x8a,0xc0,0xdf,0xff,0xff]
+ vfmadd132ps -8256(%rdx), %zmm20, %zmm1
+
+// CHECK: vfmadd132ps 508(%rdx){1to16}, %zmm20, %zmm1
+// CHECK: encoding: [0x62,0xf2,0x5d,0x50,0x98,0x4a,0x7f]
+ vfmadd132ps 508(%rdx){1to16}, %zmm20, %zmm1
+
+// CHECK: vfmadd132ps 512(%rdx){1to16}, %zmm20, %zmm1
+// CHECK: encoding: [0x62,0xf2,0x5d,0x50,0x98,0x8a,0x00,0x02,0x00,0x00]
+ vfmadd132ps 512(%rdx){1to16}, %zmm20, %zmm1
+
+// CHECK: vfmadd132ps -512(%rdx){1to16}, %zmm20, %zmm1
+// CHECK: encoding: [0x62,0xf2,0x5d,0x50,0x98,0x4a,0x80]
+ vfmadd132ps -512(%rdx){1to16}, %zmm20, %zmm1
+
+// CHECK: vfmadd132ps -516(%rdx){1to16}, %zmm20, %zmm1
+// CHECK: encoding: [0x62,0xf2,0x5d,0x50,0x98,0x8a,0xfc,0xfd,0xff,0xff]
+ vfmadd132ps -516(%rdx){1to16}, %zmm20, %zmm1
+
+// CHECK: vfmadd132pd %zmm21, %zmm16, %zmm26
+// CHECK: encoding: [0x62,0x22,0xfd,0x40,0x98,0xd5]
+ vfmadd132pd %zmm21, %zmm16, %zmm26
+
+// CHECK: vfmadd132pd %zmm21, %zmm16, %zmm26 {%k5}
+// CHECK: encoding: [0x62,0x22,0xfd,0x45,0x98,0xd5]
+ vfmadd132pd %zmm21, %zmm16, %zmm26 {%k5}
+
+// CHECK: vfmadd132pd %zmm21, %zmm16, %zmm26 {%k5} {z}
+// CHECK: encoding: [0x62,0x22,0xfd,0xc5,0x98,0xd5]
+ vfmadd132pd %zmm21, %zmm16, %zmm26 {%k5} {z}
+
+// CHECK: vfmadd132pd {rn-sae}, %zmm21, %zmm16, %zmm26
+// CHECK: encoding: [0x62,0x22,0xfd,0x10,0x98,0xd5]
+ vfmadd132pd {rn-sae}, %zmm21, %zmm16, %zmm26
+
+// CHECK: vfmadd132pd {ru-sae}, %zmm21, %zmm16, %zmm26
+// CHECK: encoding: [0x62,0x22,0xfd,0x50,0x98,0xd5]
+ vfmadd132pd {ru-sae}, %zmm21, %zmm16, %zmm26
+
+// CHECK: vfmadd132pd {rd-sae}, %zmm21, %zmm16, %zmm26
+// CHECK: encoding: [0x62,0x22,0xfd,0x30,0x98,0xd5]
+ vfmadd132pd {rd-sae}, %zmm21, %zmm16, %zmm26
+
+// CHECK: vfmadd132pd {rz-sae}, %zmm21, %zmm16, %zmm26
+// CHECK: encoding: [0x62,0x22,0xfd,0x70,0x98,0xd5]
+ vfmadd132pd {rz-sae}, %zmm21, %zmm16, %zmm26
+
+// CHECK: vfmadd132pd (%rcx), %zmm16, %zmm26
+// CHECK: encoding: [0x62,0x62,0xfd,0x40,0x98,0x11]
+ vfmadd132pd (%rcx), %zmm16, %zmm26
+
+// CHECK: vfmadd132pd 291(%rax,%r14,8), %zmm16, %zmm26
+// CHECK: encoding: [0x62,0x22,0xfd,0x40,0x98,0x94,0xf0,0x23,0x01,0x00,0x00]
+ vfmadd132pd 291(%rax,%r14,8), %zmm16, %zmm26
+
+// CHECK: vfmadd132pd (%rcx){1to8}, %zmm16, %zmm26
+// CHECK: encoding: [0x62,0x62,0xfd,0x50,0x98,0x11]
+ vfmadd132pd (%rcx){1to8}, %zmm16, %zmm26
+
+// CHECK: vfmadd132pd 8128(%rdx), %zmm16, %zmm26
+// CHECK: encoding: [0x62,0x62,0xfd,0x40,0x98,0x52,0x7f]
+ vfmadd132pd 8128(%rdx), %zmm16, %zmm26
+
+// CHECK: vfmadd132pd 8192(%rdx), %zmm16, %zmm26
+// CHECK: encoding: [0x62,0x62,0xfd,0x40,0x98,0x92,0x00,0x20,0x00,0x00]
+ vfmadd132pd 8192(%rdx), %zmm16, %zmm26
+
+// CHECK: vfmadd132pd -8192(%rdx), %zmm16, %zmm26
+// CHECK: encoding: [0x62,0x62,0xfd,0x40,0x98,0x52,0x80]
+ vfmadd132pd -8192(%rdx), %zmm16, %zmm26
+
+// CHECK: vfmadd132pd -8256(%rdx), %zmm16, %zmm26
+// CHECK: encoding: [0x62,0x62,0xfd,0x40,0x98,0x92,0xc0,0xdf,0xff,0xff]
+ vfmadd132pd -8256(%rdx), %zmm16, %zmm26
+
+// CHECK: vfmadd132pd 1016(%rdx){1to8}, %zmm16, %zmm26
+// CHECK: encoding: [0x62,0x62,0xfd,0x50,0x98,0x52,0x7f]
+ vfmadd132pd 1016(%rdx){1to8}, %zmm16, %zmm26
+
+// CHECK: vfmadd132pd 1024(%rdx){1to8}, %zmm16, %zmm26
+// CHECK: encoding: [0x62,0x62,0xfd,0x50,0x98,0x92,0x00,0x04,0x00,0x00]
+ vfmadd132pd 1024(%rdx){1to8}, %zmm16, %zmm26
+
+// CHECK: vfmadd132pd -1024(%rdx){1to8}, %zmm16, %zmm26
+// CHECK: encoding: [0x62,0x62,0xfd,0x50,0x98,0x52,0x80]
+ vfmadd132pd -1024(%rdx){1to8}, %zmm16, %zmm26
+
+// CHECK: vfmadd132pd -1032(%rdx){1to8}, %zmm16, %zmm26
+// CHECK: encoding: [0x62,0x62,0xfd,0x50,0x98,0x92,0xf8,0xfb,0xff,0xff]
+ vfmadd132pd -1032(%rdx){1to8}, %zmm16, %zmm26
+
+// CHECK: vfmadd213ps %zmm14, %zmm19, %zmm20
+// CHECK: encoding: [0x62,0xc2,0x65,0x40,0xa8,0xe6]
+ vfmadd213ps %zmm14, %zmm19, %zmm20
+
+// CHECK: vfmadd213ps %zmm14, %zmm19, %zmm20 {%k4}
+// CHECK: encoding: [0x62,0xc2,0x65,0x44,0xa8,0xe6]
+ vfmadd213ps %zmm14, %zmm19, %zmm20 {%k4}
+
+// CHECK: vfmadd213ps %zmm14, %zmm19, %zmm20 {%k4} {z}
+// CHECK: encoding: [0x62,0xc2,0x65,0xc4,0xa8,0xe6]
+ vfmadd213ps %zmm14, %zmm19, %zmm20 {%k4} {z}
+
+// CHECK: vfmadd213ps {rn-sae}, %zmm14, %zmm19, %zmm20
+// CHECK: encoding: [0x62,0xc2,0x65,0x10,0xa8,0xe6]
+ vfmadd213ps {rn-sae}, %zmm14, %zmm19, %zmm20
+
+// CHECK: vfmadd213ps {ru-sae}, %zmm14, %zmm19, %zmm20
+// CHECK: encoding: [0x62,0xc2,0x65,0x50,0xa8,0xe6]
+ vfmadd213ps {ru-sae}, %zmm14, %zmm19, %zmm20
+
+// CHECK: vfmadd213ps {rd-sae}, %zmm14, %zmm19, %zmm20
+// CHECK: encoding: [0x62,0xc2,0x65,0x30,0xa8,0xe6]
+ vfmadd213ps {rd-sae}, %zmm14, %zmm19, %zmm20
+
+// CHECK: vfmadd213ps {rz-sae}, %zmm14, %zmm19, %zmm20
+// CHECK: encoding: [0x62,0xc2,0x65,0x70,0xa8,0xe6]
+ vfmadd213ps {rz-sae}, %zmm14, %zmm19, %zmm20
+
+// CHECK: vfmadd213ps (%rcx), %zmm19, %zmm20
+// CHECK: encoding: [0x62,0xe2,0x65,0x40,0xa8,0x21]
+ vfmadd213ps (%rcx), %zmm19, %zmm20
+
+// CHECK: vfmadd213ps 291(%rax,%r14,8), %zmm19, %zmm20
+// CHECK: encoding: [0x62,0xa2,0x65,0x40,0xa8,0xa4,0xf0,0x23,0x01,0x00,0x00]
+ vfmadd213ps 291(%rax,%r14,8), %zmm19, %zmm20
+
+// CHECK: vfmadd213ps (%rcx){1to16}, %zmm19, %zmm20
+// CHECK: encoding: [0x62,0xe2,0x65,0x50,0xa8,0x21]
+ vfmadd213ps (%rcx){1to16}, %zmm19, %zmm20
+
+// CHECK: vfmadd213ps 8128(%rdx), %zmm19, %zmm20
+// CHECK: encoding: [0x62,0xe2,0x65,0x40,0xa8,0x62,0x7f]
+ vfmadd213ps 8128(%rdx), %zmm19, %zmm20
+
+// CHECK: vfmadd213ps 8192(%rdx), %zmm19, %zmm20
+// CHECK: encoding: [0x62,0xe2,0x65,0x40,0xa8,0xa2,0x00,0x20,0x00,0x00]
+ vfmadd213ps 8192(%rdx), %zmm19, %zmm20
+
+// CHECK: vfmadd213ps -8192(%rdx), %zmm19, %zmm20
+// CHECK: encoding: [0x62,0xe2,0x65,0x40,0xa8,0x62,0x80]
+ vfmadd213ps -8192(%rdx), %zmm19, %zmm20
+
+// CHECK: vfmadd213ps -8256(%rdx), %zmm19, %zmm20
+// CHECK: encoding: [0x62,0xe2,0x65,0x40,0xa8,0xa2,0xc0,0xdf,0xff,0xff]
+ vfmadd213ps -8256(%rdx), %zmm19, %zmm20
+
+// CHECK: vfmadd213ps 508(%rdx){1to16}, %zmm19, %zmm20
+// CHECK: encoding: [0x62,0xe2,0x65,0x50,0xa8,0x62,0x7f]
+ vfmadd213ps 508(%rdx){1to16}, %zmm19, %zmm20
+
+// CHECK: vfmadd213ps 512(%rdx){1to16}, %zmm19, %zmm20
+// CHECK: encoding: [0x62,0xe2,0x65,0x50,0xa8,0xa2,0x00,0x02,0x00,0x00]
+ vfmadd213ps 512(%rdx){1to16}, %zmm19, %zmm20
+
+// CHECK: vfmadd213ps -512(%rdx){1to16}, %zmm19, %zmm20
+// CHECK: encoding: [0x62,0xe2,0x65,0x50,0xa8,0x62,0x80]
+ vfmadd213ps -512(%rdx){1to16}, %zmm19, %zmm20
+
+// CHECK: vfmadd213ps -516(%rdx){1to16}, %zmm19, %zmm20
+// CHECK: encoding: [0x62,0xe2,0x65,0x50,0xa8,0xa2,0xfc,0xfd,0xff,0xff]
+ vfmadd213ps -516(%rdx){1to16}, %zmm19, %zmm20
+
+// CHECK: vfmadd213pd %zmm25, %zmm16, %zmm18
+// CHECK: encoding: [0x62,0x82,0xfd,0x40,0xa8,0xd1]
+ vfmadd213pd %zmm25, %zmm16, %zmm18
+
+// CHECK: vfmadd213pd %zmm25, %zmm16, %zmm18 {%k3}
+// CHECK: encoding: [0x62,0x82,0xfd,0x43,0xa8,0xd1]
+ vfmadd213pd %zmm25, %zmm16, %zmm18 {%k3}
+
+// CHECK: vfmadd213pd %zmm25, %zmm16, %zmm18 {%k3} {z}
+// CHECK: encoding: [0x62,0x82,0xfd,0xc3,0xa8,0xd1]
+ vfmadd213pd %zmm25, %zmm16, %zmm18 {%k3} {z}
+
+// CHECK: vfmadd213pd {rn-sae}, %zmm25, %zmm16, %zmm18
+// CHECK: encoding: [0x62,0x82,0xfd,0x10,0xa8,0xd1]
+ vfmadd213pd {rn-sae}, %zmm25, %zmm16, %zmm18
+
+// CHECK: vfmadd213pd {ru-sae}, %zmm25, %zmm16, %zmm18
+// CHECK: encoding: [0x62,0x82,0xfd,0x50,0xa8,0xd1]
+ vfmadd213pd {ru-sae}, %zmm25, %zmm16, %zmm18
+
+// CHECK: vfmadd213pd {rd-sae}, %zmm25, %zmm16, %zmm18
+// CHECK: encoding: [0x62,0x82,0xfd,0x30,0xa8,0xd1]
+ vfmadd213pd {rd-sae}, %zmm25, %zmm16, %zmm18
+
+// CHECK: vfmadd213pd {rz-sae}, %zmm25, %zmm16, %zmm18
+// CHECK: encoding: [0x62,0x82,0xfd,0x70,0xa8,0xd1]
+ vfmadd213pd {rz-sae}, %zmm25, %zmm16, %zmm18
+
+// CHECK: vfmadd213pd (%rcx), %zmm16, %zmm18
+// CHECK: encoding: [0x62,0xe2,0xfd,0x40,0xa8,0x11]
+ vfmadd213pd (%rcx), %zmm16, %zmm18
+
+// CHECK: vfmadd213pd 291(%rax,%r14,8), %zmm16, %zmm18
+// CHECK: encoding: [0x62,0xa2,0xfd,0x40,0xa8,0x94,0xf0,0x23,0x01,0x00,0x00]
+ vfmadd213pd 291(%rax,%r14,8), %zmm16, %zmm18
+
+// CHECK: vfmadd213pd (%rcx){1to8}, %zmm16, %zmm18
+// CHECK: encoding: [0x62,0xe2,0xfd,0x50,0xa8,0x11]
+ vfmadd213pd (%rcx){1to8}, %zmm16, %zmm18
+
+// CHECK: vfmadd213pd 8128(%rdx), %zmm16, %zmm18
+// CHECK: encoding: [0x62,0xe2,0xfd,0x40,0xa8,0x52,0x7f]
+ vfmadd213pd 8128(%rdx), %zmm16, %zmm18
+
+// CHECK: vfmadd213pd 8192(%rdx), %zmm16, %zmm18
+// CHECK: encoding: [0x62,0xe2,0xfd,0x40,0xa8,0x92,0x00,0x20,0x00,0x00]
+ vfmadd213pd 8192(%rdx), %zmm16, %zmm18
+
+// CHECK: vfmadd213pd -8192(%rdx), %zmm16, %zmm18
+// CHECK: encoding: [0x62,0xe2,0xfd,0x40,0xa8,0x52,0x80]
+ vfmadd213pd -8192(%rdx), %zmm16, %zmm18
+
+// CHECK: vfmadd213pd -8256(%rdx), %zmm16, %zmm18
+// CHECK: encoding: [0x62,0xe2,0xfd,0x40,0xa8,0x92,0xc0,0xdf,0xff,0xff]
+ vfmadd213pd -8256(%rdx), %zmm16, %zmm18
+
+// CHECK: vfmadd213pd 1016(%rdx){1to8}, %zmm16, %zmm18
+// CHECK: encoding: [0x62,0xe2,0xfd,0x50,0xa8,0x52,0x7f]
+ vfmadd213pd 1016(%rdx){1to8}, %zmm16, %zmm18
+
+// CHECK: vfmadd213pd 1024(%rdx){1to8}, %zmm16, %zmm18
+// CHECK: encoding: [0x62,0xe2,0xfd,0x50,0xa8,0x92,0x00,0x04,0x00,0x00]
+ vfmadd213pd 1024(%rdx){1to8}, %zmm16, %zmm18
+
+// CHECK: vfmadd213pd -1024(%rdx){1to8}, %zmm16, %zmm18
+// CHECK: encoding: [0x62,0xe2,0xfd,0x50,0xa8,0x52,0x80]
+ vfmadd213pd -1024(%rdx){1to8}, %zmm16, %zmm18
+
+// CHECK: vfmadd213pd -1032(%rdx){1to8}, %zmm16, %zmm18
+// CHECK: encoding: [0x62,0xe2,0xfd,0x50,0xa8,0x92,0xf8,0xfb,0xff,0xff]
+ vfmadd213pd -1032(%rdx){1to8}, %zmm16, %zmm18
+
+// CHECK: vfmadd231ps %zmm25, %zmm28, %zmm27
+// CHECK: encoding: [0x62,0x02,0x1d,0x40,0xb8,0xd9]
+ vfmadd231ps %zmm25, %zmm28, %zmm27
+
+// CHECK: vfmadd231ps %zmm25, %zmm28, %zmm27 {%k3}
+// CHECK: encoding: [0x62,0x02,0x1d,0x43,0xb8,0xd9]
+ vfmadd231ps %zmm25, %zmm28, %zmm27 {%k3}
+
+// CHECK: vfmadd231ps %zmm25, %zmm28, %zmm27 {%k3} {z}
+// CHECK: encoding: [0x62,0x02,0x1d,0xc3,0xb8,0xd9]
+ vfmadd231ps %zmm25, %zmm28, %zmm27 {%k3} {z}
+
+// CHECK: vfmadd231ps {rn-sae}, %zmm25, %zmm28, %zmm27
+// CHECK: encoding: [0x62,0x02,0x1d,0x10,0xb8,0xd9]
+ vfmadd231ps {rn-sae}, %zmm25, %zmm28, %zmm27
+
+// CHECK: vfmadd231ps {ru-sae}, %zmm25, %zmm28, %zmm27
+// CHECK: encoding: [0x62,0x02,0x1d,0x50,0xb8,0xd9]
+ vfmadd231ps {ru-sae}, %zmm25, %zmm28, %zmm27
+
+// CHECK: vfmadd231ps {rd-sae}, %zmm25, %zmm28, %zmm27
+// CHECK: encoding: [0x62,0x02,0x1d,0x30,0xb8,0xd9]
+ vfmadd231ps {rd-sae}, %zmm25, %zmm28, %zmm27
+
+// CHECK: vfmadd231ps {rz-sae}, %zmm25, %zmm28, %zmm27
+// CHECK: encoding: [0x62,0x02,0x1d,0x70,0xb8,0xd9]
+ vfmadd231ps {rz-sae}, %zmm25, %zmm28, %zmm27
+
+// CHECK: vfmadd231ps (%rcx), %zmm28, %zmm27
+// CHECK: encoding: [0x62,0x62,0x1d,0x40,0xb8,0x19]
+ vfmadd231ps (%rcx), %zmm28, %zmm27
+
+// CHECK: vfmadd231ps 291(%rax,%r14,8), %zmm28, %zmm27
+// CHECK: encoding: [0x62,0x22,0x1d,0x40,0xb8,0x9c,0xf0,0x23,0x01,0x00,0x00]
+ vfmadd231ps 291(%rax,%r14,8), %zmm28, %zmm27
+
+// CHECK: vfmadd231ps (%rcx){1to16}, %zmm28, %zmm27
+// CHECK: encoding: [0x62,0x62,0x1d,0x50,0xb8,0x19]
+ vfmadd231ps (%rcx){1to16}, %zmm28, %zmm27
+
+// CHECK: vfmadd231ps 8128(%rdx), %zmm28, %zmm27
+// CHECK: encoding: [0x62,0x62,0x1d,0x40,0xb8,0x5a,0x7f]
+ vfmadd231ps 8128(%rdx), %zmm28, %zmm27
+
+// CHECK: vfmadd231ps 8192(%rdx), %zmm28, %zmm27
+// CHECK: encoding: [0x62,0x62,0x1d,0x40,0xb8,0x9a,0x00,0x20,0x00,0x00]
+ vfmadd231ps 8192(%rdx), %zmm28, %zmm27
+
+// CHECK: vfmadd231ps -8192(%rdx), %zmm28, %zmm27
+// CHECK: encoding: [0x62,0x62,0x1d,0x40,0xb8,0x5a,0x80]
+ vfmadd231ps -8192(%rdx), %zmm28, %zmm27
+
+// CHECK: vfmadd231ps -8256(%rdx), %zmm28, %zmm27
+// CHECK: encoding: [0x62,0x62,0x1d,0x40,0xb8,0x9a,0xc0,0xdf,0xff,0xff]
+ vfmadd231ps -8256(%rdx), %zmm28, %zmm27
+
+// CHECK: vfmadd231ps 508(%rdx){1to16}, %zmm28, %zmm27
+// CHECK: encoding: [0x62,0x62,0x1d,0x50,0xb8,0x5a,0x7f]
+ vfmadd231ps 508(%rdx){1to16}, %zmm28, %zmm27
+
+// CHECK: vfmadd231ps 512(%rdx){1to16}, %zmm28, %zmm27
+// CHECK: encoding: [0x62,0x62,0x1d,0x50,0xb8,0x9a,0x00,0x02,0x00,0x00]
+ vfmadd231ps 512(%rdx){1to16}, %zmm28, %zmm27
+
+// CHECK: vfmadd231ps -512(%rdx){1to16}, %zmm28, %zmm27
+// CHECK: encoding: [0x62,0x62,0x1d,0x50,0xb8,0x5a,0x80]
+ vfmadd231ps -512(%rdx){1to16}, %zmm28, %zmm27
+
+// CHECK: vfmadd231ps -516(%rdx){1to16}, %zmm28, %zmm27
+// CHECK: encoding: [0x62,0x62,0x1d,0x50,0xb8,0x9a,0xfc,0xfd,0xff,0xff]
+ vfmadd231ps -516(%rdx){1to16}, %zmm28, %zmm27
+
+// CHECK: vfmadd231pd %zmm9, %zmm6, %zmm30
+// CHECK: encoding: [0x62,0x42,0xcd,0x48,0xb8,0xf1]
+ vfmadd231pd %zmm9, %zmm6, %zmm30
+
+// CHECK: vfmadd231pd %zmm9, %zmm6, %zmm30 {%k4}
+// CHECK: encoding: [0x62,0x42,0xcd,0x4c,0xb8,0xf1]
+ vfmadd231pd %zmm9, %zmm6, %zmm30 {%k4}
+
+// CHECK: vfmadd231pd %zmm9, %zmm6, %zmm30 {%k4} {z}
+// CHECK: encoding: [0x62,0x42,0xcd,0xcc,0xb8,0xf1]
+ vfmadd231pd %zmm9, %zmm6, %zmm30 {%k4} {z}
+
+// CHECK: vfmadd231pd {rn-sae}, %zmm9, %zmm6, %zmm30
+// CHECK: encoding: [0x62,0x42,0xcd,0x18,0xb8,0xf1]
+ vfmadd231pd {rn-sae}, %zmm9, %zmm6, %zmm30
+
+// CHECK: vfmadd231pd {ru-sae}, %zmm9, %zmm6, %zmm30
+// CHECK: encoding: [0x62,0x42,0xcd,0x58,0xb8,0xf1]
+ vfmadd231pd {ru-sae}, %zmm9, %zmm6, %zmm30
+
+// CHECK: vfmadd231pd {rd-sae}, %zmm9, %zmm6, %zmm30
+// CHECK: encoding: [0x62,0x42,0xcd,0x38,0xb8,0xf1]
+ vfmadd231pd {rd-sae}, %zmm9, %zmm6, %zmm30
+
+// CHECK: vfmadd231pd {rz-sae}, %zmm9, %zmm6, %zmm30
+// CHECK: encoding: [0x62,0x42,0xcd,0x78,0xb8,0xf1]
+ vfmadd231pd {rz-sae}, %zmm9, %zmm6, %zmm30
+
+// CHECK: vfmadd231pd (%rcx), %zmm6, %zmm30
+// CHECK: encoding: [0x62,0x62,0xcd,0x48,0xb8,0x31]
+ vfmadd231pd (%rcx), %zmm6, %zmm30
+
+// CHECK: vfmadd231pd 291(%rax,%r14,8), %zmm6, %zmm30
+// CHECK: encoding: [0x62,0x22,0xcd,0x48,0xb8,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vfmadd231pd 291(%rax,%r14,8), %zmm6, %zmm30
+
+// CHECK: vfmadd231pd (%rcx){1to8}, %zmm6, %zmm30
+// CHECK: encoding: [0x62,0x62,0xcd,0x58,0xb8,0x31]
+ vfmadd231pd (%rcx){1to8}, %zmm6, %zmm30
+
+// CHECK: vfmadd231pd 8128(%rdx), %zmm6, %zmm30
+// CHECK: encoding: [0x62,0x62,0xcd,0x48,0xb8,0x72,0x7f]
+ vfmadd231pd 8128(%rdx), %zmm6, %zmm30
+
+// CHECK: vfmadd231pd 8192(%rdx), %zmm6, %zmm30
+// CHECK: encoding: [0x62,0x62,0xcd,0x48,0xb8,0xb2,0x00,0x20,0x00,0x00]
+ vfmadd231pd 8192(%rdx), %zmm6, %zmm30
+
+// CHECK: vfmadd231pd -8192(%rdx), %zmm6, %zmm30
+// CHECK: encoding: [0x62,0x62,0xcd,0x48,0xb8,0x72,0x80]
+ vfmadd231pd -8192(%rdx), %zmm6, %zmm30
+
+// CHECK: vfmadd231pd -8256(%rdx), %zmm6, %zmm30
+// CHECK: encoding: [0x62,0x62,0xcd,0x48,0xb8,0xb2,0xc0,0xdf,0xff,0xff]
+ vfmadd231pd -8256(%rdx), %zmm6, %zmm30
+
+// CHECK: vfmadd231pd 1016(%rdx){1to8}, %zmm6, %zmm30
+// CHECK: encoding: [0x62,0x62,0xcd,0x58,0xb8,0x72,0x7f]
+ vfmadd231pd 1016(%rdx){1to8}, %zmm6, %zmm30
+
+// CHECK: vfmadd231pd 1024(%rdx){1to8}, %zmm6, %zmm30
+// CHECK: encoding: [0x62,0x62,0xcd,0x58,0xb8,0xb2,0x00,0x04,0x00,0x00]
+ vfmadd231pd 1024(%rdx){1to8}, %zmm6, %zmm30
+
+// CHECK: vfmadd231pd -1024(%rdx){1to8}, %zmm6, %zmm30
+// CHECK: encoding: [0x62,0x62,0xcd,0x58,0xb8,0x72,0x80]
+ vfmadd231pd -1024(%rdx){1to8}, %zmm6, %zmm30
+
+// CHECK: vfmadd231pd -1032(%rdx){1to8}, %zmm6, %zmm30
+// CHECK: encoding: [0x62,0x62,0xcd,0x58,0xb8,0xb2,0xf8,0xfb,0xff,0xff]
+ vfmadd231pd -1032(%rdx){1to8}, %zmm6, %zmm30
+
+// CHECK: vfmsub132ps %zmm16, %zmm13, %zmm1
+// CHECK: encoding: [0x62,0xb2,0x15,0x48,0x9a,0xc8]
+ vfmsub132ps %zmm16, %zmm13, %zmm1
+
+// CHECK: vfmsub132ps %zmm16, %zmm13, %zmm1 {%k4}
+// CHECK: encoding: [0x62,0xb2,0x15,0x4c,0x9a,0xc8]
+ vfmsub132ps %zmm16, %zmm13, %zmm1 {%k4}
+
+// CHECK: vfmsub132ps %zmm16, %zmm13, %zmm1 {%k4} {z}
+// CHECK: encoding: [0x62,0xb2,0x15,0xcc,0x9a,0xc8]
+ vfmsub132ps %zmm16, %zmm13, %zmm1 {%k4} {z}
+
+// CHECK: vfmsub132ps {rn-sae}, %zmm16, %zmm13, %zmm1
+// CHECK: encoding: [0x62,0xb2,0x15,0x18,0x9a,0xc8]
+ vfmsub132ps {rn-sae}, %zmm16, %zmm13, %zmm1
+
+// CHECK: vfmsub132ps {ru-sae}, %zmm16, %zmm13, %zmm1
+// CHECK: encoding: [0x62,0xb2,0x15,0x58,0x9a,0xc8]
+ vfmsub132ps {ru-sae}, %zmm16, %zmm13, %zmm1
+
+// CHECK: vfmsub132ps {rd-sae}, %zmm16, %zmm13, %zmm1
+// CHECK: encoding: [0x62,0xb2,0x15,0x38,0x9a,0xc8]
+ vfmsub132ps {rd-sae}, %zmm16, %zmm13, %zmm1
+
+// CHECK: vfmsub132ps {rz-sae}, %zmm16, %zmm13, %zmm1
+// CHECK: encoding: [0x62,0xb2,0x15,0x78,0x9a,0xc8]
+ vfmsub132ps {rz-sae}, %zmm16, %zmm13, %zmm1
+
+// CHECK: vfmsub132ps (%rcx), %zmm13, %zmm1
+// CHECK: encoding: [0x62,0xf2,0x15,0x48,0x9a,0x09]
+ vfmsub132ps (%rcx), %zmm13, %zmm1
+
+// CHECK: vfmsub132ps 291(%rax,%r14,8), %zmm13, %zmm1
+// CHECK: encoding: [0x62,0xb2,0x15,0x48,0x9a,0x8c,0xf0,0x23,0x01,0x00,0x00]
+ vfmsub132ps 291(%rax,%r14,8), %zmm13, %zmm1
+
+// CHECK: vfmsub132ps (%rcx){1to16}, %zmm13, %zmm1
+// CHECK: encoding: [0x62,0xf2,0x15,0x58,0x9a,0x09]
+ vfmsub132ps (%rcx){1to16}, %zmm13, %zmm1
+
+// CHECK: vfmsub132ps 8128(%rdx), %zmm13, %zmm1
+// CHECK: encoding: [0x62,0xf2,0x15,0x48,0x9a,0x4a,0x7f]
+ vfmsub132ps 8128(%rdx), %zmm13, %zmm1
+
+// CHECK: vfmsub132ps 8192(%rdx), %zmm13, %zmm1
+// CHECK: encoding: [0x62,0xf2,0x15,0x48,0x9a,0x8a,0x00,0x20,0x00,0x00]
+ vfmsub132ps 8192(%rdx), %zmm13, %zmm1
+
+// CHECK: vfmsub132ps -8192(%rdx), %zmm13, %zmm1
+// CHECK: encoding: [0x62,0xf2,0x15,0x48,0x9a,0x4a,0x80]
+ vfmsub132ps -8192(%rdx), %zmm13, %zmm1
+
+// CHECK: vfmsub132ps -8256(%rdx), %zmm13, %zmm1
+// CHECK: encoding: [0x62,0xf2,0x15,0x48,0x9a,0x8a,0xc0,0xdf,0xff,0xff]
+ vfmsub132ps -8256(%rdx), %zmm13, %zmm1
+
+// CHECK: vfmsub132ps 508(%rdx){1to16}, %zmm13, %zmm1
+// CHECK: encoding: [0x62,0xf2,0x15,0x58,0x9a,0x4a,0x7f]
+ vfmsub132ps 508(%rdx){1to16}, %zmm13, %zmm1
+
+// CHECK: vfmsub132ps 512(%rdx){1to16}, %zmm13, %zmm1
+// CHECK: encoding: [0x62,0xf2,0x15,0x58,0x9a,0x8a,0x00,0x02,0x00,0x00]
+ vfmsub132ps 512(%rdx){1to16}, %zmm13, %zmm1
+
+// CHECK: vfmsub132ps -512(%rdx){1to16}, %zmm13, %zmm1
+// CHECK: encoding: [0x62,0xf2,0x15,0x58,0x9a,0x4a,0x80]
+ vfmsub132ps -512(%rdx){1to16}, %zmm13, %zmm1
+
+// CHECK: vfmsub132ps -516(%rdx){1to16}, %zmm13, %zmm1
+// CHECK: encoding: [0x62,0xf2,0x15,0x58,0x9a,0x8a,0xfc,0xfd,0xff,0xff]
+ vfmsub132ps -516(%rdx){1to16}, %zmm13, %zmm1
+
+// CHECK: vfmsub132pd %zmm27, %zmm12, %zmm22
+// CHECK: encoding: [0x62,0x82,0x9d,0x48,0x9a,0xf3]
+ vfmsub132pd %zmm27, %zmm12, %zmm22
+
+// CHECK: vfmsub132pd %zmm27, %zmm12, %zmm22 {%k2}
+// CHECK: encoding: [0x62,0x82,0x9d,0x4a,0x9a,0xf3]
+ vfmsub132pd %zmm27, %zmm12, %zmm22 {%k2}
+
+// CHECK: vfmsub132pd %zmm27, %zmm12, %zmm22 {%k2} {z}
+// CHECK: encoding: [0x62,0x82,0x9d,0xca,0x9a,0xf3]
+ vfmsub132pd %zmm27, %zmm12, %zmm22 {%k2} {z}
+
+// CHECK: vfmsub132pd {rn-sae}, %zmm27, %zmm12, %zmm22
+// CHECK: encoding: [0x62,0x82,0x9d,0x18,0x9a,0xf3]
+ vfmsub132pd {rn-sae}, %zmm27, %zmm12, %zmm22
+
+// CHECK: vfmsub132pd {ru-sae}, %zmm27, %zmm12, %zmm22
+// CHECK: encoding: [0x62,0x82,0x9d,0x58,0x9a,0xf3]
+ vfmsub132pd {ru-sae}, %zmm27, %zmm12, %zmm22
+
+// CHECK: vfmsub132pd {rd-sae}, %zmm27, %zmm12, %zmm22
+// CHECK: encoding: [0x62,0x82,0x9d,0x38,0x9a,0xf3]
+ vfmsub132pd {rd-sae}, %zmm27, %zmm12, %zmm22
+
+// CHECK: vfmsub132pd {rz-sae}, %zmm27, %zmm12, %zmm22
+// CHECK: encoding: [0x62,0x82,0x9d,0x78,0x9a,0xf3]
+ vfmsub132pd {rz-sae}, %zmm27, %zmm12, %zmm22
+
+// CHECK: vfmsub132pd (%rcx), %zmm12, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x9d,0x48,0x9a,0x31]
+ vfmsub132pd (%rcx), %zmm12, %zmm22
+
+// CHECK: vfmsub132pd 291(%rax,%r14,8), %zmm12, %zmm22
+// CHECK: encoding: [0x62,0xa2,0x9d,0x48,0x9a,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vfmsub132pd 291(%rax,%r14,8), %zmm12, %zmm22
+
+// CHECK: vfmsub132pd (%rcx){1to8}, %zmm12, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x9d,0x58,0x9a,0x31]
+ vfmsub132pd (%rcx){1to8}, %zmm12, %zmm22
+
+// CHECK: vfmsub132pd 8128(%rdx), %zmm12, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x9d,0x48,0x9a,0x72,0x7f]
+ vfmsub132pd 8128(%rdx), %zmm12, %zmm22
+
+// CHECK: vfmsub132pd 8192(%rdx), %zmm12, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x9d,0x48,0x9a,0xb2,0x00,0x20,0x00,0x00]
+ vfmsub132pd 8192(%rdx), %zmm12, %zmm22
+
+// CHECK: vfmsub132pd -8192(%rdx), %zmm12, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x9d,0x48,0x9a,0x72,0x80]
+ vfmsub132pd -8192(%rdx), %zmm12, %zmm22
+
+// CHECK: vfmsub132pd -8256(%rdx), %zmm12, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x9d,0x48,0x9a,0xb2,0xc0,0xdf,0xff,0xff]
+ vfmsub132pd -8256(%rdx), %zmm12, %zmm22
+
+// CHECK: vfmsub132pd 1016(%rdx){1to8}, %zmm12, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x9d,0x58,0x9a,0x72,0x7f]
+ vfmsub132pd 1016(%rdx){1to8}, %zmm12, %zmm22
+
+// CHECK: vfmsub132pd 1024(%rdx){1to8}, %zmm12, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x9d,0x58,0x9a,0xb2,0x00,0x04,0x00,0x00]
+ vfmsub132pd 1024(%rdx){1to8}, %zmm12, %zmm22
+
+// CHECK: vfmsub132pd -1024(%rdx){1to8}, %zmm12, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x9d,0x58,0x9a,0x72,0x80]
+ vfmsub132pd -1024(%rdx){1to8}, %zmm12, %zmm22
+
+// CHECK: vfmsub132pd -1032(%rdx){1to8}, %zmm12, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x9d,0x58,0x9a,0xb2,0xf8,0xfb,0xff,0xff]
+ vfmsub132pd -1032(%rdx){1to8}, %zmm12, %zmm22
+
+// CHECK: vfmsub213ps %zmm10, %zmm22, %zmm22
+// CHECK: encoding: [0x62,0xc2,0x4d,0x40,0xaa,0xf2]
+ vfmsub213ps %zmm10, %zmm22, %zmm22
+
+// CHECK: vfmsub213ps %zmm10, %zmm22, %zmm22 {%k6}
+// CHECK: encoding: [0x62,0xc2,0x4d,0x46,0xaa,0xf2]
+ vfmsub213ps %zmm10, %zmm22, %zmm22 {%k6}
+
+// CHECK: vfmsub213ps %zmm10, %zmm22, %zmm22 {%k6} {z}
+// CHECK: encoding: [0x62,0xc2,0x4d,0xc6,0xaa,0xf2]
+ vfmsub213ps %zmm10, %zmm22, %zmm22 {%k6} {z}
+
+// CHECK: vfmsub213ps {rn-sae}, %zmm10, %zmm22, %zmm22
+// CHECK: encoding: [0x62,0xc2,0x4d,0x10,0xaa,0xf2]
+ vfmsub213ps {rn-sae}, %zmm10, %zmm22, %zmm22
+
+// CHECK: vfmsub213ps {ru-sae}, %zmm10, %zmm22, %zmm22
+// CHECK: encoding: [0x62,0xc2,0x4d,0x50,0xaa,0xf2]
+ vfmsub213ps {ru-sae}, %zmm10, %zmm22, %zmm22
+
+// CHECK: vfmsub213ps {rd-sae}, %zmm10, %zmm22, %zmm22
+// CHECK: encoding: [0x62,0xc2,0x4d,0x30,0xaa,0xf2]
+ vfmsub213ps {rd-sae}, %zmm10, %zmm22, %zmm22
+
+// CHECK: vfmsub213ps {rz-sae}, %zmm10, %zmm22, %zmm22
+// CHECK: encoding: [0x62,0xc2,0x4d,0x70,0xaa,0xf2]
+ vfmsub213ps {rz-sae}, %zmm10, %zmm22, %zmm22
+
+// CHECK: vfmsub213ps (%rcx), %zmm22, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x4d,0x40,0xaa,0x31]
+ vfmsub213ps (%rcx), %zmm22, %zmm22
+
+// CHECK: vfmsub213ps 291(%rax,%r14,8), %zmm22, %zmm22
+// CHECK: encoding: [0x62,0xa2,0x4d,0x40,0xaa,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vfmsub213ps 291(%rax,%r14,8), %zmm22, %zmm22
+
+// CHECK: vfmsub213ps (%rcx){1to16}, %zmm22, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x4d,0x50,0xaa,0x31]
+ vfmsub213ps (%rcx){1to16}, %zmm22, %zmm22
+
+// CHECK: vfmsub213ps 8128(%rdx), %zmm22, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x4d,0x40,0xaa,0x72,0x7f]
+ vfmsub213ps 8128(%rdx), %zmm22, %zmm22
+
+// CHECK: vfmsub213ps 8192(%rdx), %zmm22, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x4d,0x40,0xaa,0xb2,0x00,0x20,0x00,0x00]
+ vfmsub213ps 8192(%rdx), %zmm22, %zmm22
+
+// CHECK: vfmsub213ps -8192(%rdx), %zmm22, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x4d,0x40,0xaa,0x72,0x80]
+ vfmsub213ps -8192(%rdx), %zmm22, %zmm22
+
+// CHECK: vfmsub213ps -8256(%rdx), %zmm22, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x4d,0x40,0xaa,0xb2,0xc0,0xdf,0xff,0xff]
+ vfmsub213ps -8256(%rdx), %zmm22, %zmm22
+
+// CHECK: vfmsub213ps 508(%rdx){1to16}, %zmm22, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x4d,0x50,0xaa,0x72,0x7f]
+ vfmsub213ps 508(%rdx){1to16}, %zmm22, %zmm22
+
+// CHECK: vfmsub213ps 512(%rdx){1to16}, %zmm22, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x4d,0x50,0xaa,0xb2,0x00,0x02,0x00,0x00]
+ vfmsub213ps 512(%rdx){1to16}, %zmm22, %zmm22
+
+// CHECK: vfmsub213ps -512(%rdx){1to16}, %zmm22, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x4d,0x50,0xaa,0x72,0x80]
+ vfmsub213ps -512(%rdx){1to16}, %zmm22, %zmm22
+
+// CHECK: vfmsub213ps -516(%rdx){1to16}, %zmm22, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x4d,0x50,0xaa,0xb2,0xfc,0xfd,0xff,0xff]
+ vfmsub213ps -516(%rdx){1to16}, %zmm22, %zmm22
+
+// CHECK: vfmsub213pd %zmm4, %zmm10, %zmm5
+// CHECK: encoding: [0x62,0xf2,0xad,0x48,0xaa,0xec]
+ vfmsub213pd %zmm4, %zmm10, %zmm5
+
+// CHECK: vfmsub213pd %zmm4, %zmm10, %zmm5 {%k1}
+// CHECK: encoding: [0x62,0xf2,0xad,0x49,0xaa,0xec]
+ vfmsub213pd %zmm4, %zmm10, %zmm5 {%k1}
+
+// CHECK: vfmsub213pd %zmm4, %zmm10, %zmm5 {%k1} {z}
+// CHECK: encoding: [0x62,0xf2,0xad,0xc9,0xaa,0xec]
+ vfmsub213pd %zmm4, %zmm10, %zmm5 {%k1} {z}
+
+// CHECK: vfmsub213pd {rn-sae}, %zmm4, %zmm10, %zmm5
+// CHECK: encoding: [0x62,0xf2,0xad,0x18,0xaa,0xec]
+ vfmsub213pd {rn-sae}, %zmm4, %zmm10, %zmm5
+
+// CHECK: vfmsub213pd {ru-sae}, %zmm4, %zmm10, %zmm5
+// CHECK: encoding: [0x62,0xf2,0xad,0x58,0xaa,0xec]
+ vfmsub213pd {ru-sae}, %zmm4, %zmm10, %zmm5
+
+// CHECK: vfmsub213pd {rd-sae}, %zmm4, %zmm10, %zmm5
+// CHECK: encoding: [0x62,0xf2,0xad,0x38,0xaa,0xec]
+ vfmsub213pd {rd-sae}, %zmm4, %zmm10, %zmm5
+
+// CHECK: vfmsub213pd {rz-sae}, %zmm4, %zmm10, %zmm5
+// CHECK: encoding: [0x62,0xf2,0xad,0x78,0xaa,0xec]
+ vfmsub213pd {rz-sae}, %zmm4, %zmm10, %zmm5
+
+// CHECK: vfmsub213pd (%rcx), %zmm10, %zmm5
+// CHECK: encoding: [0x62,0xf2,0xad,0x48,0xaa,0x29]
+ vfmsub213pd (%rcx), %zmm10, %zmm5
+
+// CHECK: vfmsub213pd 291(%rax,%r14,8), %zmm10, %zmm5
+// CHECK: encoding: [0x62,0xb2,0xad,0x48,0xaa,0xac,0xf0,0x23,0x01,0x00,0x00]
+ vfmsub213pd 291(%rax,%r14,8), %zmm10, %zmm5
+
+// CHECK: vfmsub213pd (%rcx){1to8}, %zmm10, %zmm5
+// CHECK: encoding: [0x62,0xf2,0xad,0x58,0xaa,0x29]
+ vfmsub213pd (%rcx){1to8}, %zmm10, %zmm5
+
+// CHECK: vfmsub213pd 8128(%rdx), %zmm10, %zmm5
+// CHECK: encoding: [0x62,0xf2,0xad,0x48,0xaa,0x6a,0x7f]
+ vfmsub213pd 8128(%rdx), %zmm10, %zmm5
+
+// CHECK: vfmsub213pd 8192(%rdx), %zmm10, %zmm5
+// CHECK: encoding: [0x62,0xf2,0xad,0x48,0xaa,0xaa,0x00,0x20,0x00,0x00]
+ vfmsub213pd 8192(%rdx), %zmm10, %zmm5
+
+// CHECK: vfmsub213pd -8192(%rdx), %zmm10, %zmm5
+// CHECK: encoding: [0x62,0xf2,0xad,0x48,0xaa,0x6a,0x80]
+ vfmsub213pd -8192(%rdx), %zmm10, %zmm5
+
+// CHECK: vfmsub213pd -8256(%rdx), %zmm10, %zmm5
+// CHECK: encoding: [0x62,0xf2,0xad,0x48,0xaa,0xaa,0xc0,0xdf,0xff,0xff]
+ vfmsub213pd -8256(%rdx), %zmm10, %zmm5
+
+// CHECK: vfmsub213pd 1016(%rdx){1to8}, %zmm10, %zmm5
+// CHECK: encoding: [0x62,0xf2,0xad,0x58,0xaa,0x6a,0x7f]
+ vfmsub213pd 1016(%rdx){1to8}, %zmm10, %zmm5
+
+// CHECK: vfmsub213pd 1024(%rdx){1to8}, %zmm10, %zmm5
+// CHECK: encoding: [0x62,0xf2,0xad,0x58,0xaa,0xaa,0x00,0x04,0x00,0x00]
+ vfmsub213pd 1024(%rdx){1to8}, %zmm10, %zmm5
+
+// CHECK: vfmsub213pd -1024(%rdx){1to8}, %zmm10, %zmm5
+// CHECK: encoding: [0x62,0xf2,0xad,0x58,0xaa,0x6a,0x80]
+ vfmsub213pd -1024(%rdx){1to8}, %zmm10, %zmm5
+
+// CHECK: vfmsub213pd -1032(%rdx){1to8}, %zmm10, %zmm5
+// CHECK: encoding: [0x62,0xf2,0xad,0x58,0xaa,0xaa,0xf8,0xfb,0xff,0xff]
+ vfmsub213pd -1032(%rdx){1to8}, %zmm10, %zmm5
+
+// CHECK: vfmsub231ps %zmm27, %zmm21, %zmm6
+// CHECK: encoding: [0x62,0x92,0x55,0x40,0xba,0xf3]
+ vfmsub231ps %zmm27, %zmm21, %zmm6
+
+// CHECK: vfmsub231ps %zmm27, %zmm21, %zmm6 {%k3}
+// CHECK: encoding: [0x62,0x92,0x55,0x43,0xba,0xf3]
+ vfmsub231ps %zmm27, %zmm21, %zmm6 {%k3}
+
+// CHECK: vfmsub231ps %zmm27, %zmm21, %zmm6 {%k3} {z}
+// CHECK: encoding: [0x62,0x92,0x55,0xc3,0xba,0xf3]
+ vfmsub231ps %zmm27, %zmm21, %zmm6 {%k3} {z}
+
+// CHECK: vfmsub231ps {rn-sae}, %zmm27, %zmm21, %zmm6
+// CHECK: encoding: [0x62,0x92,0x55,0x10,0xba,0xf3]
+ vfmsub231ps {rn-sae}, %zmm27, %zmm21, %zmm6
+
+// CHECK: vfmsub231ps {ru-sae}, %zmm27, %zmm21, %zmm6
+// CHECK: encoding: [0x62,0x92,0x55,0x50,0xba,0xf3]
+ vfmsub231ps {ru-sae}, %zmm27, %zmm21, %zmm6
+
+// CHECK: vfmsub231ps {rd-sae}, %zmm27, %zmm21, %zmm6
+// CHECK: encoding: [0x62,0x92,0x55,0x30,0xba,0xf3]
+ vfmsub231ps {rd-sae}, %zmm27, %zmm21, %zmm6
+
+// CHECK: vfmsub231ps {rz-sae}, %zmm27, %zmm21, %zmm6
+// CHECK: encoding: [0x62,0x92,0x55,0x70,0xba,0xf3]
+ vfmsub231ps {rz-sae}, %zmm27, %zmm21, %zmm6
+
+// CHECK: vfmsub231ps (%rcx), %zmm21, %zmm6
+// CHECK: encoding: [0x62,0xf2,0x55,0x40,0xba,0x31]
+ vfmsub231ps (%rcx), %zmm21, %zmm6
+
+// CHECK: vfmsub231ps 291(%rax,%r14,8), %zmm21, %zmm6
+// CHECK: encoding: [0x62,0xb2,0x55,0x40,0xba,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vfmsub231ps 291(%rax,%r14,8), %zmm21, %zmm6
+
+// CHECK: vfmsub231ps (%rcx){1to16}, %zmm21, %zmm6
+// CHECK: encoding: [0x62,0xf2,0x55,0x50,0xba,0x31]
+ vfmsub231ps (%rcx){1to16}, %zmm21, %zmm6
+
+// CHECK: vfmsub231ps 8128(%rdx), %zmm21, %zmm6
+// CHECK: encoding: [0x62,0xf2,0x55,0x40,0xba,0x72,0x7f]
+ vfmsub231ps 8128(%rdx), %zmm21, %zmm6
+
+// CHECK: vfmsub231ps 8192(%rdx), %zmm21, %zmm6
+// CHECK: encoding: [0x62,0xf2,0x55,0x40,0xba,0xb2,0x00,0x20,0x00,0x00]
+ vfmsub231ps 8192(%rdx), %zmm21, %zmm6
+
+// CHECK: vfmsub231ps -8192(%rdx), %zmm21, %zmm6
+// CHECK: encoding: [0x62,0xf2,0x55,0x40,0xba,0x72,0x80]
+ vfmsub231ps -8192(%rdx), %zmm21, %zmm6
+
+// CHECK: vfmsub231ps -8256(%rdx), %zmm21, %zmm6
+// CHECK: encoding: [0x62,0xf2,0x55,0x40,0xba,0xb2,0xc0,0xdf,0xff,0xff]
+ vfmsub231ps -8256(%rdx), %zmm21, %zmm6
+
+// CHECK: vfmsub231ps 508(%rdx){1to16}, %zmm21, %zmm6
+// CHECK: encoding: [0x62,0xf2,0x55,0x50,0xba,0x72,0x7f]
+ vfmsub231ps 508(%rdx){1to16}, %zmm21, %zmm6
+
+// CHECK: vfmsub231ps 512(%rdx){1to16}, %zmm21, %zmm6
+// CHECK: encoding: [0x62,0xf2,0x55,0x50,0xba,0xb2,0x00,0x02,0x00,0x00]
+ vfmsub231ps 512(%rdx){1to16}, %zmm21, %zmm6
+
+// CHECK: vfmsub231ps -512(%rdx){1to16}, %zmm21, %zmm6
+// CHECK: encoding: [0x62,0xf2,0x55,0x50,0xba,0x72,0x80]
+ vfmsub231ps -512(%rdx){1to16}, %zmm21, %zmm6
+
+// CHECK: vfmsub231ps -516(%rdx){1to16}, %zmm21, %zmm6
+// CHECK: encoding: [0x62,0xf2,0x55,0x50,0xba,0xb2,0xfc,0xfd,0xff,0xff]
+ vfmsub231ps -516(%rdx){1to16}, %zmm21, %zmm6
+
+// CHECK: vfmsub231pd %zmm11, %zmm12, %zmm5
+// CHECK: encoding: [0x62,0xd2,0x9d,0x48,0xba,0xeb]
+ vfmsub231pd %zmm11, %zmm12, %zmm5
+
+// CHECK: vfmsub231pd %zmm11, %zmm12, %zmm5 {%k2}
+// CHECK: encoding: [0x62,0xd2,0x9d,0x4a,0xba,0xeb]
+ vfmsub231pd %zmm11, %zmm12, %zmm5 {%k2}
+
+// CHECK: vfmsub231pd %zmm11, %zmm12, %zmm5 {%k2} {z}
+// CHECK: encoding: [0x62,0xd2,0x9d,0xca,0xba,0xeb]
+ vfmsub231pd %zmm11, %zmm12, %zmm5 {%k2} {z}
+
+// CHECK: vfmsub231pd {rn-sae}, %zmm11, %zmm12, %zmm5
+// CHECK: encoding: [0x62,0xd2,0x9d,0x18,0xba,0xeb]
+ vfmsub231pd {rn-sae}, %zmm11, %zmm12, %zmm5
+
+// CHECK: vfmsub231pd {ru-sae}, %zmm11, %zmm12, %zmm5
+// CHECK: encoding: [0x62,0xd2,0x9d,0x58,0xba,0xeb]
+ vfmsub231pd {ru-sae}, %zmm11, %zmm12, %zmm5
+
+// CHECK: vfmsub231pd {rd-sae}, %zmm11, %zmm12, %zmm5
+// CHECK: encoding: [0x62,0xd2,0x9d,0x38,0xba,0xeb]
+ vfmsub231pd {rd-sae}, %zmm11, %zmm12, %zmm5
+
+// CHECK: vfmsub231pd {rz-sae}, %zmm11, %zmm12, %zmm5
+// CHECK: encoding: [0x62,0xd2,0x9d,0x78,0xba,0xeb]
+ vfmsub231pd {rz-sae}, %zmm11, %zmm12, %zmm5
+
+// CHECK: vfmsub231pd (%rcx), %zmm12, %zmm5
+// CHECK: encoding: [0x62,0xf2,0x9d,0x48,0xba,0x29]
+ vfmsub231pd (%rcx), %zmm12, %zmm5
+
+// CHECK: vfmsub231pd 291(%rax,%r14,8), %zmm12, %zmm5
+// CHECK: encoding: [0x62,0xb2,0x9d,0x48,0xba,0xac,0xf0,0x23,0x01,0x00,0x00]
+ vfmsub231pd 291(%rax,%r14,8), %zmm12, %zmm5
+
+// CHECK: vfmsub231pd (%rcx){1to8}, %zmm12, %zmm5
+// CHECK: encoding: [0x62,0xf2,0x9d,0x58,0xba,0x29]
+ vfmsub231pd (%rcx){1to8}, %zmm12, %zmm5
+
+// CHECK: vfmsub231pd 8128(%rdx), %zmm12, %zmm5
+// CHECK: encoding: [0x62,0xf2,0x9d,0x48,0xba,0x6a,0x7f]
+ vfmsub231pd 8128(%rdx), %zmm12, %zmm5
+
+// CHECK: vfmsub231pd 8192(%rdx), %zmm12, %zmm5
+// CHECK: encoding: [0x62,0xf2,0x9d,0x48,0xba,0xaa,0x00,0x20,0x00,0x00]
+ vfmsub231pd 8192(%rdx), %zmm12, %zmm5
+
+// CHECK: vfmsub231pd -8192(%rdx), %zmm12, %zmm5
+// CHECK: encoding: [0x62,0xf2,0x9d,0x48,0xba,0x6a,0x80]
+ vfmsub231pd -8192(%rdx), %zmm12, %zmm5
+
+// CHECK: vfmsub231pd -8256(%rdx), %zmm12, %zmm5
+// CHECK: encoding: [0x62,0xf2,0x9d,0x48,0xba,0xaa,0xc0,0xdf,0xff,0xff]
+ vfmsub231pd -8256(%rdx), %zmm12, %zmm5
+
+// CHECK: vfmsub231pd 1016(%rdx){1to8}, %zmm12, %zmm5
+// CHECK: encoding: [0x62,0xf2,0x9d,0x58,0xba,0x6a,0x7f]
+ vfmsub231pd 1016(%rdx){1to8}, %zmm12, %zmm5
+
+// CHECK: vfmsub231pd 1024(%rdx){1to8}, %zmm12, %zmm5
+// CHECK: encoding: [0x62,0xf2,0x9d,0x58,0xba,0xaa,0x00,0x04,0x00,0x00]
+ vfmsub231pd 1024(%rdx){1to8}, %zmm12, %zmm5
+
+// CHECK: vfmsub231pd -1024(%rdx){1to8}, %zmm12, %zmm5
+// CHECK: encoding: [0x62,0xf2,0x9d,0x58,0xba,0x6a,0x80]
+ vfmsub231pd -1024(%rdx){1to8}, %zmm12, %zmm5
+
+// CHECK: vfmsub231pd -1032(%rdx){1to8}, %zmm12, %zmm5
+// CHECK: encoding: [0x62,0xf2,0x9d,0x58,0xba,0xaa,0xf8,0xfb,0xff,0xff]
+ vfmsub231pd -1032(%rdx){1to8}, %zmm12, %zmm5
+
+// CHECK: vfmaddsub132ps %zmm20, %zmm9, %zmm10
+// CHECK: encoding: [0x62,0x32,0x35,0x48,0x96,0xd4]
+ vfmaddsub132ps %zmm20, %zmm9, %zmm10
+
+// CHECK: vfmaddsub132ps %zmm20, %zmm9, %zmm10 {%k3}
+// CHECK: encoding: [0x62,0x32,0x35,0x4b,0x96,0xd4]
+ vfmaddsub132ps %zmm20, %zmm9, %zmm10 {%k3}
+
+// CHECK: vfmaddsub132ps %zmm20, %zmm9, %zmm10 {%k3} {z}
+// CHECK: encoding: [0x62,0x32,0x35,0xcb,0x96,0xd4]
+ vfmaddsub132ps %zmm20, %zmm9, %zmm10 {%k3} {z}
+
+// CHECK: vfmaddsub132ps {rn-sae}, %zmm20, %zmm9, %zmm10
+// CHECK: encoding: [0x62,0x32,0x35,0x18,0x96,0xd4]
+ vfmaddsub132ps {rn-sae}, %zmm20, %zmm9, %zmm10
+
+// CHECK: vfmaddsub132ps {ru-sae}, %zmm20, %zmm9, %zmm10
+// CHECK: encoding: [0x62,0x32,0x35,0x58,0x96,0xd4]
+ vfmaddsub132ps {ru-sae}, %zmm20, %zmm9, %zmm10
+
+// CHECK: vfmaddsub132ps {rd-sae}, %zmm20, %zmm9, %zmm10
+// CHECK: encoding: [0x62,0x32,0x35,0x38,0x96,0xd4]
+ vfmaddsub132ps {rd-sae}, %zmm20, %zmm9, %zmm10
+
+// CHECK: vfmaddsub132ps {rz-sae}, %zmm20, %zmm9, %zmm10
+// CHECK: encoding: [0x62,0x32,0x35,0x78,0x96,0xd4]
+ vfmaddsub132ps {rz-sae}, %zmm20, %zmm9, %zmm10
+
+// CHECK: vfmaddsub132ps (%rcx), %zmm9, %zmm10
+// CHECK: encoding: [0x62,0x72,0x35,0x48,0x96,0x11]
+ vfmaddsub132ps (%rcx), %zmm9, %zmm10
+
+// CHECK: vfmaddsub132ps 291(%rax,%r14,8), %zmm9, %zmm10
+// CHECK: encoding: [0x62,0x32,0x35,0x48,0x96,0x94,0xf0,0x23,0x01,0x00,0x00]
+ vfmaddsub132ps 291(%rax,%r14,8), %zmm9, %zmm10
+
+// CHECK: vfmaddsub132ps (%rcx){1to16}, %zmm9, %zmm10
+// CHECK: encoding: [0x62,0x72,0x35,0x58,0x96,0x11]
+ vfmaddsub132ps (%rcx){1to16}, %zmm9, %zmm10
+
+// CHECK: vfmaddsub132ps 8128(%rdx), %zmm9, %zmm10
+// CHECK: encoding: [0x62,0x72,0x35,0x48,0x96,0x52,0x7f]
+ vfmaddsub132ps 8128(%rdx), %zmm9, %zmm10
+
+// CHECK: vfmaddsub132ps 8192(%rdx), %zmm9, %zmm10
+// CHECK: encoding: [0x62,0x72,0x35,0x48,0x96,0x92,0x00,0x20,0x00,0x00]
+ vfmaddsub132ps 8192(%rdx), %zmm9, %zmm10
+
+// CHECK: vfmaddsub132ps -8192(%rdx), %zmm9, %zmm10
+// CHECK: encoding: [0x62,0x72,0x35,0x48,0x96,0x52,0x80]
+ vfmaddsub132ps -8192(%rdx), %zmm9, %zmm10
+
+// CHECK: vfmaddsub132ps -8256(%rdx), %zmm9, %zmm10
+// CHECK: encoding: [0x62,0x72,0x35,0x48,0x96,0x92,0xc0,0xdf,0xff,0xff]
+ vfmaddsub132ps -8256(%rdx), %zmm9, %zmm10
+
+// CHECK: vfmaddsub132ps 508(%rdx){1to16}, %zmm9, %zmm10
+// CHECK: encoding: [0x62,0x72,0x35,0x58,0x96,0x52,0x7f]
+ vfmaddsub132ps 508(%rdx){1to16}, %zmm9, %zmm10
+
+// CHECK: vfmaddsub132ps 512(%rdx){1to16}, %zmm9, %zmm10
+// CHECK: encoding: [0x62,0x72,0x35,0x58,0x96,0x92,0x00,0x02,0x00,0x00]
+ vfmaddsub132ps 512(%rdx){1to16}, %zmm9, %zmm10
+
+// CHECK: vfmaddsub132ps -512(%rdx){1to16}, %zmm9, %zmm10
+// CHECK: encoding: [0x62,0x72,0x35,0x58,0x96,0x52,0x80]
+ vfmaddsub132ps -512(%rdx){1to16}, %zmm9, %zmm10
+
+// CHECK: vfmaddsub132ps -516(%rdx){1to16}, %zmm9, %zmm10
+// CHECK: encoding: [0x62,0x72,0x35,0x58,0x96,0x92,0xfc,0xfd,0xff,0xff]
+ vfmaddsub132ps -516(%rdx){1to16}, %zmm9, %zmm10
+
+// CHECK: vfmaddsub132pd %zmm21, %zmm25, %zmm20
+// CHECK: encoding: [0x62,0xa2,0xb5,0x40,0x96,0xe5]
+ vfmaddsub132pd %zmm21, %zmm25, %zmm20
+
+// CHECK: vfmaddsub132pd %zmm21, %zmm25, %zmm20 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xb5,0x42,0x96,0xe5]
+ vfmaddsub132pd %zmm21, %zmm25, %zmm20 {%k2}
+
+// CHECK: vfmaddsub132pd %zmm21, %zmm25, %zmm20 {%k2} {z}
+// CHECK: encoding: [0x62,0xa2,0xb5,0xc2,0x96,0xe5]
+ vfmaddsub132pd %zmm21, %zmm25, %zmm20 {%k2} {z}
+
+// CHECK: vfmaddsub132pd {rn-sae}, %zmm21, %zmm25, %zmm20
+// CHECK: encoding: [0x62,0xa2,0xb5,0x10,0x96,0xe5]
+ vfmaddsub132pd {rn-sae}, %zmm21, %zmm25, %zmm20
+
+// CHECK: vfmaddsub132pd {ru-sae}, %zmm21, %zmm25, %zmm20
+// CHECK: encoding: [0x62,0xa2,0xb5,0x50,0x96,0xe5]
+ vfmaddsub132pd {ru-sae}, %zmm21, %zmm25, %zmm20
+
+// CHECK: vfmaddsub132pd {rd-sae}, %zmm21, %zmm25, %zmm20
+// CHECK: encoding: [0x62,0xa2,0xb5,0x30,0x96,0xe5]
+ vfmaddsub132pd {rd-sae}, %zmm21, %zmm25, %zmm20
+
+// CHECK: vfmaddsub132pd {rz-sae}, %zmm21, %zmm25, %zmm20
+// CHECK: encoding: [0x62,0xa2,0xb5,0x70,0x96,0xe5]
+ vfmaddsub132pd {rz-sae}, %zmm21, %zmm25, %zmm20
+
+// CHECK: vfmaddsub132pd (%rcx), %zmm25, %zmm20
+// CHECK: encoding: [0x62,0xe2,0xb5,0x40,0x96,0x21]
+ vfmaddsub132pd (%rcx), %zmm25, %zmm20
+
+// CHECK: vfmaddsub132pd 291(%rax,%r14,8), %zmm25, %zmm20
+// CHECK: encoding: [0x62,0xa2,0xb5,0x40,0x96,0xa4,0xf0,0x23,0x01,0x00,0x00]
+ vfmaddsub132pd 291(%rax,%r14,8), %zmm25, %zmm20
+
+// CHECK: vfmaddsub132pd (%rcx){1to8}, %zmm25, %zmm20
+// CHECK: encoding: [0x62,0xe2,0xb5,0x50,0x96,0x21]
+ vfmaddsub132pd (%rcx){1to8}, %zmm25, %zmm20
+
+// CHECK: vfmaddsub132pd 8128(%rdx), %zmm25, %zmm20
+// CHECK: encoding: [0x62,0xe2,0xb5,0x40,0x96,0x62,0x7f]
+ vfmaddsub132pd 8128(%rdx), %zmm25, %zmm20
+
+// CHECK: vfmaddsub132pd 8192(%rdx), %zmm25, %zmm20
+// CHECK: encoding: [0x62,0xe2,0xb5,0x40,0x96,0xa2,0x00,0x20,0x00,0x00]
+ vfmaddsub132pd 8192(%rdx), %zmm25, %zmm20
+
+// CHECK: vfmaddsub132pd -8192(%rdx), %zmm25, %zmm20
+// CHECK: encoding: [0x62,0xe2,0xb5,0x40,0x96,0x62,0x80]
+ vfmaddsub132pd -8192(%rdx), %zmm25, %zmm20
+
+// CHECK: vfmaddsub132pd -8256(%rdx), %zmm25, %zmm20
+// CHECK: encoding: [0x62,0xe2,0xb5,0x40,0x96,0xa2,0xc0,0xdf,0xff,0xff]
+ vfmaddsub132pd -8256(%rdx), %zmm25, %zmm20
+
+// CHECK: vfmaddsub132pd 1016(%rdx){1to8}, %zmm25, %zmm20
+// CHECK: encoding: [0x62,0xe2,0xb5,0x50,0x96,0x62,0x7f]
+ vfmaddsub132pd 1016(%rdx){1to8}, %zmm25, %zmm20
+
+// CHECK: vfmaddsub132pd 1024(%rdx){1to8}, %zmm25, %zmm20
+// CHECK: encoding: [0x62,0xe2,0xb5,0x50,0x96,0xa2,0x00,0x04,0x00,0x00]
+ vfmaddsub132pd 1024(%rdx){1to8}, %zmm25, %zmm20
+
+// CHECK: vfmaddsub132pd -1024(%rdx){1to8}, %zmm25, %zmm20
+// CHECK: encoding: [0x62,0xe2,0xb5,0x50,0x96,0x62,0x80]
+ vfmaddsub132pd -1024(%rdx){1to8}, %zmm25, %zmm20
+
+// CHECK: vfmaddsub132pd -1032(%rdx){1to8}, %zmm25, %zmm20
+// CHECK: encoding: [0x62,0xe2,0xb5,0x50,0x96,0xa2,0xf8,0xfb,0xff,0xff]
+ vfmaddsub132pd -1032(%rdx){1to8}, %zmm25, %zmm20
+
+// CHECK: vfmaddsub213ps %zmm28, %zmm24, %zmm17
+// CHECK: encoding: [0x62,0x82,0x3d,0x40,0xa6,0xcc]
+ vfmaddsub213ps %zmm28, %zmm24, %zmm17
+
+// CHECK: vfmaddsub213ps %zmm28, %zmm24, %zmm17 {%k6}
+// CHECK: encoding: [0x62,0x82,0x3d,0x46,0xa6,0xcc]
+ vfmaddsub213ps %zmm28, %zmm24, %zmm17 {%k6}
+
+// CHECK: vfmaddsub213ps %zmm28, %zmm24, %zmm17 {%k6} {z}
+// CHECK: encoding: [0x62,0x82,0x3d,0xc6,0xa6,0xcc]
+ vfmaddsub213ps %zmm28, %zmm24, %zmm17 {%k6} {z}
+
+// CHECK: vfmaddsub213ps {rn-sae}, %zmm28, %zmm24, %zmm17
+// CHECK: encoding: [0x62,0x82,0x3d,0x10,0xa6,0xcc]
+ vfmaddsub213ps {rn-sae}, %zmm28, %zmm24, %zmm17
+
+// CHECK: vfmaddsub213ps {ru-sae}, %zmm28, %zmm24, %zmm17
+// CHECK: encoding: [0x62,0x82,0x3d,0x50,0xa6,0xcc]
+ vfmaddsub213ps {ru-sae}, %zmm28, %zmm24, %zmm17
+
+// CHECK: vfmaddsub213ps {rd-sae}, %zmm28, %zmm24, %zmm17
+// CHECK: encoding: [0x62,0x82,0x3d,0x30,0xa6,0xcc]
+ vfmaddsub213ps {rd-sae}, %zmm28, %zmm24, %zmm17
+
+// CHECK: vfmaddsub213ps {rz-sae}, %zmm28, %zmm24, %zmm17
+// CHECK: encoding: [0x62,0x82,0x3d,0x70,0xa6,0xcc]
+ vfmaddsub213ps {rz-sae}, %zmm28, %zmm24, %zmm17
+
+// CHECK: vfmaddsub213ps (%rcx), %zmm24, %zmm17
+// CHECK: encoding: [0x62,0xe2,0x3d,0x40,0xa6,0x09]
+ vfmaddsub213ps (%rcx), %zmm24, %zmm17
+
+// CHECK: vfmaddsub213ps 291(%rax,%r14,8), %zmm24, %zmm17
+// CHECK: encoding: [0x62,0xa2,0x3d,0x40,0xa6,0x8c,0xf0,0x23,0x01,0x00,0x00]
+ vfmaddsub213ps 291(%rax,%r14,8), %zmm24, %zmm17
+
+// CHECK: vfmaddsub213ps (%rcx){1to16}, %zmm24, %zmm17
+// CHECK: encoding: [0x62,0xe2,0x3d,0x50,0xa6,0x09]
+ vfmaddsub213ps (%rcx){1to16}, %zmm24, %zmm17
+
+// CHECK: vfmaddsub213ps 8128(%rdx), %zmm24, %zmm17
+// CHECK: encoding: [0x62,0xe2,0x3d,0x40,0xa6,0x4a,0x7f]
+ vfmaddsub213ps 8128(%rdx), %zmm24, %zmm17
+
+// CHECK: vfmaddsub213ps 8192(%rdx), %zmm24, %zmm17
+// CHECK: encoding: [0x62,0xe2,0x3d,0x40,0xa6,0x8a,0x00,0x20,0x00,0x00]
+ vfmaddsub213ps 8192(%rdx), %zmm24, %zmm17
+
+// CHECK: vfmaddsub213ps -8192(%rdx), %zmm24, %zmm17
+// CHECK: encoding: [0x62,0xe2,0x3d,0x40,0xa6,0x4a,0x80]
+ vfmaddsub213ps -8192(%rdx), %zmm24, %zmm17
+
+// CHECK: vfmaddsub213ps -8256(%rdx), %zmm24, %zmm17
+// CHECK: encoding: [0x62,0xe2,0x3d,0x40,0xa6,0x8a,0xc0,0xdf,0xff,0xff]
+ vfmaddsub213ps -8256(%rdx), %zmm24, %zmm17
+
+// CHECK: vfmaddsub213ps 508(%rdx){1to16}, %zmm24, %zmm17
+// CHECK: encoding: [0x62,0xe2,0x3d,0x50,0xa6,0x4a,0x7f]
+ vfmaddsub213ps 508(%rdx){1to16}, %zmm24, %zmm17
+
+// CHECK: vfmaddsub213ps 512(%rdx){1to16}, %zmm24, %zmm17
+// CHECK: encoding: [0x62,0xe2,0x3d,0x50,0xa6,0x8a,0x00,0x02,0x00,0x00]
+ vfmaddsub213ps 512(%rdx){1to16}, %zmm24, %zmm17
+
+// CHECK: vfmaddsub213ps -512(%rdx){1to16}, %zmm24, %zmm17
+// CHECK: encoding: [0x62,0xe2,0x3d,0x50,0xa6,0x4a,0x80]
+ vfmaddsub213ps -512(%rdx){1to16}, %zmm24, %zmm17
+
+// CHECK: vfmaddsub213ps -516(%rdx){1to16}, %zmm24, %zmm17
+// CHECK: encoding: [0x62,0xe2,0x3d,0x50,0xa6,0x8a,0xfc,0xfd,0xff,0xff]
+ vfmaddsub213ps -516(%rdx){1to16}, %zmm24, %zmm17
+
+// CHECK: vfmaddsub213pd %zmm10, %zmm6, %zmm26
+// CHECK: encoding: [0x62,0x42,0xcd,0x48,0xa6,0xd2]
+ vfmaddsub213pd %zmm10, %zmm6, %zmm26
+
+// CHECK: vfmaddsub213pd %zmm10, %zmm6, %zmm26 {%k6}
+// CHECK: encoding: [0x62,0x42,0xcd,0x4e,0xa6,0xd2]
+ vfmaddsub213pd %zmm10, %zmm6, %zmm26 {%k6}
+
+// CHECK: vfmaddsub213pd %zmm10, %zmm6, %zmm26 {%k6} {z}
+// CHECK: encoding: [0x62,0x42,0xcd,0xce,0xa6,0xd2]
+ vfmaddsub213pd %zmm10, %zmm6, %zmm26 {%k6} {z}
+
+// CHECK: vfmaddsub213pd {rn-sae}, %zmm10, %zmm6, %zmm26
+// CHECK: encoding: [0x62,0x42,0xcd,0x18,0xa6,0xd2]
+ vfmaddsub213pd {rn-sae}, %zmm10, %zmm6, %zmm26
+
+// CHECK: vfmaddsub213pd {ru-sae}, %zmm10, %zmm6, %zmm26
+// CHECK: encoding: [0x62,0x42,0xcd,0x58,0xa6,0xd2]
+ vfmaddsub213pd {ru-sae}, %zmm10, %zmm6, %zmm26
+
+// CHECK: vfmaddsub213pd {rd-sae}, %zmm10, %zmm6, %zmm26
+// CHECK: encoding: [0x62,0x42,0xcd,0x38,0xa6,0xd2]
+ vfmaddsub213pd {rd-sae}, %zmm10, %zmm6, %zmm26
+
+// CHECK: vfmaddsub213pd {rz-sae}, %zmm10, %zmm6, %zmm26
+// CHECK: encoding: [0x62,0x42,0xcd,0x78,0xa6,0xd2]
+ vfmaddsub213pd {rz-sae}, %zmm10, %zmm6, %zmm26
+
+// CHECK: vfmaddsub213pd (%rcx), %zmm6, %zmm26
+// CHECK: encoding: [0x62,0x62,0xcd,0x48,0xa6,0x11]
+ vfmaddsub213pd (%rcx), %zmm6, %zmm26
+
+// CHECK: vfmaddsub213pd 291(%rax,%r14,8), %zmm6, %zmm26
+// CHECK: encoding: [0x62,0x22,0xcd,0x48,0xa6,0x94,0xf0,0x23,0x01,0x00,0x00]
+ vfmaddsub213pd 291(%rax,%r14,8), %zmm6, %zmm26
+
+// CHECK: vfmaddsub213pd (%rcx){1to8}, %zmm6, %zmm26
+// CHECK: encoding: [0x62,0x62,0xcd,0x58,0xa6,0x11]
+ vfmaddsub213pd (%rcx){1to8}, %zmm6, %zmm26
+
+// CHECK: vfmaddsub213pd 8128(%rdx), %zmm6, %zmm26
+// CHECK: encoding: [0x62,0x62,0xcd,0x48,0xa6,0x52,0x7f]
+ vfmaddsub213pd 8128(%rdx), %zmm6, %zmm26
+
+// CHECK: vfmaddsub213pd 8192(%rdx), %zmm6, %zmm26
+// CHECK: encoding: [0x62,0x62,0xcd,0x48,0xa6,0x92,0x00,0x20,0x00,0x00]
+ vfmaddsub213pd 8192(%rdx), %zmm6, %zmm26
+
+// CHECK: vfmaddsub213pd -8192(%rdx), %zmm6, %zmm26
+// CHECK: encoding: [0x62,0x62,0xcd,0x48,0xa6,0x52,0x80]
+ vfmaddsub213pd -8192(%rdx), %zmm6, %zmm26
+
+// CHECK: vfmaddsub213pd -8256(%rdx), %zmm6, %zmm26
+// CHECK: encoding: [0x62,0x62,0xcd,0x48,0xa6,0x92,0xc0,0xdf,0xff,0xff]
+ vfmaddsub213pd -8256(%rdx), %zmm6, %zmm26
+
+// CHECK: vfmaddsub213pd 1016(%rdx){1to8}, %zmm6, %zmm26
+// CHECK: encoding: [0x62,0x62,0xcd,0x58,0xa6,0x52,0x7f]
+ vfmaddsub213pd 1016(%rdx){1to8}, %zmm6, %zmm26
+
+// CHECK: vfmaddsub213pd 1024(%rdx){1to8}, %zmm6, %zmm26
+// CHECK: encoding: [0x62,0x62,0xcd,0x58,0xa6,0x92,0x00,0x04,0x00,0x00]
+ vfmaddsub213pd 1024(%rdx){1to8}, %zmm6, %zmm26
+
+// CHECK: vfmaddsub213pd -1024(%rdx){1to8}, %zmm6, %zmm26
+// CHECK: encoding: [0x62,0x62,0xcd,0x58,0xa6,0x52,0x80]
+ vfmaddsub213pd -1024(%rdx){1to8}, %zmm6, %zmm26
+
+// CHECK: vfmaddsub213pd -1032(%rdx){1to8}, %zmm6, %zmm26
+// CHECK: encoding: [0x62,0x62,0xcd,0x58,0xa6,0x92,0xf8,0xfb,0xff,0xff]
+ vfmaddsub213pd -1032(%rdx){1to8}, %zmm6, %zmm26
+
+// CHECK: vfmaddsub231ps %zmm19, %zmm21, %zmm15
+// CHECK: encoding: [0x62,0x32,0x55,0x40,0xb6,0xfb]
+ vfmaddsub231ps %zmm19, %zmm21, %zmm15
+
+// CHECK: vfmaddsub231ps %zmm19, %zmm21, %zmm15 {%k6}
+// CHECK: encoding: [0x62,0x32,0x55,0x46,0xb6,0xfb]
+ vfmaddsub231ps %zmm19, %zmm21, %zmm15 {%k6}
+
+// CHECK: vfmaddsub231ps %zmm19, %zmm21, %zmm15 {%k6} {z}
+// CHECK: encoding: [0x62,0x32,0x55,0xc6,0xb6,0xfb]
+ vfmaddsub231ps %zmm19, %zmm21, %zmm15 {%k6} {z}
+
+// CHECK: vfmaddsub231ps {rn-sae}, %zmm19, %zmm21, %zmm15
+// CHECK: encoding: [0x62,0x32,0x55,0x10,0xb6,0xfb]
+ vfmaddsub231ps {rn-sae}, %zmm19, %zmm21, %zmm15
+
+// CHECK: vfmaddsub231ps {ru-sae}, %zmm19, %zmm21, %zmm15
+// CHECK: encoding: [0x62,0x32,0x55,0x50,0xb6,0xfb]
+ vfmaddsub231ps {ru-sae}, %zmm19, %zmm21, %zmm15
+
+// CHECK: vfmaddsub231ps {rd-sae}, %zmm19, %zmm21, %zmm15
+// CHECK: encoding: [0x62,0x32,0x55,0x30,0xb6,0xfb]
+ vfmaddsub231ps {rd-sae}, %zmm19, %zmm21, %zmm15
+
+// CHECK: vfmaddsub231ps {rz-sae}, %zmm19, %zmm21, %zmm15
+// CHECK: encoding: [0x62,0x32,0x55,0x70,0xb6,0xfb]
+ vfmaddsub231ps {rz-sae}, %zmm19, %zmm21, %zmm15
+
+// CHECK: vfmaddsub231ps (%rcx), %zmm21, %zmm15
+// CHECK: encoding: [0x62,0x72,0x55,0x40,0xb6,0x39]
+ vfmaddsub231ps (%rcx), %zmm21, %zmm15
+
+// CHECK: vfmaddsub231ps 291(%rax,%r14,8), %zmm21, %zmm15
+// CHECK: encoding: [0x62,0x32,0x55,0x40,0xb6,0xbc,0xf0,0x23,0x01,0x00,0x00]
+ vfmaddsub231ps 291(%rax,%r14,8), %zmm21, %zmm15
+
+// CHECK: vfmaddsub231ps (%rcx){1to16}, %zmm21, %zmm15
+// CHECK: encoding: [0x62,0x72,0x55,0x50,0xb6,0x39]
+ vfmaddsub231ps (%rcx){1to16}, %zmm21, %zmm15
+
+// CHECK: vfmaddsub231ps 8128(%rdx), %zmm21, %zmm15
+// CHECK: encoding: [0x62,0x72,0x55,0x40,0xb6,0x7a,0x7f]
+ vfmaddsub231ps 8128(%rdx), %zmm21, %zmm15
+
+// CHECK: vfmaddsub231ps 8192(%rdx), %zmm21, %zmm15
+// CHECK: encoding: [0x62,0x72,0x55,0x40,0xb6,0xba,0x00,0x20,0x00,0x00]
+ vfmaddsub231ps 8192(%rdx), %zmm21, %zmm15
+
+// CHECK: vfmaddsub231ps -8192(%rdx), %zmm21, %zmm15
+// CHECK: encoding: [0x62,0x72,0x55,0x40,0xb6,0x7a,0x80]
+ vfmaddsub231ps -8192(%rdx), %zmm21, %zmm15
+
+// CHECK: vfmaddsub231ps -8256(%rdx), %zmm21, %zmm15
+// CHECK: encoding: [0x62,0x72,0x55,0x40,0xb6,0xba,0xc0,0xdf,0xff,0xff]
+ vfmaddsub231ps -8256(%rdx), %zmm21, %zmm15
+
+// CHECK: vfmaddsub231ps 508(%rdx){1to16}, %zmm21, %zmm15
+// CHECK: encoding: [0x62,0x72,0x55,0x50,0xb6,0x7a,0x7f]
+ vfmaddsub231ps 508(%rdx){1to16}, %zmm21, %zmm15
+
+// CHECK: vfmaddsub231ps 512(%rdx){1to16}, %zmm21, %zmm15
+// CHECK: encoding: [0x62,0x72,0x55,0x50,0xb6,0xba,0x00,0x02,0x00,0x00]
+ vfmaddsub231ps 512(%rdx){1to16}, %zmm21, %zmm15
+
+// CHECK: vfmaddsub231ps -512(%rdx){1to16}, %zmm21, %zmm15
+// CHECK: encoding: [0x62,0x72,0x55,0x50,0xb6,0x7a,0x80]
+ vfmaddsub231ps -512(%rdx){1to16}, %zmm21, %zmm15
+
+// CHECK: vfmaddsub231ps -516(%rdx){1to16}, %zmm21, %zmm15
+// CHECK: encoding: [0x62,0x72,0x55,0x50,0xb6,0xba,0xfc,0xfd,0xff,0xff]
+ vfmaddsub231ps -516(%rdx){1to16}, %zmm21, %zmm15
+
+// CHECK: vfmaddsub231pd %zmm24, %zmm27, %zmm9
+// CHECK: encoding: [0x62,0x12,0xa5,0x40,0xb6,0xc8]
+ vfmaddsub231pd %zmm24, %zmm27, %zmm9
+
+// CHECK: vfmaddsub231pd %zmm24, %zmm27, %zmm9 {%k7}
+// CHECK: encoding: [0x62,0x12,0xa5,0x47,0xb6,0xc8]
+ vfmaddsub231pd %zmm24, %zmm27, %zmm9 {%k7}
+
+// CHECK: vfmaddsub231pd %zmm24, %zmm27, %zmm9 {%k7} {z}
+// CHECK: encoding: [0x62,0x12,0xa5,0xc7,0xb6,0xc8]
+ vfmaddsub231pd %zmm24, %zmm27, %zmm9 {%k7} {z}
+
+// CHECK: vfmaddsub231pd {rn-sae}, %zmm24, %zmm27, %zmm9
+// CHECK: encoding: [0x62,0x12,0xa5,0x10,0xb6,0xc8]
+ vfmaddsub231pd {rn-sae}, %zmm24, %zmm27, %zmm9
+
+// CHECK: vfmaddsub231pd {ru-sae}, %zmm24, %zmm27, %zmm9
+// CHECK: encoding: [0x62,0x12,0xa5,0x50,0xb6,0xc8]
+ vfmaddsub231pd {ru-sae}, %zmm24, %zmm27, %zmm9
+
+// CHECK: vfmaddsub231pd {rd-sae}, %zmm24, %zmm27, %zmm9
+// CHECK: encoding: [0x62,0x12,0xa5,0x30,0xb6,0xc8]
+ vfmaddsub231pd {rd-sae}, %zmm24, %zmm27, %zmm9
+
+// CHECK: vfmaddsub231pd {rz-sae}, %zmm24, %zmm27, %zmm9
+// CHECK: encoding: [0x62,0x12,0xa5,0x70,0xb6,0xc8]
+ vfmaddsub231pd {rz-sae}, %zmm24, %zmm27, %zmm9
+
+// CHECK: vfmaddsub231pd (%rcx), %zmm27, %zmm9
+// CHECK: encoding: [0x62,0x72,0xa5,0x40,0xb6,0x09]
+ vfmaddsub231pd (%rcx), %zmm27, %zmm9
+
+// CHECK: vfmaddsub231pd 291(%rax,%r14,8), %zmm27, %zmm9
+// CHECK: encoding: [0x62,0x32,0xa5,0x40,0xb6,0x8c,0xf0,0x23,0x01,0x00,0x00]
+ vfmaddsub231pd 291(%rax,%r14,8), %zmm27, %zmm9
+
+// CHECK: vfmaddsub231pd (%rcx){1to8}, %zmm27, %zmm9
+// CHECK: encoding: [0x62,0x72,0xa5,0x50,0xb6,0x09]
+ vfmaddsub231pd (%rcx){1to8}, %zmm27, %zmm9
+
+// CHECK: vfmaddsub231pd 8128(%rdx), %zmm27, %zmm9
+// CHECK: encoding: [0x62,0x72,0xa5,0x40,0xb6,0x4a,0x7f]
+ vfmaddsub231pd 8128(%rdx), %zmm27, %zmm9
+
+// CHECK: vfmaddsub231pd 8192(%rdx), %zmm27, %zmm9
+// CHECK: encoding: [0x62,0x72,0xa5,0x40,0xb6,0x8a,0x00,0x20,0x00,0x00]
+ vfmaddsub231pd 8192(%rdx), %zmm27, %zmm9
+
+// CHECK: vfmaddsub231pd -8192(%rdx), %zmm27, %zmm9
+// CHECK: encoding: [0x62,0x72,0xa5,0x40,0xb6,0x4a,0x80]
+ vfmaddsub231pd -8192(%rdx), %zmm27, %zmm9
+
+// CHECK: vfmaddsub231pd -8256(%rdx), %zmm27, %zmm9
+// CHECK: encoding: [0x62,0x72,0xa5,0x40,0xb6,0x8a,0xc0,0xdf,0xff,0xff]
+ vfmaddsub231pd -8256(%rdx), %zmm27, %zmm9
+
+// CHECK: vfmaddsub231pd 1016(%rdx){1to8}, %zmm27, %zmm9
+// CHECK: encoding: [0x62,0x72,0xa5,0x50,0xb6,0x4a,0x7f]
+ vfmaddsub231pd 1016(%rdx){1to8}, %zmm27, %zmm9
+
+// CHECK: vfmaddsub231pd 1024(%rdx){1to8}, %zmm27, %zmm9
+// CHECK: encoding: [0x62,0x72,0xa5,0x50,0xb6,0x8a,0x00,0x04,0x00,0x00]
+ vfmaddsub231pd 1024(%rdx){1to8}, %zmm27, %zmm9
+
+// CHECK: vfmaddsub231pd -1024(%rdx){1to8}, %zmm27, %zmm9
+// CHECK: encoding: [0x62,0x72,0xa5,0x50,0xb6,0x4a,0x80]
+ vfmaddsub231pd -1024(%rdx){1to8}, %zmm27, %zmm9
+
+// CHECK: vfmaddsub231pd -1032(%rdx){1to8}, %zmm27, %zmm9
+// CHECK: encoding: [0x62,0x72,0xa5,0x50,0xb6,0x8a,0xf8,0xfb,0xff,0xff]
+ vfmaddsub231pd -1032(%rdx){1to8}, %zmm27, %zmm9
+
+// CHECK: vfmsubadd132ps %zmm21, %zmm13, %zmm2
+// CHECK: encoding: [0x62,0xb2,0x15,0x48,0x97,0xd5]
+ vfmsubadd132ps %zmm21, %zmm13, %zmm2
+
+// CHECK: vfmsubadd132ps %zmm21, %zmm13, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xb2,0x15,0x4f,0x97,0xd5]
+ vfmsubadd132ps %zmm21, %zmm13, %zmm2 {%k7}
+
+// CHECK: vfmsubadd132ps %zmm21, %zmm13, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xb2,0x15,0xcf,0x97,0xd5]
+ vfmsubadd132ps %zmm21, %zmm13, %zmm2 {%k7} {z}
+
+// CHECK: vfmsubadd132ps {rn-sae}, %zmm21, %zmm13, %zmm2
+// CHECK: encoding: [0x62,0xb2,0x15,0x18,0x97,0xd5]
+ vfmsubadd132ps {rn-sae}, %zmm21, %zmm13, %zmm2
+
+// CHECK: vfmsubadd132ps {ru-sae}, %zmm21, %zmm13, %zmm2
+// CHECK: encoding: [0x62,0xb2,0x15,0x58,0x97,0xd5]
+ vfmsubadd132ps {ru-sae}, %zmm21, %zmm13, %zmm2
+
+// CHECK: vfmsubadd132ps {rd-sae}, %zmm21, %zmm13, %zmm2
+// CHECK: encoding: [0x62,0xb2,0x15,0x38,0x97,0xd5]
+ vfmsubadd132ps {rd-sae}, %zmm21, %zmm13, %zmm2
+
+// CHECK: vfmsubadd132ps {rz-sae}, %zmm21, %zmm13, %zmm2
+// CHECK: encoding: [0x62,0xb2,0x15,0x78,0x97,0xd5]
+ vfmsubadd132ps {rz-sae}, %zmm21, %zmm13, %zmm2
+
+// CHECK: vfmsubadd132ps (%rcx), %zmm13, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x15,0x48,0x97,0x11]
+ vfmsubadd132ps (%rcx), %zmm13, %zmm2
+
+// CHECK: vfmsubadd132ps 291(%rax,%r14,8), %zmm13, %zmm2
+// CHECK: encoding: [0x62,0xb2,0x15,0x48,0x97,0x94,0xf0,0x23,0x01,0x00,0x00]
+ vfmsubadd132ps 291(%rax,%r14,8), %zmm13, %zmm2
+
+// CHECK: vfmsubadd132ps (%rcx){1to16}, %zmm13, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x15,0x58,0x97,0x11]
+ vfmsubadd132ps (%rcx){1to16}, %zmm13, %zmm2
+
+// CHECK: vfmsubadd132ps 8128(%rdx), %zmm13, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x15,0x48,0x97,0x52,0x7f]
+ vfmsubadd132ps 8128(%rdx), %zmm13, %zmm2
+
+// CHECK: vfmsubadd132ps 8192(%rdx), %zmm13, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x15,0x48,0x97,0x92,0x00,0x20,0x00,0x00]
+ vfmsubadd132ps 8192(%rdx), %zmm13, %zmm2
+
+// CHECK: vfmsubadd132ps -8192(%rdx), %zmm13, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x15,0x48,0x97,0x52,0x80]
+ vfmsubadd132ps -8192(%rdx), %zmm13, %zmm2
+
+// CHECK: vfmsubadd132ps -8256(%rdx), %zmm13, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x15,0x48,0x97,0x92,0xc0,0xdf,0xff,0xff]
+ vfmsubadd132ps -8256(%rdx), %zmm13, %zmm2
+
+// CHECK: vfmsubadd132ps 508(%rdx){1to16}, %zmm13, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x15,0x58,0x97,0x52,0x7f]
+ vfmsubadd132ps 508(%rdx){1to16}, %zmm13, %zmm2
+
+// CHECK: vfmsubadd132ps 512(%rdx){1to16}, %zmm13, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x15,0x58,0x97,0x92,0x00,0x02,0x00,0x00]
+ vfmsubadd132ps 512(%rdx){1to16}, %zmm13, %zmm2
+
+// CHECK: vfmsubadd132ps -512(%rdx){1to16}, %zmm13, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x15,0x58,0x97,0x52,0x80]
+ vfmsubadd132ps -512(%rdx){1to16}, %zmm13, %zmm2
+
+// CHECK: vfmsubadd132ps -516(%rdx){1to16}, %zmm13, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x15,0x58,0x97,0x92,0xfc,0xfd,0xff,0xff]
+ vfmsubadd132ps -516(%rdx){1to16}, %zmm13, %zmm2
+
+// CHECK: vfmsubadd132pd %zmm18, %zmm28, %zmm21
+// CHECK: encoding: [0x62,0xa2,0x9d,0x40,0x97,0xea]
+ vfmsubadd132pd %zmm18, %zmm28, %zmm21
+
+// CHECK: vfmsubadd132pd %zmm18, %zmm28, %zmm21 {%k7}
+// CHECK: encoding: [0x62,0xa2,0x9d,0x47,0x97,0xea]
+ vfmsubadd132pd %zmm18, %zmm28, %zmm21 {%k7}
+
+// CHECK: vfmsubadd132pd %zmm18, %zmm28, %zmm21 {%k7} {z}
+// CHECK: encoding: [0x62,0xa2,0x9d,0xc7,0x97,0xea]
+ vfmsubadd132pd %zmm18, %zmm28, %zmm21 {%k7} {z}
+
+// CHECK: vfmsubadd132pd {rn-sae}, %zmm18, %zmm28, %zmm21
+// CHECK: encoding: [0x62,0xa2,0x9d,0x10,0x97,0xea]
+ vfmsubadd132pd {rn-sae}, %zmm18, %zmm28, %zmm21
+
+// CHECK: vfmsubadd132pd {ru-sae}, %zmm18, %zmm28, %zmm21
+// CHECK: encoding: [0x62,0xa2,0x9d,0x50,0x97,0xea]
+ vfmsubadd132pd {ru-sae}, %zmm18, %zmm28, %zmm21
+
+// CHECK: vfmsubadd132pd {rd-sae}, %zmm18, %zmm28, %zmm21
+// CHECK: encoding: [0x62,0xa2,0x9d,0x30,0x97,0xea]
+ vfmsubadd132pd {rd-sae}, %zmm18, %zmm28, %zmm21
+
+// CHECK: vfmsubadd132pd {rz-sae}, %zmm18, %zmm28, %zmm21
+// CHECK: encoding: [0x62,0xa2,0x9d,0x70,0x97,0xea]
+ vfmsubadd132pd {rz-sae}, %zmm18, %zmm28, %zmm21
+
+// CHECK: vfmsubadd132pd (%rcx), %zmm28, %zmm21
+// CHECK: encoding: [0x62,0xe2,0x9d,0x40,0x97,0x29]
+ vfmsubadd132pd (%rcx), %zmm28, %zmm21
+
+// CHECK: vfmsubadd132pd 291(%rax,%r14,8), %zmm28, %zmm21
+// CHECK: encoding: [0x62,0xa2,0x9d,0x40,0x97,0xac,0xf0,0x23,0x01,0x00,0x00]
+ vfmsubadd132pd 291(%rax,%r14,8), %zmm28, %zmm21
+
+// CHECK: vfmsubadd132pd (%rcx){1to8}, %zmm28, %zmm21
+// CHECK: encoding: [0x62,0xe2,0x9d,0x50,0x97,0x29]
+ vfmsubadd132pd (%rcx){1to8}, %zmm28, %zmm21
+
+// CHECK: vfmsubadd132pd 8128(%rdx), %zmm28, %zmm21
+// CHECK: encoding: [0x62,0xe2,0x9d,0x40,0x97,0x6a,0x7f]
+ vfmsubadd132pd 8128(%rdx), %zmm28, %zmm21
+
+// CHECK: vfmsubadd132pd 8192(%rdx), %zmm28, %zmm21
+// CHECK: encoding: [0x62,0xe2,0x9d,0x40,0x97,0xaa,0x00,0x20,0x00,0x00]
+ vfmsubadd132pd 8192(%rdx), %zmm28, %zmm21
+
+// CHECK: vfmsubadd132pd -8192(%rdx), %zmm28, %zmm21
+// CHECK: encoding: [0x62,0xe2,0x9d,0x40,0x97,0x6a,0x80]
+ vfmsubadd132pd -8192(%rdx), %zmm28, %zmm21
+
+// CHECK: vfmsubadd132pd -8256(%rdx), %zmm28, %zmm21
+// CHECK: encoding: [0x62,0xe2,0x9d,0x40,0x97,0xaa,0xc0,0xdf,0xff,0xff]
+ vfmsubadd132pd -8256(%rdx), %zmm28, %zmm21
+
+// CHECK: vfmsubadd132pd 1016(%rdx){1to8}, %zmm28, %zmm21
+// CHECK: encoding: [0x62,0xe2,0x9d,0x50,0x97,0x6a,0x7f]
+ vfmsubadd132pd 1016(%rdx){1to8}, %zmm28, %zmm21
+
+// CHECK: vfmsubadd132pd 1024(%rdx){1to8}, %zmm28, %zmm21
+// CHECK: encoding: [0x62,0xe2,0x9d,0x50,0x97,0xaa,0x00,0x04,0x00,0x00]
+ vfmsubadd132pd 1024(%rdx){1to8}, %zmm28, %zmm21
+
+// CHECK: vfmsubadd132pd -1024(%rdx){1to8}, %zmm28, %zmm21
+// CHECK: encoding: [0x62,0xe2,0x9d,0x50,0x97,0x6a,0x80]
+ vfmsubadd132pd -1024(%rdx){1to8}, %zmm28, %zmm21
+
+// CHECK: vfmsubadd132pd -1032(%rdx){1to8}, %zmm28, %zmm21
+// CHECK: encoding: [0x62,0xe2,0x9d,0x50,0x97,0xaa,0xf8,0xfb,0xff,0xff]
+ vfmsubadd132pd -1032(%rdx){1to8}, %zmm28, %zmm21
+
+// CHECK: vfmsubadd213ps %zmm14, %zmm12, %zmm22
+// CHECK: encoding: [0x62,0xc2,0x1d,0x48,0xa7,0xf6]
+ vfmsubadd213ps %zmm14, %zmm12, %zmm22
+
+// CHECK: vfmsubadd213ps %zmm14, %zmm12, %zmm22 {%k6}
+// CHECK: encoding: [0x62,0xc2,0x1d,0x4e,0xa7,0xf6]
+ vfmsubadd213ps %zmm14, %zmm12, %zmm22 {%k6}
+
+// CHECK: vfmsubadd213ps %zmm14, %zmm12, %zmm22 {%k6} {z}
+// CHECK: encoding: [0x62,0xc2,0x1d,0xce,0xa7,0xf6]
+ vfmsubadd213ps %zmm14, %zmm12, %zmm22 {%k6} {z}
+
+// CHECK: vfmsubadd213ps {rn-sae}, %zmm14, %zmm12, %zmm22
+// CHECK: encoding: [0x62,0xc2,0x1d,0x18,0xa7,0xf6]
+ vfmsubadd213ps {rn-sae}, %zmm14, %zmm12, %zmm22
+
+// CHECK: vfmsubadd213ps {ru-sae}, %zmm14, %zmm12, %zmm22
+// CHECK: encoding: [0x62,0xc2,0x1d,0x58,0xa7,0xf6]
+ vfmsubadd213ps {ru-sae}, %zmm14, %zmm12, %zmm22
+
+// CHECK: vfmsubadd213ps {rd-sae}, %zmm14, %zmm12, %zmm22
+// CHECK: encoding: [0x62,0xc2,0x1d,0x38,0xa7,0xf6]
+ vfmsubadd213ps {rd-sae}, %zmm14, %zmm12, %zmm22
+
+// CHECK: vfmsubadd213ps {rz-sae}, %zmm14, %zmm12, %zmm22
+// CHECK: encoding: [0x62,0xc2,0x1d,0x78,0xa7,0xf6]
+ vfmsubadd213ps {rz-sae}, %zmm14, %zmm12, %zmm22
+
+// CHECK: vfmsubadd213ps (%rcx), %zmm12, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x1d,0x48,0xa7,0x31]
+ vfmsubadd213ps (%rcx), %zmm12, %zmm22
+
+// CHECK: vfmsubadd213ps 291(%rax,%r14,8), %zmm12, %zmm22
+// CHECK: encoding: [0x62,0xa2,0x1d,0x48,0xa7,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vfmsubadd213ps 291(%rax,%r14,8), %zmm12, %zmm22
+
+// CHECK: vfmsubadd213ps (%rcx){1to16}, %zmm12, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x1d,0x58,0xa7,0x31]
+ vfmsubadd213ps (%rcx){1to16}, %zmm12, %zmm22
+
+// CHECK: vfmsubadd213ps 8128(%rdx), %zmm12, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x1d,0x48,0xa7,0x72,0x7f]
+ vfmsubadd213ps 8128(%rdx), %zmm12, %zmm22
+
+// CHECK: vfmsubadd213ps 8192(%rdx), %zmm12, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x1d,0x48,0xa7,0xb2,0x00,0x20,0x00,0x00]
+ vfmsubadd213ps 8192(%rdx), %zmm12, %zmm22
+
+// CHECK: vfmsubadd213ps -8192(%rdx), %zmm12, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x1d,0x48,0xa7,0x72,0x80]
+ vfmsubadd213ps -8192(%rdx), %zmm12, %zmm22
+
+// CHECK: vfmsubadd213ps -8256(%rdx), %zmm12, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x1d,0x48,0xa7,0xb2,0xc0,0xdf,0xff,0xff]
+ vfmsubadd213ps -8256(%rdx), %zmm12, %zmm22
+
+// CHECK: vfmsubadd213ps 508(%rdx){1to16}, %zmm12, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x1d,0x58,0xa7,0x72,0x7f]
+ vfmsubadd213ps 508(%rdx){1to16}, %zmm12, %zmm22
+
+// CHECK: vfmsubadd213ps 512(%rdx){1to16}, %zmm12, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x1d,0x58,0xa7,0xb2,0x00,0x02,0x00,0x00]
+ vfmsubadd213ps 512(%rdx){1to16}, %zmm12, %zmm22
+
+// CHECK: vfmsubadd213ps -512(%rdx){1to16}, %zmm12, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x1d,0x58,0xa7,0x72,0x80]
+ vfmsubadd213ps -512(%rdx){1to16}, %zmm12, %zmm22
+
+// CHECK: vfmsubadd213ps -516(%rdx){1to16}, %zmm12, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x1d,0x58,0xa7,0xb2,0xfc,0xfd,0xff,0xff]
+ vfmsubadd213ps -516(%rdx){1to16}, %zmm12, %zmm22
+
+// CHECK: vfmsubadd213pd %zmm2, %zmm23, %zmm2
+// CHECK: encoding: [0x62,0xf2,0xc5,0x40,0xa7,0xd2]
+ vfmsubadd213pd %zmm2, %zmm23, %zmm2
+
+// CHECK: vfmsubadd213pd %zmm2, %zmm23, %zmm2 {%k6}
+// CHECK: encoding: [0x62,0xf2,0xc5,0x46,0xa7,0xd2]
+ vfmsubadd213pd %zmm2, %zmm23, %zmm2 {%k6}
+
+// CHECK: vfmsubadd213pd %zmm2, %zmm23, %zmm2 {%k6} {z}
+// CHECK: encoding: [0x62,0xf2,0xc5,0xc6,0xa7,0xd2]
+ vfmsubadd213pd %zmm2, %zmm23, %zmm2 {%k6} {z}
+
+// CHECK: vfmsubadd213pd {rn-sae}, %zmm2, %zmm23, %zmm2
+// CHECK: encoding: [0x62,0xf2,0xc5,0x10,0xa7,0xd2]
+ vfmsubadd213pd {rn-sae}, %zmm2, %zmm23, %zmm2
+
+// CHECK: vfmsubadd213pd {ru-sae}, %zmm2, %zmm23, %zmm2
+// CHECK: encoding: [0x62,0xf2,0xc5,0x50,0xa7,0xd2]
+ vfmsubadd213pd {ru-sae}, %zmm2, %zmm23, %zmm2
+
+// CHECK: vfmsubadd213pd {rd-sae}, %zmm2, %zmm23, %zmm2
+// CHECK: encoding: [0x62,0xf2,0xc5,0x30,0xa7,0xd2]
+ vfmsubadd213pd {rd-sae}, %zmm2, %zmm23, %zmm2
+
+// CHECK: vfmsubadd213pd {rz-sae}, %zmm2, %zmm23, %zmm2
+// CHECK: encoding: [0x62,0xf2,0xc5,0x70,0xa7,0xd2]
+ vfmsubadd213pd {rz-sae}, %zmm2, %zmm23, %zmm2
+
+// CHECK: vfmsubadd213pd (%rcx), %zmm23, %zmm2
+// CHECK: encoding: [0x62,0xf2,0xc5,0x40,0xa7,0x11]
+ vfmsubadd213pd (%rcx), %zmm23, %zmm2
+
+// CHECK: vfmsubadd213pd 291(%rax,%r14,8), %zmm23, %zmm2
+// CHECK: encoding: [0x62,0xb2,0xc5,0x40,0xa7,0x94,0xf0,0x23,0x01,0x00,0x00]
+ vfmsubadd213pd 291(%rax,%r14,8), %zmm23, %zmm2
+
+// CHECK: vfmsubadd213pd (%rcx){1to8}, %zmm23, %zmm2
+// CHECK: encoding: [0x62,0xf2,0xc5,0x50,0xa7,0x11]
+ vfmsubadd213pd (%rcx){1to8}, %zmm23, %zmm2
+
+// CHECK: vfmsubadd213pd 8128(%rdx), %zmm23, %zmm2
+// CHECK: encoding: [0x62,0xf2,0xc5,0x40,0xa7,0x52,0x7f]
+ vfmsubadd213pd 8128(%rdx), %zmm23, %zmm2
+
+// CHECK: vfmsubadd213pd 8192(%rdx), %zmm23, %zmm2
+// CHECK: encoding: [0x62,0xf2,0xc5,0x40,0xa7,0x92,0x00,0x20,0x00,0x00]
+ vfmsubadd213pd 8192(%rdx), %zmm23, %zmm2
+
+// CHECK: vfmsubadd213pd -8192(%rdx), %zmm23, %zmm2
+// CHECK: encoding: [0x62,0xf2,0xc5,0x40,0xa7,0x52,0x80]
+ vfmsubadd213pd -8192(%rdx), %zmm23, %zmm2
+
+// CHECK: vfmsubadd213pd -8256(%rdx), %zmm23, %zmm2
+// CHECK: encoding: [0x62,0xf2,0xc5,0x40,0xa7,0x92,0xc0,0xdf,0xff,0xff]
+ vfmsubadd213pd -8256(%rdx), %zmm23, %zmm2
+
+// CHECK: vfmsubadd213pd 1016(%rdx){1to8}, %zmm23, %zmm2
+// CHECK: encoding: [0x62,0xf2,0xc5,0x50,0xa7,0x52,0x7f]
+ vfmsubadd213pd 1016(%rdx){1to8}, %zmm23, %zmm2
+
+// CHECK: vfmsubadd213pd 1024(%rdx){1to8}, %zmm23, %zmm2
+// CHECK: encoding: [0x62,0xf2,0xc5,0x50,0xa7,0x92,0x00,0x04,0x00,0x00]
+ vfmsubadd213pd 1024(%rdx){1to8}, %zmm23, %zmm2
+
+// CHECK: vfmsubadd213pd -1024(%rdx){1to8}, %zmm23, %zmm2
+// CHECK: encoding: [0x62,0xf2,0xc5,0x50,0xa7,0x52,0x80]
+ vfmsubadd213pd -1024(%rdx){1to8}, %zmm23, %zmm2
+
+// CHECK: vfmsubadd213pd -1032(%rdx){1to8}, %zmm23, %zmm2
+// CHECK: encoding: [0x62,0xf2,0xc5,0x50,0xa7,0x92,0xf8,0xfb,0xff,0xff]
+ vfmsubadd213pd -1032(%rdx){1to8}, %zmm23, %zmm2
+
+// CHECK: vfmsubadd231ps %zmm1, %zmm19, %zmm8
+// CHECK: encoding: [0x62,0x72,0x65,0x40,0xb7,0xc1]
+ vfmsubadd231ps %zmm1, %zmm19, %zmm8
+
+// CHECK: vfmsubadd231ps %zmm1, %zmm19, %zmm8 {%k2}
+// CHECK: encoding: [0x62,0x72,0x65,0x42,0xb7,0xc1]
+ vfmsubadd231ps %zmm1, %zmm19, %zmm8 {%k2}
+
+// CHECK: vfmsubadd231ps %zmm1, %zmm19, %zmm8 {%k2} {z}
+// CHECK: encoding: [0x62,0x72,0x65,0xc2,0xb7,0xc1]
+ vfmsubadd231ps %zmm1, %zmm19, %zmm8 {%k2} {z}
+
+// CHECK: vfmsubadd231ps {rn-sae}, %zmm1, %zmm19, %zmm8
+// CHECK: encoding: [0x62,0x72,0x65,0x10,0xb7,0xc1]
+ vfmsubadd231ps {rn-sae}, %zmm1, %zmm19, %zmm8
+
+// CHECK: vfmsubadd231ps {ru-sae}, %zmm1, %zmm19, %zmm8
+// CHECK: encoding: [0x62,0x72,0x65,0x50,0xb7,0xc1]
+ vfmsubadd231ps {ru-sae}, %zmm1, %zmm19, %zmm8
+
+// CHECK: vfmsubadd231ps {rd-sae}, %zmm1, %zmm19, %zmm8
+// CHECK: encoding: [0x62,0x72,0x65,0x30,0xb7,0xc1]
+ vfmsubadd231ps {rd-sae}, %zmm1, %zmm19, %zmm8
+
+// CHECK: vfmsubadd231ps {rz-sae}, %zmm1, %zmm19, %zmm8
+// CHECK: encoding: [0x62,0x72,0x65,0x70,0xb7,0xc1]
+ vfmsubadd231ps {rz-sae}, %zmm1, %zmm19, %zmm8
+
+// CHECK: vfmsubadd231ps (%rcx), %zmm19, %zmm8
+// CHECK: encoding: [0x62,0x72,0x65,0x40,0xb7,0x01]
+ vfmsubadd231ps (%rcx), %zmm19, %zmm8
+
+// CHECK: vfmsubadd231ps 291(%rax,%r14,8), %zmm19, %zmm8
+// CHECK: encoding: [0x62,0x32,0x65,0x40,0xb7,0x84,0xf0,0x23,0x01,0x00,0x00]
+ vfmsubadd231ps 291(%rax,%r14,8), %zmm19, %zmm8
+
+// CHECK: vfmsubadd231ps (%rcx){1to16}, %zmm19, %zmm8
+// CHECK: encoding: [0x62,0x72,0x65,0x50,0xb7,0x01]
+ vfmsubadd231ps (%rcx){1to16}, %zmm19, %zmm8
+
+// CHECK: vfmsubadd231ps 8128(%rdx), %zmm19, %zmm8
+// CHECK: encoding: [0x62,0x72,0x65,0x40,0xb7,0x42,0x7f]
+ vfmsubadd231ps 8128(%rdx), %zmm19, %zmm8
+
+// CHECK: vfmsubadd231ps 8192(%rdx), %zmm19, %zmm8
+// CHECK: encoding: [0x62,0x72,0x65,0x40,0xb7,0x82,0x00,0x20,0x00,0x00]
+ vfmsubadd231ps 8192(%rdx), %zmm19, %zmm8
+
+// CHECK: vfmsubadd231ps -8192(%rdx), %zmm19, %zmm8
+// CHECK: encoding: [0x62,0x72,0x65,0x40,0xb7,0x42,0x80]
+ vfmsubadd231ps -8192(%rdx), %zmm19, %zmm8
+
+// CHECK: vfmsubadd231ps -8256(%rdx), %zmm19, %zmm8
+// CHECK: encoding: [0x62,0x72,0x65,0x40,0xb7,0x82,0xc0,0xdf,0xff,0xff]
+ vfmsubadd231ps -8256(%rdx), %zmm19, %zmm8
+
+// CHECK: vfmsubadd231ps 508(%rdx){1to16}, %zmm19, %zmm8
+// CHECK: encoding: [0x62,0x72,0x65,0x50,0xb7,0x42,0x7f]
+ vfmsubadd231ps 508(%rdx){1to16}, %zmm19, %zmm8
+
+// CHECK: vfmsubadd231ps 512(%rdx){1to16}, %zmm19, %zmm8
+// CHECK: encoding: [0x62,0x72,0x65,0x50,0xb7,0x82,0x00,0x02,0x00,0x00]
+ vfmsubadd231ps 512(%rdx){1to16}, %zmm19, %zmm8
+
+// CHECK: vfmsubadd231ps -512(%rdx){1to16}, %zmm19, %zmm8
+// CHECK: encoding: [0x62,0x72,0x65,0x50,0xb7,0x42,0x80]
+ vfmsubadd231ps -512(%rdx){1to16}, %zmm19, %zmm8
+
+// CHECK: vfmsubadd231ps -516(%rdx){1to16}, %zmm19, %zmm8
+// CHECK: encoding: [0x62,0x72,0x65,0x50,0xb7,0x82,0xfc,0xfd,0xff,0xff]
+ vfmsubadd231ps -516(%rdx){1to16}, %zmm19, %zmm8
+
+// CHECK: vfmsubadd231pd %zmm21, %zmm27, %zmm16
+// CHECK: encoding: [0x62,0xa2,0xa5,0x40,0xb7,0xc5]
+ vfmsubadd231pd %zmm21, %zmm27, %zmm16
+
+// CHECK: vfmsubadd231pd %zmm21, %zmm27, %zmm16 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xa5,0x42,0xb7,0xc5]
+ vfmsubadd231pd %zmm21, %zmm27, %zmm16 {%k2}
+
+// CHECK: vfmsubadd231pd %zmm21, %zmm27, %zmm16 {%k2} {z}
+// CHECK: encoding: [0x62,0xa2,0xa5,0xc2,0xb7,0xc5]
+ vfmsubadd231pd %zmm21, %zmm27, %zmm16 {%k2} {z}
+
+// CHECK: vfmsubadd231pd {rn-sae}, %zmm21, %zmm27, %zmm16
+// CHECK: encoding: [0x62,0xa2,0xa5,0x10,0xb7,0xc5]
+ vfmsubadd231pd {rn-sae}, %zmm21, %zmm27, %zmm16
+
+// CHECK: vfmsubadd231pd {ru-sae}, %zmm21, %zmm27, %zmm16
+// CHECK: encoding: [0x62,0xa2,0xa5,0x50,0xb7,0xc5]
+ vfmsubadd231pd {ru-sae}, %zmm21, %zmm27, %zmm16
+
+// CHECK: vfmsubadd231pd {rd-sae}, %zmm21, %zmm27, %zmm16
+// CHECK: encoding: [0x62,0xa2,0xa5,0x30,0xb7,0xc5]
+ vfmsubadd231pd {rd-sae}, %zmm21, %zmm27, %zmm16
+
+// CHECK: vfmsubadd231pd {rz-sae}, %zmm21, %zmm27, %zmm16
+// CHECK: encoding: [0x62,0xa2,0xa5,0x70,0xb7,0xc5]
+ vfmsubadd231pd {rz-sae}, %zmm21, %zmm27, %zmm16
+
+// CHECK: vfmsubadd231pd (%rcx), %zmm27, %zmm16
+// CHECK: encoding: [0x62,0xe2,0xa5,0x40,0xb7,0x01]
+ vfmsubadd231pd (%rcx), %zmm27, %zmm16
+
+// CHECK: vfmsubadd231pd 291(%rax,%r14,8), %zmm27, %zmm16
+// CHECK: encoding: [0x62,0xa2,0xa5,0x40,0xb7,0x84,0xf0,0x23,0x01,0x00,0x00]
+ vfmsubadd231pd 291(%rax,%r14,8), %zmm27, %zmm16
+
+// CHECK: vfmsubadd231pd (%rcx){1to8}, %zmm27, %zmm16
+// CHECK: encoding: [0x62,0xe2,0xa5,0x50,0xb7,0x01]
+ vfmsubadd231pd (%rcx){1to8}, %zmm27, %zmm16
+
+// CHECK: vfmsubadd231pd 8128(%rdx), %zmm27, %zmm16
+// CHECK: encoding: [0x62,0xe2,0xa5,0x40,0xb7,0x42,0x7f]
+ vfmsubadd231pd 8128(%rdx), %zmm27, %zmm16
+
+// CHECK: vfmsubadd231pd 8192(%rdx), %zmm27, %zmm16
+// CHECK: encoding: [0x62,0xe2,0xa5,0x40,0xb7,0x82,0x00,0x20,0x00,0x00]
+ vfmsubadd231pd 8192(%rdx), %zmm27, %zmm16
+
+// CHECK: vfmsubadd231pd -8192(%rdx), %zmm27, %zmm16
+// CHECK: encoding: [0x62,0xe2,0xa5,0x40,0xb7,0x42,0x80]
+ vfmsubadd231pd -8192(%rdx), %zmm27, %zmm16
+
+// CHECK: vfmsubadd231pd -8256(%rdx), %zmm27, %zmm16
+// CHECK: encoding: [0x62,0xe2,0xa5,0x40,0xb7,0x82,0xc0,0xdf,0xff,0xff]
+ vfmsubadd231pd -8256(%rdx), %zmm27, %zmm16
+
+// CHECK: vfmsubadd231pd 1016(%rdx){1to8}, %zmm27, %zmm16
+// CHECK: encoding: [0x62,0xe2,0xa5,0x50,0xb7,0x42,0x7f]
+ vfmsubadd231pd 1016(%rdx){1to8}, %zmm27, %zmm16
+
+// CHECK: vfmsubadd231pd 1024(%rdx){1to8}, %zmm27, %zmm16
+// CHECK: encoding: [0x62,0xe2,0xa5,0x50,0xb7,0x82,0x00,0x04,0x00,0x00]
+ vfmsubadd231pd 1024(%rdx){1to8}, %zmm27, %zmm16
+
+// CHECK: vfmsubadd231pd -1024(%rdx){1to8}, %zmm27, %zmm16
+// CHECK: encoding: [0x62,0xe2,0xa5,0x50,0xb7,0x42,0x80]
+ vfmsubadd231pd -1024(%rdx){1to8}, %zmm27, %zmm16
+
+// CHECK: vfmsubadd231pd -1032(%rdx){1to8}, %zmm27, %zmm16
+// CHECK: encoding: [0x62,0xe2,0xa5,0x50,0xb7,0x82,0xf8,0xfb,0xff,0xff]
+ vfmsubadd231pd -1032(%rdx){1to8}, %zmm27, %zmm16
+
+// CHECK: vfnmadd132ps %zmm10, %zmm16, %zmm21
+// CHECK: encoding: [0x62,0xc2,0x7d,0x40,0x9c,0xea]
+ vfnmadd132ps %zmm10, %zmm16, %zmm21
+
+// CHECK: vfnmadd132ps %zmm10, %zmm16, %zmm21 {%k5}
+// CHECK: encoding: [0x62,0xc2,0x7d,0x45,0x9c,0xea]
+ vfnmadd132ps %zmm10, %zmm16, %zmm21 {%k5}
+
+// CHECK: vfnmadd132ps %zmm10, %zmm16, %zmm21 {%k5} {z}
+// CHECK: encoding: [0x62,0xc2,0x7d,0xc5,0x9c,0xea]
+ vfnmadd132ps %zmm10, %zmm16, %zmm21 {%k5} {z}
+
+// CHECK: vfnmadd132ps {rn-sae}, %zmm10, %zmm16, %zmm21
+// CHECK: encoding: [0x62,0xc2,0x7d,0x10,0x9c,0xea]
+ vfnmadd132ps {rn-sae}, %zmm10, %zmm16, %zmm21
+
+// CHECK: vfnmadd132ps {ru-sae}, %zmm10, %zmm16, %zmm21
+// CHECK: encoding: [0x62,0xc2,0x7d,0x50,0x9c,0xea]
+ vfnmadd132ps {ru-sae}, %zmm10, %zmm16, %zmm21
+
+// CHECK: vfnmadd132ps {rd-sae}, %zmm10, %zmm16, %zmm21
+// CHECK: encoding: [0x62,0xc2,0x7d,0x30,0x9c,0xea]
+ vfnmadd132ps {rd-sae}, %zmm10, %zmm16, %zmm21
+
+// CHECK: vfnmadd132ps {rz-sae}, %zmm10, %zmm16, %zmm21
+// CHECK: encoding: [0x62,0xc2,0x7d,0x70,0x9c,0xea]
+ vfnmadd132ps {rz-sae}, %zmm10, %zmm16, %zmm21
+
+// CHECK: vfnmadd132ps (%rcx), %zmm16, %zmm21
+// CHECK: encoding: [0x62,0xe2,0x7d,0x40,0x9c,0x29]
+ vfnmadd132ps (%rcx), %zmm16, %zmm21
+
+// CHECK: vfnmadd132ps 291(%rax,%r14,8), %zmm16, %zmm21
+// CHECK: encoding: [0x62,0xa2,0x7d,0x40,0x9c,0xac,0xf0,0x23,0x01,0x00,0x00]
+ vfnmadd132ps 291(%rax,%r14,8), %zmm16, %zmm21
+
+// CHECK: vfnmadd132ps (%rcx){1to16}, %zmm16, %zmm21
+// CHECK: encoding: [0x62,0xe2,0x7d,0x50,0x9c,0x29]
+ vfnmadd132ps (%rcx){1to16}, %zmm16, %zmm21
+
+// CHECK: vfnmadd132ps 8128(%rdx), %zmm16, %zmm21
+// CHECK: encoding: [0x62,0xe2,0x7d,0x40,0x9c,0x6a,0x7f]
+ vfnmadd132ps 8128(%rdx), %zmm16, %zmm21
+
+// CHECK: vfnmadd132ps 8192(%rdx), %zmm16, %zmm21
+// CHECK: encoding: [0x62,0xe2,0x7d,0x40,0x9c,0xaa,0x00,0x20,0x00,0x00]
+ vfnmadd132ps 8192(%rdx), %zmm16, %zmm21
+
+// CHECK: vfnmadd132ps -8192(%rdx), %zmm16, %zmm21
+// CHECK: encoding: [0x62,0xe2,0x7d,0x40,0x9c,0x6a,0x80]
+ vfnmadd132ps -8192(%rdx), %zmm16, %zmm21
+
+// CHECK: vfnmadd132ps -8256(%rdx), %zmm16, %zmm21
+// CHECK: encoding: [0x62,0xe2,0x7d,0x40,0x9c,0xaa,0xc0,0xdf,0xff,0xff]
+ vfnmadd132ps -8256(%rdx), %zmm16, %zmm21
+
+// CHECK: vfnmadd132ps 508(%rdx){1to16}, %zmm16, %zmm21
+// CHECK: encoding: [0x62,0xe2,0x7d,0x50,0x9c,0x6a,0x7f]
+ vfnmadd132ps 508(%rdx){1to16}, %zmm16, %zmm21
+
+// CHECK: vfnmadd132ps 512(%rdx){1to16}, %zmm16, %zmm21
+// CHECK: encoding: [0x62,0xe2,0x7d,0x50,0x9c,0xaa,0x00,0x02,0x00,0x00]
+ vfnmadd132ps 512(%rdx){1to16}, %zmm16, %zmm21
+
+// CHECK: vfnmadd132ps -512(%rdx){1to16}, %zmm16, %zmm21
+// CHECK: encoding: [0x62,0xe2,0x7d,0x50,0x9c,0x6a,0x80]
+ vfnmadd132ps -512(%rdx){1to16}, %zmm16, %zmm21
+
+// CHECK: vfnmadd132ps -516(%rdx){1to16}, %zmm16, %zmm21
+// CHECK: encoding: [0x62,0xe2,0x7d,0x50,0x9c,0xaa,0xfc,0xfd,0xff,0xff]
+ vfnmadd132ps -516(%rdx){1to16}, %zmm16, %zmm21
+
+// CHECK: vfnmadd132pd %zmm1, %zmm14, %zmm12
+// CHECK: encoding: [0x62,0x72,0x8d,0x48,0x9c,0xe1]
+ vfnmadd132pd %zmm1, %zmm14, %zmm12
+
+// CHECK: vfnmadd132pd %zmm1, %zmm14, %zmm12 {%k7}
+// CHECK: encoding: [0x62,0x72,0x8d,0x4f,0x9c,0xe1]
+ vfnmadd132pd %zmm1, %zmm14, %zmm12 {%k7}
+
+// CHECK: vfnmadd132pd %zmm1, %zmm14, %zmm12 {%k7} {z}
+// CHECK: encoding: [0x62,0x72,0x8d,0xcf,0x9c,0xe1]
+ vfnmadd132pd %zmm1, %zmm14, %zmm12 {%k7} {z}
+
+// CHECK: vfnmadd132pd {rn-sae}, %zmm1, %zmm14, %zmm12
+// CHECK: encoding: [0x62,0x72,0x8d,0x18,0x9c,0xe1]
+ vfnmadd132pd {rn-sae}, %zmm1, %zmm14, %zmm12
+
+// CHECK: vfnmadd132pd {ru-sae}, %zmm1, %zmm14, %zmm12
+// CHECK: encoding: [0x62,0x72,0x8d,0x58,0x9c,0xe1]
+ vfnmadd132pd {ru-sae}, %zmm1, %zmm14, %zmm12
+
+// CHECK: vfnmadd132pd {rd-sae}, %zmm1, %zmm14, %zmm12
+// CHECK: encoding: [0x62,0x72,0x8d,0x38,0x9c,0xe1]
+ vfnmadd132pd {rd-sae}, %zmm1, %zmm14, %zmm12
+
+// CHECK: vfnmadd132pd {rz-sae}, %zmm1, %zmm14, %zmm12
+// CHECK: encoding: [0x62,0x72,0x8d,0x78,0x9c,0xe1]
+ vfnmadd132pd {rz-sae}, %zmm1, %zmm14, %zmm12
+
+// CHECK: vfnmadd132pd (%rcx), %zmm14, %zmm12
+// CHECK: encoding: [0x62,0x72,0x8d,0x48,0x9c,0x21]
+ vfnmadd132pd (%rcx), %zmm14, %zmm12
+
+// CHECK: vfnmadd132pd 291(%rax,%r14,8), %zmm14, %zmm12
+// CHECK: encoding: [0x62,0x32,0x8d,0x48,0x9c,0xa4,0xf0,0x23,0x01,0x00,0x00]
+ vfnmadd132pd 291(%rax,%r14,8), %zmm14, %zmm12
+
+// CHECK: vfnmadd132pd (%rcx){1to8}, %zmm14, %zmm12
+// CHECK: encoding: [0x62,0x72,0x8d,0x58,0x9c,0x21]
+ vfnmadd132pd (%rcx){1to8}, %zmm14, %zmm12
+
+// CHECK: vfnmadd132pd 8128(%rdx), %zmm14, %zmm12
+// CHECK: encoding: [0x62,0x72,0x8d,0x48,0x9c,0x62,0x7f]
+ vfnmadd132pd 8128(%rdx), %zmm14, %zmm12
+
+// CHECK: vfnmadd132pd 8192(%rdx), %zmm14, %zmm12
+// CHECK: encoding: [0x62,0x72,0x8d,0x48,0x9c,0xa2,0x00,0x20,0x00,0x00]
+ vfnmadd132pd 8192(%rdx), %zmm14, %zmm12
+
+// CHECK: vfnmadd132pd -8192(%rdx), %zmm14, %zmm12
+// CHECK: encoding: [0x62,0x72,0x8d,0x48,0x9c,0x62,0x80]
+ vfnmadd132pd -8192(%rdx), %zmm14, %zmm12
+
+// CHECK: vfnmadd132pd -8256(%rdx), %zmm14, %zmm12
+// CHECK: encoding: [0x62,0x72,0x8d,0x48,0x9c,0xa2,0xc0,0xdf,0xff,0xff]
+ vfnmadd132pd -8256(%rdx), %zmm14, %zmm12
+
+// CHECK: vfnmadd132pd 1016(%rdx){1to8}, %zmm14, %zmm12
+// CHECK: encoding: [0x62,0x72,0x8d,0x58,0x9c,0x62,0x7f]
+ vfnmadd132pd 1016(%rdx){1to8}, %zmm14, %zmm12
+
+// CHECK: vfnmadd132pd 1024(%rdx){1to8}, %zmm14, %zmm12
+// CHECK: encoding: [0x62,0x72,0x8d,0x58,0x9c,0xa2,0x00,0x04,0x00,0x00]
+ vfnmadd132pd 1024(%rdx){1to8}, %zmm14, %zmm12
+
+// CHECK: vfnmadd132pd -1024(%rdx){1to8}, %zmm14, %zmm12
+// CHECK: encoding: [0x62,0x72,0x8d,0x58,0x9c,0x62,0x80]
+ vfnmadd132pd -1024(%rdx){1to8}, %zmm14, %zmm12
+
+// CHECK: vfnmadd132pd -1032(%rdx){1to8}, %zmm14, %zmm12
+// CHECK: encoding: [0x62,0x72,0x8d,0x58,0x9c,0xa2,0xf8,0xfb,0xff,0xff]
+ vfnmadd132pd -1032(%rdx){1to8}, %zmm14, %zmm12
+
+// CHECK: vfnmadd213ps %zmm6, %zmm10, %zmm26
+// CHECK: encoding: [0x62,0x62,0x2d,0x48,0xac,0xd6]
+ vfnmadd213ps %zmm6, %zmm10, %zmm26
+
+// CHECK: vfnmadd213ps %zmm6, %zmm10, %zmm26 {%k6}
+// CHECK: encoding: [0x62,0x62,0x2d,0x4e,0xac,0xd6]
+ vfnmadd213ps %zmm6, %zmm10, %zmm26 {%k6}
+
+// CHECK: vfnmadd213ps %zmm6, %zmm10, %zmm26 {%k6} {z}
+// CHECK: encoding: [0x62,0x62,0x2d,0xce,0xac,0xd6]
+ vfnmadd213ps %zmm6, %zmm10, %zmm26 {%k6} {z}
+
+// CHECK: vfnmadd213ps {rn-sae}, %zmm6, %zmm10, %zmm26
+// CHECK: encoding: [0x62,0x62,0x2d,0x18,0xac,0xd6]
+ vfnmadd213ps {rn-sae}, %zmm6, %zmm10, %zmm26
+
+// CHECK: vfnmadd213ps {ru-sae}, %zmm6, %zmm10, %zmm26
+// CHECK: encoding: [0x62,0x62,0x2d,0x58,0xac,0xd6]
+ vfnmadd213ps {ru-sae}, %zmm6, %zmm10, %zmm26
+
+// CHECK: vfnmadd213ps {rd-sae}, %zmm6, %zmm10, %zmm26
+// CHECK: encoding: [0x62,0x62,0x2d,0x38,0xac,0xd6]
+ vfnmadd213ps {rd-sae}, %zmm6, %zmm10, %zmm26
+
+// CHECK: vfnmadd213ps {rz-sae}, %zmm6, %zmm10, %zmm26
+// CHECK: encoding: [0x62,0x62,0x2d,0x78,0xac,0xd6]
+ vfnmadd213ps {rz-sae}, %zmm6, %zmm10, %zmm26
+
+// CHECK: vfnmadd213ps (%rcx), %zmm10, %zmm26
+// CHECK: encoding: [0x62,0x62,0x2d,0x48,0xac,0x11]
+ vfnmadd213ps (%rcx), %zmm10, %zmm26
+
+// CHECK: vfnmadd213ps 291(%rax,%r14,8), %zmm10, %zmm26
+// CHECK: encoding: [0x62,0x22,0x2d,0x48,0xac,0x94,0xf0,0x23,0x01,0x00,0x00]
+ vfnmadd213ps 291(%rax,%r14,8), %zmm10, %zmm26
+
+// CHECK: vfnmadd213ps (%rcx){1to16}, %zmm10, %zmm26
+// CHECK: encoding: [0x62,0x62,0x2d,0x58,0xac,0x11]
+ vfnmadd213ps (%rcx){1to16}, %zmm10, %zmm26
+
+// CHECK: vfnmadd213ps 8128(%rdx), %zmm10, %zmm26
+// CHECK: encoding: [0x62,0x62,0x2d,0x48,0xac,0x52,0x7f]
+ vfnmadd213ps 8128(%rdx), %zmm10, %zmm26
+
+// CHECK: vfnmadd213ps 8192(%rdx), %zmm10, %zmm26
+// CHECK: encoding: [0x62,0x62,0x2d,0x48,0xac,0x92,0x00,0x20,0x00,0x00]
+ vfnmadd213ps 8192(%rdx), %zmm10, %zmm26
+
+// CHECK: vfnmadd213ps -8192(%rdx), %zmm10, %zmm26
+// CHECK: encoding: [0x62,0x62,0x2d,0x48,0xac,0x52,0x80]
+ vfnmadd213ps -8192(%rdx), %zmm10, %zmm26
+
+// CHECK: vfnmadd213ps -8256(%rdx), %zmm10, %zmm26
+// CHECK: encoding: [0x62,0x62,0x2d,0x48,0xac,0x92,0xc0,0xdf,0xff,0xff]
+ vfnmadd213ps -8256(%rdx), %zmm10, %zmm26
+
+// CHECK: vfnmadd213ps 508(%rdx){1to16}, %zmm10, %zmm26
+// CHECK: encoding: [0x62,0x62,0x2d,0x58,0xac,0x52,0x7f]
+ vfnmadd213ps 508(%rdx){1to16}, %zmm10, %zmm26
+
+// CHECK: vfnmadd213ps 512(%rdx){1to16}, %zmm10, %zmm26
+// CHECK: encoding: [0x62,0x62,0x2d,0x58,0xac,0x92,0x00,0x02,0x00,0x00]
+ vfnmadd213ps 512(%rdx){1to16}, %zmm10, %zmm26
+
+// CHECK: vfnmadd213ps -512(%rdx){1to16}, %zmm10, %zmm26
+// CHECK: encoding: [0x62,0x62,0x2d,0x58,0xac,0x52,0x80]
+ vfnmadd213ps -512(%rdx){1to16}, %zmm10, %zmm26
+
+// CHECK: vfnmadd213ps -516(%rdx){1to16}, %zmm10, %zmm26
+// CHECK: encoding: [0x62,0x62,0x2d,0x58,0xac,0x92,0xfc,0xfd,0xff,0xff]
+ vfnmadd213ps -516(%rdx){1to16}, %zmm10, %zmm26
+
+// CHECK: vfnmadd213pd %zmm9, %zmm16, %zmm17
+// CHECK: encoding: [0x62,0xc2,0xfd,0x40,0xac,0xc9]
+ vfnmadd213pd %zmm9, %zmm16, %zmm17
+
+// CHECK: vfnmadd213pd %zmm9, %zmm16, %zmm17 {%k4}
+// CHECK: encoding: [0x62,0xc2,0xfd,0x44,0xac,0xc9]
+ vfnmadd213pd %zmm9, %zmm16, %zmm17 {%k4}
+
+// CHECK: vfnmadd213pd %zmm9, %zmm16, %zmm17 {%k4} {z}
+// CHECK: encoding: [0x62,0xc2,0xfd,0xc4,0xac,0xc9]
+ vfnmadd213pd %zmm9, %zmm16, %zmm17 {%k4} {z}
+
+// CHECK: vfnmadd213pd {rn-sae}, %zmm9, %zmm16, %zmm17
+// CHECK: encoding: [0x62,0xc2,0xfd,0x10,0xac,0xc9]
+ vfnmadd213pd {rn-sae}, %zmm9, %zmm16, %zmm17
+
+// CHECK: vfnmadd213pd {ru-sae}, %zmm9, %zmm16, %zmm17
+// CHECK: encoding: [0x62,0xc2,0xfd,0x50,0xac,0xc9]
+ vfnmadd213pd {ru-sae}, %zmm9, %zmm16, %zmm17
+
+// CHECK: vfnmadd213pd {rd-sae}, %zmm9, %zmm16, %zmm17
+// CHECK: encoding: [0x62,0xc2,0xfd,0x30,0xac,0xc9]
+ vfnmadd213pd {rd-sae}, %zmm9, %zmm16, %zmm17
+
+// CHECK: vfnmadd213pd {rz-sae}, %zmm9, %zmm16, %zmm17
+// CHECK: encoding: [0x62,0xc2,0xfd,0x70,0xac,0xc9]
+ vfnmadd213pd {rz-sae}, %zmm9, %zmm16, %zmm17
+
+// CHECK: vfnmadd213pd (%rcx), %zmm16, %zmm17
+// CHECK: encoding: [0x62,0xe2,0xfd,0x40,0xac,0x09]
+ vfnmadd213pd (%rcx), %zmm16, %zmm17
+
+// CHECK: vfnmadd213pd 291(%rax,%r14,8), %zmm16, %zmm17
+// CHECK: encoding: [0x62,0xa2,0xfd,0x40,0xac,0x8c,0xf0,0x23,0x01,0x00,0x00]
+ vfnmadd213pd 291(%rax,%r14,8), %zmm16, %zmm17
+
+// CHECK: vfnmadd213pd (%rcx){1to8}, %zmm16, %zmm17
+// CHECK: encoding: [0x62,0xe2,0xfd,0x50,0xac,0x09]
+ vfnmadd213pd (%rcx){1to8}, %zmm16, %zmm17
+
+// CHECK: vfnmadd213pd 8128(%rdx), %zmm16, %zmm17
+// CHECK: encoding: [0x62,0xe2,0xfd,0x40,0xac,0x4a,0x7f]
+ vfnmadd213pd 8128(%rdx), %zmm16, %zmm17
+
+// CHECK: vfnmadd213pd 8192(%rdx), %zmm16, %zmm17
+// CHECK: encoding: [0x62,0xe2,0xfd,0x40,0xac,0x8a,0x00,0x20,0x00,0x00]
+ vfnmadd213pd 8192(%rdx), %zmm16, %zmm17
+
+// CHECK: vfnmadd213pd -8192(%rdx), %zmm16, %zmm17
+// CHECK: encoding: [0x62,0xe2,0xfd,0x40,0xac,0x4a,0x80]
+ vfnmadd213pd -8192(%rdx), %zmm16, %zmm17
+
+// CHECK: vfnmadd213pd -8256(%rdx), %zmm16, %zmm17
+// CHECK: encoding: [0x62,0xe2,0xfd,0x40,0xac,0x8a,0xc0,0xdf,0xff,0xff]
+ vfnmadd213pd -8256(%rdx), %zmm16, %zmm17
+
+// CHECK: vfnmadd213pd 1016(%rdx){1to8}, %zmm16, %zmm17
+// CHECK: encoding: [0x62,0xe2,0xfd,0x50,0xac,0x4a,0x7f]
+ vfnmadd213pd 1016(%rdx){1to8}, %zmm16, %zmm17
+
+// CHECK: vfnmadd213pd 1024(%rdx){1to8}, %zmm16, %zmm17
+// CHECK: encoding: [0x62,0xe2,0xfd,0x50,0xac,0x8a,0x00,0x04,0x00,0x00]
+ vfnmadd213pd 1024(%rdx){1to8}, %zmm16, %zmm17
+
+// CHECK: vfnmadd213pd -1024(%rdx){1to8}, %zmm16, %zmm17
+// CHECK: encoding: [0x62,0xe2,0xfd,0x50,0xac,0x4a,0x80]
+ vfnmadd213pd -1024(%rdx){1to8}, %zmm16, %zmm17
+
+// CHECK: vfnmadd213pd -1032(%rdx){1to8}, %zmm16, %zmm17
+// CHECK: encoding: [0x62,0xe2,0xfd,0x50,0xac,0x8a,0xf8,0xfb,0xff,0xff]
+ vfnmadd213pd -1032(%rdx){1to8}, %zmm16, %zmm17
+
+// CHECK: vfnmadd231ps %zmm24, %zmm7, %zmm14
+// CHECK: encoding: [0x62,0x12,0x45,0x48,0xbc,0xf0]
+ vfnmadd231ps %zmm24, %zmm7, %zmm14
+
+// CHECK: vfnmadd231ps %zmm24, %zmm7, %zmm14 {%k5}
+// CHECK: encoding: [0x62,0x12,0x45,0x4d,0xbc,0xf0]
+ vfnmadd231ps %zmm24, %zmm7, %zmm14 {%k5}
+
+// CHECK: vfnmadd231ps %zmm24, %zmm7, %zmm14 {%k5} {z}
+// CHECK: encoding: [0x62,0x12,0x45,0xcd,0xbc,0xf0]
+ vfnmadd231ps %zmm24, %zmm7, %zmm14 {%k5} {z}
+
+// CHECK: vfnmadd231ps {rn-sae}, %zmm24, %zmm7, %zmm14
+// CHECK: encoding: [0x62,0x12,0x45,0x18,0xbc,0xf0]
+ vfnmadd231ps {rn-sae}, %zmm24, %zmm7, %zmm14
+
+// CHECK: vfnmadd231ps {ru-sae}, %zmm24, %zmm7, %zmm14
+// CHECK: encoding: [0x62,0x12,0x45,0x58,0xbc,0xf0]
+ vfnmadd231ps {ru-sae}, %zmm24, %zmm7, %zmm14
+
+// CHECK: vfnmadd231ps {rd-sae}, %zmm24, %zmm7, %zmm14
+// CHECK: encoding: [0x62,0x12,0x45,0x38,0xbc,0xf0]
+ vfnmadd231ps {rd-sae}, %zmm24, %zmm7, %zmm14
+
+// CHECK: vfnmadd231ps {rz-sae}, %zmm24, %zmm7, %zmm14
+// CHECK: encoding: [0x62,0x12,0x45,0x78,0xbc,0xf0]
+ vfnmadd231ps {rz-sae}, %zmm24, %zmm7, %zmm14
+
+// CHECK: vfnmadd231ps (%rcx), %zmm7, %zmm14
+// CHECK: encoding: [0x62,0x72,0x45,0x48,0xbc,0x31]
+ vfnmadd231ps (%rcx), %zmm7, %zmm14
+
+// CHECK: vfnmadd231ps 291(%rax,%r14,8), %zmm7, %zmm14
+// CHECK: encoding: [0x62,0x32,0x45,0x48,0xbc,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vfnmadd231ps 291(%rax,%r14,8), %zmm7, %zmm14
+
+// CHECK: vfnmadd231ps (%rcx){1to16}, %zmm7, %zmm14
+// CHECK: encoding: [0x62,0x72,0x45,0x58,0xbc,0x31]
+ vfnmadd231ps (%rcx){1to16}, %zmm7, %zmm14
+
+// CHECK: vfnmadd231ps 8128(%rdx), %zmm7, %zmm14
+// CHECK: encoding: [0x62,0x72,0x45,0x48,0xbc,0x72,0x7f]
+ vfnmadd231ps 8128(%rdx), %zmm7, %zmm14
+
+// CHECK: vfnmadd231ps 8192(%rdx), %zmm7, %zmm14
+// CHECK: encoding: [0x62,0x72,0x45,0x48,0xbc,0xb2,0x00,0x20,0x00,0x00]
+ vfnmadd231ps 8192(%rdx), %zmm7, %zmm14
+
+// CHECK: vfnmadd231ps -8192(%rdx), %zmm7, %zmm14
+// CHECK: encoding: [0x62,0x72,0x45,0x48,0xbc,0x72,0x80]
+ vfnmadd231ps -8192(%rdx), %zmm7, %zmm14
+
+// CHECK: vfnmadd231ps -8256(%rdx), %zmm7, %zmm14
+// CHECK: encoding: [0x62,0x72,0x45,0x48,0xbc,0xb2,0xc0,0xdf,0xff,0xff]
+ vfnmadd231ps -8256(%rdx), %zmm7, %zmm14
+
+// CHECK: vfnmadd231ps 508(%rdx){1to16}, %zmm7, %zmm14
+// CHECK: encoding: [0x62,0x72,0x45,0x58,0xbc,0x72,0x7f]
+ vfnmadd231ps 508(%rdx){1to16}, %zmm7, %zmm14
+
+// CHECK: vfnmadd231ps 512(%rdx){1to16}, %zmm7, %zmm14
+// CHECK: encoding: [0x62,0x72,0x45,0x58,0xbc,0xb2,0x00,0x02,0x00,0x00]
+ vfnmadd231ps 512(%rdx){1to16}, %zmm7, %zmm14
+
+// CHECK: vfnmadd231ps -512(%rdx){1to16}, %zmm7, %zmm14
+// CHECK: encoding: [0x62,0x72,0x45,0x58,0xbc,0x72,0x80]
+ vfnmadd231ps -512(%rdx){1to16}, %zmm7, %zmm14
+
+// CHECK: vfnmadd231ps -516(%rdx){1to16}, %zmm7, %zmm14
+// CHECK: encoding: [0x62,0x72,0x45,0x58,0xbc,0xb2,0xfc,0xfd,0xff,0xff]
+ vfnmadd231ps -516(%rdx){1to16}, %zmm7, %zmm14
+
+// CHECK: vfnmadd231pd %zmm16, %zmm11, %zmm12
+// CHECK: encoding: [0x62,0x32,0xa5,0x48,0xbc,0xe0]
+ vfnmadd231pd %zmm16, %zmm11, %zmm12
+
+// CHECK: vfnmadd231pd %zmm16, %zmm11, %zmm12 {%k6}
+// CHECK: encoding: [0x62,0x32,0xa5,0x4e,0xbc,0xe0]
+ vfnmadd231pd %zmm16, %zmm11, %zmm12 {%k6}
+
+// CHECK: vfnmadd231pd %zmm16, %zmm11, %zmm12 {%k6} {z}
+// CHECK: encoding: [0x62,0x32,0xa5,0xce,0xbc,0xe0]
+ vfnmadd231pd %zmm16, %zmm11, %zmm12 {%k6} {z}
+
+// CHECK: vfnmadd231pd {rn-sae}, %zmm16, %zmm11, %zmm12
+// CHECK: encoding: [0x62,0x32,0xa5,0x18,0xbc,0xe0]
+ vfnmadd231pd {rn-sae}, %zmm16, %zmm11, %zmm12
+
+// CHECK: vfnmadd231pd {ru-sae}, %zmm16, %zmm11, %zmm12
+// CHECK: encoding: [0x62,0x32,0xa5,0x58,0xbc,0xe0]
+ vfnmadd231pd {ru-sae}, %zmm16, %zmm11, %zmm12
+
+// CHECK: vfnmadd231pd {rd-sae}, %zmm16, %zmm11, %zmm12
+// CHECK: encoding: [0x62,0x32,0xa5,0x38,0xbc,0xe0]
+ vfnmadd231pd {rd-sae}, %zmm16, %zmm11, %zmm12
+
+// CHECK: vfnmadd231pd {rz-sae}, %zmm16, %zmm11, %zmm12
+// CHECK: encoding: [0x62,0x32,0xa5,0x78,0xbc,0xe0]
+ vfnmadd231pd {rz-sae}, %zmm16, %zmm11, %zmm12
+
+// CHECK: vfnmadd231pd (%rcx), %zmm11, %zmm12
+// CHECK: encoding: [0x62,0x72,0xa5,0x48,0xbc,0x21]
+ vfnmadd231pd (%rcx), %zmm11, %zmm12
+
+// CHECK: vfnmadd231pd 291(%rax,%r14,8), %zmm11, %zmm12
+// CHECK: encoding: [0x62,0x32,0xa5,0x48,0xbc,0xa4,0xf0,0x23,0x01,0x00,0x00]
+ vfnmadd231pd 291(%rax,%r14,8), %zmm11, %zmm12
+
+// CHECK: vfnmadd231pd (%rcx){1to8}, %zmm11, %zmm12
+// CHECK: encoding: [0x62,0x72,0xa5,0x58,0xbc,0x21]
+ vfnmadd231pd (%rcx){1to8}, %zmm11, %zmm12
+
+// CHECK: vfnmadd231pd 8128(%rdx), %zmm11, %zmm12
+// CHECK: encoding: [0x62,0x72,0xa5,0x48,0xbc,0x62,0x7f]
+ vfnmadd231pd 8128(%rdx), %zmm11, %zmm12
+
+// CHECK: vfnmadd231pd 8192(%rdx), %zmm11, %zmm12
+// CHECK: encoding: [0x62,0x72,0xa5,0x48,0xbc,0xa2,0x00,0x20,0x00,0x00]
+ vfnmadd231pd 8192(%rdx), %zmm11, %zmm12
+
+// CHECK: vfnmadd231pd -8192(%rdx), %zmm11, %zmm12
+// CHECK: encoding: [0x62,0x72,0xa5,0x48,0xbc,0x62,0x80]
+ vfnmadd231pd -8192(%rdx), %zmm11, %zmm12
+
+// CHECK: vfnmadd231pd -8256(%rdx), %zmm11, %zmm12
+// CHECK: encoding: [0x62,0x72,0xa5,0x48,0xbc,0xa2,0xc0,0xdf,0xff,0xff]
+ vfnmadd231pd -8256(%rdx), %zmm11, %zmm12
+
+// CHECK: vfnmadd231pd 1016(%rdx){1to8}, %zmm11, %zmm12
+// CHECK: encoding: [0x62,0x72,0xa5,0x58,0xbc,0x62,0x7f]
+ vfnmadd231pd 1016(%rdx){1to8}, %zmm11, %zmm12
+
+// CHECK: vfnmadd231pd 1024(%rdx){1to8}, %zmm11, %zmm12
+// CHECK: encoding: [0x62,0x72,0xa5,0x58,0xbc,0xa2,0x00,0x04,0x00,0x00]
+ vfnmadd231pd 1024(%rdx){1to8}, %zmm11, %zmm12
+
+// CHECK: vfnmadd231pd -1024(%rdx){1to8}, %zmm11, %zmm12
+// CHECK: encoding: [0x62,0x72,0xa5,0x58,0xbc,0x62,0x80]
+ vfnmadd231pd -1024(%rdx){1to8}, %zmm11, %zmm12
+
+// CHECK: vfnmadd231pd -1032(%rdx){1to8}, %zmm11, %zmm12
+// CHECK: encoding: [0x62,0x72,0xa5,0x58,0xbc,0xa2,0xf8,0xfb,0xff,0xff]
+ vfnmadd231pd -1032(%rdx){1to8}, %zmm11, %zmm12
+
+// CHECK: vfnmsub132ps %zmm6, %zmm18, %zmm4
+// CHECK: encoding: [0x62,0xf2,0x6d,0x40,0x9e,0xe6]
+ vfnmsub132ps %zmm6, %zmm18, %zmm4
+
+// CHECK: vfnmsub132ps %zmm6, %zmm18, %zmm4 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x6d,0x42,0x9e,0xe6]
+ vfnmsub132ps %zmm6, %zmm18, %zmm4 {%k2}
+
+// CHECK: vfnmsub132ps %zmm6, %zmm18, %zmm4 {%k2} {z}
+// CHECK: encoding: [0x62,0xf2,0x6d,0xc2,0x9e,0xe6]
+ vfnmsub132ps %zmm6, %zmm18, %zmm4 {%k2} {z}
+
+// CHECK: vfnmsub132ps {rn-sae}, %zmm6, %zmm18, %zmm4
+// CHECK: encoding: [0x62,0xf2,0x6d,0x10,0x9e,0xe6]
+ vfnmsub132ps {rn-sae}, %zmm6, %zmm18, %zmm4
+
+// CHECK: vfnmsub132ps {ru-sae}, %zmm6, %zmm18, %zmm4
+// CHECK: encoding: [0x62,0xf2,0x6d,0x50,0x9e,0xe6]
+ vfnmsub132ps {ru-sae}, %zmm6, %zmm18, %zmm4
+
+// CHECK: vfnmsub132ps {rd-sae}, %zmm6, %zmm18, %zmm4
+// CHECK: encoding: [0x62,0xf2,0x6d,0x30,0x9e,0xe6]
+ vfnmsub132ps {rd-sae}, %zmm6, %zmm18, %zmm4
+
+// CHECK: vfnmsub132ps {rz-sae}, %zmm6, %zmm18, %zmm4
+// CHECK: encoding: [0x62,0xf2,0x6d,0x70,0x9e,0xe6]
+ vfnmsub132ps {rz-sae}, %zmm6, %zmm18, %zmm4
+
+// CHECK: vfnmsub132ps (%rcx), %zmm18, %zmm4
+// CHECK: encoding: [0x62,0xf2,0x6d,0x40,0x9e,0x21]
+ vfnmsub132ps (%rcx), %zmm18, %zmm4
+
+// CHECK: vfnmsub132ps 291(%rax,%r14,8), %zmm18, %zmm4
+// CHECK: encoding: [0x62,0xb2,0x6d,0x40,0x9e,0xa4,0xf0,0x23,0x01,0x00,0x00]
+ vfnmsub132ps 291(%rax,%r14,8), %zmm18, %zmm4
+
+// CHECK: vfnmsub132ps (%rcx){1to16}, %zmm18, %zmm4
+// CHECK: encoding: [0x62,0xf2,0x6d,0x50,0x9e,0x21]
+ vfnmsub132ps (%rcx){1to16}, %zmm18, %zmm4
+
+// CHECK: vfnmsub132ps 8128(%rdx), %zmm18, %zmm4
+// CHECK: encoding: [0x62,0xf2,0x6d,0x40,0x9e,0x62,0x7f]
+ vfnmsub132ps 8128(%rdx), %zmm18, %zmm4
+
+// CHECK: vfnmsub132ps 8192(%rdx), %zmm18, %zmm4
+// CHECK: encoding: [0x62,0xf2,0x6d,0x40,0x9e,0xa2,0x00,0x20,0x00,0x00]
+ vfnmsub132ps 8192(%rdx), %zmm18, %zmm4
+
+// CHECK: vfnmsub132ps -8192(%rdx), %zmm18, %zmm4
+// CHECK: encoding: [0x62,0xf2,0x6d,0x40,0x9e,0x62,0x80]
+ vfnmsub132ps -8192(%rdx), %zmm18, %zmm4
+
+// CHECK: vfnmsub132ps -8256(%rdx), %zmm18, %zmm4
+// CHECK: encoding: [0x62,0xf2,0x6d,0x40,0x9e,0xa2,0xc0,0xdf,0xff,0xff]
+ vfnmsub132ps -8256(%rdx), %zmm18, %zmm4
+
+// CHECK: vfnmsub132ps 508(%rdx){1to16}, %zmm18, %zmm4
+// CHECK: encoding: [0x62,0xf2,0x6d,0x50,0x9e,0x62,0x7f]
+ vfnmsub132ps 508(%rdx){1to16}, %zmm18, %zmm4
+
+// CHECK: vfnmsub132ps 512(%rdx){1to16}, %zmm18, %zmm4
+// CHECK: encoding: [0x62,0xf2,0x6d,0x50,0x9e,0xa2,0x00,0x02,0x00,0x00]
+ vfnmsub132ps 512(%rdx){1to16}, %zmm18, %zmm4
+
+// CHECK: vfnmsub132ps -512(%rdx){1to16}, %zmm18, %zmm4
+// CHECK: encoding: [0x62,0xf2,0x6d,0x50,0x9e,0x62,0x80]
+ vfnmsub132ps -512(%rdx){1to16}, %zmm18, %zmm4
+
+// CHECK: vfnmsub132ps -516(%rdx){1to16}, %zmm18, %zmm4
+// CHECK: encoding: [0x62,0xf2,0x6d,0x50,0x9e,0xa2,0xfc,0xfd,0xff,0xff]
+ vfnmsub132ps -516(%rdx){1to16}, %zmm18, %zmm4
+
+// CHECK: vfnmsub132pd %zmm6, %zmm5, %zmm28
+// CHECK: encoding: [0x62,0x62,0xd5,0x48,0x9e,0xe6]
+ vfnmsub132pd %zmm6, %zmm5, %zmm28
+
+// CHECK: vfnmsub132pd %zmm6, %zmm5, %zmm28 {%k2}
+// CHECK: encoding: [0x62,0x62,0xd5,0x4a,0x9e,0xe6]
+ vfnmsub132pd %zmm6, %zmm5, %zmm28 {%k2}
+
+// CHECK: vfnmsub132pd %zmm6, %zmm5, %zmm28 {%k2} {z}
+// CHECK: encoding: [0x62,0x62,0xd5,0xca,0x9e,0xe6]
+ vfnmsub132pd %zmm6, %zmm5, %zmm28 {%k2} {z}
+
+// CHECK: vfnmsub132pd {rn-sae}, %zmm6, %zmm5, %zmm28
+// CHECK: encoding: [0x62,0x62,0xd5,0x18,0x9e,0xe6]
+ vfnmsub132pd {rn-sae}, %zmm6, %zmm5, %zmm28
+
+// CHECK: vfnmsub132pd {ru-sae}, %zmm6, %zmm5, %zmm28
+// CHECK: encoding: [0x62,0x62,0xd5,0x58,0x9e,0xe6]
+ vfnmsub132pd {ru-sae}, %zmm6, %zmm5, %zmm28
+
+// CHECK: vfnmsub132pd {rd-sae}, %zmm6, %zmm5, %zmm28
+// CHECK: encoding: [0x62,0x62,0xd5,0x38,0x9e,0xe6]
+ vfnmsub132pd {rd-sae}, %zmm6, %zmm5, %zmm28
+
+// CHECK: vfnmsub132pd {rz-sae}, %zmm6, %zmm5, %zmm28
+// CHECK: encoding: [0x62,0x62,0xd5,0x78,0x9e,0xe6]
+ vfnmsub132pd {rz-sae}, %zmm6, %zmm5, %zmm28
+
+// CHECK: vfnmsub132pd (%rcx), %zmm5, %zmm28
+// CHECK: encoding: [0x62,0x62,0xd5,0x48,0x9e,0x21]
+ vfnmsub132pd (%rcx), %zmm5, %zmm28
+
+// CHECK: vfnmsub132pd 291(%rax,%r14,8), %zmm5, %zmm28
+// CHECK: encoding: [0x62,0x22,0xd5,0x48,0x9e,0xa4,0xf0,0x23,0x01,0x00,0x00]
+ vfnmsub132pd 291(%rax,%r14,8), %zmm5, %zmm28
+
+// CHECK: vfnmsub132pd (%rcx){1to8}, %zmm5, %zmm28
+// CHECK: encoding: [0x62,0x62,0xd5,0x58,0x9e,0x21]
+ vfnmsub132pd (%rcx){1to8}, %zmm5, %zmm28
+
+// CHECK: vfnmsub132pd 8128(%rdx), %zmm5, %zmm28
+// CHECK: encoding: [0x62,0x62,0xd5,0x48,0x9e,0x62,0x7f]
+ vfnmsub132pd 8128(%rdx), %zmm5, %zmm28
+
+// CHECK: vfnmsub132pd 8192(%rdx), %zmm5, %zmm28
+// CHECK: encoding: [0x62,0x62,0xd5,0x48,0x9e,0xa2,0x00,0x20,0x00,0x00]
+ vfnmsub132pd 8192(%rdx), %zmm5, %zmm28
+
+// CHECK: vfnmsub132pd -8192(%rdx), %zmm5, %zmm28
+// CHECK: encoding: [0x62,0x62,0xd5,0x48,0x9e,0x62,0x80]
+ vfnmsub132pd -8192(%rdx), %zmm5, %zmm28
+
+// CHECK: vfnmsub132pd -8256(%rdx), %zmm5, %zmm28
+// CHECK: encoding: [0x62,0x62,0xd5,0x48,0x9e,0xa2,0xc0,0xdf,0xff,0xff]
+ vfnmsub132pd -8256(%rdx), %zmm5, %zmm28
+
+// CHECK: vfnmsub132pd 1016(%rdx){1to8}, %zmm5, %zmm28
+// CHECK: encoding: [0x62,0x62,0xd5,0x58,0x9e,0x62,0x7f]
+ vfnmsub132pd 1016(%rdx){1to8}, %zmm5, %zmm28
+
+// CHECK: vfnmsub132pd 1024(%rdx){1to8}, %zmm5, %zmm28
+// CHECK: encoding: [0x62,0x62,0xd5,0x58,0x9e,0xa2,0x00,0x04,0x00,0x00]
+ vfnmsub132pd 1024(%rdx){1to8}, %zmm5, %zmm28
+
+// CHECK: vfnmsub132pd -1024(%rdx){1to8}, %zmm5, %zmm28
+// CHECK: encoding: [0x62,0x62,0xd5,0x58,0x9e,0x62,0x80]
+ vfnmsub132pd -1024(%rdx){1to8}, %zmm5, %zmm28
+
+// CHECK: vfnmsub132pd -1032(%rdx){1to8}, %zmm5, %zmm28
+// CHECK: encoding: [0x62,0x62,0xd5,0x58,0x9e,0xa2,0xf8,0xfb,0xff,0xff]
+ vfnmsub132pd -1032(%rdx){1to8}, %zmm5, %zmm28
+
+// CHECK: vfnmsub213ps %zmm2, %zmm13, %zmm21
+// CHECK: encoding: [0x62,0xe2,0x15,0x48,0xae,0xea]
+ vfnmsub213ps %zmm2, %zmm13, %zmm21
+
+// CHECK: vfnmsub213ps %zmm2, %zmm13, %zmm21 {%k3}
+// CHECK: encoding: [0x62,0xe2,0x15,0x4b,0xae,0xea]
+ vfnmsub213ps %zmm2, %zmm13, %zmm21 {%k3}
+
+// CHECK: vfnmsub213ps %zmm2, %zmm13, %zmm21 {%k3} {z}
+// CHECK: encoding: [0x62,0xe2,0x15,0xcb,0xae,0xea]
+ vfnmsub213ps %zmm2, %zmm13, %zmm21 {%k3} {z}
+
+// CHECK: vfnmsub213ps {rn-sae}, %zmm2, %zmm13, %zmm21
+// CHECK: encoding: [0x62,0xe2,0x15,0x18,0xae,0xea]
+ vfnmsub213ps {rn-sae}, %zmm2, %zmm13, %zmm21
+
+// CHECK: vfnmsub213ps {ru-sae}, %zmm2, %zmm13, %zmm21
+// CHECK: encoding: [0x62,0xe2,0x15,0x58,0xae,0xea]
+ vfnmsub213ps {ru-sae}, %zmm2, %zmm13, %zmm21
+
+// CHECK: vfnmsub213ps {rd-sae}, %zmm2, %zmm13, %zmm21
+// CHECK: encoding: [0x62,0xe2,0x15,0x38,0xae,0xea]
+ vfnmsub213ps {rd-sae}, %zmm2, %zmm13, %zmm21
+
+// CHECK: vfnmsub213ps {rz-sae}, %zmm2, %zmm13, %zmm21
+// CHECK: encoding: [0x62,0xe2,0x15,0x78,0xae,0xea]
+ vfnmsub213ps {rz-sae}, %zmm2, %zmm13, %zmm21
+
+// CHECK: vfnmsub213ps (%rcx), %zmm13, %zmm21
+// CHECK: encoding: [0x62,0xe2,0x15,0x48,0xae,0x29]
+ vfnmsub213ps (%rcx), %zmm13, %zmm21
+
+// CHECK: vfnmsub213ps 291(%rax,%r14,8), %zmm13, %zmm21
+// CHECK: encoding: [0x62,0xa2,0x15,0x48,0xae,0xac,0xf0,0x23,0x01,0x00,0x00]
+ vfnmsub213ps 291(%rax,%r14,8), %zmm13, %zmm21
+
+// CHECK: vfnmsub213ps (%rcx){1to16}, %zmm13, %zmm21
+// CHECK: encoding: [0x62,0xe2,0x15,0x58,0xae,0x29]
+ vfnmsub213ps (%rcx){1to16}, %zmm13, %zmm21
+
+// CHECK: vfnmsub213ps 8128(%rdx), %zmm13, %zmm21
+// CHECK: encoding: [0x62,0xe2,0x15,0x48,0xae,0x6a,0x7f]
+ vfnmsub213ps 8128(%rdx), %zmm13, %zmm21
+
+// CHECK: vfnmsub213ps 8192(%rdx), %zmm13, %zmm21
+// CHECK: encoding: [0x62,0xe2,0x15,0x48,0xae,0xaa,0x00,0x20,0x00,0x00]
+ vfnmsub213ps 8192(%rdx), %zmm13, %zmm21
+
+// CHECK: vfnmsub213ps -8192(%rdx), %zmm13, %zmm21
+// CHECK: encoding: [0x62,0xe2,0x15,0x48,0xae,0x6a,0x80]
+ vfnmsub213ps -8192(%rdx), %zmm13, %zmm21
+
+// CHECK: vfnmsub213ps -8256(%rdx), %zmm13, %zmm21
+// CHECK: encoding: [0x62,0xe2,0x15,0x48,0xae,0xaa,0xc0,0xdf,0xff,0xff]
+ vfnmsub213ps -8256(%rdx), %zmm13, %zmm21
+
+// CHECK: vfnmsub213ps 508(%rdx){1to16}, %zmm13, %zmm21
+// CHECK: encoding: [0x62,0xe2,0x15,0x58,0xae,0x6a,0x7f]
+ vfnmsub213ps 508(%rdx){1to16}, %zmm13, %zmm21
+
+// CHECK: vfnmsub213ps 512(%rdx){1to16}, %zmm13, %zmm21
+// CHECK: encoding: [0x62,0xe2,0x15,0x58,0xae,0xaa,0x00,0x02,0x00,0x00]
+ vfnmsub213ps 512(%rdx){1to16}, %zmm13, %zmm21
+
+// CHECK: vfnmsub213ps -512(%rdx){1to16}, %zmm13, %zmm21
+// CHECK: encoding: [0x62,0xe2,0x15,0x58,0xae,0x6a,0x80]
+ vfnmsub213ps -512(%rdx){1to16}, %zmm13, %zmm21
+
+// CHECK: vfnmsub213ps -516(%rdx){1to16}, %zmm13, %zmm21
+// CHECK: encoding: [0x62,0xe2,0x15,0x58,0xae,0xaa,0xfc,0xfd,0xff,0xff]
+ vfnmsub213ps -516(%rdx){1to16}, %zmm13, %zmm21
+
+// CHECK: vfnmsub213pd %zmm11, %zmm18, %zmm23
+// CHECK: encoding: [0x62,0xc2,0xed,0x40,0xae,0xfb]
+ vfnmsub213pd %zmm11, %zmm18, %zmm23
+
+// CHECK: vfnmsub213pd %zmm11, %zmm18, %zmm23 {%k2}
+// CHECK: encoding: [0x62,0xc2,0xed,0x42,0xae,0xfb]
+ vfnmsub213pd %zmm11, %zmm18, %zmm23 {%k2}
+
+// CHECK: vfnmsub213pd %zmm11, %zmm18, %zmm23 {%k2} {z}
+// CHECK: encoding: [0x62,0xc2,0xed,0xc2,0xae,0xfb]
+ vfnmsub213pd %zmm11, %zmm18, %zmm23 {%k2} {z}
+
+// CHECK: vfnmsub213pd {rn-sae}, %zmm11, %zmm18, %zmm23
+// CHECK: encoding: [0x62,0xc2,0xed,0x10,0xae,0xfb]
+ vfnmsub213pd {rn-sae}, %zmm11, %zmm18, %zmm23
+
+// CHECK: vfnmsub213pd {ru-sae}, %zmm11, %zmm18, %zmm23
+// CHECK: encoding: [0x62,0xc2,0xed,0x50,0xae,0xfb]
+ vfnmsub213pd {ru-sae}, %zmm11, %zmm18, %zmm23
+
+// CHECK: vfnmsub213pd {rd-sae}, %zmm11, %zmm18, %zmm23
+// CHECK: encoding: [0x62,0xc2,0xed,0x30,0xae,0xfb]
+ vfnmsub213pd {rd-sae}, %zmm11, %zmm18, %zmm23
+
+// CHECK: vfnmsub213pd {rz-sae}, %zmm11, %zmm18, %zmm23
+// CHECK: encoding: [0x62,0xc2,0xed,0x70,0xae,0xfb]
+ vfnmsub213pd {rz-sae}, %zmm11, %zmm18, %zmm23
+
+// CHECK: vfnmsub213pd (%rcx), %zmm18, %zmm23
+// CHECK: encoding: [0x62,0xe2,0xed,0x40,0xae,0x39]
+ vfnmsub213pd (%rcx), %zmm18, %zmm23
+
+// CHECK: vfnmsub213pd 291(%rax,%r14,8), %zmm18, %zmm23
+// CHECK: encoding: [0x62,0xa2,0xed,0x40,0xae,0xbc,0xf0,0x23,0x01,0x00,0x00]
+ vfnmsub213pd 291(%rax,%r14,8), %zmm18, %zmm23
+
+// CHECK: vfnmsub213pd (%rcx){1to8}, %zmm18, %zmm23
+// CHECK: encoding: [0x62,0xe2,0xed,0x50,0xae,0x39]
+ vfnmsub213pd (%rcx){1to8}, %zmm18, %zmm23
+
+// CHECK: vfnmsub213pd 8128(%rdx), %zmm18, %zmm23
+// CHECK: encoding: [0x62,0xe2,0xed,0x40,0xae,0x7a,0x7f]
+ vfnmsub213pd 8128(%rdx), %zmm18, %zmm23
+
+// CHECK: vfnmsub213pd 8192(%rdx), %zmm18, %zmm23
+// CHECK: encoding: [0x62,0xe2,0xed,0x40,0xae,0xba,0x00,0x20,0x00,0x00]
+ vfnmsub213pd 8192(%rdx), %zmm18, %zmm23
+
+// CHECK: vfnmsub213pd -8192(%rdx), %zmm18, %zmm23
+// CHECK: encoding: [0x62,0xe2,0xed,0x40,0xae,0x7a,0x80]
+ vfnmsub213pd -8192(%rdx), %zmm18, %zmm23
+
+// CHECK: vfnmsub213pd -8256(%rdx), %zmm18, %zmm23
+// CHECK: encoding: [0x62,0xe2,0xed,0x40,0xae,0xba,0xc0,0xdf,0xff,0xff]
+ vfnmsub213pd -8256(%rdx), %zmm18, %zmm23
+
+// CHECK: vfnmsub213pd 1016(%rdx){1to8}, %zmm18, %zmm23
+// CHECK: encoding: [0x62,0xe2,0xed,0x50,0xae,0x7a,0x7f]
+ vfnmsub213pd 1016(%rdx){1to8}, %zmm18, %zmm23
+
+// CHECK: vfnmsub213pd 1024(%rdx){1to8}, %zmm18, %zmm23
+// CHECK: encoding: [0x62,0xe2,0xed,0x50,0xae,0xba,0x00,0x04,0x00,0x00]
+ vfnmsub213pd 1024(%rdx){1to8}, %zmm18, %zmm23
+
+// CHECK: vfnmsub213pd -1024(%rdx){1to8}, %zmm18, %zmm23
+// CHECK: encoding: [0x62,0xe2,0xed,0x50,0xae,0x7a,0x80]
+ vfnmsub213pd -1024(%rdx){1to8}, %zmm18, %zmm23
+
+// CHECK: vfnmsub213pd -1032(%rdx){1to8}, %zmm18, %zmm23
+// CHECK: encoding: [0x62,0xe2,0xed,0x50,0xae,0xba,0xf8,0xfb,0xff,0xff]
+ vfnmsub213pd -1032(%rdx){1to8}, %zmm18, %zmm23
+
+// CHECK: vfnmsub231ps %zmm13, %zmm6, %zmm8
+// CHECK: encoding: [0x62,0x52,0x4d,0x48,0xbe,0xc5]
+ vfnmsub231ps %zmm13, %zmm6, %zmm8
+
+// CHECK: vfnmsub231ps %zmm13, %zmm6, %zmm8 {%k2}
+// CHECK: encoding: [0x62,0x52,0x4d,0x4a,0xbe,0xc5]
+ vfnmsub231ps %zmm13, %zmm6, %zmm8 {%k2}
+
+// CHECK: vfnmsub231ps %zmm13, %zmm6, %zmm8 {%k2} {z}
+// CHECK: encoding: [0x62,0x52,0x4d,0xca,0xbe,0xc5]
+ vfnmsub231ps %zmm13, %zmm6, %zmm8 {%k2} {z}
+
+// CHECK: vfnmsub231ps {rn-sae}, %zmm13, %zmm6, %zmm8
+// CHECK: encoding: [0x62,0x52,0x4d,0x18,0xbe,0xc5]
+ vfnmsub231ps {rn-sae}, %zmm13, %zmm6, %zmm8
+
+// CHECK: vfnmsub231ps {ru-sae}, %zmm13, %zmm6, %zmm8
+// CHECK: encoding: [0x62,0x52,0x4d,0x58,0xbe,0xc5]
+ vfnmsub231ps {ru-sae}, %zmm13, %zmm6, %zmm8
+
+// CHECK: vfnmsub231ps {rd-sae}, %zmm13, %zmm6, %zmm8
+// CHECK: encoding: [0x62,0x52,0x4d,0x38,0xbe,0xc5]
+ vfnmsub231ps {rd-sae}, %zmm13, %zmm6, %zmm8
+
+// CHECK: vfnmsub231ps {rz-sae}, %zmm13, %zmm6, %zmm8
+// CHECK: encoding: [0x62,0x52,0x4d,0x78,0xbe,0xc5]
+ vfnmsub231ps {rz-sae}, %zmm13, %zmm6, %zmm8
+
+// CHECK: vfnmsub231ps (%rcx), %zmm6, %zmm8
+// CHECK: encoding: [0x62,0x72,0x4d,0x48,0xbe,0x01]
+ vfnmsub231ps (%rcx), %zmm6, %zmm8
+
+// CHECK: vfnmsub231ps 291(%rax,%r14,8), %zmm6, %zmm8
+// CHECK: encoding: [0x62,0x32,0x4d,0x48,0xbe,0x84,0xf0,0x23,0x01,0x00,0x00]
+ vfnmsub231ps 291(%rax,%r14,8), %zmm6, %zmm8
+
+// CHECK: vfnmsub231ps (%rcx){1to16}, %zmm6, %zmm8
+// CHECK: encoding: [0x62,0x72,0x4d,0x58,0xbe,0x01]
+ vfnmsub231ps (%rcx){1to16}, %zmm6, %zmm8
+
+// CHECK: vfnmsub231ps 8128(%rdx), %zmm6, %zmm8
+// CHECK: encoding: [0x62,0x72,0x4d,0x48,0xbe,0x42,0x7f]
+ vfnmsub231ps 8128(%rdx), %zmm6, %zmm8
+
+// CHECK: vfnmsub231ps 8192(%rdx), %zmm6, %zmm8
+// CHECK: encoding: [0x62,0x72,0x4d,0x48,0xbe,0x82,0x00,0x20,0x00,0x00]
+ vfnmsub231ps 8192(%rdx), %zmm6, %zmm8
+
+// CHECK: vfnmsub231ps -8192(%rdx), %zmm6, %zmm8
+// CHECK: encoding: [0x62,0x72,0x4d,0x48,0xbe,0x42,0x80]
+ vfnmsub231ps -8192(%rdx), %zmm6, %zmm8
+
+// CHECK: vfnmsub231ps -8256(%rdx), %zmm6, %zmm8
+// CHECK: encoding: [0x62,0x72,0x4d,0x48,0xbe,0x82,0xc0,0xdf,0xff,0xff]
+ vfnmsub231ps -8256(%rdx), %zmm6, %zmm8
+
+// CHECK: vfnmsub231ps 508(%rdx){1to16}, %zmm6, %zmm8
+// CHECK: encoding: [0x62,0x72,0x4d,0x58,0xbe,0x42,0x7f]
+ vfnmsub231ps 508(%rdx){1to16}, %zmm6, %zmm8
+
+// CHECK: vfnmsub231ps 512(%rdx){1to16}, %zmm6, %zmm8
+// CHECK: encoding: [0x62,0x72,0x4d,0x58,0xbe,0x82,0x00,0x02,0x00,0x00]
+ vfnmsub231ps 512(%rdx){1to16}, %zmm6, %zmm8
+
+// CHECK: vfnmsub231ps -512(%rdx){1to16}, %zmm6, %zmm8
+// CHECK: encoding: [0x62,0x72,0x4d,0x58,0xbe,0x42,0x80]
+ vfnmsub231ps -512(%rdx){1to16}, %zmm6, %zmm8
+
+// CHECK: vfnmsub231ps -516(%rdx){1to16}, %zmm6, %zmm8
+// CHECK: encoding: [0x62,0x72,0x4d,0x58,0xbe,0x82,0xfc,0xfd,0xff,0xff]
+ vfnmsub231ps -516(%rdx){1to16}, %zmm6, %zmm8
+
+// CHECK: vfnmsub231pd %zmm24, %zmm4, %zmm29
+// CHECK: encoding: [0x62,0x02,0xdd,0x48,0xbe,0xe8]
+ vfnmsub231pd %zmm24, %zmm4, %zmm29
+
+// CHECK: vfnmsub231pd %zmm24, %zmm4, %zmm29 {%k7}
+// CHECK: encoding: [0x62,0x02,0xdd,0x4f,0xbe,0xe8]
+ vfnmsub231pd %zmm24, %zmm4, %zmm29 {%k7}
+
+// CHECK: vfnmsub231pd %zmm24, %zmm4, %zmm29 {%k7} {z}
+// CHECK: encoding: [0x62,0x02,0xdd,0xcf,0xbe,0xe8]
+ vfnmsub231pd %zmm24, %zmm4, %zmm29 {%k7} {z}
+
+// CHECK: vfnmsub231pd {rn-sae}, %zmm24, %zmm4, %zmm29
+// CHECK: encoding: [0x62,0x02,0xdd,0x18,0xbe,0xe8]
+ vfnmsub231pd {rn-sae}, %zmm24, %zmm4, %zmm29
+
+// CHECK: vfnmsub231pd {ru-sae}, %zmm24, %zmm4, %zmm29
+// CHECK: encoding: [0x62,0x02,0xdd,0x58,0xbe,0xe8]
+ vfnmsub231pd {ru-sae}, %zmm24, %zmm4, %zmm29
+
+// CHECK: vfnmsub231pd {rd-sae}, %zmm24, %zmm4, %zmm29
+// CHECK: encoding: [0x62,0x02,0xdd,0x38,0xbe,0xe8]
+ vfnmsub231pd {rd-sae}, %zmm24, %zmm4, %zmm29
+
+// CHECK: vfnmsub231pd {rz-sae}, %zmm24, %zmm4, %zmm29
+// CHECK: encoding: [0x62,0x02,0xdd,0x78,0xbe,0xe8]
+ vfnmsub231pd {rz-sae}, %zmm24, %zmm4, %zmm29
+
+// CHECK: vfnmsub231pd (%rcx), %zmm4, %zmm29
+// CHECK: encoding: [0x62,0x62,0xdd,0x48,0xbe,0x29]
+ vfnmsub231pd (%rcx), %zmm4, %zmm29
+
+// CHECK: vfnmsub231pd 291(%rax,%r14,8), %zmm4, %zmm29
+// CHECK: encoding: [0x62,0x22,0xdd,0x48,0xbe,0xac,0xf0,0x23,0x01,0x00,0x00]
+ vfnmsub231pd 291(%rax,%r14,8), %zmm4, %zmm29
+
+// CHECK: vfnmsub231pd (%rcx){1to8}, %zmm4, %zmm29
+// CHECK: encoding: [0x62,0x62,0xdd,0x58,0xbe,0x29]
+ vfnmsub231pd (%rcx){1to8}, %zmm4, %zmm29
+
+// CHECK: vfnmsub231pd 8128(%rdx), %zmm4, %zmm29
+// CHECK: encoding: [0x62,0x62,0xdd,0x48,0xbe,0x6a,0x7f]
+ vfnmsub231pd 8128(%rdx), %zmm4, %zmm29
+
+// CHECK: vfnmsub231pd 8192(%rdx), %zmm4, %zmm29
+// CHECK: encoding: [0x62,0x62,0xdd,0x48,0xbe,0xaa,0x00,0x20,0x00,0x00]
+ vfnmsub231pd 8192(%rdx), %zmm4, %zmm29
+
+// CHECK: vfnmsub231pd -8192(%rdx), %zmm4, %zmm29
+// CHECK: encoding: [0x62,0x62,0xdd,0x48,0xbe,0x6a,0x80]
+ vfnmsub231pd -8192(%rdx), %zmm4, %zmm29
+
+// CHECK: vfnmsub231pd -8256(%rdx), %zmm4, %zmm29
+// CHECK: encoding: [0x62,0x62,0xdd,0x48,0xbe,0xaa,0xc0,0xdf,0xff,0xff]
+ vfnmsub231pd -8256(%rdx), %zmm4, %zmm29
+
+// CHECK: vfnmsub231pd 1016(%rdx){1to8}, %zmm4, %zmm29
+// CHECK: encoding: [0x62,0x62,0xdd,0x58,0xbe,0x6a,0x7f]
+ vfnmsub231pd 1016(%rdx){1to8}, %zmm4, %zmm29
+
+// CHECK: vfnmsub231pd 1024(%rdx){1to8}, %zmm4, %zmm29
+// CHECK: encoding: [0x62,0x62,0xdd,0x58,0xbe,0xaa,0x00,0x04,0x00,0x00]
+ vfnmsub231pd 1024(%rdx){1to8}, %zmm4, %zmm29
+
+// CHECK: vfnmsub231pd -1024(%rdx){1to8}, %zmm4, %zmm29
+// CHECK: encoding: [0x62,0x62,0xdd,0x58,0xbe,0x6a,0x80]
+ vfnmsub231pd -1024(%rdx){1to8}, %zmm4, %zmm29
+
+// CHECK: vfnmsub231pd -1032(%rdx){1to8}, %zmm4, %zmm29
+// CHECK: encoding: [0x62,0x62,0xdd,0x58,0xbe,0xaa,0xf8,0xfb,0xff,0xff]
+ vfnmsub231pd -1032(%rdx){1to8}, %zmm4, %zmm29
+
+
+// CHECK: vfmadd231ps 8192(%rdx), %zmm28, %zmm27 {%k3} {z}
+// CHECK: encoding: [0x62,0x62,0x1d,0xc3,0xb8,0x9a,0x00,0x20,0x00,0x00]
+ vfmadd231ps 8192(%rdx), %zmm28, %zmm27 {%k3} {z}
+
+// CHECK: vfmadd231ps {rn-sae}, %zmm25, %zmm28, %zmm27 {%k3} {z}
+// CHECK: encoding: [0x62,0x02,0x1d,0x93,0xb8,0xd9]
+ vfmadd231ps {rn-sae}, %zmm25, %zmm28, %zmm27 {%k3} {z}
+
+// CHECK: vfmadd231ps -1032(%rdx){1to16}, %zmm20, %zmm24 {%k3} {z}
+// CHECK: encoding: [0x62,0x62,0x5d,0xd3,0xb8,0x82,0xf8,0xfb,0xff,0xff]
+ vfmadd231ps -1032(%rdx){1to16}, %zmm20, %zmm24 {%k3} {z}
+
+// CHECK: vfmadd231pd 8192(%rdx), %zmm28, %zmm27 {%k3} {z}
+// CHECK: encoding: [0x62,0x62,0x9d,0xc3,0xb8,0x9a,0x00,0x20,0x00,0x00]
+ vfmadd231pd 8192(%rdx), %zmm28, %zmm27 {%k3} {z}
+
+// CHECK: vfmadd231pd {rn-sae}, %zmm25, %zmm28, %zmm27 {%k3} {z}
+// CHECK: encoding: [0x62,0x02,0x9d,0x93,0xb8,0xd9]
+ vfmadd231pd {rn-sae}, %zmm25, %zmm28, %zmm27 {%k3} {z}
+
+// CHECK: vfmadd231pd -1032(%rdx){1to8}, %zmm20, %zmm24 {%k3} {z}
+// CHECK: encoding: [0x62,0x62,0xdd,0xd3,0xb8,0x82,0xf8,0xfb,0xff,0xff]
+ vfmadd231pd -1032(%rdx){1to8}, %zmm20, %zmm24 {%k3} {z}
+
+// CHECK: vfmadd213ps 8192(%rdx), %zmm28, %zmm27 {%k3} {z}
+// CHECK: encoding: [0x62,0x62,0x1d,0xc3,0xa8,0x9a,0x00,0x20,0x00,0x00]
+ vfmadd213ps 8192(%rdx), %zmm28, %zmm27 {%k3} {z}
+
+// CHECK: vfmadd213ps {rn-sae}, %zmm25, %zmm28, %zmm27 {%k3} {z}
+// CHECK: encoding: [0x62,0x02,0x1d,0x93,0xa8,0xd9]
+ vfmadd213ps {rn-sae}, %zmm25, %zmm28, %zmm27 {%k3} {z}
+
+// CHECK: vfmadd213ps -1032(%rdx){1to16}, %zmm20, %zmm24 {%k3} {z}
+// CHECK: encoding: [0x62,0x62,0x5d,0xd3,0xa8,0x82,0xf8,0xfb,0xff,0xff]
+ vfmadd213ps -1032(%rdx){1to16}, %zmm20, %zmm24 {%k3} {z}
+
+// CHECK: vfmadd213pd 8192(%rdx), %zmm28, %zmm27 {%k3} {z}
+// CHECK: encoding: [0x62,0x62,0x9d,0xc3,0xa8,0x9a,0x00,0x20,0x00,0x00]
+ vfmadd213pd 8192(%rdx), %zmm28, %zmm27 {%k3} {z}
+
+// CHECK: vfmadd213pd {rn-sae}, %zmm25, %zmm28, %zmm27 {%k3} {z}
+// CHECK: encoding: [0x62,0x02,0x9d,0x93,0xa8,0xd9]
+ vfmadd213pd {rn-sae}, %zmm25, %zmm28, %zmm27 {%k3} {z}
+
+// CHECK: vfmadd213pd -1032(%rdx){1to8}, %zmm20, %zmm24 {%k3} {z}
+// CHECK: encoding: [0x62,0x62,0xdd,0xd3,0xa8,0x82,0xf8,0xfb,0xff,0xff]
+ vfmadd213pd -1032(%rdx){1to8}, %zmm20, %zmm24 {%k3} {z}
+
+// CHECK: vfmadd132ps 8192(%rdx), %zmm28, %zmm27 {%k3} {z}
+// CHECK: encoding: [0x62,0x62,0x1d,0xc3,0x98,0x9a,0x00,0x20,0x00,0x00]
+ vfmadd132ps 8192(%rdx), %zmm28, %zmm27 {%k3} {z}
+
+// CHECK: vfmadd132ps {rn-sae}, %zmm25, %zmm28, %zmm27 {%k3} {z}
+// CHECK: encoding: [0x62,0x02,0x1d,0x93,0x98,0xd9]
+ vfmadd132ps {rn-sae}, %zmm25, %zmm28, %zmm27 {%k3} {z}
+
+// CHECK: vfmadd132ps -1032(%rdx){1to16}, %zmm20, %zmm24 {%k3} {z}
+// CHECK: encoding: [0x62,0x62,0x5d,0xd3,0x98,0x82,0xf8,0xfb,0xff,0xff]
+ vfmadd132ps -1032(%rdx){1to16}, %zmm20, %zmm24 {%k3} {z}
+
+// CHECK: vfmadd132pd 8192(%rdx), %zmm28, %zmm27 {%k3} {z}
+// CHECK: encoding: [0x62,0x62,0x9d,0xc3,0x98,0x9a,0x00,0x20,0x00,0x00]
+ vfmadd132pd 8192(%rdx), %zmm28, %zmm27 {%k3} {z}
+
+// CHECK: vfmadd132pd {rn-sae}, %zmm25, %zmm28, %zmm27 {%k3} {z}
+// CHECK: encoding: [0x62,0x02,0x9d,0x93,0x98,0xd9]
+ vfmadd132pd {rn-sae}, %zmm25, %zmm28, %zmm27 {%k3} {z}
+
+// CHECK: vfmadd132pd -1032(%rdx){1to8}, %zmm20, %zmm24 {%k3} {z}
+// CHECK: encoding: [0x62,0x62,0xdd,0xd3,0x98,0x82,0xf8,0xfb,0xff,0xff]
+ vfmadd132pd -1032(%rdx){1to8}, %zmm20, %zmm24 {%k3} {z}
+
// CHECK: vpermi2d %zmm4, %zmm28, %zmm10
// CHECK: encoding: [0x62,0x72,0x1d,0x40,0x76,0xd4]
vpermi2d %zmm4, %zmm28, %zmm10
// CHECK: encoding: [0x62,0x63,0xbd,0x30,0x03,0x8a,0xf8,0xfb,0xff,0xff,0x7b]
valignq $0x7b, -1032(%rdx){1to4}, %ymm24, %ymm25
+// CHECK: vfmadd132ps %xmm19, %xmm25, %xmm29
+// CHECK: encoding: [0x62,0x22,0x35,0x00,0x98,0xeb]
+ vfmadd132ps %xmm19, %xmm25, %xmm29
+
+// CHECK: vfmadd132ps %xmm19, %xmm25, %xmm29 {%k4}
+// CHECK: encoding: [0x62,0x22,0x35,0x04,0x98,0xeb]
+ vfmadd132ps %xmm19, %xmm25, %xmm29 {%k4}
+
+// CHECK: vfmadd132ps %xmm19, %xmm25, %xmm29 {%k4} {z}
+// CHECK: encoding: [0x62,0x22,0x35,0x84,0x98,0xeb]
+ vfmadd132ps %xmm19, %xmm25, %xmm29 {%k4} {z}
+
+// CHECK: vfmadd132ps (%rcx), %xmm25, %xmm29
+// CHECK: encoding: [0x62,0x62,0x35,0x00,0x98,0x29]
+ vfmadd132ps (%rcx), %xmm25, %xmm29
+
+// CHECK: vfmadd132ps 291(%rax,%r14,8), %xmm25, %xmm29
+// CHECK: encoding: [0x62,0x22,0x35,0x00,0x98,0xac,0xf0,0x23,0x01,0x00,0x00]
+ vfmadd132ps 291(%rax,%r14,8), %xmm25, %xmm29
+
+// CHECK: vfmadd132ps (%rcx){1to4}, %xmm25, %xmm29
+// CHECK: encoding: [0x62,0x62,0x35,0x10,0x98,0x29]
+ vfmadd132ps (%rcx){1to4}, %xmm25, %xmm29
+
+// CHECK: vfmadd132ps 2032(%rdx), %xmm25, %xmm29
+// CHECK: encoding: [0x62,0x62,0x35,0x00,0x98,0x6a,0x7f]
+ vfmadd132ps 2032(%rdx), %xmm25, %xmm29
+
+// CHECK: vfmadd132ps 2048(%rdx), %xmm25, %xmm29
+// CHECK: encoding: [0x62,0x62,0x35,0x00,0x98,0xaa,0x00,0x08,0x00,0x00]
+ vfmadd132ps 2048(%rdx), %xmm25, %xmm29
+
+// CHECK: vfmadd132ps -2048(%rdx), %xmm25, %xmm29
+// CHECK: encoding: [0x62,0x62,0x35,0x00,0x98,0x6a,0x80]
+ vfmadd132ps -2048(%rdx), %xmm25, %xmm29
+
+// CHECK: vfmadd132ps -2064(%rdx), %xmm25, %xmm29
+// CHECK: encoding: [0x62,0x62,0x35,0x00,0x98,0xaa,0xf0,0xf7,0xff,0xff]
+ vfmadd132ps -2064(%rdx), %xmm25, %xmm29
+
+// CHECK: vfmadd132ps 508(%rdx){1to4}, %xmm25, %xmm29
+// CHECK: encoding: [0x62,0x62,0x35,0x10,0x98,0x6a,0x7f]
+ vfmadd132ps 508(%rdx){1to4}, %xmm25, %xmm29
+
+// CHECK: vfmadd132ps 512(%rdx){1to4}, %xmm25, %xmm29
+// CHECK: encoding: [0x62,0x62,0x35,0x10,0x98,0xaa,0x00,0x02,0x00,0x00]
+ vfmadd132ps 512(%rdx){1to4}, %xmm25, %xmm29
+
+// CHECK: vfmadd132ps -512(%rdx){1to4}, %xmm25, %xmm29
+// CHECK: encoding: [0x62,0x62,0x35,0x10,0x98,0x6a,0x80]
+ vfmadd132ps -512(%rdx){1to4}, %xmm25, %xmm29
+
+// CHECK: vfmadd132ps -516(%rdx){1to4}, %xmm25, %xmm29
+// CHECK: encoding: [0x62,0x62,0x35,0x10,0x98,0xaa,0xfc,0xfd,0xff,0xff]
+ vfmadd132ps -516(%rdx){1to4}, %xmm25, %xmm29
+
+// CHECK: vfmadd132ps %ymm26, %ymm22, %ymm23
+// CHECK: encoding: [0x62,0x82,0x4d,0x20,0x98,0xfa]
+ vfmadd132ps %ymm26, %ymm22, %ymm23
+
+// CHECK: vfmadd132ps %ymm26, %ymm22, %ymm23 {%k5}
+// CHECK: encoding: [0x62,0x82,0x4d,0x25,0x98,0xfa]
+ vfmadd132ps %ymm26, %ymm22, %ymm23 {%k5}
+
+// CHECK: vfmadd132ps %ymm26, %ymm22, %ymm23 {%k5} {z}
+// CHECK: encoding: [0x62,0x82,0x4d,0xa5,0x98,0xfa]
+ vfmadd132ps %ymm26, %ymm22, %ymm23 {%k5} {z}
+
+// CHECK: vfmadd132ps (%rcx), %ymm22, %ymm23
+// CHECK: encoding: [0x62,0xe2,0x4d,0x20,0x98,0x39]
+ vfmadd132ps (%rcx), %ymm22, %ymm23
+
+// CHECK: vfmadd132ps 291(%rax,%r14,8), %ymm22, %ymm23
+// CHECK: encoding: [0x62,0xa2,0x4d,0x20,0x98,0xbc,0xf0,0x23,0x01,0x00,0x00]
+ vfmadd132ps 291(%rax,%r14,8), %ymm22, %ymm23
+
+// CHECK: vfmadd132ps (%rcx){1to8}, %ymm22, %ymm23
+// CHECK: encoding: [0x62,0xe2,0x4d,0x30,0x98,0x39]
+ vfmadd132ps (%rcx){1to8}, %ymm22, %ymm23
+
+// CHECK: vfmadd132ps 4064(%rdx), %ymm22, %ymm23
+// CHECK: encoding: [0x62,0xe2,0x4d,0x20,0x98,0x7a,0x7f]
+ vfmadd132ps 4064(%rdx), %ymm22, %ymm23
+
+// CHECK: vfmadd132ps 4096(%rdx), %ymm22, %ymm23
+// CHECK: encoding: [0x62,0xe2,0x4d,0x20,0x98,0xba,0x00,0x10,0x00,0x00]
+ vfmadd132ps 4096(%rdx), %ymm22, %ymm23
+
+// CHECK: vfmadd132ps -4096(%rdx), %ymm22, %ymm23
+// CHECK: encoding: [0x62,0xe2,0x4d,0x20,0x98,0x7a,0x80]
+ vfmadd132ps -4096(%rdx), %ymm22, %ymm23
+
+// CHECK: vfmadd132ps -4128(%rdx), %ymm22, %ymm23
+// CHECK: encoding: [0x62,0xe2,0x4d,0x20,0x98,0xba,0xe0,0xef,0xff,0xff]
+ vfmadd132ps -4128(%rdx), %ymm22, %ymm23
+
+// CHECK: vfmadd132ps 508(%rdx){1to8}, %ymm22, %ymm23
+// CHECK: encoding: [0x62,0xe2,0x4d,0x30,0x98,0x7a,0x7f]
+ vfmadd132ps 508(%rdx){1to8}, %ymm22, %ymm23
+
+// CHECK: vfmadd132ps 512(%rdx){1to8}, %ymm22, %ymm23
+// CHECK: encoding: [0x62,0xe2,0x4d,0x30,0x98,0xba,0x00,0x02,0x00,0x00]
+ vfmadd132ps 512(%rdx){1to8}, %ymm22, %ymm23
+
+// CHECK: vfmadd132ps -512(%rdx){1to8}, %ymm22, %ymm23
+// CHECK: encoding: [0x62,0xe2,0x4d,0x30,0x98,0x7a,0x80]
+ vfmadd132ps -512(%rdx){1to8}, %ymm22, %ymm23
+
+// CHECK: vfmadd132ps -516(%rdx){1to8}, %ymm22, %ymm23
+// CHECK: encoding: [0x62,0xe2,0x4d,0x30,0x98,0xba,0xfc,0xfd,0xff,0xff]
+ vfmadd132ps -516(%rdx){1to8}, %ymm22, %ymm23
+
+// CHECK: vfmadd132pd %xmm27, %xmm21, %xmm28
+// CHECK: encoding: [0x62,0x02,0xd5,0x00,0x98,0xe3]
+ vfmadd132pd %xmm27, %xmm21, %xmm28
+
+// CHECK: vfmadd132pd %xmm27, %xmm21, %xmm28 {%k1}
+// CHECK: encoding: [0x62,0x02,0xd5,0x01,0x98,0xe3]
+ vfmadd132pd %xmm27, %xmm21, %xmm28 {%k1}
+
+// CHECK: vfmadd132pd %xmm27, %xmm21, %xmm28 {%k1} {z}
+// CHECK: encoding: [0x62,0x02,0xd5,0x81,0x98,0xe3]
+ vfmadd132pd %xmm27, %xmm21, %xmm28 {%k1} {z}
+
+// CHECK: vfmadd132pd (%rcx), %xmm21, %xmm28
+// CHECK: encoding: [0x62,0x62,0xd5,0x00,0x98,0x21]
+ vfmadd132pd (%rcx), %xmm21, %xmm28
+
+// CHECK: vfmadd132pd 291(%rax,%r14,8), %xmm21, %xmm28
+// CHECK: encoding: [0x62,0x22,0xd5,0x00,0x98,0xa4,0xf0,0x23,0x01,0x00,0x00]
+ vfmadd132pd 291(%rax,%r14,8), %xmm21, %xmm28
+
+// CHECK: vfmadd132pd (%rcx){1to2}, %xmm21, %xmm28
+// CHECK: encoding: [0x62,0x62,0xd5,0x10,0x98,0x21]
+ vfmadd132pd (%rcx){1to2}, %xmm21, %xmm28
+
+// CHECK: vfmadd132pd 2032(%rdx), %xmm21, %xmm28
+// CHECK: encoding: [0x62,0x62,0xd5,0x00,0x98,0x62,0x7f]
+ vfmadd132pd 2032(%rdx), %xmm21, %xmm28
+
+// CHECK: vfmadd132pd 2048(%rdx), %xmm21, %xmm28
+// CHECK: encoding: [0x62,0x62,0xd5,0x00,0x98,0xa2,0x00,0x08,0x00,0x00]
+ vfmadd132pd 2048(%rdx), %xmm21, %xmm28
+
+// CHECK: vfmadd132pd -2048(%rdx), %xmm21, %xmm28
+// CHECK: encoding: [0x62,0x62,0xd5,0x00,0x98,0x62,0x80]
+ vfmadd132pd -2048(%rdx), %xmm21, %xmm28
+
+// CHECK: vfmadd132pd -2064(%rdx), %xmm21, %xmm28
+// CHECK: encoding: [0x62,0x62,0xd5,0x00,0x98,0xa2,0xf0,0xf7,0xff,0xff]
+ vfmadd132pd -2064(%rdx), %xmm21, %xmm28
+
+// CHECK: vfmadd132pd 1016(%rdx){1to2}, %xmm21, %xmm28
+// CHECK: encoding: [0x62,0x62,0xd5,0x10,0x98,0x62,0x7f]
+ vfmadd132pd 1016(%rdx){1to2}, %xmm21, %xmm28
+
+// CHECK: vfmadd132pd 1024(%rdx){1to2}, %xmm21, %xmm28
+// CHECK: encoding: [0x62,0x62,0xd5,0x10,0x98,0xa2,0x00,0x04,0x00,0x00]
+ vfmadd132pd 1024(%rdx){1to2}, %xmm21, %xmm28
+
+// CHECK: vfmadd132pd -1024(%rdx){1to2}, %xmm21, %xmm28
+// CHECK: encoding: [0x62,0x62,0xd5,0x10,0x98,0x62,0x80]
+ vfmadd132pd -1024(%rdx){1to2}, %xmm21, %xmm28
+
+// CHECK: vfmadd132pd -1032(%rdx){1to2}, %xmm21, %xmm28
+// CHECK: encoding: [0x62,0x62,0xd5,0x10,0x98,0xa2,0xf8,0xfb,0xff,0xff]
+ vfmadd132pd -1032(%rdx){1to2}, %xmm21, %xmm28
+
+// CHECK: vfmadd132pd %ymm27, %ymm24, %ymm22
+// CHECK: encoding: [0x62,0x82,0xbd,0x20,0x98,0xf3]
+ vfmadd132pd %ymm27, %ymm24, %ymm22
+
+// CHECK: vfmadd132pd %ymm27, %ymm24, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x82,0xbd,0x27,0x98,0xf3]
+ vfmadd132pd %ymm27, %ymm24, %ymm22 {%k7}
+
+// CHECK: vfmadd132pd %ymm27, %ymm24, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x82,0xbd,0xa7,0x98,0xf3]
+ vfmadd132pd %ymm27, %ymm24, %ymm22 {%k7} {z}
+
+// CHECK: vfmadd132pd (%rcx), %ymm24, %ymm22
+// CHECK: encoding: [0x62,0xe2,0xbd,0x20,0x98,0x31]
+ vfmadd132pd (%rcx), %ymm24, %ymm22
+
+// CHECK: vfmadd132pd 291(%rax,%r14,8), %ymm24, %ymm22
+// CHECK: encoding: [0x62,0xa2,0xbd,0x20,0x98,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vfmadd132pd 291(%rax,%r14,8), %ymm24, %ymm22
+
+// CHECK: vfmadd132pd (%rcx){1to4}, %ymm24, %ymm22
+// CHECK: encoding: [0x62,0xe2,0xbd,0x30,0x98,0x31]
+ vfmadd132pd (%rcx){1to4}, %ymm24, %ymm22
+
+// CHECK: vfmadd132pd 4064(%rdx), %ymm24, %ymm22
+// CHECK: encoding: [0x62,0xe2,0xbd,0x20,0x98,0x72,0x7f]
+ vfmadd132pd 4064(%rdx), %ymm24, %ymm22
+
+// CHECK: vfmadd132pd 4096(%rdx), %ymm24, %ymm22
+// CHECK: encoding: [0x62,0xe2,0xbd,0x20,0x98,0xb2,0x00,0x10,0x00,0x00]
+ vfmadd132pd 4096(%rdx), %ymm24, %ymm22
+
+// CHECK: vfmadd132pd -4096(%rdx), %ymm24, %ymm22
+// CHECK: encoding: [0x62,0xe2,0xbd,0x20,0x98,0x72,0x80]
+ vfmadd132pd -4096(%rdx), %ymm24, %ymm22
+
+// CHECK: vfmadd132pd -4128(%rdx), %ymm24, %ymm22
+// CHECK: encoding: [0x62,0xe2,0xbd,0x20,0x98,0xb2,0xe0,0xef,0xff,0xff]
+ vfmadd132pd -4128(%rdx), %ymm24, %ymm22
+
+// CHECK: vfmadd132pd 1016(%rdx){1to4}, %ymm24, %ymm22
+// CHECK: encoding: [0x62,0xe2,0xbd,0x30,0x98,0x72,0x7f]
+ vfmadd132pd 1016(%rdx){1to4}, %ymm24, %ymm22
+
+// CHECK: vfmadd132pd 1024(%rdx){1to4}, %ymm24, %ymm22
+// CHECK: encoding: [0x62,0xe2,0xbd,0x30,0x98,0xb2,0x00,0x04,0x00,0x00]
+ vfmadd132pd 1024(%rdx){1to4}, %ymm24, %ymm22
+
+// CHECK: vfmadd132pd -1024(%rdx){1to4}, %ymm24, %ymm22
+// CHECK: encoding: [0x62,0xe2,0xbd,0x30,0x98,0x72,0x80]
+ vfmadd132pd -1024(%rdx){1to4}, %ymm24, %ymm22
+
+// CHECK: vfmadd132pd -1032(%rdx){1to4}, %ymm24, %ymm22
+// CHECK: encoding: [0x62,0xe2,0xbd,0x30,0x98,0xb2,0xf8,0xfb,0xff,0xff]
+ vfmadd132pd -1032(%rdx){1to4}, %ymm24, %ymm22
+
+// CHECK: vfmadd213ps %xmm28, %xmm28, %xmm24
+// CHECK: encoding: [0x62,0x02,0x1d,0x00,0xa8,0xc4]
+ vfmadd213ps %xmm28, %xmm28, %xmm24
+
+// CHECK: vfmadd213ps %xmm28, %xmm28, %xmm24 {%k1}
+// CHECK: encoding: [0x62,0x02,0x1d,0x01,0xa8,0xc4]
+ vfmadd213ps %xmm28, %xmm28, %xmm24 {%k1}
+
+// CHECK: vfmadd213ps %xmm28, %xmm28, %xmm24 {%k1} {z}
+// CHECK: encoding: [0x62,0x02,0x1d,0x81,0xa8,0xc4]
+ vfmadd213ps %xmm28, %xmm28, %xmm24 {%k1} {z}
+
+// CHECK: vfmadd213ps (%rcx), %xmm28, %xmm24
+// CHECK: encoding: [0x62,0x62,0x1d,0x00,0xa8,0x01]
+ vfmadd213ps (%rcx), %xmm28, %xmm24
+
+// CHECK: vfmadd213ps 291(%rax,%r14,8), %xmm28, %xmm24
+// CHECK: encoding: [0x62,0x22,0x1d,0x00,0xa8,0x84,0xf0,0x23,0x01,0x00,0x00]
+ vfmadd213ps 291(%rax,%r14,8), %xmm28, %xmm24
+
+// CHECK: vfmadd213ps (%rcx){1to4}, %xmm28, %xmm24
+// CHECK: encoding: [0x62,0x62,0x1d,0x10,0xa8,0x01]
+ vfmadd213ps (%rcx){1to4}, %xmm28, %xmm24
+
+// CHECK: vfmadd213ps 2032(%rdx), %xmm28, %xmm24
+// CHECK: encoding: [0x62,0x62,0x1d,0x00,0xa8,0x42,0x7f]
+ vfmadd213ps 2032(%rdx), %xmm28, %xmm24
+
+// CHECK: vfmadd213ps 2048(%rdx), %xmm28, %xmm24
+// CHECK: encoding: [0x62,0x62,0x1d,0x00,0xa8,0x82,0x00,0x08,0x00,0x00]
+ vfmadd213ps 2048(%rdx), %xmm28, %xmm24
+
+// CHECK: vfmadd213ps -2048(%rdx), %xmm28, %xmm24
+// CHECK: encoding: [0x62,0x62,0x1d,0x00,0xa8,0x42,0x80]
+ vfmadd213ps -2048(%rdx), %xmm28, %xmm24
+
+// CHECK: vfmadd213ps -2064(%rdx), %xmm28, %xmm24
+// CHECK: encoding: [0x62,0x62,0x1d,0x00,0xa8,0x82,0xf0,0xf7,0xff,0xff]
+ vfmadd213ps -2064(%rdx), %xmm28, %xmm24
+
+// CHECK: vfmadd213ps 508(%rdx){1to4}, %xmm28, %xmm24
+// CHECK: encoding: [0x62,0x62,0x1d,0x10,0xa8,0x42,0x7f]
+ vfmadd213ps 508(%rdx){1to4}, %xmm28, %xmm24
+
+// CHECK: vfmadd213ps 512(%rdx){1to4}, %xmm28, %xmm24
+// CHECK: encoding: [0x62,0x62,0x1d,0x10,0xa8,0x82,0x00,0x02,0x00,0x00]
+ vfmadd213ps 512(%rdx){1to4}, %xmm28, %xmm24
+
+// CHECK: vfmadd213ps -512(%rdx){1to4}, %xmm28, %xmm24
+// CHECK: encoding: [0x62,0x62,0x1d,0x10,0xa8,0x42,0x80]
+ vfmadd213ps -512(%rdx){1to4}, %xmm28, %xmm24
+
+// CHECK: vfmadd213ps -516(%rdx){1to4}, %xmm28, %xmm24
+// CHECK: encoding: [0x62,0x62,0x1d,0x10,0xa8,0x82,0xfc,0xfd,0xff,0xff]
+ vfmadd213ps -516(%rdx){1to4}, %xmm28, %xmm24
+
+// CHECK: vfmadd213ps %ymm17, %ymm22, %ymm26
+// CHECK: encoding: [0x62,0x22,0x4d,0x20,0xa8,0xd1]
+ vfmadd213ps %ymm17, %ymm22, %ymm26
+
+// CHECK: vfmadd213ps %ymm17, %ymm22, %ymm26 {%k3}
+// CHECK: encoding: [0x62,0x22,0x4d,0x23,0xa8,0xd1]
+ vfmadd213ps %ymm17, %ymm22, %ymm26 {%k3}
+
+// CHECK: vfmadd213ps %ymm17, %ymm22, %ymm26 {%k3} {z}
+// CHECK: encoding: [0x62,0x22,0x4d,0xa3,0xa8,0xd1]
+ vfmadd213ps %ymm17, %ymm22, %ymm26 {%k3} {z}
+
+// CHECK: vfmadd213ps (%rcx), %ymm22, %ymm26
+// CHECK: encoding: [0x62,0x62,0x4d,0x20,0xa8,0x11]
+ vfmadd213ps (%rcx), %ymm22, %ymm26
+
+// CHECK: vfmadd213ps 291(%rax,%r14,8), %ymm22, %ymm26
+// CHECK: encoding: [0x62,0x22,0x4d,0x20,0xa8,0x94,0xf0,0x23,0x01,0x00,0x00]
+ vfmadd213ps 291(%rax,%r14,8), %ymm22, %ymm26
+
+// CHECK: vfmadd213ps (%rcx){1to8}, %ymm22, %ymm26
+// CHECK: encoding: [0x62,0x62,0x4d,0x30,0xa8,0x11]
+ vfmadd213ps (%rcx){1to8}, %ymm22, %ymm26
+
+// CHECK: vfmadd213ps 4064(%rdx), %ymm22, %ymm26
+// CHECK: encoding: [0x62,0x62,0x4d,0x20,0xa8,0x52,0x7f]
+ vfmadd213ps 4064(%rdx), %ymm22, %ymm26
+
+// CHECK: vfmadd213ps 4096(%rdx), %ymm22, %ymm26
+// CHECK: encoding: [0x62,0x62,0x4d,0x20,0xa8,0x92,0x00,0x10,0x00,0x00]
+ vfmadd213ps 4096(%rdx), %ymm22, %ymm26
+
+// CHECK: vfmadd213ps -4096(%rdx), %ymm22, %ymm26
+// CHECK: encoding: [0x62,0x62,0x4d,0x20,0xa8,0x52,0x80]
+ vfmadd213ps -4096(%rdx), %ymm22, %ymm26
+
+// CHECK: vfmadd213ps -4128(%rdx), %ymm22, %ymm26
+// CHECK: encoding: [0x62,0x62,0x4d,0x20,0xa8,0x92,0xe0,0xef,0xff,0xff]
+ vfmadd213ps -4128(%rdx), %ymm22, %ymm26
+
+// CHECK: vfmadd213ps 508(%rdx){1to8}, %ymm22, %ymm26
+// CHECK: encoding: [0x62,0x62,0x4d,0x30,0xa8,0x52,0x7f]
+ vfmadd213ps 508(%rdx){1to8}, %ymm22, %ymm26
+
+// CHECK: vfmadd213ps 512(%rdx){1to8}, %ymm22, %ymm26
+// CHECK: encoding: [0x62,0x62,0x4d,0x30,0xa8,0x92,0x00,0x02,0x00,0x00]
+ vfmadd213ps 512(%rdx){1to8}, %ymm22, %ymm26
+
+// CHECK: vfmadd213ps -512(%rdx){1to8}, %ymm22, %ymm26
+// CHECK: encoding: [0x62,0x62,0x4d,0x30,0xa8,0x52,0x80]
+ vfmadd213ps -512(%rdx){1to8}, %ymm22, %ymm26
+
+// CHECK: vfmadd213ps -516(%rdx){1to8}, %ymm22, %ymm26
+// CHECK: encoding: [0x62,0x62,0x4d,0x30,0xa8,0x92,0xfc,0xfd,0xff,0xff]
+ vfmadd213ps -516(%rdx){1to8}, %ymm22, %ymm26
+
+// CHECK: vfmadd213pd %xmm23, %xmm21, %xmm22
+// CHECK: encoding: [0x62,0xa2,0xd5,0x00,0xa8,0xf7]
+ vfmadd213pd %xmm23, %xmm21, %xmm22
+
+// CHECK: vfmadd213pd %xmm23, %xmm21, %xmm22 {%k4}
+// CHECK: encoding: [0x62,0xa2,0xd5,0x04,0xa8,0xf7]
+ vfmadd213pd %xmm23, %xmm21, %xmm22 {%k4}
+
+// CHECK: vfmadd213pd %xmm23, %xmm21, %xmm22 {%k4} {z}
+// CHECK: encoding: [0x62,0xa2,0xd5,0x84,0xa8,0xf7]
+ vfmadd213pd %xmm23, %xmm21, %xmm22 {%k4} {z}
+
+// CHECK: vfmadd213pd (%rcx), %xmm21, %xmm22
+// CHECK: encoding: [0x62,0xe2,0xd5,0x00,0xa8,0x31]
+ vfmadd213pd (%rcx), %xmm21, %xmm22
+
+// CHECK: vfmadd213pd 291(%rax,%r14,8), %xmm21, %xmm22
+// CHECK: encoding: [0x62,0xa2,0xd5,0x00,0xa8,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vfmadd213pd 291(%rax,%r14,8), %xmm21, %xmm22
+
+// CHECK: vfmadd213pd (%rcx){1to2}, %xmm21, %xmm22
+// CHECK: encoding: [0x62,0xe2,0xd5,0x10,0xa8,0x31]
+ vfmadd213pd (%rcx){1to2}, %xmm21, %xmm22
+
+// CHECK: vfmadd213pd 2032(%rdx), %xmm21, %xmm22
+// CHECK: encoding: [0x62,0xe2,0xd5,0x00,0xa8,0x72,0x7f]
+ vfmadd213pd 2032(%rdx), %xmm21, %xmm22
+
+// CHECK: vfmadd213pd 2048(%rdx), %xmm21, %xmm22
+// CHECK: encoding: [0x62,0xe2,0xd5,0x00,0xa8,0xb2,0x00,0x08,0x00,0x00]
+ vfmadd213pd 2048(%rdx), %xmm21, %xmm22
+
+// CHECK: vfmadd213pd -2048(%rdx), %xmm21, %xmm22
+// CHECK: encoding: [0x62,0xe2,0xd5,0x00,0xa8,0x72,0x80]
+ vfmadd213pd -2048(%rdx), %xmm21, %xmm22
+
+// CHECK: vfmadd213pd -2064(%rdx), %xmm21, %xmm22
+// CHECK: encoding: [0x62,0xe2,0xd5,0x00,0xa8,0xb2,0xf0,0xf7,0xff,0xff]
+ vfmadd213pd -2064(%rdx), %xmm21, %xmm22
+
+// CHECK: vfmadd213pd 1016(%rdx){1to2}, %xmm21, %xmm22
+// CHECK: encoding: [0x62,0xe2,0xd5,0x10,0xa8,0x72,0x7f]
+ vfmadd213pd 1016(%rdx){1to2}, %xmm21, %xmm22
+
+// CHECK: vfmadd213pd 1024(%rdx){1to2}, %xmm21, %xmm22
+// CHECK: encoding: [0x62,0xe2,0xd5,0x10,0xa8,0xb2,0x00,0x04,0x00,0x00]
+ vfmadd213pd 1024(%rdx){1to2}, %xmm21, %xmm22
+
+// CHECK: vfmadd213pd -1024(%rdx){1to2}, %xmm21, %xmm22
+// CHECK: encoding: [0x62,0xe2,0xd5,0x10,0xa8,0x72,0x80]
+ vfmadd213pd -1024(%rdx){1to2}, %xmm21, %xmm22
+
+// CHECK: vfmadd213pd -1032(%rdx){1to2}, %xmm21, %xmm22
+// CHECK: encoding: [0x62,0xe2,0xd5,0x10,0xa8,0xb2,0xf8,0xfb,0xff,0xff]
+ vfmadd213pd -1032(%rdx){1to2}, %xmm21, %xmm22
+
+// CHECK: vfmadd213pd %ymm17, %ymm19, %ymm18
+// CHECK: encoding: [0x62,0xa2,0xe5,0x20,0xa8,0xd1]
+ vfmadd213pd %ymm17, %ymm19, %ymm18
+
+// CHECK: vfmadd213pd %ymm17, %ymm19, %ymm18 {%k1}
+// CHECK: encoding: [0x62,0xa2,0xe5,0x21,0xa8,0xd1]
+ vfmadd213pd %ymm17, %ymm19, %ymm18 {%k1}
+
+// CHECK: vfmadd213pd %ymm17, %ymm19, %ymm18 {%k1} {z}
+// CHECK: encoding: [0x62,0xa2,0xe5,0xa1,0xa8,0xd1]
+ vfmadd213pd %ymm17, %ymm19, %ymm18 {%k1} {z}
+
+// CHECK: vfmadd213pd (%rcx), %ymm19, %ymm18
+// CHECK: encoding: [0x62,0xe2,0xe5,0x20,0xa8,0x11]
+ vfmadd213pd (%rcx), %ymm19, %ymm18
+
+// CHECK: vfmadd213pd 291(%rax,%r14,8), %ymm19, %ymm18
+// CHECK: encoding: [0x62,0xa2,0xe5,0x20,0xa8,0x94,0xf0,0x23,0x01,0x00,0x00]
+ vfmadd213pd 291(%rax,%r14,8), %ymm19, %ymm18
+
+// CHECK: vfmadd213pd (%rcx){1to4}, %ymm19, %ymm18
+// CHECK: encoding: [0x62,0xe2,0xe5,0x30,0xa8,0x11]
+ vfmadd213pd (%rcx){1to4}, %ymm19, %ymm18
+
+// CHECK: vfmadd213pd 4064(%rdx), %ymm19, %ymm18
+// CHECK: encoding: [0x62,0xe2,0xe5,0x20,0xa8,0x52,0x7f]
+ vfmadd213pd 4064(%rdx), %ymm19, %ymm18
+
+// CHECK: vfmadd213pd 4096(%rdx), %ymm19, %ymm18
+// CHECK: encoding: [0x62,0xe2,0xe5,0x20,0xa8,0x92,0x00,0x10,0x00,0x00]
+ vfmadd213pd 4096(%rdx), %ymm19, %ymm18
+
+// CHECK: vfmadd213pd -4096(%rdx), %ymm19, %ymm18
+// CHECK: encoding: [0x62,0xe2,0xe5,0x20,0xa8,0x52,0x80]
+ vfmadd213pd -4096(%rdx), %ymm19, %ymm18
+
+// CHECK: vfmadd213pd -4128(%rdx), %ymm19, %ymm18
+// CHECK: encoding: [0x62,0xe2,0xe5,0x20,0xa8,0x92,0xe0,0xef,0xff,0xff]
+ vfmadd213pd -4128(%rdx), %ymm19, %ymm18
+
+// CHECK: vfmadd213pd 1016(%rdx){1to4}, %ymm19, %ymm18
+// CHECK: encoding: [0x62,0xe2,0xe5,0x30,0xa8,0x52,0x7f]
+ vfmadd213pd 1016(%rdx){1to4}, %ymm19, %ymm18
+
+// CHECK: vfmadd213pd 1024(%rdx){1to4}, %ymm19, %ymm18
+// CHECK: encoding: [0x62,0xe2,0xe5,0x30,0xa8,0x92,0x00,0x04,0x00,0x00]
+ vfmadd213pd 1024(%rdx){1to4}, %ymm19, %ymm18
+
+// CHECK: vfmadd213pd -1024(%rdx){1to4}, %ymm19, %ymm18
+// CHECK: encoding: [0x62,0xe2,0xe5,0x30,0xa8,0x52,0x80]
+ vfmadd213pd -1024(%rdx){1to4}, %ymm19, %ymm18
+
+// CHECK: vfmadd213pd -1032(%rdx){1to4}, %ymm19, %ymm18
+// CHECK: encoding: [0x62,0xe2,0xe5,0x30,0xa8,0x92,0xf8,0xfb,0xff,0xff]
+ vfmadd213pd -1032(%rdx){1to4}, %ymm19, %ymm18
+
+// CHECK: vfmadd231ps %xmm27, %xmm20, %xmm30
+// CHECK: encoding: [0x62,0x02,0x5d,0x00,0xb8,0xf3]
+ vfmadd231ps %xmm27, %xmm20, %xmm30
+
+// CHECK: vfmadd231ps %xmm27, %xmm20, %xmm30 {%k7}
+// CHECK: encoding: [0x62,0x02,0x5d,0x07,0xb8,0xf3]
+ vfmadd231ps %xmm27, %xmm20, %xmm30 {%k7}
+
+// CHECK: vfmadd231ps %xmm27, %xmm20, %xmm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x02,0x5d,0x87,0xb8,0xf3]
+ vfmadd231ps %xmm27, %xmm20, %xmm30 {%k7} {z}
+
+// CHECK: vfmadd231ps (%rcx), %xmm20, %xmm30
+// CHECK: encoding: [0x62,0x62,0x5d,0x00,0xb8,0x31]
+ vfmadd231ps (%rcx), %xmm20, %xmm30
+
+// CHECK: vfmadd231ps 291(%rax,%r14,8), %xmm20, %xmm30
+// CHECK: encoding: [0x62,0x22,0x5d,0x00,0xb8,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vfmadd231ps 291(%rax,%r14,8), %xmm20, %xmm30
+
+// CHECK: vfmadd231ps (%rcx){1to4}, %xmm20, %xmm30
+// CHECK: encoding: [0x62,0x62,0x5d,0x10,0xb8,0x31]
+ vfmadd231ps (%rcx){1to4}, %xmm20, %xmm30
+
+// CHECK: vfmadd231ps 2032(%rdx), %xmm20, %xmm30
+// CHECK: encoding: [0x62,0x62,0x5d,0x00,0xb8,0x72,0x7f]
+ vfmadd231ps 2032(%rdx), %xmm20, %xmm30
+
+// CHECK: vfmadd231ps 2048(%rdx), %xmm20, %xmm30
+// CHECK: encoding: [0x62,0x62,0x5d,0x00,0xb8,0xb2,0x00,0x08,0x00,0x00]
+ vfmadd231ps 2048(%rdx), %xmm20, %xmm30
+
+// CHECK: vfmadd231ps -2048(%rdx), %xmm20, %xmm30
+// CHECK: encoding: [0x62,0x62,0x5d,0x00,0xb8,0x72,0x80]
+ vfmadd231ps -2048(%rdx), %xmm20, %xmm30
+
+// CHECK: vfmadd231ps -2064(%rdx), %xmm20, %xmm30
+// CHECK: encoding: [0x62,0x62,0x5d,0x00,0xb8,0xb2,0xf0,0xf7,0xff,0xff]
+ vfmadd231ps -2064(%rdx), %xmm20, %xmm30
+
+// CHECK: vfmadd231ps 508(%rdx){1to4}, %xmm20, %xmm30
+// CHECK: encoding: [0x62,0x62,0x5d,0x10,0xb8,0x72,0x7f]
+ vfmadd231ps 508(%rdx){1to4}, %xmm20, %xmm30
+
+// CHECK: vfmadd231ps 512(%rdx){1to4}, %xmm20, %xmm30
+// CHECK: encoding: [0x62,0x62,0x5d,0x10,0xb8,0xb2,0x00,0x02,0x00,0x00]
+ vfmadd231ps 512(%rdx){1to4}, %xmm20, %xmm30
+
+// CHECK: vfmadd231ps -512(%rdx){1to4}, %xmm20, %xmm30
+// CHECK: encoding: [0x62,0x62,0x5d,0x10,0xb8,0x72,0x80]
+ vfmadd231ps -512(%rdx){1to4}, %xmm20, %xmm30
+
+// CHECK: vfmadd231ps -516(%rdx){1to4}, %xmm20, %xmm30
+// CHECK: encoding: [0x62,0x62,0x5d,0x10,0xb8,0xb2,0xfc,0xfd,0xff,0xff]
+ vfmadd231ps -516(%rdx){1to4}, %xmm20, %xmm30
+
+// CHECK: vfmadd231ps %ymm25, %ymm26, %ymm22
+// CHECK: encoding: [0x62,0x82,0x2d,0x20,0xb8,0xf1]
+ vfmadd231ps %ymm25, %ymm26, %ymm22
+
+// CHECK: vfmadd231ps %ymm25, %ymm26, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x82,0x2d,0x27,0xb8,0xf1]
+ vfmadd231ps %ymm25, %ymm26, %ymm22 {%k7}
+
+// CHECK: vfmadd231ps %ymm25, %ymm26, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x82,0x2d,0xa7,0xb8,0xf1]
+ vfmadd231ps %ymm25, %ymm26, %ymm22 {%k7} {z}
+
+// CHECK: vfmadd231ps (%rcx), %ymm26, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x2d,0x20,0xb8,0x31]
+ vfmadd231ps (%rcx), %ymm26, %ymm22
+
+// CHECK: vfmadd231ps 291(%rax,%r14,8), %ymm26, %ymm22
+// CHECK: encoding: [0x62,0xa2,0x2d,0x20,0xb8,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vfmadd231ps 291(%rax,%r14,8), %ymm26, %ymm22
+
+// CHECK: vfmadd231ps (%rcx){1to8}, %ymm26, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x2d,0x30,0xb8,0x31]
+ vfmadd231ps (%rcx){1to8}, %ymm26, %ymm22
+
+// CHECK: vfmadd231ps 4064(%rdx), %ymm26, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x2d,0x20,0xb8,0x72,0x7f]
+ vfmadd231ps 4064(%rdx), %ymm26, %ymm22
+
+// CHECK: vfmadd231ps 4096(%rdx), %ymm26, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x2d,0x20,0xb8,0xb2,0x00,0x10,0x00,0x00]
+ vfmadd231ps 4096(%rdx), %ymm26, %ymm22
+
+// CHECK: vfmadd231ps -4096(%rdx), %ymm26, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x2d,0x20,0xb8,0x72,0x80]
+ vfmadd231ps -4096(%rdx), %ymm26, %ymm22
+
+// CHECK: vfmadd231ps -4128(%rdx), %ymm26, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x2d,0x20,0xb8,0xb2,0xe0,0xef,0xff,0xff]
+ vfmadd231ps -4128(%rdx), %ymm26, %ymm22
+
+// CHECK: vfmadd231ps 508(%rdx){1to8}, %ymm26, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x2d,0x30,0xb8,0x72,0x7f]
+ vfmadd231ps 508(%rdx){1to8}, %ymm26, %ymm22
+
+// CHECK: vfmadd231ps 512(%rdx){1to8}, %ymm26, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x2d,0x30,0xb8,0xb2,0x00,0x02,0x00,0x00]
+ vfmadd231ps 512(%rdx){1to8}, %ymm26, %ymm22
+
+// CHECK: vfmadd231ps -512(%rdx){1to8}, %ymm26, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x2d,0x30,0xb8,0x72,0x80]
+ vfmadd231ps -512(%rdx){1to8}, %ymm26, %ymm22
+
+// CHECK: vfmadd231ps -516(%rdx){1to8}, %ymm26, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x2d,0x30,0xb8,0xb2,0xfc,0xfd,0xff,0xff]
+ vfmadd231ps -516(%rdx){1to8}, %ymm26, %ymm22
+
+// CHECK: vfmadd231pd %xmm24, %xmm20, %xmm29
+// CHECK: encoding: [0x62,0x02,0xdd,0x00,0xb8,0xe8]
+ vfmadd231pd %xmm24, %xmm20, %xmm29
+
+// CHECK: vfmadd231pd %xmm24, %xmm20, %xmm29 {%k7}
+// CHECK: encoding: [0x62,0x02,0xdd,0x07,0xb8,0xe8]
+ vfmadd231pd %xmm24, %xmm20, %xmm29 {%k7}
+
+// CHECK: vfmadd231pd %xmm24, %xmm20, %xmm29 {%k7} {z}
+// CHECK: encoding: [0x62,0x02,0xdd,0x87,0xb8,0xe8]
+ vfmadd231pd %xmm24, %xmm20, %xmm29 {%k7} {z}
+
+// CHECK: vfmadd231pd (%rcx), %xmm20, %xmm29
+// CHECK: encoding: [0x62,0x62,0xdd,0x00,0xb8,0x29]
+ vfmadd231pd (%rcx), %xmm20, %xmm29
+
+// CHECK: vfmadd231pd 291(%rax,%r14,8), %xmm20, %xmm29
+// CHECK: encoding: [0x62,0x22,0xdd,0x00,0xb8,0xac,0xf0,0x23,0x01,0x00,0x00]
+ vfmadd231pd 291(%rax,%r14,8), %xmm20, %xmm29
+
+// CHECK: vfmadd231pd (%rcx){1to2}, %xmm20, %xmm29
+// CHECK: encoding: [0x62,0x62,0xdd,0x10,0xb8,0x29]
+ vfmadd231pd (%rcx){1to2}, %xmm20, %xmm29
+
+// CHECK: vfmadd231pd 2032(%rdx), %xmm20, %xmm29
+// CHECK: encoding: [0x62,0x62,0xdd,0x00,0xb8,0x6a,0x7f]
+ vfmadd231pd 2032(%rdx), %xmm20, %xmm29
+
+// CHECK: vfmadd231pd 2048(%rdx), %xmm20, %xmm29
+// CHECK: encoding: [0x62,0x62,0xdd,0x00,0xb8,0xaa,0x00,0x08,0x00,0x00]
+ vfmadd231pd 2048(%rdx), %xmm20, %xmm29
+
+// CHECK: vfmadd231pd -2048(%rdx), %xmm20, %xmm29
+// CHECK: encoding: [0x62,0x62,0xdd,0x00,0xb8,0x6a,0x80]
+ vfmadd231pd -2048(%rdx), %xmm20, %xmm29
+
+// CHECK: vfmadd231pd -2064(%rdx), %xmm20, %xmm29
+// CHECK: encoding: [0x62,0x62,0xdd,0x00,0xb8,0xaa,0xf0,0xf7,0xff,0xff]
+ vfmadd231pd -2064(%rdx), %xmm20, %xmm29
+
+// CHECK: vfmadd231pd 1016(%rdx){1to2}, %xmm20, %xmm29
+// CHECK: encoding: [0x62,0x62,0xdd,0x10,0xb8,0x6a,0x7f]
+ vfmadd231pd 1016(%rdx){1to2}, %xmm20, %xmm29
+
+// CHECK: vfmadd231pd 1024(%rdx){1to2}, %xmm20, %xmm29
+// CHECK: encoding: [0x62,0x62,0xdd,0x10,0xb8,0xaa,0x00,0x04,0x00,0x00]
+ vfmadd231pd 1024(%rdx){1to2}, %xmm20, %xmm29
+
+// CHECK: vfmadd231pd -1024(%rdx){1to2}, %xmm20, %xmm29
+// CHECK: encoding: [0x62,0x62,0xdd,0x10,0xb8,0x6a,0x80]
+ vfmadd231pd -1024(%rdx){1to2}, %xmm20, %xmm29
+
+// CHECK: vfmadd231pd -1032(%rdx){1to2}, %xmm20, %xmm29
+// CHECK: encoding: [0x62,0x62,0xdd,0x10,0xb8,0xaa,0xf8,0xfb,0xff,0xff]
+ vfmadd231pd -1032(%rdx){1to2}, %xmm20, %xmm29
+
+// CHECK: vfmadd231pd %ymm26, %ymm20, %ymm24
+// CHECK: encoding: [0x62,0x02,0xdd,0x20,0xb8,0xc2]
+ vfmadd231pd %ymm26, %ymm20, %ymm24
+
+// CHECK: vfmadd231pd %ymm26, %ymm20, %ymm24 {%k6}
+// CHECK: encoding: [0x62,0x02,0xdd,0x26,0xb8,0xc2]
+ vfmadd231pd %ymm26, %ymm20, %ymm24 {%k6}
+
+// CHECK: vfmadd231pd %ymm26, %ymm20, %ymm24 {%k6} {z}
+// CHECK: encoding: [0x62,0x02,0xdd,0xa6,0xb8,0xc2]
+ vfmadd231pd %ymm26, %ymm20, %ymm24 {%k6} {z}
+
+// CHECK: vfmadd231pd (%rcx), %ymm20, %ymm24
+// CHECK: encoding: [0x62,0x62,0xdd,0x20,0xb8,0x01]
+ vfmadd231pd (%rcx), %ymm20, %ymm24
+
+// CHECK: vfmadd231pd 291(%rax,%r14,8), %ymm20, %ymm24
+// CHECK: encoding: [0x62,0x22,0xdd,0x20,0xb8,0x84,0xf0,0x23,0x01,0x00,0x00]
+ vfmadd231pd 291(%rax,%r14,8), %ymm20, %ymm24
+
+// CHECK: vfmadd231pd (%rcx){1to4}, %ymm20, %ymm24
+// CHECK: encoding: [0x62,0x62,0xdd,0x30,0xb8,0x01]
+ vfmadd231pd (%rcx){1to4}, %ymm20, %ymm24
+
+// CHECK: vfmadd231pd 4064(%rdx), %ymm20, %ymm24
+// CHECK: encoding: [0x62,0x62,0xdd,0x20,0xb8,0x42,0x7f]
+ vfmadd231pd 4064(%rdx), %ymm20, %ymm24
+
+// CHECK: vfmadd231pd 4096(%rdx), %ymm20, %ymm24
+// CHECK: encoding: [0x62,0x62,0xdd,0x20,0xb8,0x82,0x00,0x10,0x00,0x00]
+ vfmadd231pd 4096(%rdx), %ymm20, %ymm24
+
+// CHECK: vfmadd231pd -4096(%rdx), %ymm20, %ymm24
+// CHECK: encoding: [0x62,0x62,0xdd,0x20,0xb8,0x42,0x80]
+ vfmadd231pd -4096(%rdx), %ymm20, %ymm24
+
+// CHECK: vfmadd231pd -4128(%rdx), %ymm20, %ymm24
+// CHECK: encoding: [0x62,0x62,0xdd,0x20,0xb8,0x82,0xe0,0xef,0xff,0xff]
+ vfmadd231pd -4128(%rdx), %ymm20, %ymm24
+
+// CHECK: vfmadd231pd 1016(%rdx){1to4}, %ymm20, %ymm24
+// CHECK: encoding: [0x62,0x62,0xdd,0x30,0xb8,0x42,0x7f]
+ vfmadd231pd 1016(%rdx){1to4}, %ymm20, %ymm24
+
+// CHECK: vfmadd231pd 1024(%rdx){1to4}, %ymm20, %ymm24
+// CHECK: encoding: [0x62,0x62,0xdd,0x30,0xb8,0x82,0x00,0x04,0x00,0x00]
+ vfmadd231pd 1024(%rdx){1to4}, %ymm20, %ymm24
+
+// CHECK: vfmadd231pd -1024(%rdx){1to4}, %ymm20, %ymm24
+// CHECK: encoding: [0x62,0x62,0xdd,0x30,0xb8,0x42,0x80]
+ vfmadd231pd -1024(%rdx){1to4}, %ymm20, %ymm24
+
+// CHECK: vfmadd231pd -1032(%rdx){1to4}, %ymm20, %ymm24
+// CHECK: encoding: [0x62,0x62,0xdd,0x30,0xb8,0x82,0xf8,0xfb,0xff,0xff]
+ vfmadd231pd -1032(%rdx){1to4}, %ymm20, %ymm24
+
+// CHECK: vfmsub132ps %xmm21, %xmm18, %xmm17
+// CHECK: encoding: [0x62,0xa2,0x6d,0x00,0x9a,0xcd]
+ vfmsub132ps %xmm21, %xmm18, %xmm17
+
+// CHECK: vfmsub132ps %xmm21, %xmm18, %xmm17 {%k1}
+// CHECK: encoding: [0x62,0xa2,0x6d,0x01,0x9a,0xcd]
+ vfmsub132ps %xmm21, %xmm18, %xmm17 {%k1}
+
+// CHECK: vfmsub132ps %xmm21, %xmm18, %xmm17 {%k1} {z}
+// CHECK: encoding: [0x62,0xa2,0x6d,0x81,0x9a,0xcd]
+ vfmsub132ps %xmm21, %xmm18, %xmm17 {%k1} {z}
+
+// CHECK: vfmsub132ps (%rcx), %xmm18, %xmm17
+// CHECK: encoding: [0x62,0xe2,0x6d,0x00,0x9a,0x09]
+ vfmsub132ps (%rcx), %xmm18, %xmm17
+
+// CHECK: vfmsub132ps 291(%rax,%r14,8), %xmm18, %xmm17
+// CHECK: encoding: [0x62,0xa2,0x6d,0x00,0x9a,0x8c,0xf0,0x23,0x01,0x00,0x00]
+ vfmsub132ps 291(%rax,%r14,8), %xmm18, %xmm17
+
+// CHECK: vfmsub132ps (%rcx){1to4}, %xmm18, %xmm17
+// CHECK: encoding: [0x62,0xe2,0x6d,0x10,0x9a,0x09]
+ vfmsub132ps (%rcx){1to4}, %xmm18, %xmm17
+
+// CHECK: vfmsub132ps 2032(%rdx), %xmm18, %xmm17
+// CHECK: encoding: [0x62,0xe2,0x6d,0x00,0x9a,0x4a,0x7f]
+ vfmsub132ps 2032(%rdx), %xmm18, %xmm17
+
+// CHECK: vfmsub132ps 2048(%rdx), %xmm18, %xmm17
+// CHECK: encoding: [0x62,0xe2,0x6d,0x00,0x9a,0x8a,0x00,0x08,0x00,0x00]
+ vfmsub132ps 2048(%rdx), %xmm18, %xmm17
+
+// CHECK: vfmsub132ps -2048(%rdx), %xmm18, %xmm17
+// CHECK: encoding: [0x62,0xe2,0x6d,0x00,0x9a,0x4a,0x80]
+ vfmsub132ps -2048(%rdx), %xmm18, %xmm17
+
+// CHECK: vfmsub132ps -2064(%rdx), %xmm18, %xmm17
+// CHECK: encoding: [0x62,0xe2,0x6d,0x00,0x9a,0x8a,0xf0,0xf7,0xff,0xff]
+ vfmsub132ps -2064(%rdx), %xmm18, %xmm17
+
+// CHECK: vfmsub132ps 508(%rdx){1to4}, %xmm18, %xmm17
+// CHECK: encoding: [0x62,0xe2,0x6d,0x10,0x9a,0x4a,0x7f]
+ vfmsub132ps 508(%rdx){1to4}, %xmm18, %xmm17
+
+// CHECK: vfmsub132ps 512(%rdx){1to4}, %xmm18, %xmm17
+// CHECK: encoding: [0x62,0xe2,0x6d,0x10,0x9a,0x8a,0x00,0x02,0x00,0x00]
+ vfmsub132ps 512(%rdx){1to4}, %xmm18, %xmm17
+
+// CHECK: vfmsub132ps -512(%rdx){1to4}, %xmm18, %xmm17
+// CHECK: encoding: [0x62,0xe2,0x6d,0x10,0x9a,0x4a,0x80]
+ vfmsub132ps -512(%rdx){1to4}, %xmm18, %xmm17
+
+// CHECK: vfmsub132ps -516(%rdx){1to4}, %xmm18, %xmm17
+// CHECK: encoding: [0x62,0xe2,0x6d,0x10,0x9a,0x8a,0xfc,0xfd,0xff,0xff]
+ vfmsub132ps -516(%rdx){1to4}, %xmm18, %xmm17
+
+// CHECK: vfmsub132ps %ymm23, %ymm28, %ymm25
+// CHECK: encoding: [0x62,0x22,0x1d,0x20,0x9a,0xcf]
+ vfmsub132ps %ymm23, %ymm28, %ymm25
+
+// CHECK: vfmsub132ps %ymm23, %ymm28, %ymm25 {%k5}
+// CHECK: encoding: [0x62,0x22,0x1d,0x25,0x9a,0xcf]
+ vfmsub132ps %ymm23, %ymm28, %ymm25 {%k5}
+
+// CHECK: vfmsub132ps %ymm23, %ymm28, %ymm25 {%k5} {z}
+// CHECK: encoding: [0x62,0x22,0x1d,0xa5,0x9a,0xcf]
+ vfmsub132ps %ymm23, %ymm28, %ymm25 {%k5} {z}
+
+// CHECK: vfmsub132ps (%rcx), %ymm28, %ymm25
+// CHECK: encoding: [0x62,0x62,0x1d,0x20,0x9a,0x09]
+ vfmsub132ps (%rcx), %ymm28, %ymm25
+
+// CHECK: vfmsub132ps 291(%rax,%r14,8), %ymm28, %ymm25
+// CHECK: encoding: [0x62,0x22,0x1d,0x20,0x9a,0x8c,0xf0,0x23,0x01,0x00,0x00]
+ vfmsub132ps 291(%rax,%r14,8), %ymm28, %ymm25
+
+// CHECK: vfmsub132ps (%rcx){1to8}, %ymm28, %ymm25
+// CHECK: encoding: [0x62,0x62,0x1d,0x30,0x9a,0x09]
+ vfmsub132ps (%rcx){1to8}, %ymm28, %ymm25
+
+// CHECK: vfmsub132ps 4064(%rdx), %ymm28, %ymm25
+// CHECK: encoding: [0x62,0x62,0x1d,0x20,0x9a,0x4a,0x7f]
+ vfmsub132ps 4064(%rdx), %ymm28, %ymm25
+
+// CHECK: vfmsub132ps 4096(%rdx), %ymm28, %ymm25
+// CHECK: encoding: [0x62,0x62,0x1d,0x20,0x9a,0x8a,0x00,0x10,0x00,0x00]
+ vfmsub132ps 4096(%rdx), %ymm28, %ymm25
+
+// CHECK: vfmsub132ps -4096(%rdx), %ymm28, %ymm25
+// CHECK: encoding: [0x62,0x62,0x1d,0x20,0x9a,0x4a,0x80]
+ vfmsub132ps -4096(%rdx), %ymm28, %ymm25
+
+// CHECK: vfmsub132ps -4128(%rdx), %ymm28, %ymm25
+// CHECK: encoding: [0x62,0x62,0x1d,0x20,0x9a,0x8a,0xe0,0xef,0xff,0xff]
+ vfmsub132ps -4128(%rdx), %ymm28, %ymm25
+
+// CHECK: vfmsub132ps 508(%rdx){1to8}, %ymm28, %ymm25
+// CHECK: encoding: [0x62,0x62,0x1d,0x30,0x9a,0x4a,0x7f]
+ vfmsub132ps 508(%rdx){1to8}, %ymm28, %ymm25
+
+// CHECK: vfmsub132ps 512(%rdx){1to8}, %ymm28, %ymm25
+// CHECK: encoding: [0x62,0x62,0x1d,0x30,0x9a,0x8a,0x00,0x02,0x00,0x00]
+ vfmsub132ps 512(%rdx){1to8}, %ymm28, %ymm25
+
+// CHECK: vfmsub132ps -512(%rdx){1to8}, %ymm28, %ymm25
+// CHECK: encoding: [0x62,0x62,0x1d,0x30,0x9a,0x4a,0x80]
+ vfmsub132ps -512(%rdx){1to8}, %ymm28, %ymm25
+
+// CHECK: vfmsub132ps -516(%rdx){1to8}, %ymm28, %ymm25
+// CHECK: encoding: [0x62,0x62,0x1d,0x30,0x9a,0x8a,0xfc,0xfd,0xff,0xff]
+ vfmsub132ps -516(%rdx){1to8}, %ymm28, %ymm25
+
+// CHECK: vfmsub132pd %xmm20, %xmm21, %xmm18
+// CHECK: encoding: [0x62,0xa2,0xd5,0x00,0x9a,0xd4]
+ vfmsub132pd %xmm20, %xmm21, %xmm18
+
+// CHECK: vfmsub132pd %xmm20, %xmm21, %xmm18 {%k1}
+// CHECK: encoding: [0x62,0xa2,0xd5,0x01,0x9a,0xd4]
+ vfmsub132pd %xmm20, %xmm21, %xmm18 {%k1}
+
+// CHECK: vfmsub132pd %xmm20, %xmm21, %xmm18 {%k1} {z}
+// CHECK: encoding: [0x62,0xa2,0xd5,0x81,0x9a,0xd4]
+ vfmsub132pd %xmm20, %xmm21, %xmm18 {%k1} {z}
+
+// CHECK: vfmsub132pd (%rcx), %xmm21, %xmm18
+// CHECK: encoding: [0x62,0xe2,0xd5,0x00,0x9a,0x11]
+ vfmsub132pd (%rcx), %xmm21, %xmm18
+
+// CHECK: vfmsub132pd 291(%rax,%r14,8), %xmm21, %xmm18
+// CHECK: encoding: [0x62,0xa2,0xd5,0x00,0x9a,0x94,0xf0,0x23,0x01,0x00,0x00]
+ vfmsub132pd 291(%rax,%r14,8), %xmm21, %xmm18
+
+// CHECK: vfmsub132pd (%rcx){1to2}, %xmm21, %xmm18
+// CHECK: encoding: [0x62,0xe2,0xd5,0x10,0x9a,0x11]
+ vfmsub132pd (%rcx){1to2}, %xmm21, %xmm18
+
+// CHECK: vfmsub132pd 2032(%rdx), %xmm21, %xmm18
+// CHECK: encoding: [0x62,0xe2,0xd5,0x00,0x9a,0x52,0x7f]
+ vfmsub132pd 2032(%rdx), %xmm21, %xmm18
+
+// CHECK: vfmsub132pd 2048(%rdx), %xmm21, %xmm18
+// CHECK: encoding: [0x62,0xe2,0xd5,0x00,0x9a,0x92,0x00,0x08,0x00,0x00]
+ vfmsub132pd 2048(%rdx), %xmm21, %xmm18
+
+// CHECK: vfmsub132pd -2048(%rdx), %xmm21, %xmm18
+// CHECK: encoding: [0x62,0xe2,0xd5,0x00,0x9a,0x52,0x80]
+ vfmsub132pd -2048(%rdx), %xmm21, %xmm18
+
+// CHECK: vfmsub132pd -2064(%rdx), %xmm21, %xmm18
+// CHECK: encoding: [0x62,0xe2,0xd5,0x00,0x9a,0x92,0xf0,0xf7,0xff,0xff]
+ vfmsub132pd -2064(%rdx), %xmm21, %xmm18
+
+// CHECK: vfmsub132pd 1016(%rdx){1to2}, %xmm21, %xmm18
+// CHECK: encoding: [0x62,0xe2,0xd5,0x10,0x9a,0x52,0x7f]
+ vfmsub132pd 1016(%rdx){1to2}, %xmm21, %xmm18
+
+// CHECK: vfmsub132pd 1024(%rdx){1to2}, %xmm21, %xmm18
+// CHECK: encoding: [0x62,0xe2,0xd5,0x10,0x9a,0x92,0x00,0x04,0x00,0x00]
+ vfmsub132pd 1024(%rdx){1to2}, %xmm21, %xmm18
+
+// CHECK: vfmsub132pd -1024(%rdx){1to2}, %xmm21, %xmm18
+// CHECK: encoding: [0x62,0xe2,0xd5,0x10,0x9a,0x52,0x80]
+ vfmsub132pd -1024(%rdx){1to2}, %xmm21, %xmm18
+
+// CHECK: vfmsub132pd -1032(%rdx){1to2}, %xmm21, %xmm18
+// CHECK: encoding: [0x62,0xe2,0xd5,0x10,0x9a,0x92,0xf8,0xfb,0xff,0xff]
+ vfmsub132pd -1032(%rdx){1to2}, %xmm21, %xmm18
+
+// CHECK: vfmsub132pd %ymm17, %ymm28, %ymm22
+// CHECK: encoding: [0x62,0xa2,0x9d,0x20,0x9a,0xf1]
+ vfmsub132pd %ymm17, %ymm28, %ymm22
+
+// CHECK: vfmsub132pd %ymm17, %ymm28, %ymm22 {%k5}
+// CHECK: encoding: [0x62,0xa2,0x9d,0x25,0x9a,0xf1]
+ vfmsub132pd %ymm17, %ymm28, %ymm22 {%k5}
+
+// CHECK: vfmsub132pd %ymm17, %ymm28, %ymm22 {%k5} {z}
+// CHECK: encoding: [0x62,0xa2,0x9d,0xa5,0x9a,0xf1]
+ vfmsub132pd %ymm17, %ymm28, %ymm22 {%k5} {z}
+
+// CHECK: vfmsub132pd (%rcx), %ymm28, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x9d,0x20,0x9a,0x31]
+ vfmsub132pd (%rcx), %ymm28, %ymm22
+
+// CHECK: vfmsub132pd 291(%rax,%r14,8), %ymm28, %ymm22
+// CHECK: encoding: [0x62,0xa2,0x9d,0x20,0x9a,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vfmsub132pd 291(%rax,%r14,8), %ymm28, %ymm22
+
+// CHECK: vfmsub132pd (%rcx){1to4}, %ymm28, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x9d,0x30,0x9a,0x31]
+ vfmsub132pd (%rcx){1to4}, %ymm28, %ymm22
+
+// CHECK: vfmsub132pd 4064(%rdx), %ymm28, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x9d,0x20,0x9a,0x72,0x7f]
+ vfmsub132pd 4064(%rdx), %ymm28, %ymm22
+
+// CHECK: vfmsub132pd 4096(%rdx), %ymm28, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x9d,0x20,0x9a,0xb2,0x00,0x10,0x00,0x00]
+ vfmsub132pd 4096(%rdx), %ymm28, %ymm22
+
+// CHECK: vfmsub132pd -4096(%rdx), %ymm28, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x9d,0x20,0x9a,0x72,0x80]
+ vfmsub132pd -4096(%rdx), %ymm28, %ymm22
+
+// CHECK: vfmsub132pd -4128(%rdx), %ymm28, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x9d,0x20,0x9a,0xb2,0xe0,0xef,0xff,0xff]
+ vfmsub132pd -4128(%rdx), %ymm28, %ymm22
+
+// CHECK: vfmsub132pd 1016(%rdx){1to4}, %ymm28, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x9d,0x30,0x9a,0x72,0x7f]
+ vfmsub132pd 1016(%rdx){1to4}, %ymm28, %ymm22
+
+// CHECK: vfmsub132pd 1024(%rdx){1to4}, %ymm28, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x9d,0x30,0x9a,0xb2,0x00,0x04,0x00,0x00]
+ vfmsub132pd 1024(%rdx){1to4}, %ymm28, %ymm22
+
+// CHECK: vfmsub132pd -1024(%rdx){1to4}, %ymm28, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x9d,0x30,0x9a,0x72,0x80]
+ vfmsub132pd -1024(%rdx){1to4}, %ymm28, %ymm22
+
+// CHECK: vfmsub132pd -1032(%rdx){1to4}, %ymm28, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x9d,0x30,0x9a,0xb2,0xf8,0xfb,0xff,0xff]
+ vfmsub132pd -1032(%rdx){1to4}, %ymm28, %ymm22
+
+// CHECK: vfmsub213ps %xmm28, %xmm27, %xmm22
+// CHECK: encoding: [0x62,0x82,0x25,0x00,0xaa,0xf4]
+ vfmsub213ps %xmm28, %xmm27, %xmm22
+
+// CHECK: vfmsub213ps %xmm28, %xmm27, %xmm22 {%k2}
+// CHECK: encoding: [0x62,0x82,0x25,0x02,0xaa,0xf4]
+ vfmsub213ps %xmm28, %xmm27, %xmm22 {%k2}
+
+// CHECK: vfmsub213ps %xmm28, %xmm27, %xmm22 {%k2} {z}
+// CHECK: encoding: [0x62,0x82,0x25,0x82,0xaa,0xf4]
+ vfmsub213ps %xmm28, %xmm27, %xmm22 {%k2} {z}
+
+// CHECK: vfmsub213ps (%rcx), %xmm27, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x25,0x00,0xaa,0x31]
+ vfmsub213ps (%rcx), %xmm27, %xmm22
+
+// CHECK: vfmsub213ps 291(%rax,%r14,8), %xmm27, %xmm22
+// CHECK: encoding: [0x62,0xa2,0x25,0x00,0xaa,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vfmsub213ps 291(%rax,%r14,8), %xmm27, %xmm22
+
+// CHECK: vfmsub213ps (%rcx){1to4}, %xmm27, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x25,0x10,0xaa,0x31]
+ vfmsub213ps (%rcx){1to4}, %xmm27, %xmm22
+
+// CHECK: vfmsub213ps 2032(%rdx), %xmm27, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x25,0x00,0xaa,0x72,0x7f]
+ vfmsub213ps 2032(%rdx), %xmm27, %xmm22
+
+// CHECK: vfmsub213ps 2048(%rdx), %xmm27, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x25,0x00,0xaa,0xb2,0x00,0x08,0x00,0x00]
+ vfmsub213ps 2048(%rdx), %xmm27, %xmm22
+
+// CHECK: vfmsub213ps -2048(%rdx), %xmm27, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x25,0x00,0xaa,0x72,0x80]
+ vfmsub213ps -2048(%rdx), %xmm27, %xmm22
+
+// CHECK: vfmsub213ps -2064(%rdx), %xmm27, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x25,0x00,0xaa,0xb2,0xf0,0xf7,0xff,0xff]
+ vfmsub213ps -2064(%rdx), %xmm27, %xmm22
+
+// CHECK: vfmsub213ps 508(%rdx){1to4}, %xmm27, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x25,0x10,0xaa,0x72,0x7f]
+ vfmsub213ps 508(%rdx){1to4}, %xmm27, %xmm22
+
+// CHECK: vfmsub213ps 512(%rdx){1to4}, %xmm27, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x25,0x10,0xaa,0xb2,0x00,0x02,0x00,0x00]
+ vfmsub213ps 512(%rdx){1to4}, %xmm27, %xmm22
+
+// CHECK: vfmsub213ps -512(%rdx){1to4}, %xmm27, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x25,0x10,0xaa,0x72,0x80]
+ vfmsub213ps -512(%rdx){1to4}, %xmm27, %xmm22
+
+// CHECK: vfmsub213ps -516(%rdx){1to4}, %xmm27, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x25,0x10,0xaa,0xb2,0xfc,0xfd,0xff,0xff]
+ vfmsub213ps -516(%rdx){1to4}, %xmm27, %xmm22
+
+// CHECK: vfmsub213ps %ymm22, %ymm29, %ymm28
+// CHECK: encoding: [0x62,0x22,0x15,0x20,0xaa,0xe6]
+ vfmsub213ps %ymm22, %ymm29, %ymm28
+
+// CHECK: vfmsub213ps %ymm22, %ymm29, %ymm28 {%k1}
+// CHECK: encoding: [0x62,0x22,0x15,0x21,0xaa,0xe6]
+ vfmsub213ps %ymm22, %ymm29, %ymm28 {%k1}
+
+// CHECK: vfmsub213ps %ymm22, %ymm29, %ymm28 {%k1} {z}
+// CHECK: encoding: [0x62,0x22,0x15,0xa1,0xaa,0xe6]
+ vfmsub213ps %ymm22, %ymm29, %ymm28 {%k1} {z}
+
+// CHECK: vfmsub213ps (%rcx), %ymm29, %ymm28
+// CHECK: encoding: [0x62,0x62,0x15,0x20,0xaa,0x21]
+ vfmsub213ps (%rcx), %ymm29, %ymm28
+
+// CHECK: vfmsub213ps 291(%rax,%r14,8), %ymm29, %ymm28
+// CHECK: encoding: [0x62,0x22,0x15,0x20,0xaa,0xa4,0xf0,0x23,0x01,0x00,0x00]
+ vfmsub213ps 291(%rax,%r14,8), %ymm29, %ymm28
+
+// CHECK: vfmsub213ps (%rcx){1to8}, %ymm29, %ymm28
+// CHECK: encoding: [0x62,0x62,0x15,0x30,0xaa,0x21]
+ vfmsub213ps (%rcx){1to8}, %ymm29, %ymm28
+
+// CHECK: vfmsub213ps 4064(%rdx), %ymm29, %ymm28
+// CHECK: encoding: [0x62,0x62,0x15,0x20,0xaa,0x62,0x7f]
+ vfmsub213ps 4064(%rdx), %ymm29, %ymm28
+
+// CHECK: vfmsub213ps 4096(%rdx), %ymm29, %ymm28
+// CHECK: encoding: [0x62,0x62,0x15,0x20,0xaa,0xa2,0x00,0x10,0x00,0x00]
+ vfmsub213ps 4096(%rdx), %ymm29, %ymm28
+
+// CHECK: vfmsub213ps -4096(%rdx), %ymm29, %ymm28
+// CHECK: encoding: [0x62,0x62,0x15,0x20,0xaa,0x62,0x80]
+ vfmsub213ps -4096(%rdx), %ymm29, %ymm28
+
+// CHECK: vfmsub213ps -4128(%rdx), %ymm29, %ymm28
+// CHECK: encoding: [0x62,0x62,0x15,0x20,0xaa,0xa2,0xe0,0xef,0xff,0xff]
+ vfmsub213ps -4128(%rdx), %ymm29, %ymm28
+
+// CHECK: vfmsub213ps 508(%rdx){1to8}, %ymm29, %ymm28
+// CHECK: encoding: [0x62,0x62,0x15,0x30,0xaa,0x62,0x7f]
+ vfmsub213ps 508(%rdx){1to8}, %ymm29, %ymm28
+
+// CHECK: vfmsub213ps 512(%rdx){1to8}, %ymm29, %ymm28
+// CHECK: encoding: [0x62,0x62,0x15,0x30,0xaa,0xa2,0x00,0x02,0x00,0x00]
+ vfmsub213ps 512(%rdx){1to8}, %ymm29, %ymm28
+
+// CHECK: vfmsub213ps -512(%rdx){1to8}, %ymm29, %ymm28
+// CHECK: encoding: [0x62,0x62,0x15,0x30,0xaa,0x62,0x80]
+ vfmsub213ps -512(%rdx){1to8}, %ymm29, %ymm28
+
+// CHECK: vfmsub213ps -516(%rdx){1to8}, %ymm29, %ymm28
+// CHECK: encoding: [0x62,0x62,0x15,0x30,0xaa,0xa2,0xfc,0xfd,0xff,0xff]
+ vfmsub213ps -516(%rdx){1to8}, %ymm29, %ymm28
+
+// CHECK: vfmsub213pd %xmm20, %xmm19, %xmm19
+// CHECK: encoding: [0x62,0xa2,0xe5,0x00,0xaa,0xdc]
+ vfmsub213pd %xmm20, %xmm19, %xmm19
+
+// CHECK: vfmsub213pd %xmm20, %xmm19, %xmm19 {%k1}
+// CHECK: encoding: [0x62,0xa2,0xe5,0x01,0xaa,0xdc]
+ vfmsub213pd %xmm20, %xmm19, %xmm19 {%k1}
+
+// CHECK: vfmsub213pd %xmm20, %xmm19, %xmm19 {%k1} {z}
+// CHECK: encoding: [0x62,0xa2,0xe5,0x81,0xaa,0xdc]
+ vfmsub213pd %xmm20, %xmm19, %xmm19 {%k1} {z}
+
+// CHECK: vfmsub213pd (%rcx), %xmm19, %xmm19
+// CHECK: encoding: [0x62,0xe2,0xe5,0x00,0xaa,0x19]
+ vfmsub213pd (%rcx), %xmm19, %xmm19
+
+// CHECK: vfmsub213pd 291(%rax,%r14,8), %xmm19, %xmm19
+// CHECK: encoding: [0x62,0xa2,0xe5,0x00,0xaa,0x9c,0xf0,0x23,0x01,0x00,0x00]
+ vfmsub213pd 291(%rax,%r14,8), %xmm19, %xmm19
+
+// CHECK: vfmsub213pd (%rcx){1to2}, %xmm19, %xmm19
+// CHECK: encoding: [0x62,0xe2,0xe5,0x10,0xaa,0x19]
+ vfmsub213pd (%rcx){1to2}, %xmm19, %xmm19
+
+// CHECK: vfmsub213pd 2032(%rdx), %xmm19, %xmm19
+// CHECK: encoding: [0x62,0xe2,0xe5,0x00,0xaa,0x5a,0x7f]
+ vfmsub213pd 2032(%rdx), %xmm19, %xmm19
+
+// CHECK: vfmsub213pd 2048(%rdx), %xmm19, %xmm19
+// CHECK: encoding: [0x62,0xe2,0xe5,0x00,0xaa,0x9a,0x00,0x08,0x00,0x00]
+ vfmsub213pd 2048(%rdx), %xmm19, %xmm19
+
+// CHECK: vfmsub213pd -2048(%rdx), %xmm19, %xmm19
+// CHECK: encoding: [0x62,0xe2,0xe5,0x00,0xaa,0x5a,0x80]
+ vfmsub213pd -2048(%rdx), %xmm19, %xmm19
+
+// CHECK: vfmsub213pd -2064(%rdx), %xmm19, %xmm19
+// CHECK: encoding: [0x62,0xe2,0xe5,0x00,0xaa,0x9a,0xf0,0xf7,0xff,0xff]
+ vfmsub213pd -2064(%rdx), %xmm19, %xmm19
+
+// CHECK: vfmsub213pd 1016(%rdx){1to2}, %xmm19, %xmm19
+// CHECK: encoding: [0x62,0xe2,0xe5,0x10,0xaa,0x5a,0x7f]
+ vfmsub213pd 1016(%rdx){1to2}, %xmm19, %xmm19
+
+// CHECK: vfmsub213pd 1024(%rdx){1to2}, %xmm19, %xmm19
+// CHECK: encoding: [0x62,0xe2,0xe5,0x10,0xaa,0x9a,0x00,0x04,0x00,0x00]
+ vfmsub213pd 1024(%rdx){1to2}, %xmm19, %xmm19
+
+// CHECK: vfmsub213pd -1024(%rdx){1to2}, %xmm19, %xmm19
+// CHECK: encoding: [0x62,0xe2,0xe5,0x10,0xaa,0x5a,0x80]
+ vfmsub213pd -1024(%rdx){1to2}, %xmm19, %xmm19
+
+// CHECK: vfmsub213pd -1032(%rdx){1to2}, %xmm19, %xmm19
+// CHECK: encoding: [0x62,0xe2,0xe5,0x10,0xaa,0x9a,0xf8,0xfb,0xff,0xff]
+ vfmsub213pd -1032(%rdx){1to2}, %xmm19, %xmm19
+
+// CHECK: vfmsub213pd %ymm28, %ymm25, %ymm27
+// CHECK: encoding: [0x62,0x02,0xb5,0x20,0xaa,0xdc]
+ vfmsub213pd %ymm28, %ymm25, %ymm27
+
+// CHECK: vfmsub213pd %ymm28, %ymm25, %ymm27 {%k4}
+// CHECK: encoding: [0x62,0x02,0xb5,0x24,0xaa,0xdc]
+ vfmsub213pd %ymm28, %ymm25, %ymm27 {%k4}
+
+// CHECK: vfmsub213pd %ymm28, %ymm25, %ymm27 {%k4} {z}
+// CHECK: encoding: [0x62,0x02,0xb5,0xa4,0xaa,0xdc]
+ vfmsub213pd %ymm28, %ymm25, %ymm27 {%k4} {z}
+
+// CHECK: vfmsub213pd (%rcx), %ymm25, %ymm27
+// CHECK: encoding: [0x62,0x62,0xb5,0x20,0xaa,0x19]
+ vfmsub213pd (%rcx), %ymm25, %ymm27
+
+// CHECK: vfmsub213pd 291(%rax,%r14,8), %ymm25, %ymm27
+// CHECK: encoding: [0x62,0x22,0xb5,0x20,0xaa,0x9c,0xf0,0x23,0x01,0x00,0x00]
+ vfmsub213pd 291(%rax,%r14,8), %ymm25, %ymm27
+
+// CHECK: vfmsub213pd (%rcx){1to4}, %ymm25, %ymm27
+// CHECK: encoding: [0x62,0x62,0xb5,0x30,0xaa,0x19]
+ vfmsub213pd (%rcx){1to4}, %ymm25, %ymm27
+
+// CHECK: vfmsub213pd 4064(%rdx), %ymm25, %ymm27
+// CHECK: encoding: [0x62,0x62,0xb5,0x20,0xaa,0x5a,0x7f]
+ vfmsub213pd 4064(%rdx), %ymm25, %ymm27
+
+// CHECK: vfmsub213pd 4096(%rdx), %ymm25, %ymm27
+// CHECK: encoding: [0x62,0x62,0xb5,0x20,0xaa,0x9a,0x00,0x10,0x00,0x00]
+ vfmsub213pd 4096(%rdx), %ymm25, %ymm27
+
+// CHECK: vfmsub213pd -4096(%rdx), %ymm25, %ymm27
+// CHECK: encoding: [0x62,0x62,0xb5,0x20,0xaa,0x5a,0x80]
+ vfmsub213pd -4096(%rdx), %ymm25, %ymm27
+
+// CHECK: vfmsub213pd -4128(%rdx), %ymm25, %ymm27
+// CHECK: encoding: [0x62,0x62,0xb5,0x20,0xaa,0x9a,0xe0,0xef,0xff,0xff]
+ vfmsub213pd -4128(%rdx), %ymm25, %ymm27
+
+// CHECK: vfmsub213pd 1016(%rdx){1to4}, %ymm25, %ymm27
+// CHECK: encoding: [0x62,0x62,0xb5,0x30,0xaa,0x5a,0x7f]
+ vfmsub213pd 1016(%rdx){1to4}, %ymm25, %ymm27
+
+// CHECK: vfmsub213pd 1024(%rdx){1to4}, %ymm25, %ymm27
+// CHECK: encoding: [0x62,0x62,0xb5,0x30,0xaa,0x9a,0x00,0x04,0x00,0x00]
+ vfmsub213pd 1024(%rdx){1to4}, %ymm25, %ymm27
+
+// CHECK: vfmsub213pd -1024(%rdx){1to4}, %ymm25, %ymm27
+// CHECK: encoding: [0x62,0x62,0xb5,0x30,0xaa,0x5a,0x80]
+ vfmsub213pd -1024(%rdx){1to4}, %ymm25, %ymm27
+
+// CHECK: vfmsub213pd -1032(%rdx){1to4}, %ymm25, %ymm27
+// CHECK: encoding: [0x62,0x62,0xb5,0x30,0xaa,0x9a,0xf8,0xfb,0xff,0xff]
+ vfmsub213pd -1032(%rdx){1to4}, %ymm25, %ymm27
+
+// CHECK: vfmsub231ps %xmm25, %xmm19, %xmm28
+// CHECK: encoding: [0x62,0x02,0x65,0x00,0xba,0xe1]
+ vfmsub231ps %xmm25, %xmm19, %xmm28
+
+// CHECK: vfmsub231ps %xmm25, %xmm19, %xmm28 {%k1}
+// CHECK: encoding: [0x62,0x02,0x65,0x01,0xba,0xe1]
+ vfmsub231ps %xmm25, %xmm19, %xmm28 {%k1}
+
+// CHECK: vfmsub231ps %xmm25, %xmm19, %xmm28 {%k1} {z}
+// CHECK: encoding: [0x62,0x02,0x65,0x81,0xba,0xe1]
+ vfmsub231ps %xmm25, %xmm19, %xmm28 {%k1} {z}
+
+// CHECK: vfmsub231ps (%rcx), %xmm19, %xmm28
+// CHECK: encoding: [0x62,0x62,0x65,0x00,0xba,0x21]
+ vfmsub231ps (%rcx), %xmm19, %xmm28
+
+// CHECK: vfmsub231ps 291(%rax,%r14,8), %xmm19, %xmm28
+// CHECK: encoding: [0x62,0x22,0x65,0x00,0xba,0xa4,0xf0,0x23,0x01,0x00,0x00]
+ vfmsub231ps 291(%rax,%r14,8), %xmm19, %xmm28
+
+// CHECK: vfmsub231ps (%rcx){1to4}, %xmm19, %xmm28
+// CHECK: encoding: [0x62,0x62,0x65,0x10,0xba,0x21]
+ vfmsub231ps (%rcx){1to4}, %xmm19, %xmm28
+
+// CHECK: vfmsub231ps 2032(%rdx), %xmm19, %xmm28
+// CHECK: encoding: [0x62,0x62,0x65,0x00,0xba,0x62,0x7f]
+ vfmsub231ps 2032(%rdx), %xmm19, %xmm28
+
+// CHECK: vfmsub231ps 2048(%rdx), %xmm19, %xmm28
+// CHECK: encoding: [0x62,0x62,0x65,0x00,0xba,0xa2,0x00,0x08,0x00,0x00]
+ vfmsub231ps 2048(%rdx), %xmm19, %xmm28
+
+// CHECK: vfmsub231ps -2048(%rdx), %xmm19, %xmm28
+// CHECK: encoding: [0x62,0x62,0x65,0x00,0xba,0x62,0x80]
+ vfmsub231ps -2048(%rdx), %xmm19, %xmm28
+
+// CHECK: vfmsub231ps -2064(%rdx), %xmm19, %xmm28
+// CHECK: encoding: [0x62,0x62,0x65,0x00,0xba,0xa2,0xf0,0xf7,0xff,0xff]
+ vfmsub231ps -2064(%rdx), %xmm19, %xmm28
+
+// CHECK: vfmsub231ps 508(%rdx){1to4}, %xmm19, %xmm28
+// CHECK: encoding: [0x62,0x62,0x65,0x10,0xba,0x62,0x7f]
+ vfmsub231ps 508(%rdx){1to4}, %xmm19, %xmm28
+
+// CHECK: vfmsub231ps 512(%rdx){1to4}, %xmm19, %xmm28
+// CHECK: encoding: [0x62,0x62,0x65,0x10,0xba,0xa2,0x00,0x02,0x00,0x00]
+ vfmsub231ps 512(%rdx){1to4}, %xmm19, %xmm28
+
+// CHECK: vfmsub231ps -512(%rdx){1to4}, %xmm19, %xmm28
+// CHECK: encoding: [0x62,0x62,0x65,0x10,0xba,0x62,0x80]
+ vfmsub231ps -512(%rdx){1to4}, %xmm19, %xmm28
+
+// CHECK: vfmsub231ps -516(%rdx){1to4}, %xmm19, %xmm28
+// CHECK: encoding: [0x62,0x62,0x65,0x10,0xba,0xa2,0xfc,0xfd,0xff,0xff]
+ vfmsub231ps -516(%rdx){1to4}, %xmm19, %xmm28
+
+// CHECK: vfmsub231ps %ymm26, %ymm26, %ymm23
+// CHECK: encoding: [0x62,0x82,0x2d,0x20,0xba,0xfa]
+ vfmsub231ps %ymm26, %ymm26, %ymm23
+
+// CHECK: vfmsub231ps %ymm26, %ymm26, %ymm23 {%k1}
+// CHECK: encoding: [0x62,0x82,0x2d,0x21,0xba,0xfa]
+ vfmsub231ps %ymm26, %ymm26, %ymm23 {%k1}
+
+// CHECK: vfmsub231ps %ymm26, %ymm26, %ymm23 {%k1} {z}
+// CHECK: encoding: [0x62,0x82,0x2d,0xa1,0xba,0xfa]
+ vfmsub231ps %ymm26, %ymm26, %ymm23 {%k1} {z}
+
+// CHECK: vfmsub231ps (%rcx), %ymm26, %ymm23
+// CHECK: encoding: [0x62,0xe2,0x2d,0x20,0xba,0x39]
+ vfmsub231ps (%rcx), %ymm26, %ymm23
+
+// CHECK: vfmsub231ps 291(%rax,%r14,8), %ymm26, %ymm23
+// CHECK: encoding: [0x62,0xa2,0x2d,0x20,0xba,0xbc,0xf0,0x23,0x01,0x00,0x00]
+ vfmsub231ps 291(%rax,%r14,8), %ymm26, %ymm23
+
+// CHECK: vfmsub231ps (%rcx){1to8}, %ymm26, %ymm23
+// CHECK: encoding: [0x62,0xe2,0x2d,0x30,0xba,0x39]
+ vfmsub231ps (%rcx){1to8}, %ymm26, %ymm23
+
+// CHECK: vfmsub231ps 4064(%rdx), %ymm26, %ymm23
+// CHECK: encoding: [0x62,0xe2,0x2d,0x20,0xba,0x7a,0x7f]
+ vfmsub231ps 4064(%rdx), %ymm26, %ymm23
+
+// CHECK: vfmsub231ps 4096(%rdx), %ymm26, %ymm23
+// CHECK: encoding: [0x62,0xe2,0x2d,0x20,0xba,0xba,0x00,0x10,0x00,0x00]
+ vfmsub231ps 4096(%rdx), %ymm26, %ymm23
+
+// CHECK: vfmsub231ps -4096(%rdx), %ymm26, %ymm23
+// CHECK: encoding: [0x62,0xe2,0x2d,0x20,0xba,0x7a,0x80]
+ vfmsub231ps -4096(%rdx), %ymm26, %ymm23
+
+// CHECK: vfmsub231ps -4128(%rdx), %ymm26, %ymm23
+// CHECK: encoding: [0x62,0xe2,0x2d,0x20,0xba,0xba,0xe0,0xef,0xff,0xff]
+ vfmsub231ps -4128(%rdx), %ymm26, %ymm23
+
+// CHECK: vfmsub231ps 508(%rdx){1to8}, %ymm26, %ymm23
+// CHECK: encoding: [0x62,0xe2,0x2d,0x30,0xba,0x7a,0x7f]
+ vfmsub231ps 508(%rdx){1to8}, %ymm26, %ymm23
+
+// CHECK: vfmsub231ps 512(%rdx){1to8}, %ymm26, %ymm23
+// CHECK: encoding: [0x62,0xe2,0x2d,0x30,0xba,0xba,0x00,0x02,0x00,0x00]
+ vfmsub231ps 512(%rdx){1to8}, %ymm26, %ymm23
+
+// CHECK: vfmsub231ps -512(%rdx){1to8}, %ymm26, %ymm23
+// CHECK: encoding: [0x62,0xe2,0x2d,0x30,0xba,0x7a,0x80]
+ vfmsub231ps -512(%rdx){1to8}, %ymm26, %ymm23
+
+// CHECK: vfmsub231ps -516(%rdx){1to8}, %ymm26, %ymm23
+// CHECK: encoding: [0x62,0xe2,0x2d,0x30,0xba,0xba,0xfc,0xfd,0xff,0xff]
+ vfmsub231ps -516(%rdx){1to8}, %ymm26, %ymm23
+
+// CHECK: vfmsub231pd %xmm23, %xmm20, %xmm28
+// CHECK: encoding: [0x62,0x22,0xdd,0x00,0xba,0xe7]
+ vfmsub231pd %xmm23, %xmm20, %xmm28
+
+// CHECK: vfmsub231pd %xmm23, %xmm20, %xmm28 {%k4}
+// CHECK: encoding: [0x62,0x22,0xdd,0x04,0xba,0xe7]
+ vfmsub231pd %xmm23, %xmm20, %xmm28 {%k4}
+
+// CHECK: vfmsub231pd %xmm23, %xmm20, %xmm28 {%k4} {z}
+// CHECK: encoding: [0x62,0x22,0xdd,0x84,0xba,0xe7]
+ vfmsub231pd %xmm23, %xmm20, %xmm28 {%k4} {z}
+
+// CHECK: vfmsub231pd (%rcx), %xmm20, %xmm28
+// CHECK: encoding: [0x62,0x62,0xdd,0x00,0xba,0x21]
+ vfmsub231pd (%rcx), %xmm20, %xmm28
+
+// CHECK: vfmsub231pd 291(%rax,%r14,8), %xmm20, %xmm28
+// CHECK: encoding: [0x62,0x22,0xdd,0x00,0xba,0xa4,0xf0,0x23,0x01,0x00,0x00]
+ vfmsub231pd 291(%rax,%r14,8), %xmm20, %xmm28
+
+// CHECK: vfmsub231pd (%rcx){1to2}, %xmm20, %xmm28
+// CHECK: encoding: [0x62,0x62,0xdd,0x10,0xba,0x21]
+ vfmsub231pd (%rcx){1to2}, %xmm20, %xmm28
+
+// CHECK: vfmsub231pd 2032(%rdx), %xmm20, %xmm28
+// CHECK: encoding: [0x62,0x62,0xdd,0x00,0xba,0x62,0x7f]
+ vfmsub231pd 2032(%rdx), %xmm20, %xmm28
+
+// CHECK: vfmsub231pd 2048(%rdx), %xmm20, %xmm28
+// CHECK: encoding: [0x62,0x62,0xdd,0x00,0xba,0xa2,0x00,0x08,0x00,0x00]
+ vfmsub231pd 2048(%rdx), %xmm20, %xmm28
+
+// CHECK: vfmsub231pd -2048(%rdx), %xmm20, %xmm28
+// CHECK: encoding: [0x62,0x62,0xdd,0x00,0xba,0x62,0x80]
+ vfmsub231pd -2048(%rdx), %xmm20, %xmm28
+
+// CHECK: vfmsub231pd -2064(%rdx), %xmm20, %xmm28
+// CHECK: encoding: [0x62,0x62,0xdd,0x00,0xba,0xa2,0xf0,0xf7,0xff,0xff]
+ vfmsub231pd -2064(%rdx), %xmm20, %xmm28
+
+// CHECK: vfmsub231pd 1016(%rdx){1to2}, %xmm20, %xmm28
+// CHECK: encoding: [0x62,0x62,0xdd,0x10,0xba,0x62,0x7f]
+ vfmsub231pd 1016(%rdx){1to2}, %xmm20, %xmm28
+
+// CHECK: vfmsub231pd 1024(%rdx){1to2}, %xmm20, %xmm28
+// CHECK: encoding: [0x62,0x62,0xdd,0x10,0xba,0xa2,0x00,0x04,0x00,0x00]
+ vfmsub231pd 1024(%rdx){1to2}, %xmm20, %xmm28
+
+// CHECK: vfmsub231pd -1024(%rdx){1to2}, %xmm20, %xmm28
+// CHECK: encoding: [0x62,0x62,0xdd,0x10,0xba,0x62,0x80]
+ vfmsub231pd -1024(%rdx){1to2}, %xmm20, %xmm28
+
+// CHECK: vfmsub231pd -1032(%rdx){1to2}, %xmm20, %xmm28
+// CHECK: encoding: [0x62,0x62,0xdd,0x10,0xba,0xa2,0xf8,0xfb,0xff,0xff]
+ vfmsub231pd -1032(%rdx){1to2}, %xmm20, %xmm28
+
+// CHECK: vfmsub231pd %ymm22, %ymm18, %ymm17
+// CHECK: encoding: [0x62,0xa2,0xed,0x20,0xba,0xce]
+ vfmsub231pd %ymm22, %ymm18, %ymm17
+
+// CHECK: vfmsub231pd %ymm22, %ymm18, %ymm17 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xed,0x22,0xba,0xce]
+ vfmsub231pd %ymm22, %ymm18, %ymm17 {%k2}
+
+// CHECK: vfmsub231pd %ymm22, %ymm18, %ymm17 {%k2} {z}
+// CHECK: encoding: [0x62,0xa2,0xed,0xa2,0xba,0xce]
+ vfmsub231pd %ymm22, %ymm18, %ymm17 {%k2} {z}
+
+// CHECK: vfmsub231pd (%rcx), %ymm18, %ymm17
+// CHECK: encoding: [0x62,0xe2,0xed,0x20,0xba,0x09]
+ vfmsub231pd (%rcx), %ymm18, %ymm17
+
+// CHECK: vfmsub231pd 291(%rax,%r14,8), %ymm18, %ymm17
+// CHECK: encoding: [0x62,0xa2,0xed,0x20,0xba,0x8c,0xf0,0x23,0x01,0x00,0x00]
+ vfmsub231pd 291(%rax,%r14,8), %ymm18, %ymm17
+
+// CHECK: vfmsub231pd (%rcx){1to4}, %ymm18, %ymm17
+// CHECK: encoding: [0x62,0xe2,0xed,0x30,0xba,0x09]
+ vfmsub231pd (%rcx){1to4}, %ymm18, %ymm17
+
+// CHECK: vfmsub231pd 4064(%rdx), %ymm18, %ymm17
+// CHECK: encoding: [0x62,0xe2,0xed,0x20,0xba,0x4a,0x7f]
+ vfmsub231pd 4064(%rdx), %ymm18, %ymm17
+
+// CHECK: vfmsub231pd 4096(%rdx), %ymm18, %ymm17
+// CHECK: encoding: [0x62,0xe2,0xed,0x20,0xba,0x8a,0x00,0x10,0x00,0x00]
+ vfmsub231pd 4096(%rdx), %ymm18, %ymm17
+
+// CHECK: vfmsub231pd -4096(%rdx), %ymm18, %ymm17
+// CHECK: encoding: [0x62,0xe2,0xed,0x20,0xba,0x4a,0x80]
+ vfmsub231pd -4096(%rdx), %ymm18, %ymm17
+
+// CHECK: vfmsub231pd -4128(%rdx), %ymm18, %ymm17
+// CHECK: encoding: [0x62,0xe2,0xed,0x20,0xba,0x8a,0xe0,0xef,0xff,0xff]
+ vfmsub231pd -4128(%rdx), %ymm18, %ymm17
+
+// CHECK: vfmsub231pd 1016(%rdx){1to4}, %ymm18, %ymm17
+// CHECK: encoding: [0x62,0xe2,0xed,0x30,0xba,0x4a,0x7f]
+ vfmsub231pd 1016(%rdx){1to4}, %ymm18, %ymm17
+
+// CHECK: vfmsub231pd 1024(%rdx){1to4}, %ymm18, %ymm17
+// CHECK: encoding: [0x62,0xe2,0xed,0x30,0xba,0x8a,0x00,0x04,0x00,0x00]
+ vfmsub231pd 1024(%rdx){1to4}, %ymm18, %ymm17
+
+// CHECK: vfmsub231pd -1024(%rdx){1to4}, %ymm18, %ymm17
+// CHECK: encoding: [0x62,0xe2,0xed,0x30,0xba,0x4a,0x80]
+ vfmsub231pd -1024(%rdx){1to4}, %ymm18, %ymm17
+
+// CHECK: vfmsub231pd -1032(%rdx){1to4}, %ymm18, %ymm17
+// CHECK: encoding: [0x62,0xe2,0xed,0x30,0xba,0x8a,0xf8,0xfb,0xff,0xff]
+ vfmsub231pd -1032(%rdx){1to4}, %ymm18, %ymm17
+
+// CHECK: vfmaddsub132ps %xmm18, %xmm27, %xmm24
+// CHECK: encoding: [0x62,0x22,0x25,0x00,0x96,0xc2]
+ vfmaddsub132ps %xmm18, %xmm27, %xmm24
+
+// CHECK: vfmaddsub132ps %xmm18, %xmm27, %xmm24 {%k2}
+// CHECK: encoding: [0x62,0x22,0x25,0x02,0x96,0xc2]
+ vfmaddsub132ps %xmm18, %xmm27, %xmm24 {%k2}
+
+// CHECK: vfmaddsub132ps %xmm18, %xmm27, %xmm24 {%k2} {z}
+// CHECK: encoding: [0x62,0x22,0x25,0x82,0x96,0xc2]
+ vfmaddsub132ps %xmm18, %xmm27, %xmm24 {%k2} {z}
+
+// CHECK: vfmaddsub132ps (%rcx), %xmm27, %xmm24
+// CHECK: encoding: [0x62,0x62,0x25,0x00,0x96,0x01]
+ vfmaddsub132ps (%rcx), %xmm27, %xmm24
+
+// CHECK: vfmaddsub132ps 291(%rax,%r14,8), %xmm27, %xmm24
+// CHECK: encoding: [0x62,0x22,0x25,0x00,0x96,0x84,0xf0,0x23,0x01,0x00,0x00]
+ vfmaddsub132ps 291(%rax,%r14,8), %xmm27, %xmm24
+
+// CHECK: vfmaddsub132ps (%rcx){1to4}, %xmm27, %xmm24
+// CHECK: encoding: [0x62,0x62,0x25,0x10,0x96,0x01]
+ vfmaddsub132ps (%rcx){1to4}, %xmm27, %xmm24
+
+// CHECK: vfmaddsub132ps 2032(%rdx), %xmm27, %xmm24
+// CHECK: encoding: [0x62,0x62,0x25,0x00,0x96,0x42,0x7f]
+ vfmaddsub132ps 2032(%rdx), %xmm27, %xmm24
+
+// CHECK: vfmaddsub132ps 2048(%rdx), %xmm27, %xmm24
+// CHECK: encoding: [0x62,0x62,0x25,0x00,0x96,0x82,0x00,0x08,0x00,0x00]
+ vfmaddsub132ps 2048(%rdx), %xmm27, %xmm24
+
+// CHECK: vfmaddsub132ps -2048(%rdx), %xmm27, %xmm24
+// CHECK: encoding: [0x62,0x62,0x25,0x00,0x96,0x42,0x80]
+ vfmaddsub132ps -2048(%rdx), %xmm27, %xmm24
+
+// CHECK: vfmaddsub132ps -2064(%rdx), %xmm27, %xmm24
+// CHECK: encoding: [0x62,0x62,0x25,0x00,0x96,0x82,0xf0,0xf7,0xff,0xff]
+ vfmaddsub132ps -2064(%rdx), %xmm27, %xmm24
+
+// CHECK: vfmaddsub132ps 508(%rdx){1to4}, %xmm27, %xmm24
+// CHECK: encoding: [0x62,0x62,0x25,0x10,0x96,0x42,0x7f]
+ vfmaddsub132ps 508(%rdx){1to4}, %xmm27, %xmm24
+
+// CHECK: vfmaddsub132ps 512(%rdx){1to4}, %xmm27, %xmm24
+// CHECK: encoding: [0x62,0x62,0x25,0x10,0x96,0x82,0x00,0x02,0x00,0x00]
+ vfmaddsub132ps 512(%rdx){1to4}, %xmm27, %xmm24
+
+// CHECK: vfmaddsub132ps -512(%rdx){1to4}, %xmm27, %xmm24
+// CHECK: encoding: [0x62,0x62,0x25,0x10,0x96,0x42,0x80]
+ vfmaddsub132ps -512(%rdx){1to4}, %xmm27, %xmm24
+
+// CHECK: vfmaddsub132ps -516(%rdx){1to4}, %xmm27, %xmm24
+// CHECK: encoding: [0x62,0x62,0x25,0x10,0x96,0x82,0xfc,0xfd,0xff,0xff]
+ vfmaddsub132ps -516(%rdx){1to4}, %xmm27, %xmm24
+
+// CHECK: vfmaddsub132ps %ymm24, %ymm20, %ymm21
+// CHECK: encoding: [0x62,0x82,0x5d,0x20,0x96,0xe8]
+ vfmaddsub132ps %ymm24, %ymm20, %ymm21
+
+// CHECK: vfmaddsub132ps %ymm24, %ymm20, %ymm21 {%k5}
+// CHECK: encoding: [0x62,0x82,0x5d,0x25,0x96,0xe8]
+ vfmaddsub132ps %ymm24, %ymm20, %ymm21 {%k5}
+
+// CHECK: vfmaddsub132ps %ymm24, %ymm20, %ymm21 {%k5} {z}
+// CHECK: encoding: [0x62,0x82,0x5d,0xa5,0x96,0xe8]
+ vfmaddsub132ps %ymm24, %ymm20, %ymm21 {%k5} {z}
+
+// CHECK: vfmaddsub132ps (%rcx), %ymm20, %ymm21
+// CHECK: encoding: [0x62,0xe2,0x5d,0x20,0x96,0x29]
+ vfmaddsub132ps (%rcx), %ymm20, %ymm21
+
+// CHECK: vfmaddsub132ps 291(%rax,%r14,8), %ymm20, %ymm21
+// CHECK: encoding: [0x62,0xa2,0x5d,0x20,0x96,0xac,0xf0,0x23,0x01,0x00,0x00]
+ vfmaddsub132ps 291(%rax,%r14,8), %ymm20, %ymm21
+
+// CHECK: vfmaddsub132ps (%rcx){1to8}, %ymm20, %ymm21
+// CHECK: encoding: [0x62,0xe2,0x5d,0x30,0x96,0x29]
+ vfmaddsub132ps (%rcx){1to8}, %ymm20, %ymm21
+
+// CHECK: vfmaddsub132ps 4064(%rdx), %ymm20, %ymm21
+// CHECK: encoding: [0x62,0xe2,0x5d,0x20,0x96,0x6a,0x7f]
+ vfmaddsub132ps 4064(%rdx), %ymm20, %ymm21
+
+// CHECK: vfmaddsub132ps 4096(%rdx), %ymm20, %ymm21
+// CHECK: encoding: [0x62,0xe2,0x5d,0x20,0x96,0xaa,0x00,0x10,0x00,0x00]
+ vfmaddsub132ps 4096(%rdx), %ymm20, %ymm21
+
+// CHECK: vfmaddsub132ps -4096(%rdx), %ymm20, %ymm21
+// CHECK: encoding: [0x62,0xe2,0x5d,0x20,0x96,0x6a,0x80]
+ vfmaddsub132ps -4096(%rdx), %ymm20, %ymm21
+
+// CHECK: vfmaddsub132ps -4128(%rdx), %ymm20, %ymm21
+// CHECK: encoding: [0x62,0xe2,0x5d,0x20,0x96,0xaa,0xe0,0xef,0xff,0xff]
+ vfmaddsub132ps -4128(%rdx), %ymm20, %ymm21
+
+// CHECK: vfmaddsub132ps 508(%rdx){1to8}, %ymm20, %ymm21
+// CHECK: encoding: [0x62,0xe2,0x5d,0x30,0x96,0x6a,0x7f]
+ vfmaddsub132ps 508(%rdx){1to8}, %ymm20, %ymm21
+
+// CHECK: vfmaddsub132ps 512(%rdx){1to8}, %ymm20, %ymm21
+// CHECK: encoding: [0x62,0xe2,0x5d,0x30,0x96,0xaa,0x00,0x02,0x00,0x00]
+ vfmaddsub132ps 512(%rdx){1to8}, %ymm20, %ymm21
+
+// CHECK: vfmaddsub132ps -512(%rdx){1to8}, %ymm20, %ymm21
+// CHECK: encoding: [0x62,0xe2,0x5d,0x30,0x96,0x6a,0x80]
+ vfmaddsub132ps -512(%rdx){1to8}, %ymm20, %ymm21
+
+// CHECK: vfmaddsub132ps -516(%rdx){1to8}, %ymm20, %ymm21
+// CHECK: encoding: [0x62,0xe2,0x5d,0x30,0x96,0xaa,0xfc,0xfd,0xff,0xff]
+ vfmaddsub132ps -516(%rdx){1to8}, %ymm20, %ymm21
+
+// CHECK: vfmaddsub132pd %xmm20, %xmm27, %xmm26
+// CHECK: encoding: [0x62,0x22,0xa5,0x00,0x96,0xd4]
+ vfmaddsub132pd %xmm20, %xmm27, %xmm26
+
+// CHECK: vfmaddsub132pd %xmm20, %xmm27, %xmm26 {%k6}
+// CHECK: encoding: [0x62,0x22,0xa5,0x06,0x96,0xd4]
+ vfmaddsub132pd %xmm20, %xmm27, %xmm26 {%k6}
+
+// CHECK: vfmaddsub132pd %xmm20, %xmm27, %xmm26 {%k6} {z}
+// CHECK: encoding: [0x62,0x22,0xa5,0x86,0x96,0xd4]
+ vfmaddsub132pd %xmm20, %xmm27, %xmm26 {%k6} {z}
+
+// CHECK: vfmaddsub132pd (%rcx), %xmm27, %xmm26
+// CHECK: encoding: [0x62,0x62,0xa5,0x00,0x96,0x11]
+ vfmaddsub132pd (%rcx), %xmm27, %xmm26
+
+// CHECK: vfmaddsub132pd 291(%rax,%r14,8), %xmm27, %xmm26
+// CHECK: encoding: [0x62,0x22,0xa5,0x00,0x96,0x94,0xf0,0x23,0x01,0x00,0x00]
+ vfmaddsub132pd 291(%rax,%r14,8), %xmm27, %xmm26
+
+// CHECK: vfmaddsub132pd (%rcx){1to2}, %xmm27, %xmm26
+// CHECK: encoding: [0x62,0x62,0xa5,0x10,0x96,0x11]
+ vfmaddsub132pd (%rcx){1to2}, %xmm27, %xmm26
+
+// CHECK: vfmaddsub132pd 2032(%rdx), %xmm27, %xmm26
+// CHECK: encoding: [0x62,0x62,0xa5,0x00,0x96,0x52,0x7f]
+ vfmaddsub132pd 2032(%rdx), %xmm27, %xmm26
+
+// CHECK: vfmaddsub132pd 2048(%rdx), %xmm27, %xmm26
+// CHECK: encoding: [0x62,0x62,0xa5,0x00,0x96,0x92,0x00,0x08,0x00,0x00]
+ vfmaddsub132pd 2048(%rdx), %xmm27, %xmm26
+
+// CHECK: vfmaddsub132pd -2048(%rdx), %xmm27, %xmm26
+// CHECK: encoding: [0x62,0x62,0xa5,0x00,0x96,0x52,0x80]
+ vfmaddsub132pd -2048(%rdx), %xmm27, %xmm26
+
+// CHECK: vfmaddsub132pd -2064(%rdx), %xmm27, %xmm26
+// CHECK: encoding: [0x62,0x62,0xa5,0x00,0x96,0x92,0xf0,0xf7,0xff,0xff]
+ vfmaddsub132pd -2064(%rdx), %xmm27, %xmm26
+
+// CHECK: vfmaddsub132pd 1016(%rdx){1to2}, %xmm27, %xmm26
+// CHECK: encoding: [0x62,0x62,0xa5,0x10,0x96,0x52,0x7f]
+ vfmaddsub132pd 1016(%rdx){1to2}, %xmm27, %xmm26
+
+// CHECK: vfmaddsub132pd 1024(%rdx){1to2}, %xmm27, %xmm26
+// CHECK: encoding: [0x62,0x62,0xa5,0x10,0x96,0x92,0x00,0x04,0x00,0x00]
+ vfmaddsub132pd 1024(%rdx){1to2}, %xmm27, %xmm26
+
+// CHECK: vfmaddsub132pd -1024(%rdx){1to2}, %xmm27, %xmm26
+// CHECK: encoding: [0x62,0x62,0xa5,0x10,0x96,0x52,0x80]
+ vfmaddsub132pd -1024(%rdx){1to2}, %xmm27, %xmm26
+
+// CHECK: vfmaddsub132pd -1032(%rdx){1to2}, %xmm27, %xmm26
+// CHECK: encoding: [0x62,0x62,0xa5,0x10,0x96,0x92,0xf8,0xfb,0xff,0xff]
+ vfmaddsub132pd -1032(%rdx){1to2}, %xmm27, %xmm26
+
+// CHECK: vfmaddsub132pd %ymm28, %ymm18, %ymm30
+// CHECK: encoding: [0x62,0x02,0xed,0x20,0x96,0xf4]
+ vfmaddsub132pd %ymm28, %ymm18, %ymm30
+
+// CHECK: vfmaddsub132pd %ymm28, %ymm18, %ymm30 {%k2}
+// CHECK: encoding: [0x62,0x02,0xed,0x22,0x96,0xf4]
+ vfmaddsub132pd %ymm28, %ymm18, %ymm30 {%k2}
+
+// CHECK: vfmaddsub132pd %ymm28, %ymm18, %ymm30 {%k2} {z}
+// CHECK: encoding: [0x62,0x02,0xed,0xa2,0x96,0xf4]
+ vfmaddsub132pd %ymm28, %ymm18, %ymm30 {%k2} {z}
+
+// CHECK: vfmaddsub132pd (%rcx), %ymm18, %ymm30
+// CHECK: encoding: [0x62,0x62,0xed,0x20,0x96,0x31]
+ vfmaddsub132pd (%rcx), %ymm18, %ymm30
+
+// CHECK: vfmaddsub132pd 291(%rax,%r14,8), %ymm18, %ymm30
+// CHECK: encoding: [0x62,0x22,0xed,0x20,0x96,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vfmaddsub132pd 291(%rax,%r14,8), %ymm18, %ymm30
+
+// CHECK: vfmaddsub132pd (%rcx){1to4}, %ymm18, %ymm30
+// CHECK: encoding: [0x62,0x62,0xed,0x30,0x96,0x31]
+ vfmaddsub132pd (%rcx){1to4}, %ymm18, %ymm30
+
+// CHECK: vfmaddsub132pd 4064(%rdx), %ymm18, %ymm30
+// CHECK: encoding: [0x62,0x62,0xed,0x20,0x96,0x72,0x7f]
+ vfmaddsub132pd 4064(%rdx), %ymm18, %ymm30
+
+// CHECK: vfmaddsub132pd 4096(%rdx), %ymm18, %ymm30
+// CHECK: encoding: [0x62,0x62,0xed,0x20,0x96,0xb2,0x00,0x10,0x00,0x00]
+ vfmaddsub132pd 4096(%rdx), %ymm18, %ymm30
+
+// CHECK: vfmaddsub132pd -4096(%rdx), %ymm18, %ymm30
+// CHECK: encoding: [0x62,0x62,0xed,0x20,0x96,0x72,0x80]
+ vfmaddsub132pd -4096(%rdx), %ymm18, %ymm30
+
+// CHECK: vfmaddsub132pd -4128(%rdx), %ymm18, %ymm30
+// CHECK: encoding: [0x62,0x62,0xed,0x20,0x96,0xb2,0xe0,0xef,0xff,0xff]
+ vfmaddsub132pd -4128(%rdx), %ymm18, %ymm30
+
+// CHECK: vfmaddsub132pd 1016(%rdx){1to4}, %ymm18, %ymm30
+// CHECK: encoding: [0x62,0x62,0xed,0x30,0x96,0x72,0x7f]
+ vfmaddsub132pd 1016(%rdx){1to4}, %ymm18, %ymm30
+
+// CHECK: vfmaddsub132pd 1024(%rdx){1to4}, %ymm18, %ymm30
+// CHECK: encoding: [0x62,0x62,0xed,0x30,0x96,0xb2,0x00,0x04,0x00,0x00]
+ vfmaddsub132pd 1024(%rdx){1to4}, %ymm18, %ymm30
+
+// CHECK: vfmaddsub132pd -1024(%rdx){1to4}, %ymm18, %ymm30
+// CHECK: encoding: [0x62,0x62,0xed,0x30,0x96,0x72,0x80]
+ vfmaddsub132pd -1024(%rdx){1to4}, %ymm18, %ymm30
+
+// CHECK: vfmaddsub132pd -1032(%rdx){1to4}, %ymm18, %ymm30
+// CHECK: encoding: [0x62,0x62,0xed,0x30,0x96,0xb2,0xf8,0xfb,0xff,0xff]
+ vfmaddsub132pd -1032(%rdx){1to4}, %ymm18, %ymm30
+
+// CHECK: vfmaddsub213ps %xmm17, %xmm29, %xmm20
+// CHECK: encoding: [0x62,0xa2,0x15,0x00,0xa6,0xe1]
+ vfmaddsub213ps %xmm17, %xmm29, %xmm20
+
+// CHECK: vfmaddsub213ps %xmm17, %xmm29, %xmm20 {%k1}
+// CHECK: encoding: [0x62,0xa2,0x15,0x01,0xa6,0xe1]
+ vfmaddsub213ps %xmm17, %xmm29, %xmm20 {%k1}
+
+// CHECK: vfmaddsub213ps %xmm17, %xmm29, %xmm20 {%k1} {z}
+// CHECK: encoding: [0x62,0xa2,0x15,0x81,0xa6,0xe1]
+ vfmaddsub213ps %xmm17, %xmm29, %xmm20 {%k1} {z}
+
+// CHECK: vfmaddsub213ps (%rcx), %xmm29, %xmm20
+// CHECK: encoding: [0x62,0xe2,0x15,0x00,0xa6,0x21]
+ vfmaddsub213ps (%rcx), %xmm29, %xmm20
+
+// CHECK: vfmaddsub213ps 291(%rax,%r14,8), %xmm29, %xmm20
+// CHECK: encoding: [0x62,0xa2,0x15,0x00,0xa6,0xa4,0xf0,0x23,0x01,0x00,0x00]
+ vfmaddsub213ps 291(%rax,%r14,8), %xmm29, %xmm20
+
+// CHECK: vfmaddsub213ps (%rcx){1to4}, %xmm29, %xmm20
+// CHECK: encoding: [0x62,0xe2,0x15,0x10,0xa6,0x21]
+ vfmaddsub213ps (%rcx){1to4}, %xmm29, %xmm20
+
+// CHECK: vfmaddsub213ps 2032(%rdx), %xmm29, %xmm20
+// CHECK: encoding: [0x62,0xe2,0x15,0x00,0xa6,0x62,0x7f]
+ vfmaddsub213ps 2032(%rdx), %xmm29, %xmm20
+
+// CHECK: vfmaddsub213ps 2048(%rdx), %xmm29, %xmm20
+// CHECK: encoding: [0x62,0xe2,0x15,0x00,0xa6,0xa2,0x00,0x08,0x00,0x00]
+ vfmaddsub213ps 2048(%rdx), %xmm29, %xmm20
+
+// CHECK: vfmaddsub213ps -2048(%rdx), %xmm29, %xmm20
+// CHECK: encoding: [0x62,0xe2,0x15,0x00,0xa6,0x62,0x80]
+ vfmaddsub213ps -2048(%rdx), %xmm29, %xmm20
+
+// CHECK: vfmaddsub213ps -2064(%rdx), %xmm29, %xmm20
+// CHECK: encoding: [0x62,0xe2,0x15,0x00,0xa6,0xa2,0xf0,0xf7,0xff,0xff]
+ vfmaddsub213ps -2064(%rdx), %xmm29, %xmm20
+
+// CHECK: vfmaddsub213ps 508(%rdx){1to4}, %xmm29, %xmm20
+// CHECK: encoding: [0x62,0xe2,0x15,0x10,0xa6,0x62,0x7f]
+ vfmaddsub213ps 508(%rdx){1to4}, %xmm29, %xmm20
+
+// CHECK: vfmaddsub213ps 512(%rdx){1to4}, %xmm29, %xmm20
+// CHECK: encoding: [0x62,0xe2,0x15,0x10,0xa6,0xa2,0x00,0x02,0x00,0x00]
+ vfmaddsub213ps 512(%rdx){1to4}, %xmm29, %xmm20
+
+// CHECK: vfmaddsub213ps -512(%rdx){1to4}, %xmm29, %xmm20
+// CHECK: encoding: [0x62,0xe2,0x15,0x10,0xa6,0x62,0x80]
+ vfmaddsub213ps -512(%rdx){1to4}, %xmm29, %xmm20
+
+// CHECK: vfmaddsub213ps -516(%rdx){1to4}, %xmm29, %xmm20
+// CHECK: encoding: [0x62,0xe2,0x15,0x10,0xa6,0xa2,0xfc,0xfd,0xff,0xff]
+ vfmaddsub213ps -516(%rdx){1to4}, %xmm29, %xmm20
+
+// CHECK: vfmaddsub213ps %ymm23, %ymm27, %ymm25
+// CHECK: encoding: [0x62,0x22,0x25,0x20,0xa6,0xcf]
+ vfmaddsub213ps %ymm23, %ymm27, %ymm25
+
+// CHECK: vfmaddsub213ps %ymm23, %ymm27, %ymm25 {%k4}
+// CHECK: encoding: [0x62,0x22,0x25,0x24,0xa6,0xcf]
+ vfmaddsub213ps %ymm23, %ymm27, %ymm25 {%k4}
+
+// CHECK: vfmaddsub213ps %ymm23, %ymm27, %ymm25 {%k4} {z}
+// CHECK: encoding: [0x62,0x22,0x25,0xa4,0xa6,0xcf]
+ vfmaddsub213ps %ymm23, %ymm27, %ymm25 {%k4} {z}
+
+// CHECK: vfmaddsub213ps (%rcx), %ymm27, %ymm25
+// CHECK: encoding: [0x62,0x62,0x25,0x20,0xa6,0x09]
+ vfmaddsub213ps (%rcx), %ymm27, %ymm25
+
+// CHECK: vfmaddsub213ps 291(%rax,%r14,8), %ymm27, %ymm25
+// CHECK: encoding: [0x62,0x22,0x25,0x20,0xa6,0x8c,0xf0,0x23,0x01,0x00,0x00]
+ vfmaddsub213ps 291(%rax,%r14,8), %ymm27, %ymm25
+
+// CHECK: vfmaddsub213ps (%rcx){1to8}, %ymm27, %ymm25
+// CHECK: encoding: [0x62,0x62,0x25,0x30,0xa6,0x09]
+ vfmaddsub213ps (%rcx){1to8}, %ymm27, %ymm25
+
+// CHECK: vfmaddsub213ps 4064(%rdx), %ymm27, %ymm25
+// CHECK: encoding: [0x62,0x62,0x25,0x20,0xa6,0x4a,0x7f]
+ vfmaddsub213ps 4064(%rdx), %ymm27, %ymm25
+
+// CHECK: vfmaddsub213ps 4096(%rdx), %ymm27, %ymm25
+// CHECK: encoding: [0x62,0x62,0x25,0x20,0xa6,0x8a,0x00,0x10,0x00,0x00]
+ vfmaddsub213ps 4096(%rdx), %ymm27, %ymm25
+
+// CHECK: vfmaddsub213ps -4096(%rdx), %ymm27, %ymm25
+// CHECK: encoding: [0x62,0x62,0x25,0x20,0xa6,0x4a,0x80]
+ vfmaddsub213ps -4096(%rdx), %ymm27, %ymm25
+
+// CHECK: vfmaddsub213ps -4128(%rdx), %ymm27, %ymm25
+// CHECK: encoding: [0x62,0x62,0x25,0x20,0xa6,0x8a,0xe0,0xef,0xff,0xff]
+ vfmaddsub213ps -4128(%rdx), %ymm27, %ymm25
+
+// CHECK: vfmaddsub213ps 508(%rdx){1to8}, %ymm27, %ymm25
+// CHECK: encoding: [0x62,0x62,0x25,0x30,0xa6,0x4a,0x7f]
+ vfmaddsub213ps 508(%rdx){1to8}, %ymm27, %ymm25
+
+// CHECK: vfmaddsub213ps 512(%rdx){1to8}, %ymm27, %ymm25
+// CHECK: encoding: [0x62,0x62,0x25,0x30,0xa6,0x8a,0x00,0x02,0x00,0x00]
+ vfmaddsub213ps 512(%rdx){1to8}, %ymm27, %ymm25
+
+// CHECK: vfmaddsub213ps -512(%rdx){1to8}, %ymm27, %ymm25
+// CHECK: encoding: [0x62,0x62,0x25,0x30,0xa6,0x4a,0x80]
+ vfmaddsub213ps -512(%rdx){1to8}, %ymm27, %ymm25
+
+// CHECK: vfmaddsub213ps -516(%rdx){1to8}, %ymm27, %ymm25
+// CHECK: encoding: [0x62,0x62,0x25,0x30,0xa6,0x8a,0xfc,0xfd,0xff,0xff]
+ vfmaddsub213ps -516(%rdx){1to8}, %ymm27, %ymm25
+
+// CHECK: vfmaddsub213pd %xmm19, %xmm29, %xmm25
+// CHECK: encoding: [0x62,0x22,0x95,0x00,0xa6,0xcb]
+ vfmaddsub213pd %xmm19, %xmm29, %xmm25
+
+// CHECK: vfmaddsub213pd %xmm19, %xmm29, %xmm25 {%k7}
+// CHECK: encoding: [0x62,0x22,0x95,0x07,0xa6,0xcb]
+ vfmaddsub213pd %xmm19, %xmm29, %xmm25 {%k7}
+
+// CHECK: vfmaddsub213pd %xmm19, %xmm29, %xmm25 {%k7} {z}
+// CHECK: encoding: [0x62,0x22,0x95,0x87,0xa6,0xcb]
+ vfmaddsub213pd %xmm19, %xmm29, %xmm25 {%k7} {z}
+
+// CHECK: vfmaddsub213pd (%rcx), %xmm29, %xmm25
+// CHECK: encoding: [0x62,0x62,0x95,0x00,0xa6,0x09]
+ vfmaddsub213pd (%rcx), %xmm29, %xmm25
+
+// CHECK: vfmaddsub213pd 291(%rax,%r14,8), %xmm29, %xmm25
+// CHECK: encoding: [0x62,0x22,0x95,0x00,0xa6,0x8c,0xf0,0x23,0x01,0x00,0x00]
+ vfmaddsub213pd 291(%rax,%r14,8), %xmm29, %xmm25
+
+// CHECK: vfmaddsub213pd (%rcx){1to2}, %xmm29, %xmm25
+// CHECK: encoding: [0x62,0x62,0x95,0x10,0xa6,0x09]
+ vfmaddsub213pd (%rcx){1to2}, %xmm29, %xmm25
+
+// CHECK: vfmaddsub213pd 2032(%rdx), %xmm29, %xmm25
+// CHECK: encoding: [0x62,0x62,0x95,0x00,0xa6,0x4a,0x7f]
+ vfmaddsub213pd 2032(%rdx), %xmm29, %xmm25
+
+// CHECK: vfmaddsub213pd 2048(%rdx), %xmm29, %xmm25
+// CHECK: encoding: [0x62,0x62,0x95,0x00,0xa6,0x8a,0x00,0x08,0x00,0x00]
+ vfmaddsub213pd 2048(%rdx), %xmm29, %xmm25
+
+// CHECK: vfmaddsub213pd -2048(%rdx), %xmm29, %xmm25
+// CHECK: encoding: [0x62,0x62,0x95,0x00,0xa6,0x4a,0x80]
+ vfmaddsub213pd -2048(%rdx), %xmm29, %xmm25
+
+// CHECK: vfmaddsub213pd -2064(%rdx), %xmm29, %xmm25
+// CHECK: encoding: [0x62,0x62,0x95,0x00,0xa6,0x8a,0xf0,0xf7,0xff,0xff]
+ vfmaddsub213pd -2064(%rdx), %xmm29, %xmm25
+
+// CHECK: vfmaddsub213pd 1016(%rdx){1to2}, %xmm29, %xmm25
+// CHECK: encoding: [0x62,0x62,0x95,0x10,0xa6,0x4a,0x7f]
+ vfmaddsub213pd 1016(%rdx){1to2}, %xmm29, %xmm25
+
+// CHECK: vfmaddsub213pd 1024(%rdx){1to2}, %xmm29, %xmm25
+// CHECK: encoding: [0x62,0x62,0x95,0x10,0xa6,0x8a,0x00,0x04,0x00,0x00]
+ vfmaddsub213pd 1024(%rdx){1to2}, %xmm29, %xmm25
+
+// CHECK: vfmaddsub213pd -1024(%rdx){1to2}, %xmm29, %xmm25
+// CHECK: encoding: [0x62,0x62,0x95,0x10,0xa6,0x4a,0x80]
+ vfmaddsub213pd -1024(%rdx){1to2}, %xmm29, %xmm25
+
+// CHECK: vfmaddsub213pd -1032(%rdx){1to2}, %xmm29, %xmm25
+// CHECK: encoding: [0x62,0x62,0x95,0x10,0xa6,0x8a,0xf8,0xfb,0xff,0xff]
+ vfmaddsub213pd -1032(%rdx){1to2}, %xmm29, %xmm25
+
+// CHECK: vfmaddsub213pd %ymm20, %ymm17, %ymm21
+// CHECK: encoding: [0x62,0xa2,0xf5,0x20,0xa6,0xec]
+ vfmaddsub213pd %ymm20, %ymm17, %ymm21
+
+// CHECK: vfmaddsub213pd %ymm20, %ymm17, %ymm21 {%k4}
+// CHECK: encoding: [0x62,0xa2,0xf5,0x24,0xa6,0xec]
+ vfmaddsub213pd %ymm20, %ymm17, %ymm21 {%k4}
+
+// CHECK: vfmaddsub213pd %ymm20, %ymm17, %ymm21 {%k4} {z}
+// CHECK: encoding: [0x62,0xa2,0xf5,0xa4,0xa6,0xec]
+ vfmaddsub213pd %ymm20, %ymm17, %ymm21 {%k4} {z}
+
+// CHECK: vfmaddsub213pd (%rcx), %ymm17, %ymm21
+// CHECK: encoding: [0x62,0xe2,0xf5,0x20,0xa6,0x29]
+ vfmaddsub213pd (%rcx), %ymm17, %ymm21
+
+// CHECK: vfmaddsub213pd 291(%rax,%r14,8), %ymm17, %ymm21
+// CHECK: encoding: [0x62,0xa2,0xf5,0x20,0xa6,0xac,0xf0,0x23,0x01,0x00,0x00]
+ vfmaddsub213pd 291(%rax,%r14,8), %ymm17, %ymm21
+
+// CHECK: vfmaddsub213pd (%rcx){1to4}, %ymm17, %ymm21
+// CHECK: encoding: [0x62,0xe2,0xf5,0x30,0xa6,0x29]
+ vfmaddsub213pd (%rcx){1to4}, %ymm17, %ymm21
+
+// CHECK: vfmaddsub213pd 4064(%rdx), %ymm17, %ymm21
+// CHECK: encoding: [0x62,0xe2,0xf5,0x20,0xa6,0x6a,0x7f]
+ vfmaddsub213pd 4064(%rdx), %ymm17, %ymm21
+
+// CHECK: vfmaddsub213pd 4096(%rdx), %ymm17, %ymm21
+// CHECK: encoding: [0x62,0xe2,0xf5,0x20,0xa6,0xaa,0x00,0x10,0x00,0x00]
+ vfmaddsub213pd 4096(%rdx), %ymm17, %ymm21
+
+// CHECK: vfmaddsub213pd -4096(%rdx), %ymm17, %ymm21
+// CHECK: encoding: [0x62,0xe2,0xf5,0x20,0xa6,0x6a,0x80]
+ vfmaddsub213pd -4096(%rdx), %ymm17, %ymm21
+
+// CHECK: vfmaddsub213pd -4128(%rdx), %ymm17, %ymm21
+// CHECK: encoding: [0x62,0xe2,0xf5,0x20,0xa6,0xaa,0xe0,0xef,0xff,0xff]
+ vfmaddsub213pd -4128(%rdx), %ymm17, %ymm21
+
+// CHECK: vfmaddsub213pd 1016(%rdx){1to4}, %ymm17, %ymm21
+// CHECK: encoding: [0x62,0xe2,0xf5,0x30,0xa6,0x6a,0x7f]
+ vfmaddsub213pd 1016(%rdx){1to4}, %ymm17, %ymm21
+
+// CHECK: vfmaddsub213pd 1024(%rdx){1to4}, %ymm17, %ymm21
+// CHECK: encoding: [0x62,0xe2,0xf5,0x30,0xa6,0xaa,0x00,0x04,0x00,0x00]
+ vfmaddsub213pd 1024(%rdx){1to4}, %ymm17, %ymm21
+
+// CHECK: vfmaddsub213pd -1024(%rdx){1to4}, %ymm17, %ymm21
+// CHECK: encoding: [0x62,0xe2,0xf5,0x30,0xa6,0x6a,0x80]
+ vfmaddsub213pd -1024(%rdx){1to4}, %ymm17, %ymm21
+
+// CHECK: vfmaddsub213pd -1032(%rdx){1to4}, %ymm17, %ymm21
+// CHECK: encoding: [0x62,0xe2,0xf5,0x30,0xa6,0xaa,0xf8,0xfb,0xff,0xff]
+ vfmaddsub213pd -1032(%rdx){1to4}, %ymm17, %ymm21
+
+// CHECK: vfmaddsub231ps %xmm20, %xmm29, %xmm19
+// CHECK: encoding: [0x62,0xa2,0x15,0x00,0xb6,0xdc]
+ vfmaddsub231ps %xmm20, %xmm29, %xmm19
+
+// CHECK: vfmaddsub231ps %xmm20, %xmm29, %xmm19 {%k6}
+// CHECK: encoding: [0x62,0xa2,0x15,0x06,0xb6,0xdc]
+ vfmaddsub231ps %xmm20, %xmm29, %xmm19 {%k6}
+
+// CHECK: vfmaddsub231ps %xmm20, %xmm29, %xmm19 {%k6} {z}
+// CHECK: encoding: [0x62,0xa2,0x15,0x86,0xb6,0xdc]
+ vfmaddsub231ps %xmm20, %xmm29, %xmm19 {%k6} {z}
+
+// CHECK: vfmaddsub231ps (%rcx), %xmm29, %xmm19
+// CHECK: encoding: [0x62,0xe2,0x15,0x00,0xb6,0x19]
+ vfmaddsub231ps (%rcx), %xmm29, %xmm19
+
+// CHECK: vfmaddsub231ps 291(%rax,%r14,8), %xmm29, %xmm19
+// CHECK: encoding: [0x62,0xa2,0x15,0x00,0xb6,0x9c,0xf0,0x23,0x01,0x00,0x00]
+ vfmaddsub231ps 291(%rax,%r14,8), %xmm29, %xmm19
+
+// CHECK: vfmaddsub231ps (%rcx){1to4}, %xmm29, %xmm19
+// CHECK: encoding: [0x62,0xe2,0x15,0x10,0xb6,0x19]
+ vfmaddsub231ps (%rcx){1to4}, %xmm29, %xmm19
+
+// CHECK: vfmaddsub231ps 2032(%rdx), %xmm29, %xmm19
+// CHECK: encoding: [0x62,0xe2,0x15,0x00,0xb6,0x5a,0x7f]
+ vfmaddsub231ps 2032(%rdx), %xmm29, %xmm19
+
+// CHECK: vfmaddsub231ps 2048(%rdx), %xmm29, %xmm19
+// CHECK: encoding: [0x62,0xe2,0x15,0x00,0xb6,0x9a,0x00,0x08,0x00,0x00]
+ vfmaddsub231ps 2048(%rdx), %xmm29, %xmm19
+
+// CHECK: vfmaddsub231ps -2048(%rdx), %xmm29, %xmm19
+// CHECK: encoding: [0x62,0xe2,0x15,0x00,0xb6,0x5a,0x80]
+ vfmaddsub231ps -2048(%rdx), %xmm29, %xmm19
+
+// CHECK: vfmaddsub231ps -2064(%rdx), %xmm29, %xmm19
+// CHECK: encoding: [0x62,0xe2,0x15,0x00,0xb6,0x9a,0xf0,0xf7,0xff,0xff]
+ vfmaddsub231ps -2064(%rdx), %xmm29, %xmm19
+
+// CHECK: vfmaddsub231ps 508(%rdx){1to4}, %xmm29, %xmm19
+// CHECK: encoding: [0x62,0xe2,0x15,0x10,0xb6,0x5a,0x7f]
+ vfmaddsub231ps 508(%rdx){1to4}, %xmm29, %xmm19
+
+// CHECK: vfmaddsub231ps 512(%rdx){1to4}, %xmm29, %xmm19
+// CHECK: encoding: [0x62,0xe2,0x15,0x10,0xb6,0x9a,0x00,0x02,0x00,0x00]
+ vfmaddsub231ps 512(%rdx){1to4}, %xmm29, %xmm19
+
+// CHECK: vfmaddsub231ps -512(%rdx){1to4}, %xmm29, %xmm19
+// CHECK: encoding: [0x62,0xe2,0x15,0x10,0xb6,0x5a,0x80]
+ vfmaddsub231ps -512(%rdx){1to4}, %xmm29, %xmm19
+
+// CHECK: vfmaddsub231ps -516(%rdx){1to4}, %xmm29, %xmm19
+// CHECK: encoding: [0x62,0xe2,0x15,0x10,0xb6,0x9a,0xfc,0xfd,0xff,0xff]
+ vfmaddsub231ps -516(%rdx){1to4}, %xmm29, %xmm19
+
+// CHECK: vfmaddsub231ps %ymm17, %ymm24, %ymm19
+// CHECK: encoding: [0x62,0xa2,0x3d,0x20,0xb6,0xd9]
+ vfmaddsub231ps %ymm17, %ymm24, %ymm19
+
+// CHECK: vfmaddsub231ps %ymm17, %ymm24, %ymm19 {%k1}
+// CHECK: encoding: [0x62,0xa2,0x3d,0x21,0xb6,0xd9]
+ vfmaddsub231ps %ymm17, %ymm24, %ymm19 {%k1}
+
+// CHECK: vfmaddsub231ps %ymm17, %ymm24, %ymm19 {%k1} {z}
+// CHECK: encoding: [0x62,0xa2,0x3d,0xa1,0xb6,0xd9]
+ vfmaddsub231ps %ymm17, %ymm24, %ymm19 {%k1} {z}
+
+// CHECK: vfmaddsub231ps (%rcx), %ymm24, %ymm19
+// CHECK: encoding: [0x62,0xe2,0x3d,0x20,0xb6,0x19]
+ vfmaddsub231ps (%rcx), %ymm24, %ymm19
+
+// CHECK: vfmaddsub231ps 291(%rax,%r14,8), %ymm24, %ymm19
+// CHECK: encoding: [0x62,0xa2,0x3d,0x20,0xb6,0x9c,0xf0,0x23,0x01,0x00,0x00]
+ vfmaddsub231ps 291(%rax,%r14,8), %ymm24, %ymm19
+
+// CHECK: vfmaddsub231ps (%rcx){1to8}, %ymm24, %ymm19
+// CHECK: encoding: [0x62,0xe2,0x3d,0x30,0xb6,0x19]
+ vfmaddsub231ps (%rcx){1to8}, %ymm24, %ymm19
+
+// CHECK: vfmaddsub231ps 4064(%rdx), %ymm24, %ymm19
+// CHECK: encoding: [0x62,0xe2,0x3d,0x20,0xb6,0x5a,0x7f]
+ vfmaddsub231ps 4064(%rdx), %ymm24, %ymm19
+
+// CHECK: vfmaddsub231ps 4096(%rdx), %ymm24, %ymm19
+// CHECK: encoding: [0x62,0xe2,0x3d,0x20,0xb6,0x9a,0x00,0x10,0x00,0x00]
+ vfmaddsub231ps 4096(%rdx), %ymm24, %ymm19
+
+// CHECK: vfmaddsub231ps -4096(%rdx), %ymm24, %ymm19
+// CHECK: encoding: [0x62,0xe2,0x3d,0x20,0xb6,0x5a,0x80]
+ vfmaddsub231ps -4096(%rdx), %ymm24, %ymm19
+
+// CHECK: vfmaddsub231ps -4128(%rdx), %ymm24, %ymm19
+// CHECK: encoding: [0x62,0xe2,0x3d,0x20,0xb6,0x9a,0xe0,0xef,0xff,0xff]
+ vfmaddsub231ps -4128(%rdx), %ymm24, %ymm19
+
+// CHECK: vfmaddsub231ps 508(%rdx){1to8}, %ymm24, %ymm19
+// CHECK: encoding: [0x62,0xe2,0x3d,0x30,0xb6,0x5a,0x7f]
+ vfmaddsub231ps 508(%rdx){1to8}, %ymm24, %ymm19
+
+// CHECK: vfmaddsub231ps 512(%rdx){1to8}, %ymm24, %ymm19
+// CHECK: encoding: [0x62,0xe2,0x3d,0x30,0xb6,0x9a,0x00,0x02,0x00,0x00]
+ vfmaddsub231ps 512(%rdx){1to8}, %ymm24, %ymm19
+
+// CHECK: vfmaddsub231ps -512(%rdx){1to8}, %ymm24, %ymm19
+// CHECK: encoding: [0x62,0xe2,0x3d,0x30,0xb6,0x5a,0x80]
+ vfmaddsub231ps -512(%rdx){1to8}, %ymm24, %ymm19
+
+// CHECK: vfmaddsub231ps -516(%rdx){1to8}, %ymm24, %ymm19
+// CHECK: encoding: [0x62,0xe2,0x3d,0x30,0xb6,0x9a,0xfc,0xfd,0xff,0xff]
+ vfmaddsub231ps -516(%rdx){1to8}, %ymm24, %ymm19
+
+// CHECK: vfmaddsub231pd %xmm28, %xmm26, %xmm23
+// CHECK: encoding: [0x62,0x82,0xad,0x00,0xb6,0xfc]
+ vfmaddsub231pd %xmm28, %xmm26, %xmm23
+
+// CHECK: vfmaddsub231pd %xmm28, %xmm26, %xmm23 {%k7}
+// CHECK: encoding: [0x62,0x82,0xad,0x07,0xb6,0xfc]
+ vfmaddsub231pd %xmm28, %xmm26, %xmm23 {%k7}
+
+// CHECK: vfmaddsub231pd %xmm28, %xmm26, %xmm23 {%k7} {z}
+// CHECK: encoding: [0x62,0x82,0xad,0x87,0xb6,0xfc]
+ vfmaddsub231pd %xmm28, %xmm26, %xmm23 {%k7} {z}
+
+// CHECK: vfmaddsub231pd (%rcx), %xmm26, %xmm23
+// CHECK: encoding: [0x62,0xe2,0xad,0x00,0xb6,0x39]
+ vfmaddsub231pd (%rcx), %xmm26, %xmm23
+
+// CHECK: vfmaddsub231pd 291(%rax,%r14,8), %xmm26, %xmm23
+// CHECK: encoding: [0x62,0xa2,0xad,0x00,0xb6,0xbc,0xf0,0x23,0x01,0x00,0x00]
+ vfmaddsub231pd 291(%rax,%r14,8), %xmm26, %xmm23
+
+// CHECK: vfmaddsub231pd (%rcx){1to2}, %xmm26, %xmm23
+// CHECK: encoding: [0x62,0xe2,0xad,0x10,0xb6,0x39]
+ vfmaddsub231pd (%rcx){1to2}, %xmm26, %xmm23
+
+// CHECK: vfmaddsub231pd 2032(%rdx), %xmm26, %xmm23
+// CHECK: encoding: [0x62,0xe2,0xad,0x00,0xb6,0x7a,0x7f]
+ vfmaddsub231pd 2032(%rdx), %xmm26, %xmm23
+
+// CHECK: vfmaddsub231pd 2048(%rdx), %xmm26, %xmm23
+// CHECK: encoding: [0x62,0xe2,0xad,0x00,0xb6,0xba,0x00,0x08,0x00,0x00]
+ vfmaddsub231pd 2048(%rdx), %xmm26, %xmm23
+
+// CHECK: vfmaddsub231pd -2048(%rdx), %xmm26, %xmm23
+// CHECK: encoding: [0x62,0xe2,0xad,0x00,0xb6,0x7a,0x80]
+ vfmaddsub231pd -2048(%rdx), %xmm26, %xmm23
+
+// CHECK: vfmaddsub231pd -2064(%rdx), %xmm26, %xmm23
+// CHECK: encoding: [0x62,0xe2,0xad,0x00,0xb6,0xba,0xf0,0xf7,0xff,0xff]
+ vfmaddsub231pd -2064(%rdx), %xmm26, %xmm23
+
+// CHECK: vfmaddsub231pd 1016(%rdx){1to2}, %xmm26, %xmm23
+// CHECK: encoding: [0x62,0xe2,0xad,0x10,0xb6,0x7a,0x7f]
+ vfmaddsub231pd 1016(%rdx){1to2}, %xmm26, %xmm23
+
+// CHECK: vfmaddsub231pd 1024(%rdx){1to2}, %xmm26, %xmm23
+// CHECK: encoding: [0x62,0xe2,0xad,0x10,0xb6,0xba,0x00,0x04,0x00,0x00]
+ vfmaddsub231pd 1024(%rdx){1to2}, %xmm26, %xmm23
+
+// CHECK: vfmaddsub231pd -1024(%rdx){1to2}, %xmm26, %xmm23
+// CHECK: encoding: [0x62,0xe2,0xad,0x10,0xb6,0x7a,0x80]
+ vfmaddsub231pd -1024(%rdx){1to2}, %xmm26, %xmm23
+
+// CHECK: vfmaddsub231pd -1032(%rdx){1to2}, %xmm26, %xmm23
+// CHECK: encoding: [0x62,0xe2,0xad,0x10,0xb6,0xba,0xf8,0xfb,0xff,0xff]
+ vfmaddsub231pd -1032(%rdx){1to2}, %xmm26, %xmm23
+
+// CHECK: vfmaddsub231pd %ymm27, %ymm25, %ymm30
+// CHECK: encoding: [0x62,0x02,0xb5,0x20,0xb6,0xf3]
+ vfmaddsub231pd %ymm27, %ymm25, %ymm30
+
+// CHECK: vfmaddsub231pd %ymm27, %ymm25, %ymm30 {%k5}
+// CHECK: encoding: [0x62,0x02,0xb5,0x25,0xb6,0xf3]
+ vfmaddsub231pd %ymm27, %ymm25, %ymm30 {%k5}
+
+// CHECK: vfmaddsub231pd %ymm27, %ymm25, %ymm30 {%k5} {z}
+// CHECK: encoding: [0x62,0x02,0xb5,0xa5,0xb6,0xf3]
+ vfmaddsub231pd %ymm27, %ymm25, %ymm30 {%k5} {z}
+
+// CHECK: vfmaddsub231pd (%rcx), %ymm25, %ymm30
+// CHECK: encoding: [0x62,0x62,0xb5,0x20,0xb6,0x31]
+ vfmaddsub231pd (%rcx), %ymm25, %ymm30
+
+// CHECK: vfmaddsub231pd 291(%rax,%r14,8), %ymm25, %ymm30
+// CHECK: encoding: [0x62,0x22,0xb5,0x20,0xb6,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vfmaddsub231pd 291(%rax,%r14,8), %ymm25, %ymm30
+
+// CHECK: vfmaddsub231pd (%rcx){1to4}, %ymm25, %ymm30
+// CHECK: encoding: [0x62,0x62,0xb5,0x30,0xb6,0x31]
+ vfmaddsub231pd (%rcx){1to4}, %ymm25, %ymm30
+
+// CHECK: vfmaddsub231pd 4064(%rdx), %ymm25, %ymm30
+// CHECK: encoding: [0x62,0x62,0xb5,0x20,0xb6,0x72,0x7f]
+ vfmaddsub231pd 4064(%rdx), %ymm25, %ymm30
+
+// CHECK: vfmaddsub231pd 4096(%rdx), %ymm25, %ymm30
+// CHECK: encoding: [0x62,0x62,0xb5,0x20,0xb6,0xb2,0x00,0x10,0x00,0x00]
+ vfmaddsub231pd 4096(%rdx), %ymm25, %ymm30
+
+// CHECK: vfmaddsub231pd -4096(%rdx), %ymm25, %ymm30
+// CHECK: encoding: [0x62,0x62,0xb5,0x20,0xb6,0x72,0x80]
+ vfmaddsub231pd -4096(%rdx), %ymm25, %ymm30
+
+// CHECK: vfmaddsub231pd -4128(%rdx), %ymm25, %ymm30
+// CHECK: encoding: [0x62,0x62,0xb5,0x20,0xb6,0xb2,0xe0,0xef,0xff,0xff]
+ vfmaddsub231pd -4128(%rdx), %ymm25, %ymm30
+
+// CHECK: vfmaddsub231pd 1016(%rdx){1to4}, %ymm25, %ymm30
+// CHECK: encoding: [0x62,0x62,0xb5,0x30,0xb6,0x72,0x7f]
+ vfmaddsub231pd 1016(%rdx){1to4}, %ymm25, %ymm30
+
+// CHECK: vfmaddsub231pd 1024(%rdx){1to4}, %ymm25, %ymm30
+// CHECK: encoding: [0x62,0x62,0xb5,0x30,0xb6,0xb2,0x00,0x04,0x00,0x00]
+ vfmaddsub231pd 1024(%rdx){1to4}, %ymm25, %ymm30
+
+// CHECK: vfmaddsub231pd -1024(%rdx){1to4}, %ymm25, %ymm30
+// CHECK: encoding: [0x62,0x62,0xb5,0x30,0xb6,0x72,0x80]
+ vfmaddsub231pd -1024(%rdx){1to4}, %ymm25, %ymm30
+
+// CHECK: vfmaddsub231pd -1032(%rdx){1to4}, %ymm25, %ymm30
+// CHECK: encoding: [0x62,0x62,0xb5,0x30,0xb6,0xb2,0xf8,0xfb,0xff,0xff]
+ vfmaddsub231pd -1032(%rdx){1to4}, %ymm25, %ymm30
+
+// CHECK: vfmsubadd132ps %xmm20, %xmm23, %xmm24
+// CHECK: encoding: [0x62,0x22,0x45,0x00,0x97,0xc4]
+ vfmsubadd132ps %xmm20, %xmm23, %xmm24
+
+// CHECK: vfmsubadd132ps %xmm20, %xmm23, %xmm24 {%k5}
+// CHECK: encoding: [0x62,0x22,0x45,0x05,0x97,0xc4]
+ vfmsubadd132ps %xmm20, %xmm23, %xmm24 {%k5}
+
+// CHECK: vfmsubadd132ps %xmm20, %xmm23, %xmm24 {%k5} {z}
+// CHECK: encoding: [0x62,0x22,0x45,0x85,0x97,0xc4]
+ vfmsubadd132ps %xmm20, %xmm23, %xmm24 {%k5} {z}
+
+// CHECK: vfmsubadd132ps (%rcx), %xmm23, %xmm24
+// CHECK: encoding: [0x62,0x62,0x45,0x00,0x97,0x01]
+ vfmsubadd132ps (%rcx), %xmm23, %xmm24
+
+// CHECK: vfmsubadd132ps 291(%rax,%r14,8), %xmm23, %xmm24
+// CHECK: encoding: [0x62,0x22,0x45,0x00,0x97,0x84,0xf0,0x23,0x01,0x00,0x00]
+ vfmsubadd132ps 291(%rax,%r14,8), %xmm23, %xmm24
+
+// CHECK: vfmsubadd132ps (%rcx){1to4}, %xmm23, %xmm24
+// CHECK: encoding: [0x62,0x62,0x45,0x10,0x97,0x01]
+ vfmsubadd132ps (%rcx){1to4}, %xmm23, %xmm24
+
+// CHECK: vfmsubadd132ps 2032(%rdx), %xmm23, %xmm24
+// CHECK: encoding: [0x62,0x62,0x45,0x00,0x97,0x42,0x7f]
+ vfmsubadd132ps 2032(%rdx), %xmm23, %xmm24
+
+// CHECK: vfmsubadd132ps 2048(%rdx), %xmm23, %xmm24
+// CHECK: encoding: [0x62,0x62,0x45,0x00,0x97,0x82,0x00,0x08,0x00,0x00]
+ vfmsubadd132ps 2048(%rdx), %xmm23, %xmm24
+
+// CHECK: vfmsubadd132ps -2048(%rdx), %xmm23, %xmm24
+// CHECK: encoding: [0x62,0x62,0x45,0x00,0x97,0x42,0x80]
+ vfmsubadd132ps -2048(%rdx), %xmm23, %xmm24
+
+// CHECK: vfmsubadd132ps -2064(%rdx), %xmm23, %xmm24
+// CHECK: encoding: [0x62,0x62,0x45,0x00,0x97,0x82,0xf0,0xf7,0xff,0xff]
+ vfmsubadd132ps -2064(%rdx), %xmm23, %xmm24
+
+// CHECK: vfmsubadd132ps 508(%rdx){1to4}, %xmm23, %xmm24
+// CHECK: encoding: [0x62,0x62,0x45,0x10,0x97,0x42,0x7f]
+ vfmsubadd132ps 508(%rdx){1to4}, %xmm23, %xmm24
+
+// CHECK: vfmsubadd132ps 512(%rdx){1to4}, %xmm23, %xmm24
+// CHECK: encoding: [0x62,0x62,0x45,0x10,0x97,0x82,0x00,0x02,0x00,0x00]
+ vfmsubadd132ps 512(%rdx){1to4}, %xmm23, %xmm24
+
+// CHECK: vfmsubadd132ps -512(%rdx){1to4}, %xmm23, %xmm24
+// CHECK: encoding: [0x62,0x62,0x45,0x10,0x97,0x42,0x80]
+ vfmsubadd132ps -512(%rdx){1to4}, %xmm23, %xmm24
+
+// CHECK: vfmsubadd132ps -516(%rdx){1to4}, %xmm23, %xmm24
+// CHECK: encoding: [0x62,0x62,0x45,0x10,0x97,0x82,0xfc,0xfd,0xff,0xff]
+ vfmsubadd132ps -516(%rdx){1to4}, %xmm23, %xmm24
+
+// CHECK: vfmsubadd132ps %ymm23, %ymm28, %ymm23
+// CHECK: encoding: [0x62,0xa2,0x1d,0x20,0x97,0xff]
+ vfmsubadd132ps %ymm23, %ymm28, %ymm23
+
+// CHECK: vfmsubadd132ps %ymm23, %ymm28, %ymm23 {%k1}
+// CHECK: encoding: [0x62,0xa2,0x1d,0x21,0x97,0xff]
+ vfmsubadd132ps %ymm23, %ymm28, %ymm23 {%k1}
+
+// CHECK: vfmsubadd132ps %ymm23, %ymm28, %ymm23 {%k1} {z}
+// CHECK: encoding: [0x62,0xa2,0x1d,0xa1,0x97,0xff]
+ vfmsubadd132ps %ymm23, %ymm28, %ymm23 {%k1} {z}
+
+// CHECK: vfmsubadd132ps (%rcx), %ymm28, %ymm23
+// CHECK: encoding: [0x62,0xe2,0x1d,0x20,0x97,0x39]
+ vfmsubadd132ps (%rcx), %ymm28, %ymm23
+
+// CHECK: vfmsubadd132ps 291(%rax,%r14,8), %ymm28, %ymm23
+// CHECK: encoding: [0x62,0xa2,0x1d,0x20,0x97,0xbc,0xf0,0x23,0x01,0x00,0x00]
+ vfmsubadd132ps 291(%rax,%r14,8), %ymm28, %ymm23
+
+// CHECK: vfmsubadd132ps (%rcx){1to8}, %ymm28, %ymm23
+// CHECK: encoding: [0x62,0xe2,0x1d,0x30,0x97,0x39]
+ vfmsubadd132ps (%rcx){1to8}, %ymm28, %ymm23
+
+// CHECK: vfmsubadd132ps 4064(%rdx), %ymm28, %ymm23
+// CHECK: encoding: [0x62,0xe2,0x1d,0x20,0x97,0x7a,0x7f]
+ vfmsubadd132ps 4064(%rdx), %ymm28, %ymm23
+
+// CHECK: vfmsubadd132ps 4096(%rdx), %ymm28, %ymm23
+// CHECK: encoding: [0x62,0xe2,0x1d,0x20,0x97,0xba,0x00,0x10,0x00,0x00]
+ vfmsubadd132ps 4096(%rdx), %ymm28, %ymm23
+
+// CHECK: vfmsubadd132ps -4096(%rdx), %ymm28, %ymm23
+// CHECK: encoding: [0x62,0xe2,0x1d,0x20,0x97,0x7a,0x80]
+ vfmsubadd132ps -4096(%rdx), %ymm28, %ymm23
+
+// CHECK: vfmsubadd132ps -4128(%rdx), %ymm28, %ymm23
+// CHECK: encoding: [0x62,0xe2,0x1d,0x20,0x97,0xba,0xe0,0xef,0xff,0xff]
+ vfmsubadd132ps -4128(%rdx), %ymm28, %ymm23
+
+// CHECK: vfmsubadd132ps 508(%rdx){1to8}, %ymm28, %ymm23
+// CHECK: encoding: [0x62,0xe2,0x1d,0x30,0x97,0x7a,0x7f]
+ vfmsubadd132ps 508(%rdx){1to8}, %ymm28, %ymm23
+
+// CHECK: vfmsubadd132ps 512(%rdx){1to8}, %ymm28, %ymm23
+// CHECK: encoding: [0x62,0xe2,0x1d,0x30,0x97,0xba,0x00,0x02,0x00,0x00]
+ vfmsubadd132ps 512(%rdx){1to8}, %ymm28, %ymm23
+
+// CHECK: vfmsubadd132ps -512(%rdx){1to8}, %ymm28, %ymm23
+// CHECK: encoding: [0x62,0xe2,0x1d,0x30,0x97,0x7a,0x80]
+ vfmsubadd132ps -512(%rdx){1to8}, %ymm28, %ymm23
+
+// CHECK: vfmsubadd132ps -516(%rdx){1to8}, %ymm28, %ymm23
+// CHECK: encoding: [0x62,0xe2,0x1d,0x30,0x97,0xba,0xfc,0xfd,0xff,0xff]
+ vfmsubadd132ps -516(%rdx){1to8}, %ymm28, %ymm23
+
+// CHECK: vfmsubadd132pd %xmm24, %xmm26, %xmm22
+// CHECK: encoding: [0x62,0x82,0xad,0x00,0x97,0xf0]
+ vfmsubadd132pd %xmm24, %xmm26, %xmm22
+
+// CHECK: vfmsubadd132pd %xmm24, %xmm26, %xmm22 {%k3}
+// CHECK: encoding: [0x62,0x82,0xad,0x03,0x97,0xf0]
+ vfmsubadd132pd %xmm24, %xmm26, %xmm22 {%k3}
+
+// CHECK: vfmsubadd132pd %xmm24, %xmm26, %xmm22 {%k3} {z}
+// CHECK: encoding: [0x62,0x82,0xad,0x83,0x97,0xf0]
+ vfmsubadd132pd %xmm24, %xmm26, %xmm22 {%k3} {z}
+
+// CHECK: vfmsubadd132pd (%rcx), %xmm26, %xmm22
+// CHECK: encoding: [0x62,0xe2,0xad,0x00,0x97,0x31]
+ vfmsubadd132pd (%rcx), %xmm26, %xmm22
+
+// CHECK: vfmsubadd132pd 291(%rax,%r14,8), %xmm26, %xmm22
+// CHECK: encoding: [0x62,0xa2,0xad,0x00,0x97,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vfmsubadd132pd 291(%rax,%r14,8), %xmm26, %xmm22
+
+// CHECK: vfmsubadd132pd (%rcx){1to2}, %xmm26, %xmm22
+// CHECK: encoding: [0x62,0xe2,0xad,0x10,0x97,0x31]
+ vfmsubadd132pd (%rcx){1to2}, %xmm26, %xmm22
+
+// CHECK: vfmsubadd132pd 2032(%rdx), %xmm26, %xmm22
+// CHECK: encoding: [0x62,0xe2,0xad,0x00,0x97,0x72,0x7f]
+ vfmsubadd132pd 2032(%rdx), %xmm26, %xmm22
+
+// CHECK: vfmsubadd132pd 2048(%rdx), %xmm26, %xmm22
+// CHECK: encoding: [0x62,0xe2,0xad,0x00,0x97,0xb2,0x00,0x08,0x00,0x00]
+ vfmsubadd132pd 2048(%rdx), %xmm26, %xmm22
+
+// CHECK: vfmsubadd132pd -2048(%rdx), %xmm26, %xmm22
+// CHECK: encoding: [0x62,0xe2,0xad,0x00,0x97,0x72,0x80]
+ vfmsubadd132pd -2048(%rdx), %xmm26, %xmm22
+
+// CHECK: vfmsubadd132pd -2064(%rdx), %xmm26, %xmm22
+// CHECK: encoding: [0x62,0xe2,0xad,0x00,0x97,0xb2,0xf0,0xf7,0xff,0xff]
+ vfmsubadd132pd -2064(%rdx), %xmm26, %xmm22
+
+// CHECK: vfmsubadd132pd 1016(%rdx){1to2}, %xmm26, %xmm22
+// CHECK: encoding: [0x62,0xe2,0xad,0x10,0x97,0x72,0x7f]
+ vfmsubadd132pd 1016(%rdx){1to2}, %xmm26, %xmm22
+
+// CHECK: vfmsubadd132pd 1024(%rdx){1to2}, %xmm26, %xmm22
+// CHECK: encoding: [0x62,0xe2,0xad,0x10,0x97,0xb2,0x00,0x04,0x00,0x00]
+ vfmsubadd132pd 1024(%rdx){1to2}, %xmm26, %xmm22
+
+// CHECK: vfmsubadd132pd -1024(%rdx){1to2}, %xmm26, %xmm22
+// CHECK: encoding: [0x62,0xe2,0xad,0x10,0x97,0x72,0x80]
+ vfmsubadd132pd -1024(%rdx){1to2}, %xmm26, %xmm22
+
+// CHECK: vfmsubadd132pd -1032(%rdx){1to2}, %xmm26, %xmm22
+// CHECK: encoding: [0x62,0xe2,0xad,0x10,0x97,0xb2,0xf8,0xfb,0xff,0xff]
+ vfmsubadd132pd -1032(%rdx){1to2}, %xmm26, %xmm22
+
+// CHECK: vfmsubadd132pd %ymm21, %ymm17, %ymm24
+// CHECK: encoding: [0x62,0x22,0xf5,0x20,0x97,0xc5]
+ vfmsubadd132pd %ymm21, %ymm17, %ymm24
+
+// CHECK: vfmsubadd132pd %ymm21, %ymm17, %ymm24 {%k7}
+// CHECK: encoding: [0x62,0x22,0xf5,0x27,0x97,0xc5]
+ vfmsubadd132pd %ymm21, %ymm17, %ymm24 {%k7}
+
+// CHECK: vfmsubadd132pd %ymm21, %ymm17, %ymm24 {%k7} {z}
+// CHECK: encoding: [0x62,0x22,0xf5,0xa7,0x97,0xc5]
+ vfmsubadd132pd %ymm21, %ymm17, %ymm24 {%k7} {z}
+
+// CHECK: vfmsubadd132pd (%rcx), %ymm17, %ymm24
+// CHECK: encoding: [0x62,0x62,0xf5,0x20,0x97,0x01]
+ vfmsubadd132pd (%rcx), %ymm17, %ymm24
+
+// CHECK: vfmsubadd132pd 291(%rax,%r14,8), %ymm17, %ymm24
+// CHECK: encoding: [0x62,0x22,0xf5,0x20,0x97,0x84,0xf0,0x23,0x01,0x00,0x00]
+ vfmsubadd132pd 291(%rax,%r14,8), %ymm17, %ymm24
+
+// CHECK: vfmsubadd132pd (%rcx){1to4}, %ymm17, %ymm24
+// CHECK: encoding: [0x62,0x62,0xf5,0x30,0x97,0x01]
+ vfmsubadd132pd (%rcx){1to4}, %ymm17, %ymm24
+
+// CHECK: vfmsubadd132pd 4064(%rdx), %ymm17, %ymm24
+// CHECK: encoding: [0x62,0x62,0xf5,0x20,0x97,0x42,0x7f]
+ vfmsubadd132pd 4064(%rdx), %ymm17, %ymm24
+
+// CHECK: vfmsubadd132pd 4096(%rdx), %ymm17, %ymm24
+// CHECK: encoding: [0x62,0x62,0xf5,0x20,0x97,0x82,0x00,0x10,0x00,0x00]
+ vfmsubadd132pd 4096(%rdx), %ymm17, %ymm24
+
+// CHECK: vfmsubadd132pd -4096(%rdx), %ymm17, %ymm24
+// CHECK: encoding: [0x62,0x62,0xf5,0x20,0x97,0x42,0x80]
+ vfmsubadd132pd -4096(%rdx), %ymm17, %ymm24
+
+// CHECK: vfmsubadd132pd -4128(%rdx), %ymm17, %ymm24
+// CHECK: encoding: [0x62,0x62,0xf5,0x20,0x97,0x82,0xe0,0xef,0xff,0xff]
+ vfmsubadd132pd -4128(%rdx), %ymm17, %ymm24
+
+// CHECK: vfmsubadd132pd 1016(%rdx){1to4}, %ymm17, %ymm24
+// CHECK: encoding: [0x62,0x62,0xf5,0x30,0x97,0x42,0x7f]
+ vfmsubadd132pd 1016(%rdx){1to4}, %ymm17, %ymm24
+
+// CHECK: vfmsubadd132pd 1024(%rdx){1to4}, %ymm17, %ymm24
+// CHECK: encoding: [0x62,0x62,0xf5,0x30,0x97,0x82,0x00,0x04,0x00,0x00]
+ vfmsubadd132pd 1024(%rdx){1to4}, %ymm17, %ymm24
+
+// CHECK: vfmsubadd132pd -1024(%rdx){1to4}, %ymm17, %ymm24
+// CHECK: encoding: [0x62,0x62,0xf5,0x30,0x97,0x42,0x80]
+ vfmsubadd132pd -1024(%rdx){1to4}, %ymm17, %ymm24
+
+// CHECK: vfmsubadd132pd -1032(%rdx){1to4}, %ymm17, %ymm24
+// CHECK: encoding: [0x62,0x62,0xf5,0x30,0x97,0x82,0xf8,0xfb,0xff,0xff]
+ vfmsubadd132pd -1032(%rdx){1to4}, %ymm17, %ymm24
+
+// CHECK: vfmsubadd213ps %xmm17, %xmm20, %xmm19
+// CHECK: encoding: [0x62,0xa2,0x5d,0x00,0xa7,0xd9]
+ vfmsubadd213ps %xmm17, %xmm20, %xmm19
+
+// CHECK: vfmsubadd213ps %xmm17, %xmm20, %xmm19 {%k5}
+// CHECK: encoding: [0x62,0xa2,0x5d,0x05,0xa7,0xd9]
+ vfmsubadd213ps %xmm17, %xmm20, %xmm19 {%k5}
+
+// CHECK: vfmsubadd213ps %xmm17, %xmm20, %xmm19 {%k5} {z}
+// CHECK: encoding: [0x62,0xa2,0x5d,0x85,0xa7,0xd9]
+ vfmsubadd213ps %xmm17, %xmm20, %xmm19 {%k5} {z}
+
+// CHECK: vfmsubadd213ps (%rcx), %xmm20, %xmm19
+// CHECK: encoding: [0x62,0xe2,0x5d,0x00,0xa7,0x19]
+ vfmsubadd213ps (%rcx), %xmm20, %xmm19
+
+// CHECK: vfmsubadd213ps 291(%rax,%r14,8), %xmm20, %xmm19
+// CHECK: encoding: [0x62,0xa2,0x5d,0x00,0xa7,0x9c,0xf0,0x23,0x01,0x00,0x00]
+ vfmsubadd213ps 291(%rax,%r14,8), %xmm20, %xmm19
+
+// CHECK: vfmsubadd213ps (%rcx){1to4}, %xmm20, %xmm19
+// CHECK: encoding: [0x62,0xe2,0x5d,0x10,0xa7,0x19]
+ vfmsubadd213ps (%rcx){1to4}, %xmm20, %xmm19
+
+// CHECK: vfmsubadd213ps 2032(%rdx), %xmm20, %xmm19
+// CHECK: encoding: [0x62,0xe2,0x5d,0x00,0xa7,0x5a,0x7f]
+ vfmsubadd213ps 2032(%rdx), %xmm20, %xmm19
+
+// CHECK: vfmsubadd213ps 2048(%rdx), %xmm20, %xmm19
+// CHECK: encoding: [0x62,0xe2,0x5d,0x00,0xa7,0x9a,0x00,0x08,0x00,0x00]
+ vfmsubadd213ps 2048(%rdx), %xmm20, %xmm19
+
+// CHECK: vfmsubadd213ps -2048(%rdx), %xmm20, %xmm19
+// CHECK: encoding: [0x62,0xe2,0x5d,0x00,0xa7,0x5a,0x80]
+ vfmsubadd213ps -2048(%rdx), %xmm20, %xmm19
+
+// CHECK: vfmsubadd213ps -2064(%rdx), %xmm20, %xmm19
+// CHECK: encoding: [0x62,0xe2,0x5d,0x00,0xa7,0x9a,0xf0,0xf7,0xff,0xff]
+ vfmsubadd213ps -2064(%rdx), %xmm20, %xmm19
+
+// CHECK: vfmsubadd213ps 508(%rdx){1to4}, %xmm20, %xmm19
+// CHECK: encoding: [0x62,0xe2,0x5d,0x10,0xa7,0x5a,0x7f]
+ vfmsubadd213ps 508(%rdx){1to4}, %xmm20, %xmm19
+
+// CHECK: vfmsubadd213ps 512(%rdx){1to4}, %xmm20, %xmm19
+// CHECK: encoding: [0x62,0xe2,0x5d,0x10,0xa7,0x9a,0x00,0x02,0x00,0x00]
+ vfmsubadd213ps 512(%rdx){1to4}, %xmm20, %xmm19
+
+// CHECK: vfmsubadd213ps -512(%rdx){1to4}, %xmm20, %xmm19
+// CHECK: encoding: [0x62,0xe2,0x5d,0x10,0xa7,0x5a,0x80]
+ vfmsubadd213ps -512(%rdx){1to4}, %xmm20, %xmm19
+
+// CHECK: vfmsubadd213ps -516(%rdx){1to4}, %xmm20, %xmm19
+// CHECK: encoding: [0x62,0xe2,0x5d,0x10,0xa7,0x9a,0xfc,0xfd,0xff,0xff]
+ vfmsubadd213ps -516(%rdx){1to4}, %xmm20, %xmm19
+
+// CHECK: vfmsubadd213ps %ymm23, %ymm17, %ymm26
+// CHECK: encoding: [0x62,0x22,0x75,0x20,0xa7,0xd7]
+ vfmsubadd213ps %ymm23, %ymm17, %ymm26
+
+// CHECK: vfmsubadd213ps %ymm23, %ymm17, %ymm26 {%k7}
+// CHECK: encoding: [0x62,0x22,0x75,0x27,0xa7,0xd7]
+ vfmsubadd213ps %ymm23, %ymm17, %ymm26 {%k7}
+
+// CHECK: vfmsubadd213ps %ymm23, %ymm17, %ymm26 {%k7} {z}
+// CHECK: encoding: [0x62,0x22,0x75,0xa7,0xa7,0xd7]
+ vfmsubadd213ps %ymm23, %ymm17, %ymm26 {%k7} {z}
+
+// CHECK: vfmsubadd213ps (%rcx), %ymm17, %ymm26
+// CHECK: encoding: [0x62,0x62,0x75,0x20,0xa7,0x11]
+ vfmsubadd213ps (%rcx), %ymm17, %ymm26
+
+// CHECK: vfmsubadd213ps 291(%rax,%r14,8), %ymm17, %ymm26
+// CHECK: encoding: [0x62,0x22,0x75,0x20,0xa7,0x94,0xf0,0x23,0x01,0x00,0x00]
+ vfmsubadd213ps 291(%rax,%r14,8), %ymm17, %ymm26
+
+// CHECK: vfmsubadd213ps (%rcx){1to8}, %ymm17, %ymm26
+// CHECK: encoding: [0x62,0x62,0x75,0x30,0xa7,0x11]
+ vfmsubadd213ps (%rcx){1to8}, %ymm17, %ymm26
+
+// CHECK: vfmsubadd213ps 4064(%rdx), %ymm17, %ymm26
+// CHECK: encoding: [0x62,0x62,0x75,0x20,0xa7,0x52,0x7f]
+ vfmsubadd213ps 4064(%rdx), %ymm17, %ymm26
+
+// CHECK: vfmsubadd213ps 4096(%rdx), %ymm17, %ymm26
+// CHECK: encoding: [0x62,0x62,0x75,0x20,0xa7,0x92,0x00,0x10,0x00,0x00]
+ vfmsubadd213ps 4096(%rdx), %ymm17, %ymm26
+
+// CHECK: vfmsubadd213ps -4096(%rdx), %ymm17, %ymm26
+// CHECK: encoding: [0x62,0x62,0x75,0x20,0xa7,0x52,0x80]
+ vfmsubadd213ps -4096(%rdx), %ymm17, %ymm26
+
+// CHECK: vfmsubadd213ps -4128(%rdx), %ymm17, %ymm26
+// CHECK: encoding: [0x62,0x62,0x75,0x20,0xa7,0x92,0xe0,0xef,0xff,0xff]
+ vfmsubadd213ps -4128(%rdx), %ymm17, %ymm26
+
+// CHECK: vfmsubadd213ps 508(%rdx){1to8}, %ymm17, %ymm26
+// CHECK: encoding: [0x62,0x62,0x75,0x30,0xa7,0x52,0x7f]
+ vfmsubadd213ps 508(%rdx){1to8}, %ymm17, %ymm26
+
+// CHECK: vfmsubadd213ps 512(%rdx){1to8}, %ymm17, %ymm26
+// CHECK: encoding: [0x62,0x62,0x75,0x30,0xa7,0x92,0x00,0x02,0x00,0x00]
+ vfmsubadd213ps 512(%rdx){1to8}, %ymm17, %ymm26
+
+// CHECK: vfmsubadd213ps -512(%rdx){1to8}, %ymm17, %ymm26
+// CHECK: encoding: [0x62,0x62,0x75,0x30,0xa7,0x52,0x80]
+ vfmsubadd213ps -512(%rdx){1to8}, %ymm17, %ymm26
+
+// CHECK: vfmsubadd213ps -516(%rdx){1to8}, %ymm17, %ymm26
+// CHECK: encoding: [0x62,0x62,0x75,0x30,0xa7,0x92,0xfc,0xfd,0xff,0xff]
+ vfmsubadd213ps -516(%rdx){1to8}, %ymm17, %ymm26
+
+// CHECK: vfmsubadd213pd %xmm28, %xmm21, %xmm18
+// CHECK: encoding: [0x62,0x82,0xd5,0x00,0xa7,0xd4]
+ vfmsubadd213pd %xmm28, %xmm21, %xmm18
+
+// CHECK: vfmsubadd213pd %xmm28, %xmm21, %xmm18 {%k4}
+// CHECK: encoding: [0x62,0x82,0xd5,0x04,0xa7,0xd4]
+ vfmsubadd213pd %xmm28, %xmm21, %xmm18 {%k4}
+
+// CHECK: vfmsubadd213pd %xmm28, %xmm21, %xmm18 {%k4} {z}
+// CHECK: encoding: [0x62,0x82,0xd5,0x84,0xa7,0xd4]
+ vfmsubadd213pd %xmm28, %xmm21, %xmm18 {%k4} {z}
+
+// CHECK: vfmsubadd213pd (%rcx), %xmm21, %xmm18
+// CHECK: encoding: [0x62,0xe2,0xd5,0x00,0xa7,0x11]
+ vfmsubadd213pd (%rcx), %xmm21, %xmm18
+
+// CHECK: vfmsubadd213pd 291(%rax,%r14,8), %xmm21, %xmm18
+// CHECK: encoding: [0x62,0xa2,0xd5,0x00,0xa7,0x94,0xf0,0x23,0x01,0x00,0x00]
+ vfmsubadd213pd 291(%rax,%r14,8), %xmm21, %xmm18
+
+// CHECK: vfmsubadd213pd (%rcx){1to2}, %xmm21, %xmm18
+// CHECK: encoding: [0x62,0xe2,0xd5,0x10,0xa7,0x11]
+ vfmsubadd213pd (%rcx){1to2}, %xmm21, %xmm18
+
+// CHECK: vfmsubadd213pd 2032(%rdx), %xmm21, %xmm18
+// CHECK: encoding: [0x62,0xe2,0xd5,0x00,0xa7,0x52,0x7f]
+ vfmsubadd213pd 2032(%rdx), %xmm21, %xmm18
+
+// CHECK: vfmsubadd213pd 2048(%rdx), %xmm21, %xmm18
+// CHECK: encoding: [0x62,0xe2,0xd5,0x00,0xa7,0x92,0x00,0x08,0x00,0x00]
+ vfmsubadd213pd 2048(%rdx), %xmm21, %xmm18
+
+// CHECK: vfmsubadd213pd -2048(%rdx), %xmm21, %xmm18
+// CHECK: encoding: [0x62,0xe2,0xd5,0x00,0xa7,0x52,0x80]
+ vfmsubadd213pd -2048(%rdx), %xmm21, %xmm18
+
+// CHECK: vfmsubadd213pd -2064(%rdx), %xmm21, %xmm18
+// CHECK: encoding: [0x62,0xe2,0xd5,0x00,0xa7,0x92,0xf0,0xf7,0xff,0xff]
+ vfmsubadd213pd -2064(%rdx), %xmm21, %xmm18
+
+// CHECK: vfmsubadd213pd 1016(%rdx){1to2}, %xmm21, %xmm18
+// CHECK: encoding: [0x62,0xe2,0xd5,0x10,0xa7,0x52,0x7f]
+ vfmsubadd213pd 1016(%rdx){1to2}, %xmm21, %xmm18
+
+// CHECK: vfmsubadd213pd 1024(%rdx){1to2}, %xmm21, %xmm18
+// CHECK: encoding: [0x62,0xe2,0xd5,0x10,0xa7,0x92,0x00,0x04,0x00,0x00]
+ vfmsubadd213pd 1024(%rdx){1to2}, %xmm21, %xmm18
+
+// CHECK: vfmsubadd213pd -1024(%rdx){1to2}, %xmm21, %xmm18
+// CHECK: encoding: [0x62,0xe2,0xd5,0x10,0xa7,0x52,0x80]
+ vfmsubadd213pd -1024(%rdx){1to2}, %xmm21, %xmm18
+
+// CHECK: vfmsubadd213pd -1032(%rdx){1to2}, %xmm21, %xmm18
+// CHECK: encoding: [0x62,0xe2,0xd5,0x10,0xa7,0x92,0xf8,0xfb,0xff,0xff]
+ vfmsubadd213pd -1032(%rdx){1to2}, %xmm21, %xmm18
+
+// CHECK: vfmsubadd213pd %ymm25, %ymm20, %ymm27
+// CHECK: encoding: [0x62,0x02,0xdd,0x20,0xa7,0xd9]
+ vfmsubadd213pd %ymm25, %ymm20, %ymm27
+
+// CHECK: vfmsubadd213pd %ymm25, %ymm20, %ymm27 {%k7}
+// CHECK: encoding: [0x62,0x02,0xdd,0x27,0xa7,0xd9]
+ vfmsubadd213pd %ymm25, %ymm20, %ymm27 {%k7}
+
+// CHECK: vfmsubadd213pd %ymm25, %ymm20, %ymm27 {%k7} {z}
+// CHECK: encoding: [0x62,0x02,0xdd,0xa7,0xa7,0xd9]
+ vfmsubadd213pd %ymm25, %ymm20, %ymm27 {%k7} {z}
+
+// CHECK: vfmsubadd213pd (%rcx), %ymm20, %ymm27
+// CHECK: encoding: [0x62,0x62,0xdd,0x20,0xa7,0x19]
+ vfmsubadd213pd (%rcx), %ymm20, %ymm27
+
+// CHECK: vfmsubadd213pd 291(%rax,%r14,8), %ymm20, %ymm27
+// CHECK: encoding: [0x62,0x22,0xdd,0x20,0xa7,0x9c,0xf0,0x23,0x01,0x00,0x00]
+ vfmsubadd213pd 291(%rax,%r14,8), %ymm20, %ymm27
+
+// CHECK: vfmsubadd213pd (%rcx){1to4}, %ymm20, %ymm27
+// CHECK: encoding: [0x62,0x62,0xdd,0x30,0xa7,0x19]
+ vfmsubadd213pd (%rcx){1to4}, %ymm20, %ymm27
+
+// CHECK: vfmsubadd213pd 4064(%rdx), %ymm20, %ymm27
+// CHECK: encoding: [0x62,0x62,0xdd,0x20,0xa7,0x5a,0x7f]
+ vfmsubadd213pd 4064(%rdx), %ymm20, %ymm27
+
+// CHECK: vfmsubadd213pd 4096(%rdx), %ymm20, %ymm27
+// CHECK: encoding: [0x62,0x62,0xdd,0x20,0xa7,0x9a,0x00,0x10,0x00,0x00]
+ vfmsubadd213pd 4096(%rdx), %ymm20, %ymm27
+
+// CHECK: vfmsubadd213pd -4096(%rdx), %ymm20, %ymm27
+// CHECK: encoding: [0x62,0x62,0xdd,0x20,0xa7,0x5a,0x80]
+ vfmsubadd213pd -4096(%rdx), %ymm20, %ymm27
+
+// CHECK: vfmsubadd213pd -4128(%rdx), %ymm20, %ymm27
+// CHECK: encoding: [0x62,0x62,0xdd,0x20,0xa7,0x9a,0xe0,0xef,0xff,0xff]
+ vfmsubadd213pd -4128(%rdx), %ymm20, %ymm27
+
+// CHECK: vfmsubadd213pd 1016(%rdx){1to4}, %ymm20, %ymm27
+// CHECK: encoding: [0x62,0x62,0xdd,0x30,0xa7,0x5a,0x7f]
+ vfmsubadd213pd 1016(%rdx){1to4}, %ymm20, %ymm27
+
+// CHECK: vfmsubadd213pd 1024(%rdx){1to4}, %ymm20, %ymm27
+// CHECK: encoding: [0x62,0x62,0xdd,0x30,0xa7,0x9a,0x00,0x04,0x00,0x00]
+ vfmsubadd213pd 1024(%rdx){1to4}, %ymm20, %ymm27
+
+// CHECK: vfmsubadd213pd -1024(%rdx){1to4}, %ymm20, %ymm27
+// CHECK: encoding: [0x62,0x62,0xdd,0x30,0xa7,0x5a,0x80]
+ vfmsubadd213pd -1024(%rdx){1to4}, %ymm20, %ymm27
+
+// CHECK: vfmsubadd213pd -1032(%rdx){1to4}, %ymm20, %ymm27
+// CHECK: encoding: [0x62,0x62,0xdd,0x30,0xa7,0x9a,0xf8,0xfb,0xff,0xff]
+ vfmsubadd213pd -1032(%rdx){1to4}, %ymm20, %ymm27
+
+// CHECK: vfmsubadd231ps %xmm23, %xmm25, %xmm21
+// CHECK: encoding: [0x62,0xa2,0x35,0x00,0xb7,0xef]
+ vfmsubadd231ps %xmm23, %xmm25, %xmm21
+
+// CHECK: vfmsubadd231ps %xmm23, %xmm25, %xmm21 {%k4}
+// CHECK: encoding: [0x62,0xa2,0x35,0x04,0xb7,0xef]
+ vfmsubadd231ps %xmm23, %xmm25, %xmm21 {%k4}
+
+// CHECK: vfmsubadd231ps %xmm23, %xmm25, %xmm21 {%k4} {z}
+// CHECK: encoding: [0x62,0xa2,0x35,0x84,0xb7,0xef]
+ vfmsubadd231ps %xmm23, %xmm25, %xmm21 {%k4} {z}
+
+// CHECK: vfmsubadd231ps (%rcx), %xmm25, %xmm21
+// CHECK: encoding: [0x62,0xe2,0x35,0x00,0xb7,0x29]
+ vfmsubadd231ps (%rcx), %xmm25, %xmm21
+
+// CHECK: vfmsubadd231ps 291(%rax,%r14,8), %xmm25, %xmm21
+// CHECK: encoding: [0x62,0xa2,0x35,0x00,0xb7,0xac,0xf0,0x23,0x01,0x00,0x00]
+ vfmsubadd231ps 291(%rax,%r14,8), %xmm25, %xmm21
+
+// CHECK: vfmsubadd231ps (%rcx){1to4}, %xmm25, %xmm21
+// CHECK: encoding: [0x62,0xe2,0x35,0x10,0xb7,0x29]
+ vfmsubadd231ps (%rcx){1to4}, %xmm25, %xmm21
+
+// CHECK: vfmsubadd231ps 2032(%rdx), %xmm25, %xmm21
+// CHECK: encoding: [0x62,0xe2,0x35,0x00,0xb7,0x6a,0x7f]
+ vfmsubadd231ps 2032(%rdx), %xmm25, %xmm21
+
+// CHECK: vfmsubadd231ps 2048(%rdx), %xmm25, %xmm21
+// CHECK: encoding: [0x62,0xe2,0x35,0x00,0xb7,0xaa,0x00,0x08,0x00,0x00]
+ vfmsubadd231ps 2048(%rdx), %xmm25, %xmm21
+
+// CHECK: vfmsubadd231ps -2048(%rdx), %xmm25, %xmm21
+// CHECK: encoding: [0x62,0xe2,0x35,0x00,0xb7,0x6a,0x80]
+ vfmsubadd231ps -2048(%rdx), %xmm25, %xmm21
+
+// CHECK: vfmsubadd231ps -2064(%rdx), %xmm25, %xmm21
+// CHECK: encoding: [0x62,0xe2,0x35,0x00,0xb7,0xaa,0xf0,0xf7,0xff,0xff]
+ vfmsubadd231ps -2064(%rdx), %xmm25, %xmm21
+
+// CHECK: vfmsubadd231ps 508(%rdx){1to4}, %xmm25, %xmm21
+// CHECK: encoding: [0x62,0xe2,0x35,0x10,0xb7,0x6a,0x7f]
+ vfmsubadd231ps 508(%rdx){1to4}, %xmm25, %xmm21
+
+// CHECK: vfmsubadd231ps 512(%rdx){1to4}, %xmm25, %xmm21
+// CHECK: encoding: [0x62,0xe2,0x35,0x10,0xb7,0xaa,0x00,0x02,0x00,0x00]
+ vfmsubadd231ps 512(%rdx){1to4}, %xmm25, %xmm21
+
+// CHECK: vfmsubadd231ps -512(%rdx){1to4}, %xmm25, %xmm21
+// CHECK: encoding: [0x62,0xe2,0x35,0x10,0xb7,0x6a,0x80]
+ vfmsubadd231ps -512(%rdx){1to4}, %xmm25, %xmm21
+
+// CHECK: vfmsubadd231ps -516(%rdx){1to4}, %xmm25, %xmm21
+// CHECK: encoding: [0x62,0xe2,0x35,0x10,0xb7,0xaa,0xfc,0xfd,0xff,0xff]
+ vfmsubadd231ps -516(%rdx){1to4}, %xmm25, %xmm21
+
+// CHECK: vfmsubadd231ps %ymm20, %ymm23, %ymm27
+// CHECK: encoding: [0x62,0x22,0x45,0x20,0xb7,0xdc]
+ vfmsubadd231ps %ymm20, %ymm23, %ymm27
+
+// CHECK: vfmsubadd231ps %ymm20, %ymm23, %ymm27 {%k3}
+// CHECK: encoding: [0x62,0x22,0x45,0x23,0xb7,0xdc]
+ vfmsubadd231ps %ymm20, %ymm23, %ymm27 {%k3}
+
+// CHECK: vfmsubadd231ps %ymm20, %ymm23, %ymm27 {%k3} {z}
+// CHECK: encoding: [0x62,0x22,0x45,0xa3,0xb7,0xdc]
+ vfmsubadd231ps %ymm20, %ymm23, %ymm27 {%k3} {z}
+
+// CHECK: vfmsubadd231ps (%rcx), %ymm23, %ymm27
+// CHECK: encoding: [0x62,0x62,0x45,0x20,0xb7,0x19]
+ vfmsubadd231ps (%rcx), %ymm23, %ymm27
+
+// CHECK: vfmsubadd231ps 291(%rax,%r14,8), %ymm23, %ymm27
+// CHECK: encoding: [0x62,0x22,0x45,0x20,0xb7,0x9c,0xf0,0x23,0x01,0x00,0x00]
+ vfmsubadd231ps 291(%rax,%r14,8), %ymm23, %ymm27
+
+// CHECK: vfmsubadd231ps (%rcx){1to8}, %ymm23, %ymm27
+// CHECK: encoding: [0x62,0x62,0x45,0x30,0xb7,0x19]
+ vfmsubadd231ps (%rcx){1to8}, %ymm23, %ymm27
+
+// CHECK: vfmsubadd231ps 4064(%rdx), %ymm23, %ymm27
+// CHECK: encoding: [0x62,0x62,0x45,0x20,0xb7,0x5a,0x7f]
+ vfmsubadd231ps 4064(%rdx), %ymm23, %ymm27
+
+// CHECK: vfmsubadd231ps 4096(%rdx), %ymm23, %ymm27
+// CHECK: encoding: [0x62,0x62,0x45,0x20,0xb7,0x9a,0x00,0x10,0x00,0x00]
+ vfmsubadd231ps 4096(%rdx), %ymm23, %ymm27
+
+// CHECK: vfmsubadd231ps -4096(%rdx), %ymm23, %ymm27
+// CHECK: encoding: [0x62,0x62,0x45,0x20,0xb7,0x5a,0x80]
+ vfmsubadd231ps -4096(%rdx), %ymm23, %ymm27
+
+// CHECK: vfmsubadd231ps -4128(%rdx), %ymm23, %ymm27
+// CHECK: encoding: [0x62,0x62,0x45,0x20,0xb7,0x9a,0xe0,0xef,0xff,0xff]
+ vfmsubadd231ps -4128(%rdx), %ymm23, %ymm27
+
+// CHECK: vfmsubadd231ps 508(%rdx){1to8}, %ymm23, %ymm27
+// CHECK: encoding: [0x62,0x62,0x45,0x30,0xb7,0x5a,0x7f]
+ vfmsubadd231ps 508(%rdx){1to8}, %ymm23, %ymm27
+
+// CHECK: vfmsubadd231ps 512(%rdx){1to8}, %ymm23, %ymm27
+// CHECK: encoding: [0x62,0x62,0x45,0x30,0xb7,0x9a,0x00,0x02,0x00,0x00]
+ vfmsubadd231ps 512(%rdx){1to8}, %ymm23, %ymm27
+
+// CHECK: vfmsubadd231ps -512(%rdx){1to8}, %ymm23, %ymm27
+// CHECK: encoding: [0x62,0x62,0x45,0x30,0xb7,0x5a,0x80]
+ vfmsubadd231ps -512(%rdx){1to8}, %ymm23, %ymm27
+
+// CHECK: vfmsubadd231ps -516(%rdx){1to8}, %ymm23, %ymm27
+// CHECK: encoding: [0x62,0x62,0x45,0x30,0xb7,0x9a,0xfc,0xfd,0xff,0xff]
+ vfmsubadd231ps -516(%rdx){1to8}, %ymm23, %ymm27
+
+// CHECK: vfmsubadd231pd %xmm28, %xmm24, %xmm20
+// CHECK: encoding: [0x62,0x82,0xbd,0x00,0xb7,0xe4]
+ vfmsubadd231pd %xmm28, %xmm24, %xmm20
+
+// CHECK: vfmsubadd231pd %xmm28, %xmm24, %xmm20 {%k3}
+// CHECK: encoding: [0x62,0x82,0xbd,0x03,0xb7,0xe4]
+ vfmsubadd231pd %xmm28, %xmm24, %xmm20 {%k3}
+
+// CHECK: vfmsubadd231pd %xmm28, %xmm24, %xmm20 {%k3} {z}
+// CHECK: encoding: [0x62,0x82,0xbd,0x83,0xb7,0xe4]
+ vfmsubadd231pd %xmm28, %xmm24, %xmm20 {%k3} {z}
+
+// CHECK: vfmsubadd231pd (%rcx), %xmm24, %xmm20
+// CHECK: encoding: [0x62,0xe2,0xbd,0x00,0xb7,0x21]
+ vfmsubadd231pd (%rcx), %xmm24, %xmm20
+
+// CHECK: vfmsubadd231pd 291(%rax,%r14,8), %xmm24, %xmm20
+// CHECK: encoding: [0x62,0xa2,0xbd,0x00,0xb7,0xa4,0xf0,0x23,0x01,0x00,0x00]
+ vfmsubadd231pd 291(%rax,%r14,8), %xmm24, %xmm20
+
+// CHECK: vfmsubadd231pd (%rcx){1to2}, %xmm24, %xmm20
+// CHECK: encoding: [0x62,0xe2,0xbd,0x10,0xb7,0x21]
+ vfmsubadd231pd (%rcx){1to2}, %xmm24, %xmm20
+
+// CHECK: vfmsubadd231pd 2032(%rdx), %xmm24, %xmm20
+// CHECK: encoding: [0x62,0xe2,0xbd,0x00,0xb7,0x62,0x7f]
+ vfmsubadd231pd 2032(%rdx), %xmm24, %xmm20
+
+// CHECK: vfmsubadd231pd 2048(%rdx), %xmm24, %xmm20
+// CHECK: encoding: [0x62,0xe2,0xbd,0x00,0xb7,0xa2,0x00,0x08,0x00,0x00]
+ vfmsubadd231pd 2048(%rdx), %xmm24, %xmm20
+
+// CHECK: vfmsubadd231pd -2048(%rdx), %xmm24, %xmm20
+// CHECK: encoding: [0x62,0xe2,0xbd,0x00,0xb7,0x62,0x80]
+ vfmsubadd231pd -2048(%rdx), %xmm24, %xmm20
+
+// CHECK: vfmsubadd231pd -2064(%rdx), %xmm24, %xmm20
+// CHECK: encoding: [0x62,0xe2,0xbd,0x00,0xb7,0xa2,0xf0,0xf7,0xff,0xff]
+ vfmsubadd231pd -2064(%rdx), %xmm24, %xmm20
+
+// CHECK: vfmsubadd231pd 1016(%rdx){1to2}, %xmm24, %xmm20
+// CHECK: encoding: [0x62,0xe2,0xbd,0x10,0xb7,0x62,0x7f]
+ vfmsubadd231pd 1016(%rdx){1to2}, %xmm24, %xmm20
+
+// CHECK: vfmsubadd231pd 1024(%rdx){1to2}, %xmm24, %xmm20
+// CHECK: encoding: [0x62,0xe2,0xbd,0x10,0xb7,0xa2,0x00,0x04,0x00,0x00]
+ vfmsubadd231pd 1024(%rdx){1to2}, %xmm24, %xmm20
+
+// CHECK: vfmsubadd231pd -1024(%rdx){1to2}, %xmm24, %xmm20
+// CHECK: encoding: [0x62,0xe2,0xbd,0x10,0xb7,0x62,0x80]
+ vfmsubadd231pd -1024(%rdx){1to2}, %xmm24, %xmm20
+
+// CHECK: vfmsubadd231pd -1032(%rdx){1to2}, %xmm24, %xmm20
+// CHECK: encoding: [0x62,0xe2,0xbd,0x10,0xb7,0xa2,0xf8,0xfb,0xff,0xff]
+ vfmsubadd231pd -1032(%rdx){1to2}, %xmm24, %xmm20
+
+// CHECK: vfmsubadd231pd %ymm21, %ymm29, %ymm30
+// CHECK: encoding: [0x62,0x22,0x95,0x20,0xb7,0xf5]
+ vfmsubadd231pd %ymm21, %ymm29, %ymm30
+
+// CHECK: vfmsubadd231pd %ymm21, %ymm29, %ymm30 {%k7}
+// CHECK: encoding: [0x62,0x22,0x95,0x27,0xb7,0xf5]
+ vfmsubadd231pd %ymm21, %ymm29, %ymm30 {%k7}
+
+// CHECK: vfmsubadd231pd %ymm21, %ymm29, %ymm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x22,0x95,0xa7,0xb7,0xf5]
+ vfmsubadd231pd %ymm21, %ymm29, %ymm30 {%k7} {z}
+
+// CHECK: vfmsubadd231pd (%rcx), %ymm29, %ymm30
+// CHECK: encoding: [0x62,0x62,0x95,0x20,0xb7,0x31]
+ vfmsubadd231pd (%rcx), %ymm29, %ymm30
+
+// CHECK: vfmsubadd231pd 291(%rax,%r14,8), %ymm29, %ymm30
+// CHECK: encoding: [0x62,0x22,0x95,0x20,0xb7,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vfmsubadd231pd 291(%rax,%r14,8), %ymm29, %ymm30
+
+// CHECK: vfmsubadd231pd (%rcx){1to4}, %ymm29, %ymm30
+// CHECK: encoding: [0x62,0x62,0x95,0x30,0xb7,0x31]
+ vfmsubadd231pd (%rcx){1to4}, %ymm29, %ymm30
+
+// CHECK: vfmsubadd231pd 4064(%rdx), %ymm29, %ymm30
+// CHECK: encoding: [0x62,0x62,0x95,0x20,0xb7,0x72,0x7f]
+ vfmsubadd231pd 4064(%rdx), %ymm29, %ymm30
+
+// CHECK: vfmsubadd231pd 4096(%rdx), %ymm29, %ymm30
+// CHECK: encoding: [0x62,0x62,0x95,0x20,0xb7,0xb2,0x00,0x10,0x00,0x00]
+ vfmsubadd231pd 4096(%rdx), %ymm29, %ymm30
+
+// CHECK: vfmsubadd231pd -4096(%rdx), %ymm29, %ymm30
+// CHECK: encoding: [0x62,0x62,0x95,0x20,0xb7,0x72,0x80]
+ vfmsubadd231pd -4096(%rdx), %ymm29, %ymm30
+
+// CHECK: vfmsubadd231pd -4128(%rdx), %ymm29, %ymm30
+// CHECK: encoding: [0x62,0x62,0x95,0x20,0xb7,0xb2,0xe0,0xef,0xff,0xff]
+ vfmsubadd231pd -4128(%rdx), %ymm29, %ymm30
+
+// CHECK: vfmsubadd231pd 1016(%rdx){1to4}, %ymm29, %ymm30
+// CHECK: encoding: [0x62,0x62,0x95,0x30,0xb7,0x72,0x7f]
+ vfmsubadd231pd 1016(%rdx){1to4}, %ymm29, %ymm30
+
+// CHECK: vfmsubadd231pd 1024(%rdx){1to4}, %ymm29, %ymm30
+// CHECK: encoding: [0x62,0x62,0x95,0x30,0xb7,0xb2,0x00,0x04,0x00,0x00]
+ vfmsubadd231pd 1024(%rdx){1to4}, %ymm29, %ymm30
+
+// CHECK: vfmsubadd231pd -1024(%rdx){1to4}, %ymm29, %ymm30
+// CHECK: encoding: [0x62,0x62,0x95,0x30,0xb7,0x72,0x80]
+ vfmsubadd231pd -1024(%rdx){1to4}, %ymm29, %ymm30
+
+// CHECK: vfmsubadd231pd -1032(%rdx){1to4}, %ymm29, %ymm30
+// CHECK: encoding: [0x62,0x62,0x95,0x30,0xb7,0xb2,0xf8,0xfb,0xff,0xff]
+ vfmsubadd231pd -1032(%rdx){1to4}, %ymm29, %ymm30
+
+// CHECK: vfnmadd132ps %xmm18, %xmm26, %xmm20
+// CHECK: encoding: [0x62,0xa2,0x2d,0x00,0x9c,0xe2]
+ vfnmadd132ps %xmm18, %xmm26, %xmm20
+
+// CHECK: vfnmadd132ps %xmm18, %xmm26, %xmm20 {%k7}
+// CHECK: encoding: [0x62,0xa2,0x2d,0x07,0x9c,0xe2]
+ vfnmadd132ps %xmm18, %xmm26, %xmm20 {%k7}
+
+// CHECK: vfnmadd132ps %xmm18, %xmm26, %xmm20 {%k7} {z}
+// CHECK: encoding: [0x62,0xa2,0x2d,0x87,0x9c,0xe2]
+ vfnmadd132ps %xmm18, %xmm26, %xmm20 {%k7} {z}
+
+// CHECK: vfnmadd132ps (%rcx), %xmm26, %xmm20
+// CHECK: encoding: [0x62,0xe2,0x2d,0x00,0x9c,0x21]
+ vfnmadd132ps (%rcx), %xmm26, %xmm20
+
+// CHECK: vfnmadd132ps 291(%rax,%r14,8), %xmm26, %xmm20
+// CHECK: encoding: [0x62,0xa2,0x2d,0x00,0x9c,0xa4,0xf0,0x23,0x01,0x00,0x00]
+ vfnmadd132ps 291(%rax,%r14,8), %xmm26, %xmm20
+
+// CHECK: vfnmadd132ps (%rcx){1to4}, %xmm26, %xmm20
+// CHECK: encoding: [0x62,0xe2,0x2d,0x10,0x9c,0x21]
+ vfnmadd132ps (%rcx){1to4}, %xmm26, %xmm20
+
+// CHECK: vfnmadd132ps 2032(%rdx), %xmm26, %xmm20
+// CHECK: encoding: [0x62,0xe2,0x2d,0x00,0x9c,0x62,0x7f]
+ vfnmadd132ps 2032(%rdx), %xmm26, %xmm20
+
+// CHECK: vfnmadd132ps 2048(%rdx), %xmm26, %xmm20
+// CHECK: encoding: [0x62,0xe2,0x2d,0x00,0x9c,0xa2,0x00,0x08,0x00,0x00]
+ vfnmadd132ps 2048(%rdx), %xmm26, %xmm20
+
+// CHECK: vfnmadd132ps -2048(%rdx), %xmm26, %xmm20
+// CHECK: encoding: [0x62,0xe2,0x2d,0x00,0x9c,0x62,0x80]
+ vfnmadd132ps -2048(%rdx), %xmm26, %xmm20
+
+// CHECK: vfnmadd132ps -2064(%rdx), %xmm26, %xmm20
+// CHECK: encoding: [0x62,0xe2,0x2d,0x00,0x9c,0xa2,0xf0,0xf7,0xff,0xff]
+ vfnmadd132ps -2064(%rdx), %xmm26, %xmm20
+
+// CHECK: vfnmadd132ps 508(%rdx){1to4}, %xmm26, %xmm20
+// CHECK: encoding: [0x62,0xe2,0x2d,0x10,0x9c,0x62,0x7f]
+ vfnmadd132ps 508(%rdx){1to4}, %xmm26, %xmm20
+
+// CHECK: vfnmadd132ps 512(%rdx){1to4}, %xmm26, %xmm20
+// CHECK: encoding: [0x62,0xe2,0x2d,0x10,0x9c,0xa2,0x00,0x02,0x00,0x00]
+ vfnmadd132ps 512(%rdx){1to4}, %xmm26, %xmm20
+
+// CHECK: vfnmadd132ps -512(%rdx){1to4}, %xmm26, %xmm20
+// CHECK: encoding: [0x62,0xe2,0x2d,0x10,0x9c,0x62,0x80]
+ vfnmadd132ps -512(%rdx){1to4}, %xmm26, %xmm20
+
+// CHECK: vfnmadd132ps -516(%rdx){1to4}, %xmm26, %xmm20
+// CHECK: encoding: [0x62,0xe2,0x2d,0x10,0x9c,0xa2,0xfc,0xfd,0xff,0xff]
+ vfnmadd132ps -516(%rdx){1to4}, %xmm26, %xmm20
+
+// CHECK: vfnmadd132ps %ymm18, %ymm21, %ymm20
+// CHECK: encoding: [0x62,0xa2,0x55,0x20,0x9c,0xe2]
+ vfnmadd132ps %ymm18, %ymm21, %ymm20
+
+// CHECK: vfnmadd132ps %ymm18, %ymm21, %ymm20 {%k7}
+// CHECK: encoding: [0x62,0xa2,0x55,0x27,0x9c,0xe2]
+ vfnmadd132ps %ymm18, %ymm21, %ymm20 {%k7}
+
+// CHECK: vfnmadd132ps %ymm18, %ymm21, %ymm20 {%k7} {z}
+// CHECK: encoding: [0x62,0xa2,0x55,0xa7,0x9c,0xe2]
+ vfnmadd132ps %ymm18, %ymm21, %ymm20 {%k7} {z}
+
+// CHECK: vfnmadd132ps (%rcx), %ymm21, %ymm20
+// CHECK: encoding: [0x62,0xe2,0x55,0x20,0x9c,0x21]
+ vfnmadd132ps (%rcx), %ymm21, %ymm20
+
+// CHECK: vfnmadd132ps 291(%rax,%r14,8), %ymm21, %ymm20
+// CHECK: encoding: [0x62,0xa2,0x55,0x20,0x9c,0xa4,0xf0,0x23,0x01,0x00,0x00]
+ vfnmadd132ps 291(%rax,%r14,8), %ymm21, %ymm20
+
+// CHECK: vfnmadd132ps (%rcx){1to8}, %ymm21, %ymm20
+// CHECK: encoding: [0x62,0xe2,0x55,0x30,0x9c,0x21]
+ vfnmadd132ps (%rcx){1to8}, %ymm21, %ymm20
+
+// CHECK: vfnmadd132ps 4064(%rdx), %ymm21, %ymm20
+// CHECK: encoding: [0x62,0xe2,0x55,0x20,0x9c,0x62,0x7f]
+ vfnmadd132ps 4064(%rdx), %ymm21, %ymm20
+
+// CHECK: vfnmadd132ps 4096(%rdx), %ymm21, %ymm20
+// CHECK: encoding: [0x62,0xe2,0x55,0x20,0x9c,0xa2,0x00,0x10,0x00,0x00]
+ vfnmadd132ps 4096(%rdx), %ymm21, %ymm20
+
+// CHECK: vfnmadd132ps -4096(%rdx), %ymm21, %ymm20
+// CHECK: encoding: [0x62,0xe2,0x55,0x20,0x9c,0x62,0x80]
+ vfnmadd132ps -4096(%rdx), %ymm21, %ymm20
+
+// CHECK: vfnmadd132ps -4128(%rdx), %ymm21, %ymm20
+// CHECK: encoding: [0x62,0xe2,0x55,0x20,0x9c,0xa2,0xe0,0xef,0xff,0xff]
+ vfnmadd132ps -4128(%rdx), %ymm21, %ymm20
+
+// CHECK: vfnmadd132ps 508(%rdx){1to8}, %ymm21, %ymm20
+// CHECK: encoding: [0x62,0xe2,0x55,0x30,0x9c,0x62,0x7f]
+ vfnmadd132ps 508(%rdx){1to8}, %ymm21, %ymm20
+
+// CHECK: vfnmadd132ps 512(%rdx){1to8}, %ymm21, %ymm20
+// CHECK: encoding: [0x62,0xe2,0x55,0x30,0x9c,0xa2,0x00,0x02,0x00,0x00]
+ vfnmadd132ps 512(%rdx){1to8}, %ymm21, %ymm20
+
+// CHECK: vfnmadd132ps -512(%rdx){1to8}, %ymm21, %ymm20
+// CHECK: encoding: [0x62,0xe2,0x55,0x30,0x9c,0x62,0x80]
+ vfnmadd132ps -512(%rdx){1to8}, %ymm21, %ymm20
+
+// CHECK: vfnmadd132ps -516(%rdx){1to8}, %ymm21, %ymm20
+// CHECK: encoding: [0x62,0xe2,0x55,0x30,0x9c,0xa2,0xfc,0xfd,0xff,0xff]
+ vfnmadd132ps -516(%rdx){1to8}, %ymm21, %ymm20
+
+// CHECK: vfnmadd132pd %xmm18, %xmm21, %xmm26
+// CHECK: encoding: [0x62,0x22,0xd5,0x00,0x9c,0xd2]
+ vfnmadd132pd %xmm18, %xmm21, %xmm26
+
+// CHECK: vfnmadd132pd %xmm18, %xmm21, %xmm26 {%k6}
+// CHECK: encoding: [0x62,0x22,0xd5,0x06,0x9c,0xd2]
+ vfnmadd132pd %xmm18, %xmm21, %xmm26 {%k6}
+
+// CHECK: vfnmadd132pd %xmm18, %xmm21, %xmm26 {%k6} {z}
+// CHECK: encoding: [0x62,0x22,0xd5,0x86,0x9c,0xd2]
+ vfnmadd132pd %xmm18, %xmm21, %xmm26 {%k6} {z}
+
+// CHECK: vfnmadd132pd (%rcx), %xmm21, %xmm26
+// CHECK: encoding: [0x62,0x62,0xd5,0x00,0x9c,0x11]
+ vfnmadd132pd (%rcx), %xmm21, %xmm26
+
+// CHECK: vfnmadd132pd 291(%rax,%r14,8), %xmm21, %xmm26
+// CHECK: encoding: [0x62,0x22,0xd5,0x00,0x9c,0x94,0xf0,0x23,0x01,0x00,0x00]
+ vfnmadd132pd 291(%rax,%r14,8), %xmm21, %xmm26
+
+// CHECK: vfnmadd132pd (%rcx){1to2}, %xmm21, %xmm26
+// CHECK: encoding: [0x62,0x62,0xd5,0x10,0x9c,0x11]
+ vfnmadd132pd (%rcx){1to2}, %xmm21, %xmm26
+
+// CHECK: vfnmadd132pd 2032(%rdx), %xmm21, %xmm26
+// CHECK: encoding: [0x62,0x62,0xd5,0x00,0x9c,0x52,0x7f]
+ vfnmadd132pd 2032(%rdx), %xmm21, %xmm26
+
+// CHECK: vfnmadd132pd 2048(%rdx), %xmm21, %xmm26
+// CHECK: encoding: [0x62,0x62,0xd5,0x00,0x9c,0x92,0x00,0x08,0x00,0x00]
+ vfnmadd132pd 2048(%rdx), %xmm21, %xmm26
+
+// CHECK: vfnmadd132pd -2048(%rdx), %xmm21, %xmm26
+// CHECK: encoding: [0x62,0x62,0xd5,0x00,0x9c,0x52,0x80]
+ vfnmadd132pd -2048(%rdx), %xmm21, %xmm26
+
+// CHECK: vfnmadd132pd -2064(%rdx), %xmm21, %xmm26
+// CHECK: encoding: [0x62,0x62,0xd5,0x00,0x9c,0x92,0xf0,0xf7,0xff,0xff]
+ vfnmadd132pd -2064(%rdx), %xmm21, %xmm26
+
+// CHECK: vfnmadd132pd 1016(%rdx){1to2}, %xmm21, %xmm26
+// CHECK: encoding: [0x62,0x62,0xd5,0x10,0x9c,0x52,0x7f]
+ vfnmadd132pd 1016(%rdx){1to2}, %xmm21, %xmm26
+
+// CHECK: vfnmadd132pd 1024(%rdx){1to2}, %xmm21, %xmm26
+// CHECK: encoding: [0x62,0x62,0xd5,0x10,0x9c,0x92,0x00,0x04,0x00,0x00]
+ vfnmadd132pd 1024(%rdx){1to2}, %xmm21, %xmm26
+
+// CHECK: vfnmadd132pd -1024(%rdx){1to2}, %xmm21, %xmm26
+// CHECK: encoding: [0x62,0x62,0xd5,0x10,0x9c,0x52,0x80]
+ vfnmadd132pd -1024(%rdx){1to2}, %xmm21, %xmm26
+
+// CHECK: vfnmadd132pd -1032(%rdx){1to2}, %xmm21, %xmm26
+// CHECK: encoding: [0x62,0x62,0xd5,0x10,0x9c,0x92,0xf8,0xfb,0xff,0xff]
+ vfnmadd132pd -1032(%rdx){1to2}, %xmm21, %xmm26
+
+// CHECK: vfnmadd132pd %ymm18, %ymm22, %ymm25
+// CHECK: encoding: [0x62,0x22,0xcd,0x20,0x9c,0xca]
+ vfnmadd132pd %ymm18, %ymm22, %ymm25
+
+// CHECK: vfnmadd132pd %ymm18, %ymm22, %ymm25 {%k4}
+// CHECK: encoding: [0x62,0x22,0xcd,0x24,0x9c,0xca]
+ vfnmadd132pd %ymm18, %ymm22, %ymm25 {%k4}
+
+// CHECK: vfnmadd132pd %ymm18, %ymm22, %ymm25 {%k4} {z}
+// CHECK: encoding: [0x62,0x22,0xcd,0xa4,0x9c,0xca]
+ vfnmadd132pd %ymm18, %ymm22, %ymm25 {%k4} {z}
+
+// CHECK: vfnmadd132pd (%rcx), %ymm22, %ymm25
+// CHECK: encoding: [0x62,0x62,0xcd,0x20,0x9c,0x09]
+ vfnmadd132pd (%rcx), %ymm22, %ymm25
+
+// CHECK: vfnmadd132pd 291(%rax,%r14,8), %ymm22, %ymm25
+// CHECK: encoding: [0x62,0x22,0xcd,0x20,0x9c,0x8c,0xf0,0x23,0x01,0x00,0x00]
+ vfnmadd132pd 291(%rax,%r14,8), %ymm22, %ymm25
+
+// CHECK: vfnmadd132pd (%rcx){1to4}, %ymm22, %ymm25
+// CHECK: encoding: [0x62,0x62,0xcd,0x30,0x9c,0x09]
+ vfnmadd132pd (%rcx){1to4}, %ymm22, %ymm25
+
+// CHECK: vfnmadd132pd 4064(%rdx), %ymm22, %ymm25
+// CHECK: encoding: [0x62,0x62,0xcd,0x20,0x9c,0x4a,0x7f]
+ vfnmadd132pd 4064(%rdx), %ymm22, %ymm25
+
+// CHECK: vfnmadd132pd 4096(%rdx), %ymm22, %ymm25
+// CHECK: encoding: [0x62,0x62,0xcd,0x20,0x9c,0x8a,0x00,0x10,0x00,0x00]
+ vfnmadd132pd 4096(%rdx), %ymm22, %ymm25
+
+// CHECK: vfnmadd132pd -4096(%rdx), %ymm22, %ymm25
+// CHECK: encoding: [0x62,0x62,0xcd,0x20,0x9c,0x4a,0x80]
+ vfnmadd132pd -4096(%rdx), %ymm22, %ymm25
+
+// CHECK: vfnmadd132pd -4128(%rdx), %ymm22, %ymm25
+// CHECK: encoding: [0x62,0x62,0xcd,0x20,0x9c,0x8a,0xe0,0xef,0xff,0xff]
+ vfnmadd132pd -4128(%rdx), %ymm22, %ymm25
+
+// CHECK: vfnmadd132pd 1016(%rdx){1to4}, %ymm22, %ymm25
+// CHECK: encoding: [0x62,0x62,0xcd,0x30,0x9c,0x4a,0x7f]
+ vfnmadd132pd 1016(%rdx){1to4}, %ymm22, %ymm25
+
+// CHECK: vfnmadd132pd 1024(%rdx){1to4}, %ymm22, %ymm25
+// CHECK: encoding: [0x62,0x62,0xcd,0x30,0x9c,0x8a,0x00,0x04,0x00,0x00]
+ vfnmadd132pd 1024(%rdx){1to4}, %ymm22, %ymm25
+
+// CHECK: vfnmadd132pd -1024(%rdx){1to4}, %ymm22, %ymm25
+// CHECK: encoding: [0x62,0x62,0xcd,0x30,0x9c,0x4a,0x80]
+ vfnmadd132pd -1024(%rdx){1to4}, %ymm22, %ymm25
+
+// CHECK: vfnmadd132pd -1032(%rdx){1to4}, %ymm22, %ymm25
+// CHECK: encoding: [0x62,0x62,0xcd,0x30,0x9c,0x8a,0xf8,0xfb,0xff,0xff]
+ vfnmadd132pd -1032(%rdx){1to4}, %ymm22, %ymm25
+
+// CHECK: vfnmadd213ps %xmm20, %xmm24, %xmm24
+// CHECK: encoding: [0x62,0x22,0x3d,0x00,0xac,0xc4]
+ vfnmadd213ps %xmm20, %xmm24, %xmm24
+
+// CHECK: vfnmadd213ps %xmm20, %xmm24, %xmm24 {%k4}
+// CHECK: encoding: [0x62,0x22,0x3d,0x04,0xac,0xc4]
+ vfnmadd213ps %xmm20, %xmm24, %xmm24 {%k4}
+
+// CHECK: vfnmadd213ps %xmm20, %xmm24, %xmm24 {%k4} {z}
+// CHECK: encoding: [0x62,0x22,0x3d,0x84,0xac,0xc4]
+ vfnmadd213ps %xmm20, %xmm24, %xmm24 {%k4} {z}
+
+// CHECK: vfnmadd213ps (%rcx), %xmm24, %xmm24
+// CHECK: encoding: [0x62,0x62,0x3d,0x00,0xac,0x01]
+ vfnmadd213ps (%rcx), %xmm24, %xmm24
+
+// CHECK: vfnmadd213ps 291(%rax,%r14,8), %xmm24, %xmm24
+// CHECK: encoding: [0x62,0x22,0x3d,0x00,0xac,0x84,0xf0,0x23,0x01,0x00,0x00]
+ vfnmadd213ps 291(%rax,%r14,8), %xmm24, %xmm24
+
+// CHECK: vfnmadd213ps (%rcx){1to4}, %xmm24, %xmm24
+// CHECK: encoding: [0x62,0x62,0x3d,0x10,0xac,0x01]
+ vfnmadd213ps (%rcx){1to4}, %xmm24, %xmm24
+
+// CHECK: vfnmadd213ps 2032(%rdx), %xmm24, %xmm24
+// CHECK: encoding: [0x62,0x62,0x3d,0x00,0xac,0x42,0x7f]
+ vfnmadd213ps 2032(%rdx), %xmm24, %xmm24
+
+// CHECK: vfnmadd213ps 2048(%rdx), %xmm24, %xmm24
+// CHECK: encoding: [0x62,0x62,0x3d,0x00,0xac,0x82,0x00,0x08,0x00,0x00]
+ vfnmadd213ps 2048(%rdx), %xmm24, %xmm24
+
+// CHECK: vfnmadd213ps -2048(%rdx), %xmm24, %xmm24
+// CHECK: encoding: [0x62,0x62,0x3d,0x00,0xac,0x42,0x80]
+ vfnmadd213ps -2048(%rdx), %xmm24, %xmm24
+
+// CHECK: vfnmadd213ps -2064(%rdx), %xmm24, %xmm24
+// CHECK: encoding: [0x62,0x62,0x3d,0x00,0xac,0x82,0xf0,0xf7,0xff,0xff]
+ vfnmadd213ps -2064(%rdx), %xmm24, %xmm24
+
+// CHECK: vfnmadd213ps 508(%rdx){1to4}, %xmm24, %xmm24
+// CHECK: encoding: [0x62,0x62,0x3d,0x10,0xac,0x42,0x7f]
+ vfnmadd213ps 508(%rdx){1to4}, %xmm24, %xmm24
+
+// CHECK: vfnmadd213ps 512(%rdx){1to4}, %xmm24, %xmm24
+// CHECK: encoding: [0x62,0x62,0x3d,0x10,0xac,0x82,0x00,0x02,0x00,0x00]
+ vfnmadd213ps 512(%rdx){1to4}, %xmm24, %xmm24
+
+// CHECK: vfnmadd213ps -512(%rdx){1to4}, %xmm24, %xmm24
+// CHECK: encoding: [0x62,0x62,0x3d,0x10,0xac,0x42,0x80]
+ vfnmadd213ps -512(%rdx){1to4}, %xmm24, %xmm24
+
+// CHECK: vfnmadd213ps -516(%rdx){1to4}, %xmm24, %xmm24
+// CHECK: encoding: [0x62,0x62,0x3d,0x10,0xac,0x82,0xfc,0xfd,0xff,0xff]
+ vfnmadd213ps -516(%rdx){1to4}, %xmm24, %xmm24
+
+// CHECK: vfnmadd213ps %ymm22, %ymm19, %ymm21
+// CHECK: encoding: [0x62,0xa2,0x65,0x20,0xac,0xee]
+ vfnmadd213ps %ymm22, %ymm19, %ymm21
+
+// CHECK: vfnmadd213ps %ymm22, %ymm19, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x65,0x22,0xac,0xee]
+ vfnmadd213ps %ymm22, %ymm19, %ymm21 {%k2}
+
+// CHECK: vfnmadd213ps %ymm22, %ymm19, %ymm21 {%k2} {z}
+// CHECK: encoding: [0x62,0xa2,0x65,0xa2,0xac,0xee]
+ vfnmadd213ps %ymm22, %ymm19, %ymm21 {%k2} {z}
+
+// CHECK: vfnmadd213ps (%rcx), %ymm19, %ymm21
+// CHECK: encoding: [0x62,0xe2,0x65,0x20,0xac,0x29]
+ vfnmadd213ps (%rcx), %ymm19, %ymm21
+
+// CHECK: vfnmadd213ps 291(%rax,%r14,8), %ymm19, %ymm21
+// CHECK: encoding: [0x62,0xa2,0x65,0x20,0xac,0xac,0xf0,0x23,0x01,0x00,0x00]
+ vfnmadd213ps 291(%rax,%r14,8), %ymm19, %ymm21
+
+// CHECK: vfnmadd213ps (%rcx){1to8}, %ymm19, %ymm21
+// CHECK: encoding: [0x62,0xe2,0x65,0x30,0xac,0x29]
+ vfnmadd213ps (%rcx){1to8}, %ymm19, %ymm21
+
+// CHECK: vfnmadd213ps 4064(%rdx), %ymm19, %ymm21
+// CHECK: encoding: [0x62,0xe2,0x65,0x20,0xac,0x6a,0x7f]
+ vfnmadd213ps 4064(%rdx), %ymm19, %ymm21
+
+// CHECK: vfnmadd213ps 4096(%rdx), %ymm19, %ymm21
+// CHECK: encoding: [0x62,0xe2,0x65,0x20,0xac,0xaa,0x00,0x10,0x00,0x00]
+ vfnmadd213ps 4096(%rdx), %ymm19, %ymm21
+
+// CHECK: vfnmadd213ps -4096(%rdx), %ymm19, %ymm21
+// CHECK: encoding: [0x62,0xe2,0x65,0x20,0xac,0x6a,0x80]
+ vfnmadd213ps -4096(%rdx), %ymm19, %ymm21
+
+// CHECK: vfnmadd213ps -4128(%rdx), %ymm19, %ymm21
+// CHECK: encoding: [0x62,0xe2,0x65,0x20,0xac,0xaa,0xe0,0xef,0xff,0xff]
+ vfnmadd213ps -4128(%rdx), %ymm19, %ymm21
+
+// CHECK: vfnmadd213ps 508(%rdx){1to8}, %ymm19, %ymm21
+// CHECK: encoding: [0x62,0xe2,0x65,0x30,0xac,0x6a,0x7f]
+ vfnmadd213ps 508(%rdx){1to8}, %ymm19, %ymm21
+
+// CHECK: vfnmadd213ps 512(%rdx){1to8}, %ymm19, %ymm21
+// CHECK: encoding: [0x62,0xe2,0x65,0x30,0xac,0xaa,0x00,0x02,0x00,0x00]
+ vfnmadd213ps 512(%rdx){1to8}, %ymm19, %ymm21
+
+// CHECK: vfnmadd213ps -512(%rdx){1to8}, %ymm19, %ymm21
+// CHECK: encoding: [0x62,0xe2,0x65,0x30,0xac,0x6a,0x80]
+ vfnmadd213ps -512(%rdx){1to8}, %ymm19, %ymm21
+
+// CHECK: vfnmadd213ps -516(%rdx){1to8}, %ymm19, %ymm21
+// CHECK: encoding: [0x62,0xe2,0x65,0x30,0xac,0xaa,0xfc,0xfd,0xff,0xff]
+ vfnmadd213ps -516(%rdx){1to8}, %ymm19, %ymm21
+
+// CHECK: vfnmadd213pd %xmm24, %xmm25, %xmm24
+// CHECK: encoding: [0x62,0x02,0xb5,0x00,0xac,0xc0]
+ vfnmadd213pd %xmm24, %xmm25, %xmm24
+
+// CHECK: vfnmadd213pd %xmm24, %xmm25, %xmm24 {%k4}
+// CHECK: encoding: [0x62,0x02,0xb5,0x04,0xac,0xc0]
+ vfnmadd213pd %xmm24, %xmm25, %xmm24 {%k4}
+
+// CHECK: vfnmadd213pd %xmm24, %xmm25, %xmm24 {%k4} {z}
+// CHECK: encoding: [0x62,0x02,0xb5,0x84,0xac,0xc0]
+ vfnmadd213pd %xmm24, %xmm25, %xmm24 {%k4} {z}
+
+// CHECK: vfnmadd213pd (%rcx), %xmm25, %xmm24
+// CHECK: encoding: [0x62,0x62,0xb5,0x00,0xac,0x01]
+ vfnmadd213pd (%rcx), %xmm25, %xmm24
+
+// CHECK: vfnmadd213pd 291(%rax,%r14,8), %xmm25, %xmm24
+// CHECK: encoding: [0x62,0x22,0xb5,0x00,0xac,0x84,0xf0,0x23,0x01,0x00,0x00]
+ vfnmadd213pd 291(%rax,%r14,8), %xmm25, %xmm24
+
+// CHECK: vfnmadd213pd (%rcx){1to2}, %xmm25, %xmm24
+// CHECK: encoding: [0x62,0x62,0xb5,0x10,0xac,0x01]
+ vfnmadd213pd (%rcx){1to2}, %xmm25, %xmm24
+
+// CHECK: vfnmadd213pd 2032(%rdx), %xmm25, %xmm24
+// CHECK: encoding: [0x62,0x62,0xb5,0x00,0xac,0x42,0x7f]
+ vfnmadd213pd 2032(%rdx), %xmm25, %xmm24
+
+// CHECK: vfnmadd213pd 2048(%rdx), %xmm25, %xmm24
+// CHECK: encoding: [0x62,0x62,0xb5,0x00,0xac,0x82,0x00,0x08,0x00,0x00]
+ vfnmadd213pd 2048(%rdx), %xmm25, %xmm24
+
+// CHECK: vfnmadd213pd -2048(%rdx), %xmm25, %xmm24
+// CHECK: encoding: [0x62,0x62,0xb5,0x00,0xac,0x42,0x80]
+ vfnmadd213pd -2048(%rdx), %xmm25, %xmm24
+
+// CHECK: vfnmadd213pd -2064(%rdx), %xmm25, %xmm24
+// CHECK: encoding: [0x62,0x62,0xb5,0x00,0xac,0x82,0xf0,0xf7,0xff,0xff]
+ vfnmadd213pd -2064(%rdx), %xmm25, %xmm24
+
+// CHECK: vfnmadd213pd 1016(%rdx){1to2}, %xmm25, %xmm24
+// CHECK: encoding: [0x62,0x62,0xb5,0x10,0xac,0x42,0x7f]
+ vfnmadd213pd 1016(%rdx){1to2}, %xmm25, %xmm24
+
+// CHECK: vfnmadd213pd 1024(%rdx){1to2}, %xmm25, %xmm24
+// CHECK: encoding: [0x62,0x62,0xb5,0x10,0xac,0x82,0x00,0x04,0x00,0x00]
+ vfnmadd213pd 1024(%rdx){1to2}, %xmm25, %xmm24
+
+// CHECK: vfnmadd213pd -1024(%rdx){1to2}, %xmm25, %xmm24
+// CHECK: encoding: [0x62,0x62,0xb5,0x10,0xac,0x42,0x80]
+ vfnmadd213pd -1024(%rdx){1to2}, %xmm25, %xmm24
+
+// CHECK: vfnmadd213pd -1032(%rdx){1to2}, %xmm25, %xmm24
+// CHECK: encoding: [0x62,0x62,0xb5,0x10,0xac,0x82,0xf8,0xfb,0xff,0xff]
+ vfnmadd213pd -1032(%rdx){1to2}, %xmm25, %xmm24
+
+// CHECK: vfnmadd213pd %ymm24, %ymm27, %ymm20
+// CHECK: encoding: [0x62,0x82,0xa5,0x20,0xac,0xe0]
+ vfnmadd213pd %ymm24, %ymm27, %ymm20
+
+// CHECK: vfnmadd213pd %ymm24, %ymm27, %ymm20 {%k4}
+// CHECK: encoding: [0x62,0x82,0xa5,0x24,0xac,0xe0]
+ vfnmadd213pd %ymm24, %ymm27, %ymm20 {%k4}
+
+// CHECK: vfnmadd213pd %ymm24, %ymm27, %ymm20 {%k4} {z}
+// CHECK: encoding: [0x62,0x82,0xa5,0xa4,0xac,0xe0]
+ vfnmadd213pd %ymm24, %ymm27, %ymm20 {%k4} {z}
+
+// CHECK: vfnmadd213pd (%rcx), %ymm27, %ymm20
+// CHECK: encoding: [0x62,0xe2,0xa5,0x20,0xac,0x21]
+ vfnmadd213pd (%rcx), %ymm27, %ymm20
+
+// CHECK: vfnmadd213pd 291(%rax,%r14,8), %ymm27, %ymm20
+// CHECK: encoding: [0x62,0xa2,0xa5,0x20,0xac,0xa4,0xf0,0x23,0x01,0x00,0x00]
+ vfnmadd213pd 291(%rax,%r14,8), %ymm27, %ymm20
+
+// CHECK: vfnmadd213pd (%rcx){1to4}, %ymm27, %ymm20
+// CHECK: encoding: [0x62,0xe2,0xa5,0x30,0xac,0x21]
+ vfnmadd213pd (%rcx){1to4}, %ymm27, %ymm20
+
+// CHECK: vfnmadd213pd 4064(%rdx), %ymm27, %ymm20
+// CHECK: encoding: [0x62,0xe2,0xa5,0x20,0xac,0x62,0x7f]
+ vfnmadd213pd 4064(%rdx), %ymm27, %ymm20
+
+// CHECK: vfnmadd213pd 4096(%rdx), %ymm27, %ymm20
+// CHECK: encoding: [0x62,0xe2,0xa5,0x20,0xac,0xa2,0x00,0x10,0x00,0x00]
+ vfnmadd213pd 4096(%rdx), %ymm27, %ymm20
+
+// CHECK: vfnmadd213pd -4096(%rdx), %ymm27, %ymm20
+// CHECK: encoding: [0x62,0xe2,0xa5,0x20,0xac,0x62,0x80]
+ vfnmadd213pd -4096(%rdx), %ymm27, %ymm20
+
+// CHECK: vfnmadd213pd -4128(%rdx), %ymm27, %ymm20
+// CHECK: encoding: [0x62,0xe2,0xa5,0x20,0xac,0xa2,0xe0,0xef,0xff,0xff]
+ vfnmadd213pd -4128(%rdx), %ymm27, %ymm20
+
+// CHECK: vfnmadd213pd 1016(%rdx){1to4}, %ymm27, %ymm20
+// CHECK: encoding: [0x62,0xe2,0xa5,0x30,0xac,0x62,0x7f]
+ vfnmadd213pd 1016(%rdx){1to4}, %ymm27, %ymm20
+
+// CHECK: vfnmadd213pd 1024(%rdx){1to4}, %ymm27, %ymm20
+// CHECK: encoding: [0x62,0xe2,0xa5,0x30,0xac,0xa2,0x00,0x04,0x00,0x00]
+ vfnmadd213pd 1024(%rdx){1to4}, %ymm27, %ymm20
+
+// CHECK: vfnmadd213pd -1024(%rdx){1to4}, %ymm27, %ymm20
+// CHECK: encoding: [0x62,0xe2,0xa5,0x30,0xac,0x62,0x80]
+ vfnmadd213pd -1024(%rdx){1to4}, %ymm27, %ymm20
+
+// CHECK: vfnmadd213pd -1032(%rdx){1to4}, %ymm27, %ymm20
+// CHECK: encoding: [0x62,0xe2,0xa5,0x30,0xac,0xa2,0xf8,0xfb,0xff,0xff]
+ vfnmadd213pd -1032(%rdx){1to4}, %ymm27, %ymm20
+
+// CHECK: vfnmadd231ps %xmm24, %xmm26, %xmm18
+// CHECK: encoding: [0x62,0x82,0x2d,0x00,0xbc,0xd0]
+ vfnmadd231ps %xmm24, %xmm26, %xmm18
+
+// CHECK: vfnmadd231ps %xmm24, %xmm26, %xmm18 {%k1}
+// CHECK: encoding: [0x62,0x82,0x2d,0x01,0xbc,0xd0]
+ vfnmadd231ps %xmm24, %xmm26, %xmm18 {%k1}
+
+// CHECK: vfnmadd231ps %xmm24, %xmm26, %xmm18 {%k1} {z}
+// CHECK: encoding: [0x62,0x82,0x2d,0x81,0xbc,0xd0]
+ vfnmadd231ps %xmm24, %xmm26, %xmm18 {%k1} {z}
+
+// CHECK: vfnmadd231ps (%rcx), %xmm26, %xmm18
+// CHECK: encoding: [0x62,0xe2,0x2d,0x00,0xbc,0x11]
+ vfnmadd231ps (%rcx), %xmm26, %xmm18
+
+// CHECK: vfnmadd231ps 291(%rax,%r14,8), %xmm26, %xmm18
+// CHECK: encoding: [0x62,0xa2,0x2d,0x00,0xbc,0x94,0xf0,0x23,0x01,0x00,0x00]
+ vfnmadd231ps 291(%rax,%r14,8), %xmm26, %xmm18
+
+// CHECK: vfnmadd231ps (%rcx){1to4}, %xmm26, %xmm18
+// CHECK: encoding: [0x62,0xe2,0x2d,0x10,0xbc,0x11]
+ vfnmadd231ps (%rcx){1to4}, %xmm26, %xmm18
+
+// CHECK: vfnmadd231ps 2032(%rdx), %xmm26, %xmm18
+// CHECK: encoding: [0x62,0xe2,0x2d,0x00,0xbc,0x52,0x7f]
+ vfnmadd231ps 2032(%rdx), %xmm26, %xmm18
+
+// CHECK: vfnmadd231ps 2048(%rdx), %xmm26, %xmm18
+// CHECK: encoding: [0x62,0xe2,0x2d,0x00,0xbc,0x92,0x00,0x08,0x00,0x00]
+ vfnmadd231ps 2048(%rdx), %xmm26, %xmm18
+
+// CHECK: vfnmadd231ps -2048(%rdx), %xmm26, %xmm18
+// CHECK: encoding: [0x62,0xe2,0x2d,0x00,0xbc,0x52,0x80]
+ vfnmadd231ps -2048(%rdx), %xmm26, %xmm18
+
+// CHECK: vfnmadd231ps -2064(%rdx), %xmm26, %xmm18
+// CHECK: encoding: [0x62,0xe2,0x2d,0x00,0xbc,0x92,0xf0,0xf7,0xff,0xff]
+ vfnmadd231ps -2064(%rdx), %xmm26, %xmm18
+
+// CHECK: vfnmadd231ps 508(%rdx){1to4}, %xmm26, %xmm18
+// CHECK: encoding: [0x62,0xe2,0x2d,0x10,0xbc,0x52,0x7f]
+ vfnmadd231ps 508(%rdx){1to4}, %xmm26, %xmm18
+
+// CHECK: vfnmadd231ps 512(%rdx){1to4}, %xmm26, %xmm18
+// CHECK: encoding: [0x62,0xe2,0x2d,0x10,0xbc,0x92,0x00,0x02,0x00,0x00]
+ vfnmadd231ps 512(%rdx){1to4}, %xmm26, %xmm18
+
+// CHECK: vfnmadd231ps -512(%rdx){1to4}, %xmm26, %xmm18
+// CHECK: encoding: [0x62,0xe2,0x2d,0x10,0xbc,0x52,0x80]
+ vfnmadd231ps -512(%rdx){1to4}, %xmm26, %xmm18
+
+// CHECK: vfnmadd231ps -516(%rdx){1to4}, %xmm26, %xmm18
+// CHECK: encoding: [0x62,0xe2,0x2d,0x10,0xbc,0x92,0xfc,0xfd,0xff,0xff]
+ vfnmadd231ps -516(%rdx){1to4}, %xmm26, %xmm18
+
+// CHECK: vfnmadd231ps %ymm21, %ymm20, %ymm20
+// CHECK: encoding: [0x62,0xa2,0x5d,0x20,0xbc,0xe5]
+ vfnmadd231ps %ymm21, %ymm20, %ymm20
+
+// CHECK: vfnmadd231ps %ymm21, %ymm20, %ymm20 {%k4}
+// CHECK: encoding: [0x62,0xa2,0x5d,0x24,0xbc,0xe5]
+ vfnmadd231ps %ymm21, %ymm20, %ymm20 {%k4}
+
+// CHECK: vfnmadd231ps %ymm21, %ymm20, %ymm20 {%k4} {z}
+// CHECK: encoding: [0x62,0xa2,0x5d,0xa4,0xbc,0xe5]
+ vfnmadd231ps %ymm21, %ymm20, %ymm20 {%k4} {z}
+
+// CHECK: vfnmadd231ps (%rcx), %ymm20, %ymm20
+// CHECK: encoding: [0x62,0xe2,0x5d,0x20,0xbc,0x21]
+ vfnmadd231ps (%rcx), %ymm20, %ymm20
+
+// CHECK: vfnmadd231ps 291(%rax,%r14,8), %ymm20, %ymm20
+// CHECK: encoding: [0x62,0xa2,0x5d,0x20,0xbc,0xa4,0xf0,0x23,0x01,0x00,0x00]
+ vfnmadd231ps 291(%rax,%r14,8), %ymm20, %ymm20
+
+// CHECK: vfnmadd231ps (%rcx){1to8}, %ymm20, %ymm20
+// CHECK: encoding: [0x62,0xe2,0x5d,0x30,0xbc,0x21]
+ vfnmadd231ps (%rcx){1to8}, %ymm20, %ymm20
+
+// CHECK: vfnmadd231ps 4064(%rdx), %ymm20, %ymm20
+// CHECK: encoding: [0x62,0xe2,0x5d,0x20,0xbc,0x62,0x7f]
+ vfnmadd231ps 4064(%rdx), %ymm20, %ymm20
+
+// CHECK: vfnmadd231ps 4096(%rdx), %ymm20, %ymm20
+// CHECK: encoding: [0x62,0xe2,0x5d,0x20,0xbc,0xa2,0x00,0x10,0x00,0x00]
+ vfnmadd231ps 4096(%rdx), %ymm20, %ymm20
+
+// CHECK: vfnmadd231ps -4096(%rdx), %ymm20, %ymm20
+// CHECK: encoding: [0x62,0xe2,0x5d,0x20,0xbc,0x62,0x80]
+ vfnmadd231ps -4096(%rdx), %ymm20, %ymm20
+
+// CHECK: vfnmadd231ps -4128(%rdx), %ymm20, %ymm20
+// CHECK: encoding: [0x62,0xe2,0x5d,0x20,0xbc,0xa2,0xe0,0xef,0xff,0xff]
+ vfnmadd231ps -4128(%rdx), %ymm20, %ymm20
+
+// CHECK: vfnmadd231ps 508(%rdx){1to8}, %ymm20, %ymm20
+// CHECK: encoding: [0x62,0xe2,0x5d,0x30,0xbc,0x62,0x7f]
+ vfnmadd231ps 508(%rdx){1to8}, %ymm20, %ymm20
+
+// CHECK: vfnmadd231ps 512(%rdx){1to8}, %ymm20, %ymm20
+// CHECK: encoding: [0x62,0xe2,0x5d,0x30,0xbc,0xa2,0x00,0x02,0x00,0x00]
+ vfnmadd231ps 512(%rdx){1to8}, %ymm20, %ymm20
+
+// CHECK: vfnmadd231ps -512(%rdx){1to8}, %ymm20, %ymm20
+// CHECK: encoding: [0x62,0xe2,0x5d,0x30,0xbc,0x62,0x80]
+ vfnmadd231ps -512(%rdx){1to8}, %ymm20, %ymm20
+
+// CHECK: vfnmadd231ps -516(%rdx){1to8}, %ymm20, %ymm20
+// CHECK: encoding: [0x62,0xe2,0x5d,0x30,0xbc,0xa2,0xfc,0xfd,0xff,0xff]
+ vfnmadd231ps -516(%rdx){1to8}, %ymm20, %ymm20
+
+// CHECK: vfnmadd231pd %xmm26, %xmm25, %xmm29
+// CHECK: encoding: [0x62,0x02,0xb5,0x00,0xbc,0xea]
+ vfnmadd231pd %xmm26, %xmm25, %xmm29
+
+// CHECK: vfnmadd231pd %xmm26, %xmm25, %xmm29 {%k3}
+// CHECK: encoding: [0x62,0x02,0xb5,0x03,0xbc,0xea]
+ vfnmadd231pd %xmm26, %xmm25, %xmm29 {%k3}
+
+// CHECK: vfnmadd231pd %xmm26, %xmm25, %xmm29 {%k3} {z}
+// CHECK: encoding: [0x62,0x02,0xb5,0x83,0xbc,0xea]
+ vfnmadd231pd %xmm26, %xmm25, %xmm29 {%k3} {z}
+
+// CHECK: vfnmadd231pd (%rcx), %xmm25, %xmm29
+// CHECK: encoding: [0x62,0x62,0xb5,0x00,0xbc,0x29]
+ vfnmadd231pd (%rcx), %xmm25, %xmm29
+
+// CHECK: vfnmadd231pd 291(%rax,%r14,8), %xmm25, %xmm29
+// CHECK: encoding: [0x62,0x22,0xb5,0x00,0xbc,0xac,0xf0,0x23,0x01,0x00,0x00]
+ vfnmadd231pd 291(%rax,%r14,8), %xmm25, %xmm29
+
+// CHECK: vfnmadd231pd (%rcx){1to2}, %xmm25, %xmm29
+// CHECK: encoding: [0x62,0x62,0xb5,0x10,0xbc,0x29]
+ vfnmadd231pd (%rcx){1to2}, %xmm25, %xmm29
+
+// CHECK: vfnmadd231pd 2032(%rdx), %xmm25, %xmm29
+// CHECK: encoding: [0x62,0x62,0xb5,0x00,0xbc,0x6a,0x7f]
+ vfnmadd231pd 2032(%rdx), %xmm25, %xmm29
+
+// CHECK: vfnmadd231pd 2048(%rdx), %xmm25, %xmm29
+// CHECK: encoding: [0x62,0x62,0xb5,0x00,0xbc,0xaa,0x00,0x08,0x00,0x00]
+ vfnmadd231pd 2048(%rdx), %xmm25, %xmm29
+
+// CHECK: vfnmadd231pd -2048(%rdx), %xmm25, %xmm29
+// CHECK: encoding: [0x62,0x62,0xb5,0x00,0xbc,0x6a,0x80]
+ vfnmadd231pd -2048(%rdx), %xmm25, %xmm29
+
+// CHECK: vfnmadd231pd -2064(%rdx), %xmm25, %xmm29
+// CHECK: encoding: [0x62,0x62,0xb5,0x00,0xbc,0xaa,0xf0,0xf7,0xff,0xff]
+ vfnmadd231pd -2064(%rdx), %xmm25, %xmm29
+
+// CHECK: vfnmadd231pd 1016(%rdx){1to2}, %xmm25, %xmm29
+// CHECK: encoding: [0x62,0x62,0xb5,0x10,0xbc,0x6a,0x7f]
+ vfnmadd231pd 1016(%rdx){1to2}, %xmm25, %xmm29
+
+// CHECK: vfnmadd231pd 1024(%rdx){1to2}, %xmm25, %xmm29
+// CHECK: encoding: [0x62,0x62,0xb5,0x10,0xbc,0xaa,0x00,0x04,0x00,0x00]
+ vfnmadd231pd 1024(%rdx){1to2}, %xmm25, %xmm29
+
+// CHECK: vfnmadd231pd -1024(%rdx){1to2}, %xmm25, %xmm29
+// CHECK: encoding: [0x62,0x62,0xb5,0x10,0xbc,0x6a,0x80]
+ vfnmadd231pd -1024(%rdx){1to2}, %xmm25, %xmm29
+
+// CHECK: vfnmadd231pd -1032(%rdx){1to2}, %xmm25, %xmm29
+// CHECK: encoding: [0x62,0x62,0xb5,0x10,0xbc,0xaa,0xf8,0xfb,0xff,0xff]
+ vfnmadd231pd -1032(%rdx){1to2}, %xmm25, %xmm29
+
+// CHECK: vfnmadd231pd %ymm23, %ymm29, %ymm22
+// CHECK: encoding: [0x62,0xa2,0x95,0x20,0xbc,0xf7]
+ vfnmadd231pd %ymm23, %ymm29, %ymm22
+
+// CHECK: vfnmadd231pd %ymm23, %ymm29, %ymm22 {%k1}
+// CHECK: encoding: [0x62,0xa2,0x95,0x21,0xbc,0xf7]
+ vfnmadd231pd %ymm23, %ymm29, %ymm22 {%k1}
+
+// CHECK: vfnmadd231pd %ymm23, %ymm29, %ymm22 {%k1} {z}
+// CHECK: encoding: [0x62,0xa2,0x95,0xa1,0xbc,0xf7]
+ vfnmadd231pd %ymm23, %ymm29, %ymm22 {%k1} {z}
+
+// CHECK: vfnmadd231pd (%rcx), %ymm29, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x95,0x20,0xbc,0x31]
+ vfnmadd231pd (%rcx), %ymm29, %ymm22
+
+// CHECK: vfnmadd231pd 291(%rax,%r14,8), %ymm29, %ymm22
+// CHECK: encoding: [0x62,0xa2,0x95,0x20,0xbc,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vfnmadd231pd 291(%rax,%r14,8), %ymm29, %ymm22
+
+// CHECK: vfnmadd231pd (%rcx){1to4}, %ymm29, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x95,0x30,0xbc,0x31]
+ vfnmadd231pd (%rcx){1to4}, %ymm29, %ymm22
+
+// CHECK: vfnmadd231pd 4064(%rdx), %ymm29, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x95,0x20,0xbc,0x72,0x7f]
+ vfnmadd231pd 4064(%rdx), %ymm29, %ymm22
+
+// CHECK: vfnmadd231pd 4096(%rdx), %ymm29, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x95,0x20,0xbc,0xb2,0x00,0x10,0x00,0x00]
+ vfnmadd231pd 4096(%rdx), %ymm29, %ymm22
+
+// CHECK: vfnmadd231pd -4096(%rdx), %ymm29, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x95,0x20,0xbc,0x72,0x80]
+ vfnmadd231pd -4096(%rdx), %ymm29, %ymm22
+
+// CHECK: vfnmadd231pd -4128(%rdx), %ymm29, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x95,0x20,0xbc,0xb2,0xe0,0xef,0xff,0xff]
+ vfnmadd231pd -4128(%rdx), %ymm29, %ymm22
+
+// CHECK: vfnmadd231pd 1016(%rdx){1to4}, %ymm29, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x95,0x30,0xbc,0x72,0x7f]
+ vfnmadd231pd 1016(%rdx){1to4}, %ymm29, %ymm22
+
+// CHECK: vfnmadd231pd 1024(%rdx){1to4}, %ymm29, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x95,0x30,0xbc,0xb2,0x00,0x04,0x00,0x00]
+ vfnmadd231pd 1024(%rdx){1to4}, %ymm29, %ymm22
+
+// CHECK: vfnmadd231pd -1024(%rdx){1to4}, %ymm29, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x95,0x30,0xbc,0x72,0x80]
+ vfnmadd231pd -1024(%rdx){1to4}, %ymm29, %ymm22
+
+// CHECK: vfnmadd231pd -1032(%rdx){1to4}, %ymm29, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x95,0x30,0xbc,0xb2,0xf8,0xfb,0xff,0xff]
+ vfnmadd231pd -1032(%rdx){1to4}, %ymm29, %ymm22
+
+// CHECK: vfnmsub132ps %xmm26, %xmm25, %xmm21
+// CHECK: encoding: [0x62,0x82,0x35,0x00,0x9e,0xea]
+ vfnmsub132ps %xmm26, %xmm25, %xmm21
+
+// CHECK: vfnmsub132ps %xmm26, %xmm25, %xmm21 {%k3}
+// CHECK: encoding: [0x62,0x82,0x35,0x03,0x9e,0xea]
+ vfnmsub132ps %xmm26, %xmm25, %xmm21 {%k3}
+
+// CHECK: vfnmsub132ps %xmm26, %xmm25, %xmm21 {%k3} {z}
+// CHECK: encoding: [0x62,0x82,0x35,0x83,0x9e,0xea]
+ vfnmsub132ps %xmm26, %xmm25, %xmm21 {%k3} {z}
+
+// CHECK: vfnmsub132ps (%rcx), %xmm25, %xmm21
+// CHECK: encoding: [0x62,0xe2,0x35,0x00,0x9e,0x29]
+ vfnmsub132ps (%rcx), %xmm25, %xmm21
+
+// CHECK: vfnmsub132ps 291(%rax,%r14,8), %xmm25, %xmm21
+// CHECK: encoding: [0x62,0xa2,0x35,0x00,0x9e,0xac,0xf0,0x23,0x01,0x00,0x00]
+ vfnmsub132ps 291(%rax,%r14,8), %xmm25, %xmm21
+
+// CHECK: vfnmsub132ps (%rcx){1to4}, %xmm25, %xmm21
+// CHECK: encoding: [0x62,0xe2,0x35,0x10,0x9e,0x29]
+ vfnmsub132ps (%rcx){1to4}, %xmm25, %xmm21
+
+// CHECK: vfnmsub132ps 2032(%rdx), %xmm25, %xmm21
+// CHECK: encoding: [0x62,0xe2,0x35,0x00,0x9e,0x6a,0x7f]
+ vfnmsub132ps 2032(%rdx), %xmm25, %xmm21
+
+// CHECK: vfnmsub132ps 2048(%rdx), %xmm25, %xmm21
+// CHECK: encoding: [0x62,0xe2,0x35,0x00,0x9e,0xaa,0x00,0x08,0x00,0x00]
+ vfnmsub132ps 2048(%rdx), %xmm25, %xmm21
+
+// CHECK: vfnmsub132ps -2048(%rdx), %xmm25, %xmm21
+// CHECK: encoding: [0x62,0xe2,0x35,0x00,0x9e,0x6a,0x80]
+ vfnmsub132ps -2048(%rdx), %xmm25, %xmm21
+
+// CHECK: vfnmsub132ps -2064(%rdx), %xmm25, %xmm21
+// CHECK: encoding: [0x62,0xe2,0x35,0x00,0x9e,0xaa,0xf0,0xf7,0xff,0xff]
+ vfnmsub132ps -2064(%rdx), %xmm25, %xmm21
+
+// CHECK: vfnmsub132ps 508(%rdx){1to4}, %xmm25, %xmm21
+// CHECK: encoding: [0x62,0xe2,0x35,0x10,0x9e,0x6a,0x7f]
+ vfnmsub132ps 508(%rdx){1to4}, %xmm25, %xmm21
+
+// CHECK: vfnmsub132ps 512(%rdx){1to4}, %xmm25, %xmm21
+// CHECK: encoding: [0x62,0xe2,0x35,0x10,0x9e,0xaa,0x00,0x02,0x00,0x00]
+ vfnmsub132ps 512(%rdx){1to4}, %xmm25, %xmm21
+
+// CHECK: vfnmsub132ps -512(%rdx){1to4}, %xmm25, %xmm21
+// CHECK: encoding: [0x62,0xe2,0x35,0x10,0x9e,0x6a,0x80]
+ vfnmsub132ps -512(%rdx){1to4}, %xmm25, %xmm21
+
+// CHECK: vfnmsub132ps -516(%rdx){1to4}, %xmm25, %xmm21
+// CHECK: encoding: [0x62,0xe2,0x35,0x10,0x9e,0xaa,0xfc,0xfd,0xff,0xff]
+ vfnmsub132ps -516(%rdx){1to4}, %xmm25, %xmm21
+
+// CHECK: vfnmsub132ps %ymm22, %ymm24, %ymm18
+// CHECK: encoding: [0x62,0xa2,0x3d,0x20,0x9e,0xd6]
+ vfnmsub132ps %ymm22, %ymm24, %ymm18
+
+// CHECK: vfnmsub132ps %ymm22, %ymm24, %ymm18 {%k5}
+// CHECK: encoding: [0x62,0xa2,0x3d,0x25,0x9e,0xd6]
+ vfnmsub132ps %ymm22, %ymm24, %ymm18 {%k5}
+
+// CHECK: vfnmsub132ps %ymm22, %ymm24, %ymm18 {%k5} {z}
+// CHECK: encoding: [0x62,0xa2,0x3d,0xa5,0x9e,0xd6]
+ vfnmsub132ps %ymm22, %ymm24, %ymm18 {%k5} {z}
+
+// CHECK: vfnmsub132ps (%rcx), %ymm24, %ymm18
+// CHECK: encoding: [0x62,0xe2,0x3d,0x20,0x9e,0x11]
+ vfnmsub132ps (%rcx), %ymm24, %ymm18
+
+// CHECK: vfnmsub132ps 291(%rax,%r14,8), %ymm24, %ymm18
+// CHECK: encoding: [0x62,0xa2,0x3d,0x20,0x9e,0x94,0xf0,0x23,0x01,0x00,0x00]
+ vfnmsub132ps 291(%rax,%r14,8), %ymm24, %ymm18
+
+// CHECK: vfnmsub132ps (%rcx){1to8}, %ymm24, %ymm18
+// CHECK: encoding: [0x62,0xe2,0x3d,0x30,0x9e,0x11]
+ vfnmsub132ps (%rcx){1to8}, %ymm24, %ymm18
+
+// CHECK: vfnmsub132ps 4064(%rdx), %ymm24, %ymm18
+// CHECK: encoding: [0x62,0xe2,0x3d,0x20,0x9e,0x52,0x7f]
+ vfnmsub132ps 4064(%rdx), %ymm24, %ymm18
+
+// CHECK: vfnmsub132ps 4096(%rdx), %ymm24, %ymm18
+// CHECK: encoding: [0x62,0xe2,0x3d,0x20,0x9e,0x92,0x00,0x10,0x00,0x00]
+ vfnmsub132ps 4096(%rdx), %ymm24, %ymm18
+
+// CHECK: vfnmsub132ps -4096(%rdx), %ymm24, %ymm18
+// CHECK: encoding: [0x62,0xe2,0x3d,0x20,0x9e,0x52,0x80]
+ vfnmsub132ps -4096(%rdx), %ymm24, %ymm18
+
+// CHECK: vfnmsub132ps -4128(%rdx), %ymm24, %ymm18
+// CHECK: encoding: [0x62,0xe2,0x3d,0x20,0x9e,0x92,0xe0,0xef,0xff,0xff]
+ vfnmsub132ps -4128(%rdx), %ymm24, %ymm18
+
+// CHECK: vfnmsub132ps 508(%rdx){1to8}, %ymm24, %ymm18
+// CHECK: encoding: [0x62,0xe2,0x3d,0x30,0x9e,0x52,0x7f]
+ vfnmsub132ps 508(%rdx){1to8}, %ymm24, %ymm18
+
+// CHECK: vfnmsub132ps 512(%rdx){1to8}, %ymm24, %ymm18
+// CHECK: encoding: [0x62,0xe2,0x3d,0x30,0x9e,0x92,0x00,0x02,0x00,0x00]
+ vfnmsub132ps 512(%rdx){1to8}, %ymm24, %ymm18
+
+// CHECK: vfnmsub132ps -512(%rdx){1to8}, %ymm24, %ymm18
+// CHECK: encoding: [0x62,0xe2,0x3d,0x30,0x9e,0x52,0x80]
+ vfnmsub132ps -512(%rdx){1to8}, %ymm24, %ymm18
+
+// CHECK: vfnmsub132ps -516(%rdx){1to8}, %ymm24, %ymm18
+// CHECK: encoding: [0x62,0xe2,0x3d,0x30,0x9e,0x92,0xfc,0xfd,0xff,0xff]
+ vfnmsub132ps -516(%rdx){1to8}, %ymm24, %ymm18
+
+// CHECK: vfnmsub132pd %xmm17, %xmm25, %xmm19
+// CHECK: encoding: [0x62,0xa2,0xb5,0x00,0x9e,0xd9]
+ vfnmsub132pd %xmm17, %xmm25, %xmm19
+
+// CHECK: vfnmsub132pd %xmm17, %xmm25, %xmm19 {%k4}
+// CHECK: encoding: [0x62,0xa2,0xb5,0x04,0x9e,0xd9]
+ vfnmsub132pd %xmm17, %xmm25, %xmm19 {%k4}
+
+// CHECK: vfnmsub132pd %xmm17, %xmm25, %xmm19 {%k4} {z}
+// CHECK: encoding: [0x62,0xa2,0xb5,0x84,0x9e,0xd9]
+ vfnmsub132pd %xmm17, %xmm25, %xmm19 {%k4} {z}
+
+// CHECK: vfnmsub132pd (%rcx), %xmm25, %xmm19
+// CHECK: encoding: [0x62,0xe2,0xb5,0x00,0x9e,0x19]
+ vfnmsub132pd (%rcx), %xmm25, %xmm19
+
+// CHECK: vfnmsub132pd 291(%rax,%r14,8), %xmm25, %xmm19
+// CHECK: encoding: [0x62,0xa2,0xb5,0x00,0x9e,0x9c,0xf0,0x23,0x01,0x00,0x00]
+ vfnmsub132pd 291(%rax,%r14,8), %xmm25, %xmm19
+
+// CHECK: vfnmsub132pd (%rcx){1to2}, %xmm25, %xmm19
+// CHECK: encoding: [0x62,0xe2,0xb5,0x10,0x9e,0x19]
+ vfnmsub132pd (%rcx){1to2}, %xmm25, %xmm19
+
+// CHECK: vfnmsub132pd 2032(%rdx), %xmm25, %xmm19
+// CHECK: encoding: [0x62,0xe2,0xb5,0x00,0x9e,0x5a,0x7f]
+ vfnmsub132pd 2032(%rdx), %xmm25, %xmm19
+
+// CHECK: vfnmsub132pd 2048(%rdx), %xmm25, %xmm19
+// CHECK: encoding: [0x62,0xe2,0xb5,0x00,0x9e,0x9a,0x00,0x08,0x00,0x00]
+ vfnmsub132pd 2048(%rdx), %xmm25, %xmm19
+
+// CHECK: vfnmsub132pd -2048(%rdx), %xmm25, %xmm19
+// CHECK: encoding: [0x62,0xe2,0xb5,0x00,0x9e,0x5a,0x80]
+ vfnmsub132pd -2048(%rdx), %xmm25, %xmm19
+
+// CHECK: vfnmsub132pd -2064(%rdx), %xmm25, %xmm19
+// CHECK: encoding: [0x62,0xe2,0xb5,0x00,0x9e,0x9a,0xf0,0xf7,0xff,0xff]
+ vfnmsub132pd -2064(%rdx), %xmm25, %xmm19
+
+// CHECK: vfnmsub132pd 1016(%rdx){1to2}, %xmm25, %xmm19
+// CHECK: encoding: [0x62,0xe2,0xb5,0x10,0x9e,0x5a,0x7f]
+ vfnmsub132pd 1016(%rdx){1to2}, %xmm25, %xmm19
+
+// CHECK: vfnmsub132pd 1024(%rdx){1to2}, %xmm25, %xmm19
+// CHECK: encoding: [0x62,0xe2,0xb5,0x10,0x9e,0x9a,0x00,0x04,0x00,0x00]
+ vfnmsub132pd 1024(%rdx){1to2}, %xmm25, %xmm19
+
+// CHECK: vfnmsub132pd -1024(%rdx){1to2}, %xmm25, %xmm19
+// CHECK: encoding: [0x62,0xe2,0xb5,0x10,0x9e,0x5a,0x80]
+ vfnmsub132pd -1024(%rdx){1to2}, %xmm25, %xmm19
+
+// CHECK: vfnmsub132pd -1032(%rdx){1to2}, %xmm25, %xmm19
+// CHECK: encoding: [0x62,0xe2,0xb5,0x10,0x9e,0x9a,0xf8,0xfb,0xff,0xff]
+ vfnmsub132pd -1032(%rdx){1to2}, %xmm25, %xmm19
+
+// CHECK: vfnmsub132pd %ymm22, %ymm28, %ymm17
+// CHECK: encoding: [0x62,0xa2,0x9d,0x20,0x9e,0xce]
+ vfnmsub132pd %ymm22, %ymm28, %ymm17
+
+// CHECK: vfnmsub132pd %ymm22, %ymm28, %ymm17 {%k5}
+// CHECK: encoding: [0x62,0xa2,0x9d,0x25,0x9e,0xce]
+ vfnmsub132pd %ymm22, %ymm28, %ymm17 {%k5}
+
+// CHECK: vfnmsub132pd %ymm22, %ymm28, %ymm17 {%k5} {z}
+// CHECK: encoding: [0x62,0xa2,0x9d,0xa5,0x9e,0xce]
+ vfnmsub132pd %ymm22, %ymm28, %ymm17 {%k5} {z}
+
+// CHECK: vfnmsub132pd (%rcx), %ymm28, %ymm17
+// CHECK: encoding: [0x62,0xe2,0x9d,0x20,0x9e,0x09]
+ vfnmsub132pd (%rcx), %ymm28, %ymm17
+
+// CHECK: vfnmsub132pd 291(%rax,%r14,8), %ymm28, %ymm17
+// CHECK: encoding: [0x62,0xa2,0x9d,0x20,0x9e,0x8c,0xf0,0x23,0x01,0x00,0x00]
+ vfnmsub132pd 291(%rax,%r14,8), %ymm28, %ymm17
+
+// CHECK: vfnmsub132pd (%rcx){1to4}, %ymm28, %ymm17
+// CHECK: encoding: [0x62,0xe2,0x9d,0x30,0x9e,0x09]
+ vfnmsub132pd (%rcx){1to4}, %ymm28, %ymm17
+
+// CHECK: vfnmsub132pd 4064(%rdx), %ymm28, %ymm17
+// CHECK: encoding: [0x62,0xe2,0x9d,0x20,0x9e,0x4a,0x7f]
+ vfnmsub132pd 4064(%rdx), %ymm28, %ymm17
+
+// CHECK: vfnmsub132pd 4096(%rdx), %ymm28, %ymm17
+// CHECK: encoding: [0x62,0xe2,0x9d,0x20,0x9e,0x8a,0x00,0x10,0x00,0x00]
+ vfnmsub132pd 4096(%rdx), %ymm28, %ymm17
+
+// CHECK: vfnmsub132pd -4096(%rdx), %ymm28, %ymm17
+// CHECK: encoding: [0x62,0xe2,0x9d,0x20,0x9e,0x4a,0x80]
+ vfnmsub132pd -4096(%rdx), %ymm28, %ymm17
+
+// CHECK: vfnmsub132pd -4128(%rdx), %ymm28, %ymm17
+// CHECK: encoding: [0x62,0xe2,0x9d,0x20,0x9e,0x8a,0xe0,0xef,0xff,0xff]
+ vfnmsub132pd -4128(%rdx), %ymm28, %ymm17
+
+// CHECK: vfnmsub132pd 1016(%rdx){1to4}, %ymm28, %ymm17
+// CHECK: encoding: [0x62,0xe2,0x9d,0x30,0x9e,0x4a,0x7f]
+ vfnmsub132pd 1016(%rdx){1to4}, %ymm28, %ymm17
+
+// CHECK: vfnmsub132pd 1024(%rdx){1to4}, %ymm28, %ymm17
+// CHECK: encoding: [0x62,0xe2,0x9d,0x30,0x9e,0x8a,0x00,0x04,0x00,0x00]
+ vfnmsub132pd 1024(%rdx){1to4}, %ymm28, %ymm17
+
+// CHECK: vfnmsub132pd -1024(%rdx){1to4}, %ymm28, %ymm17
+// CHECK: encoding: [0x62,0xe2,0x9d,0x30,0x9e,0x4a,0x80]
+ vfnmsub132pd -1024(%rdx){1to4}, %ymm28, %ymm17
+
+// CHECK: vfnmsub132pd -1032(%rdx){1to4}, %ymm28, %ymm17
+// CHECK: encoding: [0x62,0xe2,0x9d,0x30,0x9e,0x8a,0xf8,0xfb,0xff,0xff]
+ vfnmsub132pd -1032(%rdx){1to4}, %ymm28, %ymm17
+
+// CHECK: vfnmsub213ps %xmm18, %xmm28, %xmm28
+// CHECK: encoding: [0x62,0x22,0x1d,0x00,0xae,0xe2]
+ vfnmsub213ps %xmm18, %xmm28, %xmm28
+
+// CHECK: vfnmsub213ps %xmm18, %xmm28, %xmm28 {%k4}
+// CHECK: encoding: [0x62,0x22,0x1d,0x04,0xae,0xe2]
+ vfnmsub213ps %xmm18, %xmm28, %xmm28 {%k4}
+
+// CHECK: vfnmsub213ps %xmm18, %xmm28, %xmm28 {%k4} {z}
+// CHECK: encoding: [0x62,0x22,0x1d,0x84,0xae,0xe2]
+ vfnmsub213ps %xmm18, %xmm28, %xmm28 {%k4} {z}
+
+// CHECK: vfnmsub213ps (%rcx), %xmm28, %xmm28
+// CHECK: encoding: [0x62,0x62,0x1d,0x00,0xae,0x21]
+ vfnmsub213ps (%rcx), %xmm28, %xmm28
+
+// CHECK: vfnmsub213ps 291(%rax,%r14,8), %xmm28, %xmm28
+// CHECK: encoding: [0x62,0x22,0x1d,0x00,0xae,0xa4,0xf0,0x23,0x01,0x00,0x00]
+ vfnmsub213ps 291(%rax,%r14,8), %xmm28, %xmm28
+
+// CHECK: vfnmsub213ps (%rcx){1to4}, %xmm28, %xmm28
+// CHECK: encoding: [0x62,0x62,0x1d,0x10,0xae,0x21]
+ vfnmsub213ps (%rcx){1to4}, %xmm28, %xmm28
+
+// CHECK: vfnmsub213ps 2032(%rdx), %xmm28, %xmm28
+// CHECK: encoding: [0x62,0x62,0x1d,0x00,0xae,0x62,0x7f]
+ vfnmsub213ps 2032(%rdx), %xmm28, %xmm28
+
+// CHECK: vfnmsub213ps 2048(%rdx), %xmm28, %xmm28
+// CHECK: encoding: [0x62,0x62,0x1d,0x00,0xae,0xa2,0x00,0x08,0x00,0x00]
+ vfnmsub213ps 2048(%rdx), %xmm28, %xmm28
+
+// CHECK: vfnmsub213ps -2048(%rdx), %xmm28, %xmm28
+// CHECK: encoding: [0x62,0x62,0x1d,0x00,0xae,0x62,0x80]
+ vfnmsub213ps -2048(%rdx), %xmm28, %xmm28
+
+// CHECK: vfnmsub213ps -2064(%rdx), %xmm28, %xmm28
+// CHECK: encoding: [0x62,0x62,0x1d,0x00,0xae,0xa2,0xf0,0xf7,0xff,0xff]
+ vfnmsub213ps -2064(%rdx), %xmm28, %xmm28
+
+// CHECK: vfnmsub213ps 508(%rdx){1to4}, %xmm28, %xmm28
+// CHECK: encoding: [0x62,0x62,0x1d,0x10,0xae,0x62,0x7f]
+ vfnmsub213ps 508(%rdx){1to4}, %xmm28, %xmm28
+
+// CHECK: vfnmsub213ps 512(%rdx){1to4}, %xmm28, %xmm28
+// CHECK: encoding: [0x62,0x62,0x1d,0x10,0xae,0xa2,0x00,0x02,0x00,0x00]
+ vfnmsub213ps 512(%rdx){1to4}, %xmm28, %xmm28
+
+// CHECK: vfnmsub213ps -512(%rdx){1to4}, %xmm28, %xmm28
+// CHECK: encoding: [0x62,0x62,0x1d,0x10,0xae,0x62,0x80]
+ vfnmsub213ps -512(%rdx){1to4}, %xmm28, %xmm28
+
+// CHECK: vfnmsub213ps -516(%rdx){1to4}, %xmm28, %xmm28
+// CHECK: encoding: [0x62,0x62,0x1d,0x10,0xae,0xa2,0xfc,0xfd,0xff,0xff]
+ vfnmsub213ps -516(%rdx){1to4}, %xmm28, %xmm28
+
+// CHECK: vfnmsub213ps %ymm23, %ymm25, %ymm20
+// CHECK: encoding: [0x62,0xa2,0x35,0x20,0xae,0xe7]
+ vfnmsub213ps %ymm23, %ymm25, %ymm20
+
+// CHECK: vfnmsub213ps %ymm23, %ymm25, %ymm20 {%k1}
+// CHECK: encoding: [0x62,0xa2,0x35,0x21,0xae,0xe7]
+ vfnmsub213ps %ymm23, %ymm25, %ymm20 {%k1}
+
+// CHECK: vfnmsub213ps %ymm23, %ymm25, %ymm20 {%k1} {z}
+// CHECK: encoding: [0x62,0xa2,0x35,0xa1,0xae,0xe7]
+ vfnmsub213ps %ymm23, %ymm25, %ymm20 {%k1} {z}
+
+// CHECK: vfnmsub213ps (%rcx), %ymm25, %ymm20
+// CHECK: encoding: [0x62,0xe2,0x35,0x20,0xae,0x21]
+ vfnmsub213ps (%rcx), %ymm25, %ymm20
+
+// CHECK: vfnmsub213ps 291(%rax,%r14,8), %ymm25, %ymm20
+// CHECK: encoding: [0x62,0xa2,0x35,0x20,0xae,0xa4,0xf0,0x23,0x01,0x00,0x00]
+ vfnmsub213ps 291(%rax,%r14,8), %ymm25, %ymm20
+
+// CHECK: vfnmsub213ps (%rcx){1to8}, %ymm25, %ymm20
+// CHECK: encoding: [0x62,0xe2,0x35,0x30,0xae,0x21]
+ vfnmsub213ps (%rcx){1to8}, %ymm25, %ymm20
+
+// CHECK: vfnmsub213ps 4064(%rdx), %ymm25, %ymm20
+// CHECK: encoding: [0x62,0xe2,0x35,0x20,0xae,0x62,0x7f]
+ vfnmsub213ps 4064(%rdx), %ymm25, %ymm20
+
+// CHECK: vfnmsub213ps 4096(%rdx), %ymm25, %ymm20
+// CHECK: encoding: [0x62,0xe2,0x35,0x20,0xae,0xa2,0x00,0x10,0x00,0x00]
+ vfnmsub213ps 4096(%rdx), %ymm25, %ymm20
+
+// CHECK: vfnmsub213ps -4096(%rdx), %ymm25, %ymm20
+// CHECK: encoding: [0x62,0xe2,0x35,0x20,0xae,0x62,0x80]
+ vfnmsub213ps -4096(%rdx), %ymm25, %ymm20
+
+// CHECK: vfnmsub213ps -4128(%rdx), %ymm25, %ymm20
+// CHECK: encoding: [0x62,0xe2,0x35,0x20,0xae,0xa2,0xe0,0xef,0xff,0xff]
+ vfnmsub213ps -4128(%rdx), %ymm25, %ymm20
+
+// CHECK: vfnmsub213ps 508(%rdx){1to8}, %ymm25, %ymm20
+// CHECK: encoding: [0x62,0xe2,0x35,0x30,0xae,0x62,0x7f]
+ vfnmsub213ps 508(%rdx){1to8}, %ymm25, %ymm20
+
+// CHECK: vfnmsub213ps 512(%rdx){1to8}, %ymm25, %ymm20
+// CHECK: encoding: [0x62,0xe2,0x35,0x30,0xae,0xa2,0x00,0x02,0x00,0x00]
+ vfnmsub213ps 512(%rdx){1to8}, %ymm25, %ymm20
+
+// CHECK: vfnmsub213ps -512(%rdx){1to8}, %ymm25, %ymm20
+// CHECK: encoding: [0x62,0xe2,0x35,0x30,0xae,0x62,0x80]
+ vfnmsub213ps -512(%rdx){1to8}, %ymm25, %ymm20
+
+// CHECK: vfnmsub213ps -516(%rdx){1to8}, %ymm25, %ymm20
+// CHECK: encoding: [0x62,0xe2,0x35,0x30,0xae,0xa2,0xfc,0xfd,0xff,0xff]
+ vfnmsub213ps -516(%rdx){1to8}, %ymm25, %ymm20
+
+// CHECK: vfnmsub213pd %xmm25, %xmm17, %xmm20
+// CHECK: encoding: [0x62,0x82,0xf5,0x00,0xae,0xe1]
+ vfnmsub213pd %xmm25, %xmm17, %xmm20
+
+// CHECK: vfnmsub213pd %xmm25, %xmm17, %xmm20 {%k1}
+// CHECK: encoding: [0x62,0x82,0xf5,0x01,0xae,0xe1]
+ vfnmsub213pd %xmm25, %xmm17, %xmm20 {%k1}
+
+// CHECK: vfnmsub213pd %xmm25, %xmm17, %xmm20 {%k1} {z}
+// CHECK: encoding: [0x62,0x82,0xf5,0x81,0xae,0xe1]
+ vfnmsub213pd %xmm25, %xmm17, %xmm20 {%k1} {z}
+
+// CHECK: vfnmsub213pd (%rcx), %xmm17, %xmm20
+// CHECK: encoding: [0x62,0xe2,0xf5,0x00,0xae,0x21]
+ vfnmsub213pd (%rcx), %xmm17, %xmm20
+
+// CHECK: vfnmsub213pd 291(%rax,%r14,8), %xmm17, %xmm20
+// CHECK: encoding: [0x62,0xa2,0xf5,0x00,0xae,0xa4,0xf0,0x23,0x01,0x00,0x00]
+ vfnmsub213pd 291(%rax,%r14,8), %xmm17, %xmm20
+
+// CHECK: vfnmsub213pd (%rcx){1to2}, %xmm17, %xmm20
+// CHECK: encoding: [0x62,0xe2,0xf5,0x10,0xae,0x21]
+ vfnmsub213pd (%rcx){1to2}, %xmm17, %xmm20
+
+// CHECK: vfnmsub213pd 2032(%rdx), %xmm17, %xmm20
+// CHECK: encoding: [0x62,0xe2,0xf5,0x00,0xae,0x62,0x7f]
+ vfnmsub213pd 2032(%rdx), %xmm17, %xmm20
+
+// CHECK: vfnmsub213pd 2048(%rdx), %xmm17, %xmm20
+// CHECK: encoding: [0x62,0xe2,0xf5,0x00,0xae,0xa2,0x00,0x08,0x00,0x00]
+ vfnmsub213pd 2048(%rdx), %xmm17, %xmm20
+
+// CHECK: vfnmsub213pd -2048(%rdx), %xmm17, %xmm20
+// CHECK: encoding: [0x62,0xe2,0xf5,0x00,0xae,0x62,0x80]
+ vfnmsub213pd -2048(%rdx), %xmm17, %xmm20
+
+// CHECK: vfnmsub213pd -2064(%rdx), %xmm17, %xmm20
+// CHECK: encoding: [0x62,0xe2,0xf5,0x00,0xae,0xa2,0xf0,0xf7,0xff,0xff]
+ vfnmsub213pd -2064(%rdx), %xmm17, %xmm20
+
+// CHECK: vfnmsub213pd 1016(%rdx){1to2}, %xmm17, %xmm20
+// CHECK: encoding: [0x62,0xe2,0xf5,0x10,0xae,0x62,0x7f]
+ vfnmsub213pd 1016(%rdx){1to2}, %xmm17, %xmm20
+
+// CHECK: vfnmsub213pd 1024(%rdx){1to2}, %xmm17, %xmm20
+// CHECK: encoding: [0x62,0xe2,0xf5,0x10,0xae,0xa2,0x00,0x04,0x00,0x00]
+ vfnmsub213pd 1024(%rdx){1to2}, %xmm17, %xmm20
+
+// CHECK: vfnmsub213pd -1024(%rdx){1to2}, %xmm17, %xmm20
+// CHECK: encoding: [0x62,0xe2,0xf5,0x10,0xae,0x62,0x80]
+ vfnmsub213pd -1024(%rdx){1to2}, %xmm17, %xmm20
+
+// CHECK: vfnmsub213pd -1032(%rdx){1to2}, %xmm17, %xmm20
+// CHECK: encoding: [0x62,0xe2,0xf5,0x10,0xae,0xa2,0xf8,0xfb,0xff,0xff]
+ vfnmsub213pd -1032(%rdx){1to2}, %xmm17, %xmm20
+
+// CHECK: vfnmsub213pd %ymm28, %ymm20, %ymm19
+// CHECK: encoding: [0x62,0x82,0xdd,0x20,0xae,0xdc]
+ vfnmsub213pd %ymm28, %ymm20, %ymm19
+
+// CHECK: vfnmsub213pd %ymm28, %ymm20, %ymm19 {%k7}
+// CHECK: encoding: [0x62,0x82,0xdd,0x27,0xae,0xdc]
+ vfnmsub213pd %ymm28, %ymm20, %ymm19 {%k7}
+
+// CHECK: vfnmsub213pd %ymm28, %ymm20, %ymm19 {%k7} {z}
+// CHECK: encoding: [0x62,0x82,0xdd,0xa7,0xae,0xdc]
+ vfnmsub213pd %ymm28, %ymm20, %ymm19 {%k7} {z}
+
+// CHECK: vfnmsub213pd (%rcx), %ymm20, %ymm19
+// CHECK: encoding: [0x62,0xe2,0xdd,0x20,0xae,0x19]
+ vfnmsub213pd (%rcx), %ymm20, %ymm19
+
+// CHECK: vfnmsub213pd 291(%rax,%r14,8), %ymm20, %ymm19
+// CHECK: encoding: [0x62,0xa2,0xdd,0x20,0xae,0x9c,0xf0,0x23,0x01,0x00,0x00]
+ vfnmsub213pd 291(%rax,%r14,8), %ymm20, %ymm19
+
+// CHECK: vfnmsub213pd (%rcx){1to4}, %ymm20, %ymm19
+// CHECK: encoding: [0x62,0xe2,0xdd,0x30,0xae,0x19]
+ vfnmsub213pd (%rcx){1to4}, %ymm20, %ymm19
+
+// CHECK: vfnmsub213pd 4064(%rdx), %ymm20, %ymm19
+// CHECK: encoding: [0x62,0xe2,0xdd,0x20,0xae,0x5a,0x7f]
+ vfnmsub213pd 4064(%rdx), %ymm20, %ymm19
+
+// CHECK: vfnmsub213pd 4096(%rdx), %ymm20, %ymm19
+// CHECK: encoding: [0x62,0xe2,0xdd,0x20,0xae,0x9a,0x00,0x10,0x00,0x00]
+ vfnmsub213pd 4096(%rdx), %ymm20, %ymm19
+
+// CHECK: vfnmsub213pd -4096(%rdx), %ymm20, %ymm19
+// CHECK: encoding: [0x62,0xe2,0xdd,0x20,0xae,0x5a,0x80]
+ vfnmsub213pd -4096(%rdx), %ymm20, %ymm19
+
+// CHECK: vfnmsub213pd -4128(%rdx), %ymm20, %ymm19
+// CHECK: encoding: [0x62,0xe2,0xdd,0x20,0xae,0x9a,0xe0,0xef,0xff,0xff]
+ vfnmsub213pd -4128(%rdx), %ymm20, %ymm19
+
+// CHECK: vfnmsub213pd 1016(%rdx){1to4}, %ymm20, %ymm19
+// CHECK: encoding: [0x62,0xe2,0xdd,0x30,0xae,0x5a,0x7f]
+ vfnmsub213pd 1016(%rdx){1to4}, %ymm20, %ymm19
+
+// CHECK: vfnmsub213pd 1024(%rdx){1to4}, %ymm20, %ymm19
+// CHECK: encoding: [0x62,0xe2,0xdd,0x30,0xae,0x9a,0x00,0x04,0x00,0x00]
+ vfnmsub213pd 1024(%rdx){1to4}, %ymm20, %ymm19
+
+// CHECK: vfnmsub213pd -1024(%rdx){1to4}, %ymm20, %ymm19
+// CHECK: encoding: [0x62,0xe2,0xdd,0x30,0xae,0x5a,0x80]
+ vfnmsub213pd -1024(%rdx){1to4}, %ymm20, %ymm19
+
+// CHECK: vfnmsub213pd -1032(%rdx){1to4}, %ymm20, %ymm19
+// CHECK: encoding: [0x62,0xe2,0xdd,0x30,0xae,0x9a,0xf8,0xfb,0xff,0xff]
+ vfnmsub213pd -1032(%rdx){1to4}, %ymm20, %ymm19
+
+// CHECK: vfnmsub231ps %xmm26, %xmm27, %xmm18
+// CHECK: encoding: [0x62,0x82,0x25,0x00,0xbe,0xd2]
+ vfnmsub231ps %xmm26, %xmm27, %xmm18
+
+// CHECK: vfnmsub231ps %xmm26, %xmm27, %xmm18 {%k2}
+// CHECK: encoding: [0x62,0x82,0x25,0x02,0xbe,0xd2]
+ vfnmsub231ps %xmm26, %xmm27, %xmm18 {%k2}
+
+// CHECK: vfnmsub231ps %xmm26, %xmm27, %xmm18 {%k2} {z}
+// CHECK: encoding: [0x62,0x82,0x25,0x82,0xbe,0xd2]
+ vfnmsub231ps %xmm26, %xmm27, %xmm18 {%k2} {z}
+
+// CHECK: vfnmsub231ps (%rcx), %xmm27, %xmm18
+// CHECK: encoding: [0x62,0xe2,0x25,0x00,0xbe,0x11]
+ vfnmsub231ps (%rcx), %xmm27, %xmm18
+
+// CHECK: vfnmsub231ps 291(%rax,%r14,8), %xmm27, %xmm18
+// CHECK: encoding: [0x62,0xa2,0x25,0x00,0xbe,0x94,0xf0,0x23,0x01,0x00,0x00]
+ vfnmsub231ps 291(%rax,%r14,8), %xmm27, %xmm18
+
+// CHECK: vfnmsub231ps (%rcx){1to4}, %xmm27, %xmm18
+// CHECK: encoding: [0x62,0xe2,0x25,0x10,0xbe,0x11]
+ vfnmsub231ps (%rcx){1to4}, %xmm27, %xmm18
+
+// CHECK: vfnmsub231ps 2032(%rdx), %xmm27, %xmm18
+// CHECK: encoding: [0x62,0xe2,0x25,0x00,0xbe,0x52,0x7f]
+ vfnmsub231ps 2032(%rdx), %xmm27, %xmm18
+
+// CHECK: vfnmsub231ps 2048(%rdx), %xmm27, %xmm18
+// CHECK: encoding: [0x62,0xe2,0x25,0x00,0xbe,0x92,0x00,0x08,0x00,0x00]
+ vfnmsub231ps 2048(%rdx), %xmm27, %xmm18
+
+// CHECK: vfnmsub231ps -2048(%rdx), %xmm27, %xmm18
+// CHECK: encoding: [0x62,0xe2,0x25,0x00,0xbe,0x52,0x80]
+ vfnmsub231ps -2048(%rdx), %xmm27, %xmm18
+
+// CHECK: vfnmsub231ps -2064(%rdx), %xmm27, %xmm18
+// CHECK: encoding: [0x62,0xe2,0x25,0x00,0xbe,0x92,0xf0,0xf7,0xff,0xff]
+ vfnmsub231ps -2064(%rdx), %xmm27, %xmm18
+
+// CHECK: vfnmsub231ps 508(%rdx){1to4}, %xmm27, %xmm18
+// CHECK: encoding: [0x62,0xe2,0x25,0x10,0xbe,0x52,0x7f]
+ vfnmsub231ps 508(%rdx){1to4}, %xmm27, %xmm18
+
+// CHECK: vfnmsub231ps 512(%rdx){1to4}, %xmm27, %xmm18
+// CHECK: encoding: [0x62,0xe2,0x25,0x10,0xbe,0x92,0x00,0x02,0x00,0x00]
+ vfnmsub231ps 512(%rdx){1to4}, %xmm27, %xmm18
+
+// CHECK: vfnmsub231ps -512(%rdx){1to4}, %xmm27, %xmm18
+// CHECK: encoding: [0x62,0xe2,0x25,0x10,0xbe,0x52,0x80]
+ vfnmsub231ps -512(%rdx){1to4}, %xmm27, %xmm18
+
+// CHECK: vfnmsub231ps -516(%rdx){1to4}, %xmm27, %xmm18
+// CHECK: encoding: [0x62,0xe2,0x25,0x10,0xbe,0x92,0xfc,0xfd,0xff,0xff]
+ vfnmsub231ps -516(%rdx){1to4}, %xmm27, %xmm18
+
+// CHECK: vfnmsub231ps %ymm18, %ymm20, %ymm30
+// CHECK: encoding: [0x62,0x22,0x5d,0x20,0xbe,0xf2]
+ vfnmsub231ps %ymm18, %ymm20, %ymm30
+
+// CHECK: vfnmsub231ps %ymm18, %ymm20, %ymm30 {%k1}
+// CHECK: encoding: [0x62,0x22,0x5d,0x21,0xbe,0xf2]
+ vfnmsub231ps %ymm18, %ymm20, %ymm30 {%k1}
+
+// CHECK: vfnmsub231ps %ymm18, %ymm20, %ymm30 {%k1} {z}
+// CHECK: encoding: [0x62,0x22,0x5d,0xa1,0xbe,0xf2]
+ vfnmsub231ps %ymm18, %ymm20, %ymm30 {%k1} {z}
+
+// CHECK: vfnmsub231ps (%rcx), %ymm20, %ymm30
+// CHECK: encoding: [0x62,0x62,0x5d,0x20,0xbe,0x31]
+ vfnmsub231ps (%rcx), %ymm20, %ymm30
+
+// CHECK: vfnmsub231ps 291(%rax,%r14,8), %ymm20, %ymm30
+// CHECK: encoding: [0x62,0x22,0x5d,0x20,0xbe,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vfnmsub231ps 291(%rax,%r14,8), %ymm20, %ymm30
+
+// CHECK: vfnmsub231ps (%rcx){1to8}, %ymm20, %ymm30
+// CHECK: encoding: [0x62,0x62,0x5d,0x30,0xbe,0x31]
+ vfnmsub231ps (%rcx){1to8}, %ymm20, %ymm30
+
+// CHECK: vfnmsub231ps 4064(%rdx), %ymm20, %ymm30
+// CHECK: encoding: [0x62,0x62,0x5d,0x20,0xbe,0x72,0x7f]
+ vfnmsub231ps 4064(%rdx), %ymm20, %ymm30
+
+// CHECK: vfnmsub231ps 4096(%rdx), %ymm20, %ymm30
+// CHECK: encoding: [0x62,0x62,0x5d,0x20,0xbe,0xb2,0x00,0x10,0x00,0x00]
+ vfnmsub231ps 4096(%rdx), %ymm20, %ymm30
+
+// CHECK: vfnmsub231ps -4096(%rdx), %ymm20, %ymm30
+// CHECK: encoding: [0x62,0x62,0x5d,0x20,0xbe,0x72,0x80]
+ vfnmsub231ps -4096(%rdx), %ymm20, %ymm30
+
+// CHECK: vfnmsub231ps -4128(%rdx), %ymm20, %ymm30
+// CHECK: encoding: [0x62,0x62,0x5d,0x20,0xbe,0xb2,0xe0,0xef,0xff,0xff]
+ vfnmsub231ps -4128(%rdx), %ymm20, %ymm30
+
+// CHECK: vfnmsub231ps 508(%rdx){1to8}, %ymm20, %ymm30
+// CHECK: encoding: [0x62,0x62,0x5d,0x30,0xbe,0x72,0x7f]
+ vfnmsub231ps 508(%rdx){1to8}, %ymm20, %ymm30
+
+// CHECK: vfnmsub231ps 512(%rdx){1to8}, %ymm20, %ymm30
+// CHECK: encoding: [0x62,0x62,0x5d,0x30,0xbe,0xb2,0x00,0x02,0x00,0x00]
+ vfnmsub231ps 512(%rdx){1to8}, %ymm20, %ymm30
+
+// CHECK: vfnmsub231ps -512(%rdx){1to8}, %ymm20, %ymm30
+// CHECK: encoding: [0x62,0x62,0x5d,0x30,0xbe,0x72,0x80]
+ vfnmsub231ps -512(%rdx){1to8}, %ymm20, %ymm30
+
+// CHECK: vfnmsub231ps -516(%rdx){1to8}, %ymm20, %ymm30
+// CHECK: encoding: [0x62,0x62,0x5d,0x30,0xbe,0xb2,0xfc,0xfd,0xff,0xff]
+ vfnmsub231ps -516(%rdx){1to8}, %ymm20, %ymm30
+
+// CHECK: vfnmsub231pd %xmm25, %xmm19, %xmm23
+// CHECK: encoding: [0x62,0x82,0xe5,0x00,0xbe,0xf9]
+ vfnmsub231pd %xmm25, %xmm19, %xmm23
+
+// CHECK: vfnmsub231pd %xmm25, %xmm19, %xmm23 {%k3}
+// CHECK: encoding: [0x62,0x82,0xe5,0x03,0xbe,0xf9]
+ vfnmsub231pd %xmm25, %xmm19, %xmm23 {%k3}
+
+// CHECK: vfnmsub231pd %xmm25, %xmm19, %xmm23 {%k3} {z}
+// CHECK: encoding: [0x62,0x82,0xe5,0x83,0xbe,0xf9]
+ vfnmsub231pd %xmm25, %xmm19, %xmm23 {%k3} {z}
+
+// CHECK: vfnmsub231pd (%rcx), %xmm19, %xmm23
+// CHECK: encoding: [0x62,0xe2,0xe5,0x00,0xbe,0x39]
+ vfnmsub231pd (%rcx), %xmm19, %xmm23
+
+// CHECK: vfnmsub231pd 291(%rax,%r14,8), %xmm19, %xmm23
+// CHECK: encoding: [0x62,0xa2,0xe5,0x00,0xbe,0xbc,0xf0,0x23,0x01,0x00,0x00]
+ vfnmsub231pd 291(%rax,%r14,8), %xmm19, %xmm23
+
+// CHECK: vfnmsub231pd (%rcx){1to2}, %xmm19, %xmm23
+// CHECK: encoding: [0x62,0xe2,0xe5,0x10,0xbe,0x39]
+ vfnmsub231pd (%rcx){1to2}, %xmm19, %xmm23
+
+// CHECK: vfnmsub231pd 2032(%rdx), %xmm19, %xmm23
+// CHECK: encoding: [0x62,0xe2,0xe5,0x00,0xbe,0x7a,0x7f]
+ vfnmsub231pd 2032(%rdx), %xmm19, %xmm23
+
+// CHECK: vfnmsub231pd 2048(%rdx), %xmm19, %xmm23
+// CHECK: encoding: [0x62,0xe2,0xe5,0x00,0xbe,0xba,0x00,0x08,0x00,0x00]
+ vfnmsub231pd 2048(%rdx), %xmm19, %xmm23
+
+// CHECK: vfnmsub231pd -2048(%rdx), %xmm19, %xmm23
+// CHECK: encoding: [0x62,0xe2,0xe5,0x00,0xbe,0x7a,0x80]
+ vfnmsub231pd -2048(%rdx), %xmm19, %xmm23
+
+// CHECK: vfnmsub231pd -2064(%rdx), %xmm19, %xmm23
+// CHECK: encoding: [0x62,0xe2,0xe5,0x00,0xbe,0xba,0xf0,0xf7,0xff,0xff]
+ vfnmsub231pd -2064(%rdx), %xmm19, %xmm23
+
+// CHECK: vfnmsub231pd 1016(%rdx){1to2}, %xmm19, %xmm23
+// CHECK: encoding: [0x62,0xe2,0xe5,0x10,0xbe,0x7a,0x7f]
+ vfnmsub231pd 1016(%rdx){1to2}, %xmm19, %xmm23
+
+// CHECK: vfnmsub231pd 1024(%rdx){1to2}, %xmm19, %xmm23
+// CHECK: encoding: [0x62,0xe2,0xe5,0x10,0xbe,0xba,0x00,0x04,0x00,0x00]
+ vfnmsub231pd 1024(%rdx){1to2}, %xmm19, %xmm23
+
+// CHECK: vfnmsub231pd -1024(%rdx){1to2}, %xmm19, %xmm23
+// CHECK: encoding: [0x62,0xe2,0xe5,0x10,0xbe,0x7a,0x80]
+ vfnmsub231pd -1024(%rdx){1to2}, %xmm19, %xmm23
+
+// CHECK: vfnmsub231pd -1032(%rdx){1to2}, %xmm19, %xmm23
+// CHECK: encoding: [0x62,0xe2,0xe5,0x10,0xbe,0xba,0xf8,0xfb,0xff,0xff]
+ vfnmsub231pd -1032(%rdx){1to2}, %xmm19, %xmm23
+
+// CHECK: vfnmsub231pd %ymm20, %ymm18, %ymm22
+// CHECK: encoding: [0x62,0xa2,0xed,0x20,0xbe,0xf4]
+ vfnmsub231pd %ymm20, %ymm18, %ymm22
+
+// CHECK: vfnmsub231pd %ymm20, %ymm18, %ymm22 {%k1}
+// CHECK: encoding: [0x62,0xa2,0xed,0x21,0xbe,0xf4]
+ vfnmsub231pd %ymm20, %ymm18, %ymm22 {%k1}
+
+// CHECK: vfnmsub231pd %ymm20, %ymm18, %ymm22 {%k1} {z}
+// CHECK: encoding: [0x62,0xa2,0xed,0xa1,0xbe,0xf4]
+ vfnmsub231pd %ymm20, %ymm18, %ymm22 {%k1} {z}
+
+// CHECK: vfnmsub231pd (%rcx), %ymm18, %ymm22
+// CHECK: encoding: [0x62,0xe2,0xed,0x20,0xbe,0x31]
+ vfnmsub231pd (%rcx), %ymm18, %ymm22
+
+// CHECK: vfnmsub231pd 291(%rax,%r14,8), %ymm18, %ymm22
+// CHECK: encoding: [0x62,0xa2,0xed,0x20,0xbe,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vfnmsub231pd 291(%rax,%r14,8), %ymm18, %ymm22
+
+// CHECK: vfnmsub231pd (%rcx){1to4}, %ymm18, %ymm22
+// CHECK: encoding: [0x62,0xe2,0xed,0x30,0xbe,0x31]
+ vfnmsub231pd (%rcx){1to4}, %ymm18, %ymm22
+
+// CHECK: vfnmsub231pd 4064(%rdx), %ymm18, %ymm22
+// CHECK: encoding: [0x62,0xe2,0xed,0x20,0xbe,0x72,0x7f]
+ vfnmsub231pd 4064(%rdx), %ymm18, %ymm22
+
+// CHECK: vfnmsub231pd 4096(%rdx), %ymm18, %ymm22
+// CHECK: encoding: [0x62,0xe2,0xed,0x20,0xbe,0xb2,0x00,0x10,0x00,0x00]
+ vfnmsub231pd 4096(%rdx), %ymm18, %ymm22
+
+// CHECK: vfnmsub231pd -4096(%rdx), %ymm18, %ymm22
+// CHECK: encoding: [0x62,0xe2,0xed,0x20,0xbe,0x72,0x80]
+ vfnmsub231pd -4096(%rdx), %ymm18, %ymm22
+
+// CHECK: vfnmsub231pd -4128(%rdx), %ymm18, %ymm22
+// CHECK: encoding: [0x62,0xe2,0xed,0x20,0xbe,0xb2,0xe0,0xef,0xff,0xff]
+ vfnmsub231pd -4128(%rdx), %ymm18, %ymm22
+
+// CHECK: vfnmsub231pd 1016(%rdx){1to4}, %ymm18, %ymm22
+// CHECK: encoding: [0x62,0xe2,0xed,0x30,0xbe,0x72,0x7f]
+ vfnmsub231pd 1016(%rdx){1to4}, %ymm18, %ymm22
+
+// CHECK: vfnmsub231pd 1024(%rdx){1to4}, %ymm18, %ymm22
+// CHECK: encoding: [0x62,0xe2,0xed,0x30,0xbe,0xb2,0x00,0x04,0x00,0x00]
+ vfnmsub231pd 1024(%rdx){1to4}, %ymm18, %ymm22
+
+// CHECK: vfnmsub231pd -1024(%rdx){1to4}, %ymm18, %ymm22
+// CHECK: encoding: [0x62,0xe2,0xed,0x30,0xbe,0x72,0x80]
+ vfnmsub231pd -1024(%rdx){1to4}, %ymm18, %ymm22
+
+// CHECK: vfnmsub231pd -1032(%rdx){1to4}, %ymm18, %ymm22
+// CHECK: encoding: [0x62,0xe2,0xed,0x30,0xbe,0xb2,0xf8,0xfb,0xff,0xff]
+ vfnmsub231pd -1032(%rdx){1to4}, %ymm18, %ymm22
+
// CHECK: vpermi2d %xmm25, %xmm23, %xmm21
// CHECK: encoding: [0x62,0x82,0x45,0x00,0x76,0xe9]
vpermi2d %xmm25, %xmm23, %xmm21