multiclass fma3s<bits<8> opc132, bits<8> opc213, bits<8> opc231,
string OpStr, Intrinsic IntF32, Intrinsic IntF64,
SDNode OpNode> {
+ let ExeDomain = SSEPackedSingle in
defm SS : fma3s_forms<opc132, opc213, opc231, OpStr, "ss", OpNode,
FR32, f32mem>,
fma3s_int_forms<opc132, opc213, opc231, OpStr, "ss", VR128, ssmem>;
+
+ let ExeDomain = SSEPackedDouble in
defm SD : fma3s_forms<opc132, opc213, opc231, OpStr, "sd", OpNode,
FR64, f64mem>,
fma3s_int_forms<opc132, opc213, opc231, OpStr, "sd", VR128, sdmem>,
define <4 x float> @test_x86_fmadd_baa_ss(<4 x float> %a, <4 x float> %b) #0 {
; CHECK-LABEL: test_x86_fmadd_baa_ss:
; CHECK: # BB#0:
-; CHECK-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
-; CHECK-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
+; CHECK-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
+; CHECK-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
; CHECK-NEXT: vfmadd213ss %xmm1, %xmm1, %xmm0
; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
define <2 x double> @test_x86_fmadd_baa_sd(<2 x double> %a, <2 x double> %b) #0 {
; CHECK-LABEL: test_x86_fmadd_baa_sd:
; CHECK: # BB#0:
-; CHECK-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
-; CHECK-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
+; CHECK-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
+; CHECK-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
; CHECK-NEXT: vfmadd213sd %xmm1, %xmm1, %xmm0
; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
define <2 x double> @test_x86_fmadd_aba_sd(<2 x double> %a, <2 x double> %b) #0 {
; CHECK-LABEL: test_x86_fmadd_aba_sd:
; CHECK: # BB#0:
-; CHECK-NEXT: vmovaps (%rcx), %xmm0
+; CHECK-NEXT: vmovapd (%rcx), %xmm0
; CHECK-NEXT: vfmadd132sd (%rdx), %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
define <2 x double> @test_x86_fmadd_bba_sd(<2 x double> %a, <2 x double> %b) #0 {
; CHECK-LABEL: test_x86_fmadd_bba_sd:
; CHECK: # BB#0:
-; CHECK-NEXT: vmovaps (%rdx), %xmm0
+; CHECK-NEXT: vmovapd (%rdx), %xmm0
; CHECK-NEXT: vfmadd213sd (%rcx), %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
define <4 x float> @test_x86_fnmadd_baa_ss(<4 x float> %a, <4 x float> %b) #0 {
; CHECK-LABEL: test_x86_fnmadd_baa_ss:
; CHECK: # BB#0:
-; CHECK-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
-; CHECK-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
+; CHECK-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
+; CHECK-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
; CHECK-NEXT: vfnmadd213ss %xmm1, %xmm1, %xmm0
; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
define <2 x double> @test_x86_fnmadd_baa_sd(<2 x double> %a, <2 x double> %b) #0 {
; CHECK-LABEL: test_x86_fnmadd_baa_sd:
; CHECK: # BB#0:
-; CHECK-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
-; CHECK-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
+; CHECK-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
+; CHECK-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
; CHECK-NEXT: vfnmadd213sd %xmm1, %xmm1, %xmm0
; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
define <2 x double> @test_x86_fnmadd_aba_sd(<2 x double> %a, <2 x double> %b) #0 {
; CHECK-LABEL: test_x86_fnmadd_aba_sd:
; CHECK: # BB#0:
-; CHECK-NEXT: vmovaps (%rcx), %xmm0
+; CHECK-NEXT: vmovapd (%rcx), %xmm0
; CHECK-NEXT: vfnmadd132sd (%rdx), %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
define <2 x double> @test_x86_fnmadd_bba_sd(<2 x double> %a, <2 x double> %b) #0 {
; CHECK-LABEL: test_x86_fnmadd_bba_sd:
; CHECK: # BB#0:
-; CHECK-NEXT: vmovaps (%rdx), %xmm0
+; CHECK-NEXT: vmovapd (%rdx), %xmm0
; CHECK-NEXT: vfnmadd213sd (%rcx), %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
define <4 x float> @test_x86_fmsub_baa_ss(<4 x float> %a, <4 x float> %b) #0 {
; CHECK-LABEL: test_x86_fmsub_baa_ss:
; CHECK: # BB#0:
-; CHECK-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
-; CHECK-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
+; CHECK-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
+; CHECK-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
; CHECK-NEXT: vfmsub213ss %xmm1, %xmm1, %xmm0
; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
define <2 x double> @test_x86_fmsub_baa_sd(<2 x double> %a, <2 x double> %b) #0 {
; CHECK-LABEL: test_x86_fmsub_baa_sd:
; CHECK: # BB#0:
-; CHECK-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
-; CHECK-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
+; CHECK-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
+; CHECK-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
; CHECK-NEXT: vfmsub213sd %xmm1, %xmm1, %xmm0
; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
define <2 x double> @test_x86_fmsub_aba_sd(<2 x double> %a, <2 x double> %b) #0 {
; CHECK-LABEL: test_x86_fmsub_aba_sd:
; CHECK: # BB#0:
-; CHECK-NEXT: vmovaps (%rcx), %xmm0
+; CHECK-NEXT: vmovapd (%rcx), %xmm0
; CHECK-NEXT: vfmsub132sd (%rdx), %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
define <2 x double> @test_x86_fmsub_bba_sd(<2 x double> %a, <2 x double> %b) #0 {
; CHECK-LABEL: test_x86_fmsub_bba_sd:
; CHECK: # BB#0:
-; CHECK-NEXT: vmovaps (%rdx), %xmm0
+; CHECK-NEXT: vmovapd (%rdx), %xmm0
; CHECK-NEXT: vfmsub213sd (%rcx), %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
define <4 x float> @test_x86_fnmsub_baa_ss(<4 x float> %a, <4 x float> %b) #0 {
; CHECK-LABEL: test_x86_fnmsub_baa_ss:
; CHECK: # BB#0:
-; CHECK-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
-; CHECK-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
+; CHECK-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
+; CHECK-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
; CHECK-NEXT: vfnmsub213ss %xmm1, %xmm1, %xmm0
; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
define <2 x double> @test_x86_fnmsub_baa_sd(<2 x double> %a, <2 x double> %b) #0 {
; CHECK-LABEL: test_x86_fnmsub_baa_sd:
; CHECK: # BB#0:
-; CHECK-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
-; CHECK-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
+; CHECK-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
+; CHECK-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
; CHECK-NEXT: vfnmsub213sd %xmm1, %xmm1, %xmm0
; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
define <2 x double> @test_x86_fnmsub_aba_sd(<2 x double> %a, <2 x double> %b) #0 {
; CHECK-LABEL: test_x86_fnmsub_aba_sd:
; CHECK: # BB#0:
-; CHECK-NEXT: vmovaps (%rcx), %xmm0
+; CHECK-NEXT: vmovapd (%rcx), %xmm0
; CHECK-NEXT: vfnmsub132sd (%rdx), %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
define <2 x double> @test_x86_fnmsub_bba_sd(<2 x double> %a, <2 x double> %b) #0 {
; CHECK-LABEL: test_x86_fnmsub_bba_sd:
; CHECK: # BB#0:
-; CHECK-NEXT: vmovaps (%rdx), %xmm0
+; CHECK-NEXT: vmovapd (%rdx), %xmm0
; CHECK-NEXT: vfnmsub213sd (%rcx), %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
; CHECK-LABEL: test_x86_fma_vfmadd_ss:
; CHECK-NEXT: # BB#0:
;
-; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
-; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
; CHECK-FMA-WIN-NEXT: vfmadd132ss (%rdx), %xmm1, %xmm0
;
; CHECK-FMA-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0
; CHECK-LABEL: test_x86_fma_vfmadd_bac_ss:
; CHECK-NEXT: # BB#0:
;
-; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
-; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
; CHECK-FMA-WIN-NEXT: vfmadd132ss (%rcx), %xmm1, %xmm0
;
; CHECK-FMA-NEXT: vfmadd213ss %xmm2, %xmm0, %xmm1
; CHECK-LABEL: test_x86_fma_vfmadd_sd:
; CHECK-NEXT: # BB#0:
;
-; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
-; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
; CHECK-FMA-WIN-NEXT: vfmadd132sd (%rdx), %xmm1, %xmm0
;
; CHECK-FMA-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0
; CHECK-LABEL: test_x86_fma_vfmadd_bac_sd:
; CHECK-NEXT: # BB#0:
;
-; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
-; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
; CHECK-FMA-WIN-NEXT: vfmadd132sd (%rcx), %xmm1, %xmm0
;
; CHECK-FMA-NEXT: vfmadd213sd %xmm2, %xmm0, %xmm1
-; CHECK-FMA-NEXT: vmovaps %xmm1, %xmm0
+; CHECK-FMA-NEXT: vmovapd %xmm1, %xmm0
;
; CHECK-FMA4-NEXT: vfmaddsd %xmm2, %xmm0, %xmm1, %xmm0
;
; CHECK-LABEL: test_x86_fma_vfmsub_bac_ss:
; CHECK-NEXT: # BB#0:
;
-; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
-; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
; CHECK-FMA-WIN-NEXT: vfmsub132ss (%rcx), %xmm1, %xmm0
;
; CHECK-FMA-NEXT: vfmsub213ss %xmm2, %xmm0, %xmm1
; CHECK-LABEL: test_x86_fma_vfmsub_sd:
; CHECK-NEXT: # BB#0:
;
-; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
-; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
; CHECK-FMA-WIN-NEXT: vfmsub132sd (%rdx), %xmm1, %xmm0
;
; CHECK-FMA-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0
; CHECK-LABEL: test_x86_fma_vfmsub_bac_sd:
; CHECK-NEXT: # BB#0:
;
-; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
-; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
; CHECK-FMA-WIN-NEXT: vfmsub132sd (%rcx), %xmm1, %xmm0
;
; CHECK-FMA-NEXT: vfmsub213sd %xmm2, %xmm0, %xmm1
-; CHECK-FMA-NEXT: vmovaps %xmm1, %xmm0
+; CHECK-FMA-NEXT: vmovapd %xmm1, %xmm0
;
; CHECK-FMA4-NEXT: vfmsubsd %xmm2, %xmm0, %xmm1, %xmm0
;
; CHECK-LABEL: test_x86_fma_vfnmadd_ss:
; CHECK-NEXT: # BB#0:
;
-; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
-; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
; CHECK-FMA-WIN-NEXT: vfnmadd132ss (%rdx), %xmm1, %xmm0
;
; CHECK-FMA-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0
; CHECK-LABEL: test_x86_fma_vfnmadd_bac_ss:
; CHECK-NEXT: # BB#0:
;
-; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
-; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
; CHECK-FMA-WIN-NEXT: vfnmadd132ss (%rcx), %xmm1, %xmm0
;
; CHECK-FMA-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm1
; CHECK-LABEL: test_x86_fma_vfnmadd_sd:
; CHECK-NEXT: # BB#0:
;
-; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
-; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
; CHECK-FMA-WIN-NEXT: vfnmadd132sd (%rdx), %xmm1, %xmm0
;
; CHECK-FMA-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0
; CHECK-LABEL: test_x86_fma_vfnmadd_bac_sd:
; CHECK-NEXT: # BB#0:
;
-; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
-; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
; CHECK-FMA-WIN-NEXT: vfnmadd132sd (%rcx), %xmm1, %xmm0
;
; CHECK-FMA-NEXT: vfnmadd213sd %xmm2, %xmm0, %xmm1
-; CHECK-FMA-NEXT: vmovaps %xmm1, %xmm0
+; CHECK-FMA-NEXT: vmovapd %xmm1, %xmm0
;
; CHECK-FMA4-NEXT: vfnmaddsd %xmm2, %xmm0, %xmm1, %xmm0
;
; CHECK-LABEL: test_x86_fma_vfnmsub_ss:
; CHECK-NEXT: # BB#0:
;
-; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
-; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
; CHECK-FMA-WIN-NEXT: vfnmsub132ss (%rdx), %xmm1, %xmm0
;
; CHECK-FMA-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0
; CHECK-LABEL: test_x86_fma_vfnmsub_bac_ss:
; CHECK-NEXT: # BB#0:
;
-; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
-; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
; CHECK-FMA-WIN-NEXT: vfnmsub132ss (%rcx), %xmm1, %xmm0
;
; CHECK-FMA-NEXT: vfnmsub213ss %xmm2, %xmm0, %xmm1
; CHECK-LABEL: test_x86_fma_vfnmsub_sd:
; CHECK-NEXT: # BB#0:
;
-; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
-; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
; CHECK-FMA-WIN-NEXT: vfnmsub132sd (%rdx), %xmm1, %xmm0
;
; CHECK-FMA-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0
; CHECK-LABEL: test_x86_fma_vfnmsub_bac_sd:
; CHECK-NEXT: # BB#0:
;
-; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
-; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
; CHECK-FMA-WIN-NEXT: vfnmsub132sd (%rcx), %xmm1, %xmm0
;
; CHECK-FMA-NEXT: vfnmsub213sd %xmm2, %xmm0, %xmm1
-; CHECK-FMA-NEXT: vmovaps %xmm1, %xmm0
+; CHECK-FMA-NEXT: vmovapd %xmm1, %xmm0
;
; CHECK-FMA4-NEXT: vfnmsubsd %xmm2, %xmm0, %xmm1, %xmm0
;
; CHECK-LABEL: fmadd_aab_sd:
; CHECK: vmovsd (%rcx), %[[XMM:xmm[0-9]+]]
; CHECK-NEXT: vfmadd213sd (%rdx), %[[XMM]], %[[XMM]]
-; CHECK-NEXT: vmovlps %[[XMM]], (%rcx)
+; CHECK-NEXT: vmovlpd %[[XMM]], (%rcx)
; CHECK-NEXT: ret
%a.val = load double, double* %a
%av0 = insertelement <2 x double> undef, double %a.val, i32 0
; CHECK-LABEL: fmadd_aba_sd:
; CHECK: vmovsd (%rcx), %[[XMM:xmm[0-9]+]]
; CHECK-NEXT: vfmadd132sd (%rdx), %[[XMM]], %[[XMM]]
-; CHECK-NEXT: vmovlps %[[XMM]], (%rcx)
+; CHECK-NEXT: vmovlpd %[[XMM]], (%rcx)
; CHECK-NEXT: ret
%a.val = load double, double* %a
%av0 = insertelement <2 x double> undef, double %a.val, i32 0
; CHECK-LABEL: fmsub_aab_sd:
; CHECK: vmovsd (%rcx), %[[XMM:xmm[0-9]+]]
; CHECK-NEXT: vfmsub213sd (%rdx), %[[XMM]], %[[XMM]]
-; CHECK-NEXT: vmovlps %[[XMM]], (%rcx)
+; CHECK-NEXT: vmovlpd %[[XMM]], (%rcx)
; CHECK-NEXT: ret
%a.val = load double, double* %a
%av0 = insertelement <2 x double> undef, double %a.val, i32 0
; CHECK-LABEL: fmsub_aba_sd:
; CHECK: vmovsd (%rcx), %[[XMM:xmm[0-9]+]]
; CHECK-NEXT: vfmsub132sd (%rdx), %[[XMM]], %[[XMM]]
-; CHECK-NEXT: vmovlps %[[XMM]], (%rcx)
+; CHECK-NEXT: vmovlpd %[[XMM]], (%rcx)
; CHECK-NEXT: ret
%a.val = load double, double* %a
%av0 = insertelement <2 x double> undef, double %a.val, i32 0
; CHECK-LABEL: fnmadd_aab_sd:
; CHECK: vmovsd (%rcx), %[[XMM:xmm[0-9]+]]
; CHECK-NEXT: vfnmadd213sd (%rdx), %[[XMM]], %[[XMM]]
-; CHECK-NEXT: vmovlps %[[XMM]], (%rcx)
+; CHECK-NEXT: vmovlpd %[[XMM]], (%rcx)
; CHECK-NEXT: ret
%a.val = load double, double* %a
%av0 = insertelement <2 x double> undef, double %a.val, i32 0
; CHECK-LABEL: fnmadd_aba_sd:
; CHECK: vmovsd (%rcx), %[[XMM:xmm[0-9]+]]
; CHECK-NEXT: vfnmadd132sd (%rdx), %[[XMM]], %[[XMM]]
-; CHECK-NEXT: vmovlps %[[XMM]], (%rcx)
+; CHECK-NEXT: vmovlpd %[[XMM]], (%rcx)
; CHECK-NEXT: ret
%a.val = load double, double* %a
%av0 = insertelement <2 x double> undef, double %a.val, i32 0
; CHECK-LABEL: fnmsub_aab_sd:
; CHECK: vmovsd (%rcx), %[[XMM:xmm[0-9]+]]
; CHECK-NEXT: vfnmsub213sd (%rdx), %[[XMM]], %[[XMM]]
-; CHECK-NEXT: vmovlps %[[XMM]], (%rcx)
+; CHECK-NEXT: vmovlpd %[[XMM]], (%rcx)
; CHECK-NEXT: ret
%a.val = load double, double* %a
%av0 = insertelement <2 x double> undef, double %a.val, i32 0
; CHECK-LABEL: fnmsub_aba_sd:
; CHECK: vmovsd (%rcx), %[[XMM:xmm[0-9]+]]
; CHECK-NEXT: vfnmsub132sd (%rdx), %[[XMM]], %[[XMM]]
-; CHECK-NEXT: vmovlps %[[XMM]], (%rcx)
+; CHECK-NEXT: vmovlpd %[[XMM]], (%rcx)
; CHECK-NEXT: ret
%a.val = load double, double* %a
%av0 = insertelement <2 x double> undef, double %a.val, i32 0
define double @test_f64_fneg_fmul(double %x, double %y) #0 {
; FMA-LABEL: test_f64_fneg_fmul:
; FMA: # BB#0:
-; FMA-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; FMA-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; FMA-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0
; FMA-NEXT: retq
;