defm SD : fma3s_forms<opc132, opc213, opc231, OpStr, "sd", "PD", IntF64, OpNode,
FR64, f64, f64mem, sdmem, loadf64, sse_load_f64>, VEX_W;
+// These patterns use the 123 ordering, instead of 213, even though
+// they match the intrinsic to the 213 version of the instruction.
+// This is because src1 is tied to dest, and the scalar intrinsics
+// require the pass-through values to come from the first source
+// operand, not the second.
def : Pat<(IntF32 VR128:$src1, VR128:$src2, VR128:$src3),
(COPY_TO_REGCLASS
(!cast<Instruction>(NAME#"SSr213r")
- (COPY_TO_REGCLASS $src2, FR32),
(COPY_TO_REGCLASS $src1, FR32),
+ (COPY_TO_REGCLASS $src2, FR32),
(COPY_TO_REGCLASS $src3, FR32)),
VR128)>;
def : Pat<(IntF64 VR128:$src1, VR128:$src2, VR128:$src3),
(COPY_TO_REGCLASS
(!cast<Instruction>(NAME#"SDr213r")
- (COPY_TO_REGCLASS $src2, FR64),
(COPY_TO_REGCLASS $src1, FR64),
+ (COPY_TO_REGCLASS $src2, FR64),
(COPY_TO_REGCLASS $src3, FR64)),
VR128)>;
}
; RUN: llc < %s -mcpu=bdver2 -mtriple=x86_64-pc-win32 -mattr=-fma4 | FileCheck %s
define <4 x float> @test_x86_fmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
- ; CHECK: fmadd213ss (%r8), %xmm
+ ; CHECK-DAG: vmovaps (%rcx), [[XMM1:%xmm[0-9]+]]
+ ; CHECK-DAG: vmovaps (%rdx), [[XMM0:%xmm[0-9]+]]
+ ; CHECK: fmadd213ss (%r8), [[XMM1]], [[XMM0]]
%res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
ret <4 x float> %res
}
declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
define <4 x float> @test_x86_fnmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
- ; CHECK: fnmadd213ss (%r8), %xmm
+ ; CHECK-DAG: vmovaps (%rcx), [[XMM1:%xmm[0-9]+]]
+ ; CHECK-DAG: vmovaps (%rdx), [[XMM0:%xmm[0-9]+]]
+ ; CHECK: fnmadd213ss (%r8), [[XMM1]], [[XMM0]]
%res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
ret <4 x float> %res
}
define <4 x float> @test_x86_fmsub_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
- ; CHECK: fmsub213ss
+ ; CHECK-DAG: vmovaps (%rcx), [[XMM1:%xmm[0-9]+]]
+ ; CHECK-DAG: vmovaps (%rdx), [[XMM0:%xmm[0-9]+]]
+ ; CHECK: fmsub213ss (%r8), [[XMM1]], [[XMM0]]
%res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
ret <4 x float> %res
}
declare <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
define <4 x float> @test_x86_fnmsub_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
- ; CHECK: fnmsub213ss
+ ; CHECK-DAG: vmovaps (%rcx), [[XMM1:%xmm[0-9]+]]
+ ; CHECK-DAG: vmovaps (%rdx), [[XMM0:%xmm[0-9]+]]
+ ; CHECK: fnmsub213ss (%r8), [[XMM1]], [[XMM0]]
%res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
ret <4 x float> %res
}
;;;;
define <2 x double> @test_x86_fmadd_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
- ; CHECK: fmadd213sd
+ ; CHECK-DAG: vmovaps (%rcx), [[XMM1:%xmm[0-9]+]]
+ ; CHECK-DAG: vmovaps (%rdx), [[XMM0:%xmm[0-9]+]]
+ ; CHECK: fmadd213sd (%r8), [[XMM1]], [[XMM0]]
%res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
ret <2 x double> %res
}
declare <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
define <2 x double> @test_x86_fnmadd_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
- ; CHECK: fnmadd213sd
+ ; CHECK-DAG: vmovaps (%rcx), [[XMM1:%xmm[0-9]+]]
+ ; CHECK-DAG: vmovaps (%rdx), [[XMM0:%xmm[0-9]+]]
+ ; CHECK: fnmadd213sd (%r8), [[XMM1]], [[XMM0]]
%res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
ret <2 x double> %res
}
define <2 x double> @test_x86_fmsub_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
- ; CHECK: fmsub213sd
+ ; CHECK-DAG: vmovaps (%rcx), [[XMM1:%xmm[0-9]+]]
+ ; CHECK-DAG: vmovaps (%rdx), [[XMM0:%xmm[0-9]+]]
+ ; CHECK: fmsub213sd (%r8), [[XMM1]], [[XMM0]]
%res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
ret <2 x double> %res
}
declare <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
define <2 x double> @test_x86_fnmsub_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
- ; CHECK: fnmsub213sd
+ ; CHECK-DAG: vmovaps (%rcx), [[XMM1:%xmm[0-9]+]]
+ ; CHECK-DAG: vmovaps (%rdx), [[XMM0:%xmm[0-9]+]]
+ ; CHECK: fnmsub213sd (%r8), [[XMM1]], [[XMM0]]
%res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
ret <2 x double> %res
}