From 66baef3d14369f4238275e535d8ce4e5a209caf8 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 11 Oct 2015 19:48:12 +0000 Subject: [PATCH] [DAGCombiner] Improved FMA combine support for vectors Enabled constant canonicalization for all constants. Improved combining of constant vectors. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@249993 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 69 ++--- test/CodeGen/X86/fma_patterns.ll | 336 +++++++++++++---------- 2 files changed, 221 insertions(+), 184 deletions(-) diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 3280060316c..c1f962f1406 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -8331,7 +8331,8 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2); // Canonicalize (fma c, x, y) -> (fma x, c, y) - if (N0CFP && !N1CFP) + if (isConstantFPBuildVectorOrConstantFP(N0) && + !isConstantFPBuildVectorOrConstantFP(N1)) return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2); // TODO: FMA nodes should have flags that propagate to the created nodes. @@ -8339,26 +8340,26 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { SDNodeFlags Flags; Flags.setUnsafeAlgebra(true); - // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2) - if (Options.UnsafeFPMath && N1CFP && - N2.getOpcode() == ISD::FMUL && - N0 == N2.getOperand(0) && - N2.getOperand(1).getOpcode() == ISD::ConstantFP) { - return DAG.getNode(ISD::FMUL, dl, VT, N0, - DAG.getNode(ISD::FADD, dl, VT, N1, N2.getOperand(1), - &Flags), &Flags); - } - + if (Options.UnsafeFPMath) { + // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2) + if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) && + isConstantFPBuildVectorOrConstantFP(N1) && + isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) { + return DAG.getNode(ISD::FMUL, dl, VT, N0, + DAG.getNode(ISD::FADD, dl, VT, N1, N2.getOperand(1), + &Flags), &Flags); + } - // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y) - if (Options.UnsafeFPMath && - N0.getOpcode() == ISD::FMUL && N1CFP && - N0.getOperand(1).getOpcode() == ISD::ConstantFP) { - return DAG.getNode(ISD::FMA, dl, VT, - N0.getOperand(0), - DAG.getNode(ISD::FMUL, dl, VT, N1, N0.getOperand(1), - &Flags), - N2); + // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y) + if (N0.getOpcode() == ISD::FMUL && + isConstantFPBuildVectorOrConstantFP(N1) && + isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) { + return DAG.getNode(ISD::FMA, dl, VT, + N0.getOperand(0), + DAG.getNode(ISD::FMUL, dl, VT, N1, N0.getOperand(1), + &Flags), + N2); + } } // (fma x, 1, y) -> (fadd x, y) @@ -8377,20 +8378,22 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { } } - // (fma x, c, x) -> (fmul x, (c+1)) - if (Options.UnsafeFPMath && N1CFP && N0 == N2) { - return DAG.getNode(ISD::FMUL, dl, VT, N0, - DAG.getNode(ISD::FADD, dl, VT, - N1, DAG.getConstantFP(1.0, dl, VT), - &Flags), &Flags); - } - // (fma x, c, (fneg x)) -> (fmul x, (c-1)) - if (Options.UnsafeFPMath && N1CFP && - N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) { + if (Options.UnsafeFPMath) { + // (fma x, c, x) -> (fmul x, (c+1)) + if (N1CFP && N0 == N2) { return DAG.getNode(ISD::FMUL, dl, VT, N0, - DAG.getNode(ISD::FADD, dl, VT, - N1, DAG.getConstantFP(-1.0, dl, VT), - &Flags), &Flags); + DAG.getNode(ISD::FADD, dl, VT, + N1, DAG.getConstantFP(1.0, dl, VT), + &Flags), &Flags); + } + + // (fma x, c, (fneg x)) -> (fmul x, (c-1)) + if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) { + return DAG.getNode(ISD::FMUL, dl, VT, N0, + DAG.getNode(ISD::FADD, dl, VT, + N1, DAG.getConstantFP(-1.0, dl, VT), + &Flags), &Flags); + } } return SDValue(); diff --git a/test/CodeGen/X86/fma_patterns.ll b/test/CodeGen/X86/fma_patterns.ll index 0696f7c3533..383ecfcd5e3 100644 --- a/test/CodeGen/X86/fma_patterns.ll +++ b/test/CodeGen/X86/fma_patterns.ll @@ -1,16 +1,16 @@ -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma -fp-contract=fast | FileCheck %s -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma -fp-contract=fast | FileCheck %s -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4 -fp-contract=fast | FileCheck %s --check-prefix=CHECK_FMA4 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=CHECK_FMA +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=CHECK_FMA +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4 -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=CHECK_FMA4 ; ; Patterns (+ fneg variants): add(mul(x,y),z), sub(mul(x,y),z) ; define <4 x float> @test_x86_fmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { -; CHECK-LABEL: test_x86_fmadd_ps: -; CHECK: # BB#0: -; CHECK-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 -; CHECK-NEXT: retq +; CHECK_FMA-LABEL: test_x86_fmadd_ps: +; CHECK_FMA: # BB#0: +; CHECK_FMA-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 +; CHECK_FMA-NEXT: retq ; ; CHECK_FMA4-LABEL: test_x86_fmadd_ps: ; CHECK_FMA4: # BB#0: @@ -22,10 +22,10 @@ define <4 x float> @test_x86_fmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x flo } define <4 x float> @test_x86_fmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { -; CHECK-LABEL: test_x86_fmsub_ps: -; CHECK: # BB#0: -; CHECK-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0 -; CHECK-NEXT: retq +; CHECK_FMA-LABEL: test_x86_fmsub_ps: +; CHECK_FMA: # BB#0: +; CHECK_FMA-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0 +; CHECK_FMA-NEXT: retq ; ; CHECK_FMA4-LABEL: test_x86_fmsub_ps: ; CHECK_FMA4: # BB#0: @@ -37,10 +37,10 @@ define <4 x float> @test_x86_fmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x flo } define <4 x float> @test_x86_fnmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { -; CHECK-LABEL: test_x86_fnmadd_ps: -; CHECK: # BB#0: -; CHECK-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 -; CHECK-NEXT: retq +; CHECK_FMA-LABEL: test_x86_fnmadd_ps: +; CHECK_FMA: # BB#0: +; CHECK_FMA-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 +; CHECK_FMA-NEXT: retq ; ; CHECK_FMA4-LABEL: test_x86_fnmadd_ps: ; CHECK_FMA4: # BB#0: @@ -52,10 +52,10 @@ define <4 x float> @test_x86_fnmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x fl } define <4 x float> @test_x86_fnmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { -; CHECK-LABEL: test_x86_fnmsub_ps: -; CHECK: # BB#0: -; CHECK-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 -; CHECK-NEXT: retq +; CHECK_FMA-LABEL: test_x86_fnmsub_ps: +; CHECK_FMA: # BB#0: +; CHECK_FMA-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 +; CHECK_FMA-NEXT: retq ; ; CHECK_FMA4-LABEL: test_x86_fnmsub_ps: ; CHECK_FMA4: # BB#0: @@ -68,10 +68,10 @@ define <4 x float> @test_x86_fnmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x fl } define <8 x float> @test_x86_fmadd_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) { -; CHECK-LABEL: test_x86_fmadd_ps_y: -; CHECK: # BB#0: -; CHECK-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 -; CHECK-NEXT: retq +; CHECK_FMA-LABEL: test_x86_fmadd_ps_y: +; CHECK_FMA: # BB#0: +; CHECK_FMA-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 +; CHECK_FMA-NEXT: retq ; ; CHECK_FMA4-LABEL: test_x86_fmadd_ps_y: ; CHECK_FMA4: # BB#0: @@ -83,10 +83,10 @@ define <8 x float> @test_x86_fmadd_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x f } define <8 x float> @test_x86_fmsub_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) { -; CHECK-LABEL: test_x86_fmsub_ps_y: -; CHECK: # BB#0: -; CHECK-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0 -; CHECK-NEXT: retq +; CHECK_FMA-LABEL: test_x86_fmsub_ps_y: +; CHECK_FMA: # BB#0: +; CHECK_FMA-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0 +; CHECK_FMA-NEXT: retq ; ; CHECK_FMA4-LABEL: test_x86_fmsub_ps_y: ; CHECK_FMA4: # BB#0: @@ -98,10 +98,10 @@ define <8 x float> @test_x86_fmsub_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x f } define <8 x float> @test_x86_fnmadd_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) { -; CHECK-LABEL: test_x86_fnmadd_ps_y: -; CHECK: # BB#0: -; CHECK-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 -; CHECK-NEXT: retq +; CHECK_FMA-LABEL: test_x86_fnmadd_ps_y: +; CHECK_FMA: # BB#0: +; CHECK_FMA-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 +; CHECK_FMA-NEXT: retq ; ; CHECK_FMA4-LABEL: test_x86_fnmadd_ps_y: ; CHECK_FMA4: # BB#0: @@ -113,10 +113,10 @@ define <8 x float> @test_x86_fnmadd_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x } define <8 x float> @test_x86_fnmsub_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) { -; CHECK-LABEL: test_x86_fnmsub_ps_y: -; CHECK: # BB#0: -; CHECK-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0 -; CHECK-NEXT: retq +; CHECK_FMA-LABEL: test_x86_fnmsub_ps_y: +; CHECK_FMA: # BB#0: +; CHECK_FMA-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0 +; CHECK_FMA-NEXT: retq ; ; CHECK_FMA4-LABEL: test_x86_fnmsub_ps_y: ; CHECK_FMA4: # BB#0: @@ -129,10 +129,10 @@ define <8 x float> @test_x86_fnmsub_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x } define <4 x double> @test_x86_fmadd_pd_y(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) { -; CHECK-LABEL: test_x86_fmadd_pd_y: -; CHECK: # BB#0: -; CHECK-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 -; CHECK-NEXT: retq +; CHECK_FMA-LABEL: test_x86_fmadd_pd_y: +; CHECK_FMA: # BB#0: +; CHECK_FMA-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 +; CHECK_FMA-NEXT: retq ; ; CHECK_FMA4-LABEL: test_x86_fmadd_pd_y: ; CHECK_FMA4: # BB#0: @@ -144,10 +144,10 @@ define <4 x double> @test_x86_fmadd_pd_y(<4 x double> %a0, <4 x double> %a1, <4 } define <4 x double> @test_x86_fmsub_pd_y(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) { -; CHECK-LABEL: test_x86_fmsub_pd_y: -; CHECK: # BB#0: -; CHECK-NEXT: vfmsub213pd %ymm2, %ymm1, %ymm0 -; CHECK-NEXT: retq +; CHECK_FMA-LABEL: test_x86_fmsub_pd_y: +; CHECK_FMA: # BB#0: +; CHECK_FMA-NEXT: vfmsub213pd %ymm2, %ymm1, %ymm0 +; CHECK_FMA-NEXT: retq ; ; CHECK_FMA4-LABEL: test_x86_fmsub_pd_y: ; CHECK_FMA4: # BB#0: @@ -159,10 +159,10 @@ define <4 x double> @test_x86_fmsub_pd_y(<4 x double> %a0, <4 x double> %a1, <4 } define <2 x double> @test_x86_fmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { -; CHECK-LABEL: test_x86_fmsub_pd: -; CHECK: # BB#0: -; CHECK-NEXT: vfmsub213pd %xmm2, %xmm1, %xmm0 -; CHECK-NEXT: retq +; CHECK_FMA-LABEL: test_x86_fmsub_pd: +; CHECK_FMA: # BB#0: +; CHECK_FMA-NEXT: vfmsub213pd %xmm2, %xmm1, %xmm0 +; CHECK_FMA-NEXT: retq ; ; CHECK_FMA4-LABEL: test_x86_fmsub_pd: ; CHECK_FMA4: # BB#0: @@ -174,10 +174,10 @@ define <2 x double> @test_x86_fmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x } define float @test_x86_fnmadd_ss(float %a0, float %a1, float %a2) { -; CHECK-LABEL: test_x86_fnmadd_ss: -; CHECK: # BB#0: -; CHECK-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 -; CHECK-NEXT: retq +; CHECK_FMA-LABEL: test_x86_fnmadd_ss: +; CHECK_FMA: # BB#0: +; CHECK_FMA-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 +; CHECK_FMA-NEXT: retq ; ; CHECK_FMA4-LABEL: test_x86_fnmadd_ss: ; CHECK_FMA4: # BB#0: @@ -189,10 +189,10 @@ define float @test_x86_fnmadd_ss(float %a0, float %a1, float %a2) { } define double @test_x86_fnmadd_sd(double %a0, double %a1, double %a2) { -; CHECK-LABEL: test_x86_fnmadd_sd: -; CHECK: # BB#0: -; CHECK-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 -; CHECK-NEXT: retq +; CHECK_FMA-LABEL: test_x86_fnmadd_sd: +; CHECK_FMA: # BB#0: +; CHECK_FMA-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 +; CHECK_FMA-NEXT: retq ; ; CHECK_FMA4-LABEL: test_x86_fnmadd_sd: ; CHECK_FMA4: # BB#0: @@ -204,10 +204,10 @@ define double @test_x86_fnmadd_sd(double %a0, double %a1, double %a2) { } define double @test_x86_fmsub_sd(double %a0, double %a1, double %a2) { -; CHECK-LABEL: test_x86_fmsub_sd: -; CHECK: # BB#0: -; CHECK-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 -; CHECK-NEXT: retq +; CHECK_FMA-LABEL: test_x86_fmsub_sd: +; CHECK_FMA: # BB#0: +; CHECK_FMA-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 +; CHECK_FMA-NEXT: retq ; ; CHECK_FMA4-LABEL: test_x86_fmsub_sd: ; CHECK_FMA4: # BB#0: @@ -219,10 +219,10 @@ define double @test_x86_fmsub_sd(double %a0, double %a1, double %a2) { } define float @test_x86_fnmsub_ss(float %a0, float %a1, float %a2) { -; CHECK-LABEL: test_x86_fnmsub_ss: -; CHECK: # BB#0: -; CHECK-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 -; CHECK-NEXT: retq +; CHECK_FMA-LABEL: test_x86_fnmsub_ss: +; CHECK_FMA: # BB#0: +; CHECK_FMA-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 +; CHECK_FMA-NEXT: retq ; ; CHECK_FMA4-LABEL: test_x86_fnmsub_ss: ; CHECK_FMA4: # BB#0: @@ -235,11 +235,11 @@ define float @test_x86_fnmsub_ss(float %a0, float %a1, float %a2) { } define <4 x float> @test_x86_fmadd_ps_load(<4 x float>* %a0, <4 x float> %a1, <4 x float> %a2) { -; CHECK-LABEL: test_x86_fmadd_ps_load: -; CHECK: # BB#0: -; CHECK-NEXT: vmovaps (%rdi), %xmm2 -; CHECK-NEXT: vfmadd213ps %xmm1, %xmm2, %xmm0 -; CHECK-NEXT: retq +; CHECK_FMA-LABEL: test_x86_fmadd_ps_load: +; CHECK_FMA: # BB#0: +; CHECK_FMA-NEXT: vmovaps (%rdi), %xmm2 +; CHECK_FMA-NEXT: vfmadd213ps %xmm1, %xmm2, %xmm0 +; CHECK_FMA-NEXT: retq ; ; CHECK_FMA4-LABEL: test_x86_fmadd_ps_load: ; CHECK_FMA4: # BB#0: @@ -252,11 +252,11 @@ define <4 x float> @test_x86_fmadd_ps_load(<4 x float>* %a0, <4 x float> %a1, <4 } define <4 x float> @test_x86_fmsub_ps_load(<4 x float>* %a0, <4 x float> %a1, <4 x float> %a2) { -; CHECK-LABEL: test_x86_fmsub_ps_load: -; CHECK: # BB#0: -; CHECK-NEXT: vmovaps (%rdi), %xmm2 -; CHECK-NEXT: vfmsub213ps %xmm1, %xmm2, %xmm0 -; CHECK-NEXT: retq +; CHECK_FMA-LABEL: test_x86_fmsub_ps_load: +; CHECK_FMA: # BB#0: +; CHECK_FMA-NEXT: vmovaps (%rdi), %xmm2 +; CHECK_FMA-NEXT: vfmsub213ps %xmm1, %xmm2, %xmm0 +; CHECK_FMA-NEXT: retq ; ; CHECK_FMA4-LABEL: test_x86_fmsub_ps_load: ; CHECK_FMA4: # BB#0: @@ -273,10 +273,10 @@ define <4 x float> @test_x86_fmsub_ps_load(<4 x float>* %a0, <4 x float> %a1, <4 ; define <4 x float> @test_v4f32_mul_add_x_one_y(<4 x float> %x, <4 x float> %y) { -; CHECK-LABEL: test_v4f32_mul_add_x_one_y: -; CHECK: # BB#0: -; CHECK-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0 -; CHECK-NEXT: retq +; CHECK_FMA-LABEL: test_v4f32_mul_add_x_one_y: +; CHECK_FMA: # BB#0: +; CHECK_FMA-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0 +; CHECK_FMA-NEXT: retq ; ; CHECK_FMA4-LABEL: test_v4f32_mul_add_x_one_y: ; CHECK_FMA4: # BB#0: @@ -288,10 +288,10 @@ define <4 x float> @test_v4f32_mul_add_x_one_y(<4 x float> %x, <4 x float> %y) { } define <4 x float> @test_v4f32_mul_y_add_x_one(<4 x float> %x, <4 x float> %y) { -; CHECK-LABEL: test_v4f32_mul_y_add_x_one: -; CHECK: # BB#0: -; CHECK-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0 -; CHECK-NEXT: retq +; CHECK_FMA-LABEL: test_v4f32_mul_y_add_x_one: +; CHECK_FMA: # BB#0: +; CHECK_FMA-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0 +; CHECK_FMA-NEXT: retq ; ; CHECK_FMA4-LABEL: test_v4f32_mul_y_add_x_one: ; CHECK_FMA4: # BB#0: @@ -303,10 +303,10 @@ define <4 x float> @test_v4f32_mul_y_add_x_one(<4 x float> %x, <4 x float> %y) { } define <4 x float> @test_v4f32_mul_add_x_negone_y(<4 x float> %x, <4 x float> %y) { -; CHECK-LABEL: test_v4f32_mul_add_x_negone_y: -; CHECK: # BB#0: -; CHECK-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0 -; CHECK-NEXT: retq +; CHECK_FMA-LABEL: test_v4f32_mul_add_x_negone_y: +; CHECK_FMA: # BB#0: +; CHECK_FMA-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0 +; CHECK_FMA-NEXT: retq ; ; CHECK_FMA4-LABEL: test_v4f32_mul_add_x_negone_y: ; CHECK_FMA4: # BB#0: @@ -318,10 +318,10 @@ define <4 x float> @test_v4f32_mul_add_x_negone_y(<4 x float> %x, <4 x float> %y } define <4 x float> @test_v4f32_mul_y_add_x_negone(<4 x float> %x, <4 x float> %y) { -; CHECK-LABEL: test_v4f32_mul_y_add_x_negone: -; CHECK: # BB#0: -; CHECK-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0 -; CHECK-NEXT: retq +; CHECK_FMA-LABEL: test_v4f32_mul_y_add_x_negone: +; CHECK_FMA: # BB#0: +; CHECK_FMA-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0 +; CHECK_FMA-NEXT: retq ; ; CHECK_FMA4-LABEL: test_v4f32_mul_y_add_x_negone: ; CHECK_FMA4: # BB#0: @@ -333,10 +333,10 @@ define <4 x float> @test_v4f32_mul_y_add_x_negone(<4 x float> %x, <4 x float> %y } define <4 x float> @test_v4f32_mul_sub_one_x_y(<4 x float> %x, <4 x float> %y) { -; CHECK-LABEL: test_v4f32_mul_sub_one_x_y: -; CHECK: # BB#0: -; CHECK-NEXT: vfnmadd213ps %xmm1, %xmm1, %xmm0 -; CHECK-NEXT: retq +; CHECK_FMA-LABEL: test_v4f32_mul_sub_one_x_y: +; CHECK_FMA: # BB#0: +; CHECK_FMA-NEXT: vfnmadd213ps %xmm1, %xmm1, %xmm0 +; CHECK_FMA-NEXT: retq ; ; CHECK_FMA4-LABEL: test_v4f32_mul_sub_one_x_y: ; CHECK_FMA4: # BB#0: @@ -348,10 +348,10 @@ define <4 x float> @test_v4f32_mul_sub_one_x_y(<4 x float> %x, <4 x float> %y) { } define <4 x float> @test_v4f32_mul_y_sub_one_x(<4 x float> %x, <4 x float> %y) { -; CHECK-LABEL: test_v4f32_mul_y_sub_one_x: -; CHECK: # BB#0: -; CHECK-NEXT: vfnmadd213ps %xmm1, %xmm1, %xmm0 -; CHECK-NEXT: retq +; CHECK_FMA-LABEL: test_v4f32_mul_y_sub_one_x: +; CHECK_FMA: # BB#0: +; CHECK_FMA-NEXT: vfnmadd213ps %xmm1, %xmm1, %xmm0 +; CHECK_FMA-NEXT: retq ; ; CHECK_FMA4-LABEL: test_v4f32_mul_y_sub_one_x: ; CHECK_FMA4: # BB#0: @@ -363,10 +363,10 @@ define <4 x float> @test_v4f32_mul_y_sub_one_x(<4 x float> %x, <4 x float> %y) { } define <4 x float> @test_v4f32_mul_sub_negone_x_y(<4 x float> %x, <4 x float> %y) { -; CHECK-LABEL: test_v4f32_mul_sub_negone_x_y: -; CHECK: # BB#0: -; CHECK-NEXT: vfnmsub213ps %xmm1, %xmm1, %xmm0 -; CHECK-NEXT: retq +; CHECK_FMA-LABEL: test_v4f32_mul_sub_negone_x_y: +; CHECK_FMA: # BB#0: +; CHECK_FMA-NEXT: vfnmsub213ps %xmm1, %xmm1, %xmm0 +; CHECK_FMA-NEXT: retq ; ; CHECK_FMA4-LABEL: test_v4f32_mul_sub_negone_x_y: ; CHECK_FMA4: # BB#0: @@ -378,10 +378,10 @@ define <4 x float> @test_v4f32_mul_sub_negone_x_y(<4 x float> %x, <4 x float> %y } define <4 x float> @test_v4f32_mul_y_sub_negone_x(<4 x float> %x, <4 x float> %y) { -; CHECK-LABEL: test_v4f32_mul_y_sub_negone_x: -; CHECK: # BB#0: -; CHECK-NEXT: vfnmsub213ps %xmm1, %xmm1, %xmm0 -; CHECK-NEXT: retq +; CHECK_FMA-LABEL: test_v4f32_mul_y_sub_negone_x: +; CHECK_FMA: # BB#0: +; CHECK_FMA-NEXT: vfnmsub213ps %xmm1, %xmm1, %xmm0 +; CHECK_FMA-NEXT: retq ; ; CHECK_FMA4-LABEL: test_v4f32_mul_y_sub_negone_x: ; CHECK_FMA4: # BB#0: @@ -393,10 +393,10 @@ define <4 x float> @test_v4f32_mul_y_sub_negone_x(<4 x float> %x, <4 x float> %y } define <4 x float> @test_v4f32_mul_sub_x_one_y(<4 x float> %x, <4 x float> %y) { -; CHECK-LABEL: test_v4f32_mul_sub_x_one_y: -; CHECK: # BB#0: -; CHECK-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0 -; CHECK-NEXT: retq +; CHECK_FMA-LABEL: test_v4f32_mul_sub_x_one_y: +; CHECK_FMA: # BB#0: +; CHECK_FMA-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0 +; CHECK_FMA-NEXT: retq ; ; CHECK_FMA4-LABEL: test_v4f32_mul_sub_x_one_y: ; CHECK_FMA4: # BB#0: @@ -408,10 +408,10 @@ define <4 x float> @test_v4f32_mul_sub_x_one_y(<4 x float> %x, <4 x float> %y) { } define <4 x float> @test_v4f32_mul_y_sub_x_one(<4 x float> %x, <4 x float> %y) { -; CHECK-LABEL: test_v4f32_mul_y_sub_x_one: -; CHECK: # BB#0: -; CHECK-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0 -; CHECK-NEXT: retq +; CHECK_FMA-LABEL: test_v4f32_mul_y_sub_x_one: +; CHECK_FMA: # BB#0: +; CHECK_FMA-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0 +; CHECK_FMA-NEXT: retq ; ; CHECK_FMA4-LABEL: test_v4f32_mul_y_sub_x_one: ; CHECK_FMA4: # BB#0: @@ -423,10 +423,10 @@ define <4 x float> @test_v4f32_mul_y_sub_x_one(<4 x float> %x, <4 x float> %y) { } define <4 x float> @test_v4f32_mul_sub_x_negone_y(<4 x float> %x, <4 x float> %y) { -; CHECK-LABEL: test_v4f32_mul_sub_x_negone_y: -; CHECK: # BB#0: -; CHECK-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0 -; CHECK-NEXT: retq +; CHECK_FMA-LABEL: test_v4f32_mul_sub_x_negone_y: +; CHECK_FMA: # BB#0: +; CHECK_FMA-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0 +; CHECK_FMA-NEXT: retq ; ; CHECK_FMA4-LABEL: test_v4f32_mul_sub_x_negone_y: ; CHECK_FMA4: # BB#0: @@ -438,10 +438,10 @@ define <4 x float> @test_v4f32_mul_sub_x_negone_y(<4 x float> %x, <4 x float> %y } define <4 x float> @test_v4f32_mul_y_sub_x_negone(<4 x float> %x, <4 x float> %y) { -; CHECK-LABEL: test_v4f32_mul_y_sub_x_negone: -; CHECK: # BB#0: -; CHECK-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0 -; CHECK-NEXT: retq +; CHECK_FMA-LABEL: test_v4f32_mul_y_sub_x_negone: +; CHECK_FMA: # BB#0: +; CHECK_FMA-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0 +; CHECK_FMA-NEXT: retq ; ; CHECK_FMA4-LABEL: test_v4f32_mul_y_sub_x_negone: ; CHECK_FMA4: # BB#0: @@ -457,11 +457,11 @@ define <4 x float> @test_v4f32_mul_y_sub_x_negone(<4 x float> %x, <4 x float> %y ; define float @test_f32_interp(float %x, float %y, float %t) { -; CHECK-LABEL: test_f32_interp: -; CHECK: # BB#0: -; CHECK-NEXT: vfnmadd213ss %xmm1, %xmm2, %xmm1 -; CHECK-NEXT: vfmadd213ss %xmm1, %xmm2, %xmm0 -; CHECK-NEXT: retq +; CHECK_FMA-LABEL: test_f32_interp: +; CHECK_FMA: # BB#0: +; CHECK_FMA-NEXT: vfnmadd213ss %xmm1, %xmm2, %xmm1 +; CHECK_FMA-NEXT: vfmadd213ss %xmm1, %xmm2, %xmm0 +; CHECK_FMA-NEXT: retq ; ; CHECK_FMA4-LABEL: test_f32_interp: ; CHECK_FMA4: # BB#0: @@ -476,11 +476,11 @@ define float @test_f32_interp(float %x, float %y, float %t) { } define <4 x float> @test_v4f32_interp(<4 x float> %x, <4 x float> %y, <4 x float> %t) { -; CHECK-LABEL: test_v4f32_interp: -; CHECK: # BB#0: -; CHECK-NEXT: vfnmadd213ps %xmm1, %xmm2, %xmm1 -; CHECK-NEXT: vfmadd213ps %xmm1, %xmm2, %xmm0 -; CHECK-NEXT: retq +; CHECK_FMA-LABEL: test_v4f32_interp: +; CHECK_FMA: # BB#0: +; CHECK_FMA-NEXT: vfnmadd213ps %xmm1, %xmm2, %xmm1 +; CHECK_FMA-NEXT: vfmadd213ps %xmm1, %xmm2, %xmm0 +; CHECK_FMA-NEXT: retq ; ; CHECK_FMA4-LABEL: test_v4f32_interp: ; CHECK_FMA4: # BB#0: @@ -495,11 +495,11 @@ define <4 x float> @test_v4f32_interp(<4 x float> %x, <4 x float> %y, <4 x float } define <8 x float> @test_v8f32_interp(<8 x float> %x, <8 x float> %y, <8 x float> %t) { -; CHECK-LABEL: test_v8f32_interp: -; CHECK: # BB#0: -; CHECK-NEXT: vfnmadd213ps %ymm1, %ymm2, %ymm1 -; CHECK-NEXT: vfmadd213ps %ymm1, %ymm2, %ymm0 -; CHECK-NEXT: retq +; CHECK_FMA-LABEL: test_v8f32_interp: +; CHECK_FMA: # BB#0: +; CHECK_FMA-NEXT: vfnmadd213ps %ymm1, %ymm2, %ymm1 +; CHECK_FMA-NEXT: vfmadd213ps %ymm1, %ymm2, %ymm0 +; CHECK_FMA-NEXT: retq ; ; CHECK_FMA4-LABEL: test_v8f32_interp: ; CHECK_FMA4: # BB#0: @@ -514,11 +514,11 @@ define <8 x float> @test_v8f32_interp(<8 x float> %x, <8 x float> %y, <8 x float } define double @test_f64_interp(double %x, double %y, double %t) { -; CHECK-LABEL: test_f64_interp: -; CHECK: # BB#0: -; CHECK-NEXT: vfnmadd213sd %xmm1, %xmm2, %xmm1 -; CHECK-NEXT: vfmadd213sd %xmm1, %xmm2, %xmm0 -; CHECK-NEXT: retq +; CHECK_FMA-LABEL: test_f64_interp: +; CHECK_FMA: # BB#0: +; CHECK_FMA-NEXT: vfnmadd213sd %xmm1, %xmm2, %xmm1 +; CHECK_FMA-NEXT: vfmadd213sd %xmm1, %xmm2, %xmm0 +; CHECK_FMA-NEXT: retq ; ; CHECK_FMA4-LABEL: test_f64_interp: ; CHECK_FMA4: # BB#0: @@ -533,11 +533,11 @@ define double @test_f64_interp(double %x, double %y, double %t) { } define <2 x double> @test_v2f64_interp(<2 x double> %x, <2 x double> %y, <2 x double> %t) { -; CHECK-LABEL: test_v2f64_interp: -; CHECK: # BB#0: -; CHECK-NEXT: vfnmadd213pd %xmm1, %xmm2, %xmm1 -; CHECK-NEXT: vfmadd213pd %xmm1, %xmm2, %xmm0 -; CHECK-NEXT: retq +; CHECK_FMA-LABEL: test_v2f64_interp: +; CHECK_FMA: # BB#0: +; CHECK_FMA-NEXT: vfnmadd213pd %xmm1, %xmm2, %xmm1 +; CHECK_FMA-NEXT: vfmadd213pd %xmm1, %xmm2, %xmm0 +; CHECK_FMA-NEXT: retq ; ; CHECK_FMA4-LABEL: test_v2f64_interp: ; CHECK_FMA4: # BB#0: @@ -552,11 +552,11 @@ define <2 x double> @test_v2f64_interp(<2 x double> %x, <2 x double> %y, <2 x do } define <4 x double> @test_v4f64_interp(<4 x double> %x, <4 x double> %y, <4 x double> %t) { -; CHECK-LABEL: test_v4f64_interp: -; CHECK: # BB#0: -; CHECK-NEXT: vfnmadd213pd %ymm1, %ymm2, %ymm1 -; CHECK-NEXT: vfmadd213pd %ymm1, %ymm2, %ymm0 -; CHECK-NEXT: retq +; CHECK_FMA-LABEL: test_v4f64_interp: +; CHECK_FMA: # BB#0: +; CHECK_FMA-NEXT: vfnmadd213pd %ymm1, %ymm2, %ymm1 +; CHECK_FMA-NEXT: vfmadd213pd %ymm1, %ymm2, %ymm0 +; CHECK_FMA-NEXT: retq ; ; CHECK_FMA4-LABEL: test_v4f64_interp: ; CHECK_FMA4: # BB#0: @@ -569,3 +569,37 @@ define <4 x double> @test_v4f64_interp(<4 x double> %x, <4 x double> %y, <4 x do %r = fadd <4 x double> %tx, %ty ret <4 x double> %r } + +; (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2) + +define <4 x float> @test_v4f32_fma_x_c1_fmul_x_c2(<4 x float> %x) #0 { +; ALL-LABEL: test_v4f32_fma_x_c1_fmul_x_c2: +; ALL: # BB#0: +; ALL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 +; ALL-NEXT: retq + %m0 = fmul <4 x float> %x, + %m1 = fmul <4 x float> %x, + %a = fadd <4 x float> %m0, %m1 + ret <4 x float> %a +} + +; (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y) + +define <4 x float> @test_v4f32_fma_fmul_x_c1_c2_y(<4 x float> %x, <4 x float> %y) #0 { +; CHECK_FMA-LABEL: test_v4f32_fma_fmul_x_c1_c2_y: +; CHECK_FMA: # BB#0: +; CHECK_FMA-NEXT: vmovaps {{.*#+}} xmm2 = [4.000000e+00,6.000000e+00,6.000000e+00,4.000000e+00] +; CHECK_FMA-NEXT: vfmadd213ps %xmm1, %xmm2, %xmm0 +; CHECK_FMA-NEXT: retq +; +; CHECK_FMA4-LABEL: test_v4f32_fma_fmul_x_c1_c2_y: +; CHECK_FMA4: # BB#0: +; CHECK_FMA4-NEXT: vfmaddps %xmm1, {{.*}}(%rip), %xmm0, %xmm0 +; CHECK_FMA4-NEXT: retq + %m0 = fmul <4 x float> %x, + %m1 = fmul <4 x float> %m0, + %a = fadd <4 x float> %m1, %y + ret <4 x float> %a +} + +attributes #0 = { "unsafe-fp-math"="true" } -- 2.34.1