From: Simon Pilgrim Date: Mon, 30 Nov 2015 22:22:06 +0000 (+0000) Subject: [X86][FMA4] Prefer FMA4 to FMA X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=0f73ee481b5c64201c9be66a309d353ee0a677c8;p=oota-llvm.git [X86][FMA4] Prefer FMA4 to FMA We currently output FMA instructions on targets which support both FMA4 + FMA (i.e. later Bulldozer CPUS bdver2/bdver3/bdver4). This patch flips this so FMA4 is preferred; this is for several reasons: 1 - FMA4 is non-destructive reducing the need for mov instructions. 2 - Its more straighforward to commute and fold inputs (although the recent work on FMA has reduced this difference). 3 - All supported targets have FMA4 performance equal or better to FMA - Piledriver (bdver2) in particular has half the throughput when executing FMA instructions. Its looks like no future AMD processor lines will support FMA4 after the Bulldozer series so we're not causing problems for later CPUs. Differential Revision: http://reviews.llvm.org/D14997 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254339 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 425bc2482e9..eb0199aecbe 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -354,9 +354,10 @@ public: bool hasXSAVEC() const { return HasXSAVEC; } bool hasXSAVES() const { return HasXSAVES; } bool hasPCLMUL() const { return HasPCLMUL; } - bool hasFMA() const { return HasFMA; } - // FIXME: Favor FMA when both are enabled. Is this the right thing to do? - bool hasFMA4() const { return HasFMA4 && !HasFMA; } + // Prefer FMA4 to FMA - its better for commutation/memory folding and + // has equal or better performance on all supported targets. + bool hasFMA() const { return HasFMA && !HasFMA4; } + bool hasFMA4() const { return HasFMA4; } bool hasXOP() const { return HasXOP; } bool hasTBM() const { return HasTBM; } bool hasMOVBE() const { return HasMOVBE; } diff --git a/test/CodeGen/X86/fma-commute-x86.ll b/test/CodeGen/X86/fma-commute-x86.ll index 9e57b00bc0b..9a368792133 100644 --- a/test/CodeGen/X86/fma-commute-x86.ll +++ b/test/CodeGen/X86/fma-commute-x86.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -mtriple=x86_64-pc-win32 -mcpu=core-avx2 | FileCheck %s -; RUN: llc < %s -mtriple=x86_64-pc-win32 -mattr=+fma,+fma4 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-pc-win32 -mattr=+fma | FileCheck %s ; RUN: llc < %s -mcpu=bdver2 -mtriple=x86_64-pc-win32 -mattr=-fma4 | FileCheck %s attributes #0 = { nounwind } diff --git a/test/CodeGen/X86/fma_patterns.ll b/test/CodeGen/X86/fma_patterns.ll index 942d799d976..e3295e45823 100644 --- a/test/CodeGen/X86/fma_patterns.ll +++ b/test/CodeGen/X86/fma_patterns.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=FMA -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=FMA +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=FMA4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4 -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=FMA4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 diff --git a/test/CodeGen/X86/fma_patterns_wide.ll b/test/CodeGen/X86/fma_patterns_wide.ll index de77a27ad2b..f412c174fe3 100644 --- a/test/CodeGen/X86/fma_patterns_wide.ll +++ b/test/CodeGen/X86/fma_patterns_wide.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma -fp-contract=fast | FileCheck %s --check-prefix=FMA -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma -fp-contract=fast | FileCheck %s --check-prefix=FMA +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma -fp-contract=fast | FileCheck %s --check-prefix=FMA4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4 -fp-contract=fast | FileCheck %s --check-prefix=FMA4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -fp-contract=fast | FileCheck %s --check-prefix=AVX512