From 2676737e5ed3e4b5c89b4d06b60d998e9318eb73 Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Wed, 24 Mar 2010 22:31:46 +0000 Subject: [PATCH] Make the use of the vmla and vmls VFP instructions controllable via cmd line. Preliminary testing shows significant performance wins by not using these instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@99436 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrFormats.td | 14 ++++++++++++++ lib/Target/ARM/ARMInstrInfo.td | 2 ++ lib/Target/ARM/ARMInstrVFP.td | 8 ++++---- lib/Target/ARM/ARMSubtarget.cpp | 5 +++++ lib/Target/ARM/ARMSubtarget.h | 5 +++++ 5 files changed, 30 insertions(+), 4 deletions(-) diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index f73707fa436..08ead22dc07 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -1369,6 +1369,20 @@ class ADbI opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, let Inst{4} = op4; } +// Double precision, binary, VML[AS] (for additional predicate) +class ADbI_vmlX opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, + dag iops, InstrItinClass itin, string opc, string asm, + list pattern> + : VFPAI { + let Inst{27-23} = opcod1; + let Inst{21-20} = opcod2; + let Inst{11-8} = 0b1011; + let Inst{6} = op6; + let Inst{4} = op4; + list Predicates = [HasVFP2, UseVMLx]; +} + + // Single precision, unary class ASuI opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc, diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 26a280697b9..f2ab06f328f 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -140,6 +140,8 @@ def IsNotDarwin : Predicate<"!Subtarget->isTargetDarwin()">; def UseMovt : Predicate<"Subtarget->useMovt()">; def DontUseMovt : Predicate<"!Subtarget->useMovt()">; +def UseVMLx : Predicate<"Subtarget->useVMLx()">; + //===----------------------------------------------------------------------===// // ARM Flag Definitions. diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index aca82302212..04583892866 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -545,7 +545,7 @@ def VULTOD : AVConv1XI<0b11101, 0b11, 0b1011, 0b1011, 1, // FP FMA Operations. // -def VMLAD : ADbI<0b11100, 0b00, 0, 0, +def VMLAD : ADbI_vmlX<0b11100, 0b00, 0, 0, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b), IIC_fpMAC64, "vmla", ".f64\t$dst, $a, $b", [(set DPR:$dst, (fadd (fmul DPR:$a, DPR:$b), @@ -558,7 +558,7 @@ def VMLAS : ASbIn<0b11100, 0b00, 0, 0, [(set SPR:$dst, (fadd (fmul SPR:$a, SPR:$b), SPR:$dstin))]>, RegConstraint<"$dstin = $dst">; -def VNMLSD : ADbI<0b11100, 0b01, 0, 0, +def VNMLSD : ADbI_vmlX<0b11100, 0b01, 0, 0, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b), IIC_fpMAC64, "vnmls", ".f64\t$dst, $a, $b", [(set DPR:$dst, (fsub (fmul DPR:$a, DPR:$b), @@ -571,7 +571,7 @@ def VNMLSS : ASbI<0b11100, 0b01, 0, 0, [(set SPR:$dst, (fsub (fmul SPR:$a, SPR:$b), SPR:$dstin))]>, RegConstraint<"$dstin = $dst">; -def VMLSD : ADbI<0b11100, 0b00, 1, 0, +def VMLSD : ADbI_vmlX<0b11100, 0b00, 1, 0, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b), IIC_fpMAC64, "vmls", ".f64\t$dst, $a, $b", [(set DPR:$dst, (fadd (fneg (fmul DPR:$a, DPR:$b)), @@ -589,7 +589,7 @@ def : Pat<(fsub DPR:$dstin, (fmul DPR:$a, (f64 DPR:$b))), def : Pat<(fsub SPR:$dstin, (fmul SPR:$a, SPR:$b)), (VMLSS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[DontUseNEONForFP]>; -def VNMLAD : ADbI<0b11100, 0b01, 1, 0, +def VNMLAD : ADbI_vmlX<0b11100, 0b01, 1, 0, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b), IIC_fpMAC64, "vnmla", ".f64\t$dst, $a, $b", [(set DPR:$dst, (fsub (fneg (fmul DPR:$a, DPR:$b)), diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 2dad7f11062..6b4438d4c86 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -26,6 +26,10 @@ static cl::opt UseNEONFP("arm-use-neon-fp", cl::desc("Use NEON for single-precision FP"), cl::init(false), cl::Hidden); +static cl::opt +UseVMLxInstructions("arm-use-vmlx", + cl::desc("Use VFP vmla and vmls instructions"), + cl::init(true), cl::Hidden); static cl::opt UseMOVT("arm-use-movt", @@ -36,6 +40,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS, : ARMArchVersion(V4) , ARMFPUType(None) , UseNEONForSinglePrecisionFP(UseNEONFP) + , UseVMLx(UseVMLxInstructions) , IsThumb(isT) , ThumbMode(Thumb1) , PostRAScheduler(false) diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 2dc81a4d6d2..686684cab81 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -50,6 +50,10 @@ protected: /// determine if NEON should actually be used. bool UseNEONForSinglePrecisionFP; + /// UseVMLx - If the VFP2 instructions are available, indicates whether + /// the VML[AS] instructions should be used. + bool UseVMLx; + /// IsThumb - True if we are in thumb mode, false if in ARM mode. bool IsThumb; @@ -119,6 +123,7 @@ protected: bool hasNEON() const { return ARMFPUType >= NEON; } bool useNEONForSinglePrecisionFP() const { return hasNEON() && UseNEONForSinglePrecisionFP; } + bool useVMLx() const {return hasVFP2() && UseVMLx; } bool hasFP16() const { return HasFP16; } -- 2.34.1