From d0d042436876cee0ef37b1faf8cb50c878549407 Mon Sep 17 00:00:00 2001 From: Alexey Volkov Date: Fri, 21 Nov 2014 11:19:34 +0000 Subject: [PATCH] [X86] For Silvermont CPU use 16-bit division instead of 64-bit for small positive numbers Differential Revision: http://reviews.llvm.org/D5938 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@222521 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86.td | 12 ++++++++---- lib/Target/X86/X86ISelLowering.cpp | 7 ++++--- lib/Target/X86/X86Subtarget.cpp | 3 ++- lib/Target/X86/X86Subtarget.h | 13 +++++++++---- test/CodeGen/X86/slow-div.ll | 28 ++++++++++++++++++++++++++++ 5 files changed, 51 insertions(+), 12 deletions(-) create mode 100644 test/CodeGen/X86/slow-div.ll diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 83f55d32975..9729f4638cb 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -167,9 +167,12 @@ def FeatureSMAP : SubtargetFeature<"smap", "HasSMAP", "true", "Support SMAP instructions">; def FeatureLeaForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true", "Use LEA for adjusting the stack pointer">; -def FeatureSlowDivide : SubtargetFeature<"idiv-to-divb", - "HasSlowDivide", "true", - "Use small divide for positive values less than 256">; +def FeatureSlowDivide32 : SubtargetFeature<"idivl-to-divb", + "HasSlowDivide32", "true", + "Use 8-bit divide for positive values less than 256">; +def FeatureSlowDivide64 : SubtargetFeature<"idivq-to-divw", + "HasSlowDivide64", "true", + "Use 16-bit divide for positive values less than 65536">; def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions", "PadShortFunctions", "true", "Pad short functions">; @@ -234,7 +237,7 @@ def : ProcessorModel<"penryn", SandyBridgeModel, def : ProcessorModel<"atom", AtomModel, [ProcIntelAtom, FeatureSSSE3, FeatureCMPXCHG16B, FeatureMOVBE, FeatureSlowBTMem, FeatureLeaForSP, - FeatureSlowDivide, + FeatureSlowDivide32, FeatureSlowDivide64, FeatureCallRegIndirect, FeatureLEAUsesAG, FeaturePadShortFunctions]>; @@ -244,6 +247,7 @@ def : ProcessorModel<"slm", SLMModel, [ProcIntelSLM, FeatureSSE42, FeatureCMPXCHG16B, FeatureMOVBE, FeaturePOPCNT, FeaturePCLMUL, FeatureAES, + FeatureSlowDivide64, FeatureCallRegIndirect, FeaturePRFCHW, FeatureSlowLEA, FeatureSlowIncDec, diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index f05b6c61ca0..c9006000f26 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -249,9 +249,10 @@ void X86TargetLowering::resetOperationActions() { setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister()); // Bypass expensive divides on Atom when compiling with O2 - if (Subtarget->hasSlowDivide() && TM.getOptLevel() >= CodeGenOpt::Default) { - addBypassSlowDiv(32, 8); - if (Subtarget->is64Bit()) + if (TM.getOptLevel() >= CodeGenOpt::Default) { + if (Subtarget->hasSlowDivide32()) + addBypassSlowDiv(32, 8); + if (Subtarget->hasSlowDivide64() && Subtarget->is64Bit()) addBypassSlowDiv(64, 16); } diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 9d877c99e54..afa0173fb10 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -267,7 +267,8 @@ void X86Subtarget::initializeEnvironment() { HasVectorUAMem = false; HasCmpxchg16b = false; UseLeaForSP = false; - HasSlowDivide = false; + HasSlowDivide32 = false; + HasSlowDivide64 = false; PadShortFunctions = false; CallRegIndirect = false; LEAUsesAG = false; diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 091b6c4fbbd..cf76ac70e0b 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -171,9 +171,13 @@ protected: /// the stack pointer. This is an optimization for Intel Atom processors. bool UseLeaForSP; - /// HasSlowDivide - True if smaller divides are significantly faster than - /// full divides and should be used when possible. - bool HasSlowDivide; + /// HasSlowDivide32 - True if 8-bit divisions are significantly faster than + /// 32-bit divisions and should be used when possible. + bool HasSlowDivide32; + + /// HasSlowDivide64 - True if 16-bit divides are significantly faster than + /// 64-bit divisions and should be used when possible. + bool HasSlowDivide64; /// PadShortFunctions - True if the short functions should be padded to prevent /// a stall when returning too early. @@ -373,7 +377,8 @@ public: bool hasVectorUAMem() const { return HasVectorUAMem; } bool hasCmpxchg16b() const { return HasCmpxchg16b; } bool useLeaForSP() const { return UseLeaForSP; } - bool hasSlowDivide() const { return HasSlowDivide; } + bool hasSlowDivide32() const { return HasSlowDivide32; } + bool hasSlowDivide64() const { return HasSlowDivide64; } bool padShortFunctions() const { return PadShortFunctions; } bool callRegIndirect() const { return CallRegIndirect; } bool LEAusesAG() const { return LEAUsesAG; } diff --git a/test/CodeGen/X86/slow-div.ll b/test/CodeGen/X86/slow-div.ll new file mode 100644 index 00000000000..52223824bf9 --- /dev/null +++ b/test/CodeGen/X86/slow-div.ll @@ -0,0 +1,28 @@ +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+idivl-to-divb < %s | FileCheck -check-prefix=DIV32 %s +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+idivq-to-divw < %s | FileCheck -check-prefix=DIV64 %s + +define i32 @div32(i32 %a, i32 %b) { +entry: +; DIV32-LABEL: div32: +; DIV32: orl %{{.*}}, [[REG:%[a-z]+]] +; DIV32: testl $-256, [[REG]] +; DIV32: divb +; DIV64-LABEL: div32: +; DIV64-NOT: divb + %div = sdiv i32 %a, %b + ret i32 %div +} + +define i64 @div64(i64 %a, i64 %b) { +entry: +; DIV32-LABEL: div64: +; DIV32-NOT: divw +; DIV64-LABEL: div64: +; DIV64: orq %{{.*}}, [[REG:%[a-z]+]] +; DIV64: testq $-65536, [[REG]] +; DIV64: divw + %div = sdiv i64 %a, %b + ret i64 %div +} + + -- 2.34.1