From df4b35e3dd85fead444e23b477d61dfd43e1fb6f Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Tue, 27 Sep 2011 23:50:46 +0000 Subject: [PATCH] Remove X86-dependent stuff from SSEDomainFix. This also enables domain swizzling for AVX code which required a few trivial test changes. The pass will be moved to lib/CodeGen shortly. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@140659 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/CodeGen/Passes.h | 9 ++++++ lib/Target/X86/SSEDomainFix.cpp | 46 +++++++++++++++++++---------- lib/Target/X86/X86.h | 4 --- lib/Target/X86/X86TargetMachine.cpp | 2 +- test/CodeGen/X86/avx-load-store.ll | 4 +-- test/CodeGen/X86/avx-splat.ll | 2 +- 6 files changed, 43 insertions(+), 24 deletions(-) diff --git a/include/llvm/CodeGen/Passes.h b/include/llvm/CodeGen/Passes.h index 82b384461a1..7a03ce905d8 100644 --- a/include/llvm/CodeGen/Passes.h +++ b/include/llvm/CodeGen/Passes.h @@ -24,6 +24,7 @@ namespace llvm { class MachineFunctionPass; class PassInfo; class TargetLowering; + class TargetRegisterClass; class raw_ostream; /// createUnreachableBlockEliminationPass - The LLVM code generator does not @@ -225,6 +226,14 @@ namespace llvm { /// FunctionPass *createExpandISelPseudosPass(); + /// createExecutionDependencyFixPass - This pass fixes execution time + /// problems with dependent instructions, such as switching execution + /// domains to match. + /// + /// The pass will examine instructions using and defining registers in RC. + /// + FunctionPass *createExecutionDependencyFixPass(const TargetRegisterClass *RC); + } // End llvm namespace #endif diff --git a/lib/Target/X86/SSEDomainFix.cpp b/lib/Target/X86/SSEDomainFix.cpp index 752099846f7..8d8f5d452df 100644 --- a/lib/Target/X86/SSEDomainFix.cpp +++ b/lib/Target/X86/SSEDomainFix.cpp @@ -18,10 +18,12 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "sse-domain-fix" -#include "X86InstrInfo.h" +#define DEBUG_TYPE "execution-fix" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Debug.h" @@ -97,25 +99,27 @@ struct DomainValue { }; } -static const unsigned NumRegs = 16; - namespace { class SSEDomainFixPass : public MachineFunctionPass { static char ID; SpecificBumpPtrAllocator Allocator; SmallVector Avail; + const TargetRegisterClass *const RC; MachineFunction *MF; - const X86InstrInfo *TII; + const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; MachineBasicBlock *MBB; + std::vector AliasMap; + const unsigned NumRegs; DomainValue **LiveRegs; typedef DenseMap LiveOutMap; LiveOutMap LiveOuts; unsigned Distance; public: - SSEDomainFixPass() : MachineFunctionPass(ID) {} + SSEDomainFixPass(const TargetRegisterClass *rc) + : MachineFunctionPass(ID), RC(rc), NumRegs(RC->getNumRegs()) {} virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); @@ -154,10 +158,9 @@ char SSEDomainFixPass::ID = 0; /// Translate TRI register number to an index into our smaller tables of /// interesting registers. Return -1 for boring registers. -int SSEDomainFixPass::RegIndex(unsigned reg) { - assert(X86::XMM15 == X86::XMM0+NumRegs-1 && "Unexpected sort"); - reg -= X86::XMM0; - return reg < NumRegs ? (int) reg : -1; +int SSEDomainFixPass::RegIndex(unsigned Reg) { + assert(Reg < AliasMap.size() && "Invalid register"); + return AliasMap[Reg]; } DomainValue *SSEDomainFixPass::Alloc(int domain) { @@ -444,23 +447,33 @@ void SSEDomainFixPass::visitGenericInstr(MachineInstr *mi) { bool SSEDomainFixPass::runOnMachineFunction(MachineFunction &mf) { MF = &mf; - TII = static_cast(MF->getTarget().getInstrInfo()); + TII = MF->getTarget().getInstrInfo(); TRI = MF->getTarget().getRegisterInfo(); MBB = 0; LiveRegs = 0; Distance = 0; - assert(NumRegs == X86::VR128RegClass.getNumRegs() && "Bad regclass"); + assert(NumRegs == RC->getNumRegs() && "Bad regclass"); // If no XMM registers are used in the function, we can skip it completely. bool anyregs = false; - for (TargetRegisterClass::const_iterator I = X86::VR128RegClass.begin(), - E = X86::VR128RegClass.end(); I != E; ++I) + for (TargetRegisterClass::const_iterator I = RC->begin(), E = RC->end(); + I != E; ++I) if (MF->getRegInfo().isPhysRegUsed(*I)) { anyregs = true; break; } if (!anyregs) return false; + // Initialize the AliasMap on the first use. + if (AliasMap.empty()) { + // Given a PhysReg, AliasMap[PhysReg] is either the relevant index into RC, + // or -1. + AliasMap.resize(TRI->getNumRegs(), -1); + for (unsigned i = 0, e = RC->getNumRegs(); i != e; ++i) + for (const unsigned *AI = TRI->getOverlaps(RC->getRegister(i)); *AI; ++AI) + AliasMap[*AI] = i; + } + MachineBasicBlock *Entry = MF->begin(); SmallPtrSet Visited; for (df_ext_iterator > @@ -501,6 +514,7 @@ bool SSEDomainFixPass::runOnMachineFunction(MachineFunction &mf) { return false; } -FunctionPass *llvm::createSSEDomainFixPass() { - return new SSEDomainFixPass(); +FunctionPass * +llvm::createExecutionDependencyFixPass(const TargetRegisterClass *RC) { + return new SSEDomainFixPass(RC); } diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h index d480d0c8654..81e94227fca 100644 --- a/lib/Target/X86/X86.h +++ b/lib/Target/X86/X86.h @@ -44,10 +44,6 @@ FunctionPass* createGlobalBaseRegPass(); /// FunctionPass *createX86FloatingPointStackifierPass(); -/// createSSEDomainFixPass - This pass twiddles SSE opcodes to prevent domain -/// crossings. -FunctionPass *createSSEDomainFixPass(); - /// createX86IssueVZeroUpperPass - This pass inserts AVX vzeroupper instructions /// before each call to avoid transition penalty between functions encoded with /// AVX and SSE. diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 6a35ecc605a..683d6aa0dd5 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -133,7 +133,7 @@ bool X86TargetMachine::addPreEmitPass(PassManagerBase &PM, bool ShouldPrint = false; if (OptLevel != CodeGenOpt::None && (Subtarget.hasSSE2() || Subtarget.hasAVX())) { - PM.add(createSSEDomainFixPass()); + PM.add(createExecutionDependencyFixPass(&X86::VR128RegClass)); ShouldPrint = true; } diff --git a/test/CodeGen/X86/avx-load-store.ll b/test/CodeGen/X86/avx-load-store.ll index 1fda9bc2292..07a63efd71f 100644 --- a/test/CodeGen/X86/avx-load-store.ll +++ b/test/CodeGen/X86/avx-load-store.ll @@ -3,8 +3,8 @@ ; CHECK: vmovaps ; CHECK: vmovaps -; CHECK: vmovapd -; CHECK: vmovapd +; CHECK: vmovaps +; CHECK: vmovaps ; CHECK: vmovaps ; CHECK: vmovaps define void @test_256_load(double* nocapture %d, float* nocapture %f, <4 x i64>* nocapture %i) nounwind uwtable ssp { diff --git a/test/CodeGen/X86/avx-splat.ll b/test/CodeGen/X86/avx-splat.ll index f8522c26951..af20b90322e 100644 --- a/test/CodeGen/X86/avx-splat.ll +++ b/test/CodeGen/X86/avx-splat.ll @@ -47,7 +47,7 @@ entry: ; shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0> ; To: ; shuffle (vload ptr)), undef, <1, 1, 1, 1> -; CHECK: vmovaps +; CHECK: vmovdqa ; CHECK-NEXT: vinsertf128 $1 ; CHECK-NEXT: vpermilps $-1 define <8 x float> @funcE() nounwind { -- 2.34.1