From e2ee98ab169fe8d1d4bd39fe0ecb89274eceb438 Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Thu, 27 Mar 2014 23:12:31 +0000 Subject: [PATCH] [PowerPC] Use a small cleanup pass to remove VSX self copies As explained in r204976, because of how the allocation of VSX registers interacts with the call-lowering code, we sometimes end up generating self VSX copies. Specifically, things like this: %VSL2 = COPY %F2, %VSL2 (where %F2 is really a sub-register of %VSL2, and so this copy is a nop) This adds a small cleanup pass to remove these prior to post-RA scheduling. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@204980 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPC.h | 1 + lib/Target/PowerPC/PPCInstrInfo.cpp | 74 +++++++++++++++++++++++++ lib/Target/PowerPC/PPCTargetMachine.cpp | 3 + test/CodeGen/PowerPC/vsx-self-copy.ll | 27 +++++++++ 4 files changed, 105 insertions(+) create mode 100644 test/CodeGen/PowerPC/vsx-self-copy.ll diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h index df8dd8f28ba..c42c5be14be 100644 --- a/lib/Target/PowerPC/PPC.h +++ b/lib/Target/PowerPC/PPC.h @@ -37,6 +37,7 @@ namespace llvm { #endif FunctionPass *createPPCEarlyReturnPass(); FunctionPass *createPPCVSXCopyPass(); + FunctionPass *createPPCVSXCopyCleanupPass(); FunctionPass *createPPCVSXFMAMutatePass(); FunctionPass *createPPCBranchSelectionPass(); FunctionPass *createPPCISelDag(PPCTargetMachine &TM); diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index be4dafa0e30..0f799017916 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -1969,6 +1969,80 @@ char PPCVSXCopy::ID = 0; FunctionPass* llvm::createPPCVSXCopyPass() { return new PPCVSXCopy(); } +#undef DEBUG_TYPE +#define DEBUG_TYPE "ppc-vsx-copy-cleanup" + +namespace llvm { + void initializePPCVSXCopyCleanupPass(PassRegistry&); +} + +namespace { + // PPCVSXCopyCleanup pass - We sometimes end up generating self copies of VSX + // registers (mostly because the ABI code still places all values into the + // "traditional" floating-point and vector registers). Remove them here. + struct PPCVSXCopyCleanup : public MachineFunctionPass { + static char ID; + PPCVSXCopyCleanup() : MachineFunctionPass(ID) { + initializePPCVSXCopyCleanupPass(*PassRegistry::getPassRegistry()); + } + + const PPCTargetMachine *TM; + const PPCInstrInfo *TII; + +protected: + bool processBlock(MachineBasicBlock &MBB) { + bool Changed = false; + + SmallVector ToDelete; + for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end(); + I != IE; ++I) { + MachineInstr *MI = I; + if (MI->getOpcode() == PPC::XXLOR && + MI->getOperand(0).getReg() == MI->getOperand(1).getReg() && + MI->getOperand(0).getReg() == MI->getOperand(2).getReg()) + ToDelete.push_back(MI); + } + + if (!ToDelete.empty()) + Changed = true; + + for (unsigned i = 0, ie = ToDelete.size(); i != ie; ++i) { + DEBUG(dbgs() << "Removing VSX self-copy: " << *ToDelete[i]); + ToDelete[i]->eraseFromParent(); + } + + return Changed; + } + +public: + virtual bool runOnMachineFunction(MachineFunction &MF) { + TM = static_cast(&MF.getTarget()); + TII = TM->getInstrInfo(); + + bool Changed = false; + + for (MachineFunction::iterator I = MF.begin(); I != MF.end();) { + MachineBasicBlock &B = *I++; + if (processBlock(B)) + Changed = true; + } + + return Changed; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + MachineFunctionPass::getAnalysisUsage(AU); + } + }; +} + +INITIALIZE_PASS(PPCVSXCopyCleanup, DEBUG_TYPE, + "PowerPC VSX Copy Cleanup", false, false) + +char PPCVSXCopyCleanup::ID = 0; +FunctionPass* +llvm::createPPCVSXCopyCleanupPass() { return new PPCVSXCopyCleanup(); } + #undef DEBUG_TYPE #define DEBUG_TYPE "ppc-early-ret" STATISTIC(NumBCLR, "Number of early conditional returns"); diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index cb869bd91eb..e7438f394cc 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -182,6 +182,9 @@ bool PPCPassConfig::addPreRegAlloc() { } bool PPCPassConfig::addPreSched2() { + if (getPPCSubtarget().hasVSX()) + addPass(createPPCVSXCopyCleanupPass()); + if (getOptLevel() != CodeGenOpt::None) addPass(&IfConverterID); diff --git a/test/CodeGen/PowerPC/vsx-self-copy.ll b/test/CodeGen/PowerPC/vsx-self-copy.ll new file mode 100644 index 00000000000..23615ca10c1 --- /dev/null +++ b/test/CodeGen/PowerPC/vsx-self-copy.ll @@ -0,0 +1,27 @@ +; RUN: llc -mcpu=pwr7 -mattr=+vsx < %s | FileCheck %s +target datalayout = "E-m:e-i64:64-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +define double @takFP(double %x, double %y, double %z) #0 { +entry: + br i1 undef, label %if.then, label %return + +if.then: ; preds = %if.then, %entry + %x.tr16 = phi double [ %call, %if.then ], [ %x, %entry ] + %call = tail call double @takFP(double undef, double undef, double undef) + %call4 = tail call double @takFP(double undef, double %x.tr16, double undef) + %cmp = fcmp olt double undef, %call + br i1 %cmp, label %if.then, label %return + +return: ; preds = %if.then, %entry + %z.tr.lcssa = phi double [ %z, %entry ], [ %call4, %if.then ] + ret double %z.tr.lcssa + +; CHECK: @takFP +; CHECK-NOT: xxlor 0, 0, 0 +; CHECK: blr +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } + -- 2.34.1