From 7da9ecf9677b751d81515f95168ae3cb2df54160 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Wed, 13 Jan 2010 00:30:23 +0000 Subject: [PATCH] Add a quick pass to optimize sign / zero extension instructions. For targets where the pre-extension values are available in the subreg of the result of the extension, replace the uses of the pre-extension value with the result + extract_subreg. For now, this pass is fairly conservative. It only perform the replacement when both the pre- and post- extension values are used in the block. It will miss cases where the post-extension values are live, but not used. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@93278 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/CodeGen/Passes.h | 4 + include/llvm/Target/TargetInstrInfo.h | 19 ++- lib/CodeGen/LLVMTargetMachine.cpp | 5 + lib/CodeGen/OptimizeExts.cpp | 149 ++++++++++++++++++++++ lib/Target/X86/X86InstrInfo.cpp | 17 ++- lib/Target/X86/X86InstrInfo.h | 16 ++- test/CodeGen/X86/2008-08-05-SpillerBug.ll | 2 +- test/CodeGen/X86/sext-subreg.ll | 17 +++ test/CodeGen/X86/stack-color-with-reg.ll | 2 +- 9 files changed, 203 insertions(+), 28 deletions(-) create mode 100644 lib/CodeGen/OptimizeExts.cpp create mode 100644 test/CodeGen/X86/sext-subreg.ll diff --git a/include/llvm/CodeGen/Passes.h b/include/llvm/CodeGen/Passes.h index 99f8c34cce8..2203f8c1202 100644 --- a/include/llvm/CodeGen/Passes.h +++ b/include/llvm/CodeGen/Passes.h @@ -170,6 +170,10 @@ namespace llvm { /// instructions. FunctionPass *createMachineSinkingPass(); + /// createOptimizeExtsPass - This pass performs sign / zero extension + /// optimization by increasing uses of extended values. + FunctionPass *createOptimizeExtsPass(); + /// createStackSlotColoringPass - This pass performs stack slot coloring. FunctionPass *createStackSlotColoringPass(bool); diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h index 6172fcfa64c..e6df1bf4596 100644 --- a/include/llvm/Target/TargetInstrInfo.h +++ b/include/llvm/Target/TargetInstrInfo.h @@ -149,16 +149,15 @@ public: return false; } - /// isCoalescableInstr - Return true if the instruction is "coalescable". That - /// is, it's like a copy where it's legal for the source to overlap the - /// destination. e.g. X86::MOVSX64rr32. - virtual bool isCoalescableInstr(const MachineInstr &MI, bool &isCopy, - unsigned &SrcReg, unsigned &DstReg, - unsigned &SrcSubIdx, unsigned &DstSubIdx) const { - if (isMoveInstr(MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) { - isCopy = true; - return true; - } + /// isCoalescableExtInstr - Return true if the instruction is a "coalescable" + /// extension instruction. That is, it's like a copy where it's legal for the + /// source to overlap the destination. e.g. X86::MOVSX64rr32. If this returns + /// true, then it's expected the pre-extension value is available as a subreg + /// of the result register. This also returns the sub-register index in + /// SubIdx. + virtual bool isCoalescableExtInstr(const MachineInstr &MI, + unsigned &SrcReg, unsigned &DstReg, + unsigned &SubIdx) const { return false; } diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index 8757c9f0ae3..84eb71c0496 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -62,6 +62,10 @@ static cl::opt VerifyMachineCode("verify-machineinstrs", cl::Hidden, cl::desc("Verify generated machine code"), cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=NULL)); +#if 1 +static cl::opt XX("xx", cl::Hidden); +#endif + // Enable or disable FastISel. Both options are needed, because // FastISel is enabled by default with -fast, and we wish to be // able to enable or disable fast-isel independently from -O0. @@ -324,6 +328,7 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, /* allowDoubleDefs= */ true); if (OptLevel != CodeGenOpt::None) { + PM.add(createOptimizeExtsPass()); if (!DisableMachineLICM) PM.add(createMachineLICMPass()); if (!DisableMachineSink) diff --git a/lib/CodeGen/OptimizeExts.cpp b/lib/CodeGen/OptimizeExts.cpp new file mode 100644 index 00000000000..02fc82ec314 --- /dev/null +++ b/lib/CodeGen/OptimizeExts.cpp @@ -0,0 +1,149 @@ +//===-- OptimizeExts.cpp - Optimize sign / zero extension instrs -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "ext-opt" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +static cl::opt Aggressive("aggressive-ext-opt", cl::Hidden, + cl::desc("Aggressive extension optimization")); + +STATISTIC(NumReuse, "Number of extension results reused"); + +namespace { + class OptimizeExts : public MachineFunctionPass { + const TargetMachine *TM; + const TargetInstrInfo *TII; + MachineRegisterInfo *MRI; + MachineDominatorTree *DT; // Machine dominator tree + + public: + static char ID; // Pass identification + OptimizeExts() : MachineFunctionPass(&ID) {} + + virtual bool runOnMachineFunction(MachineFunction &MF); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + AU.addRequired(); + AU.addPreserved(); + } + }; +} + +char OptimizeExts::ID = 0; +static RegisterPass +X("opt-exts", "Optimize sign / zero extensions"); + +FunctionPass *llvm::createOptimizeExtsPass() { return new OptimizeExts(); } + +bool OptimizeExts::runOnMachineFunction(MachineFunction &MF) { + TM = &MF.getTarget(); + TII = TM->getInstrInfo(); + MRI = &MF.getRegInfo(); + DT = &getAnalysis(); + + bool Changed = false; + + SmallPtrSet LocalMIs; + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { + MachineBasicBlock *MBB = &*I; + for (MachineBasicBlock::iterator MII = I->begin(), ME = I->end(); MII != ME; + ++MII) { + MachineInstr *MI = &*MII; + LocalMIs.insert(MI); + + unsigned SrcReg, DstReg, SubIdx; + if (TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx)) { + if (TargetRegisterInfo::isPhysicalRegister(DstReg) || + TargetRegisterInfo::isPhysicalRegister(SrcReg)) + continue; + + MachineRegisterInfo::use_iterator UI = MRI->use_begin(SrcReg); + if (++UI == MRI->use_end()) + // No other uses. + continue; + + // Ok, the source has other uses. See if we can replace the other uses + // with use of the result of the extension. + + SmallPtrSet ReachedBBs; + UI = MRI->use_begin(DstReg); + for (MachineRegisterInfo::use_iterator UE = MRI->use_end(); UI != UE; + ++UI) + ReachedBBs.insert(UI->getParent()); + + bool ExtendLife = true; + SmallVector Uses; + SmallVector ExtendedUses; + + UI = MRI->use_begin(SrcReg); + for (MachineRegisterInfo::use_iterator UE = MRI->use_end(); UI != UE; + ++UI) { + MachineOperand &UseMO = UI.getOperand(); + MachineInstr *UseMI = &*UI; + if (UseMI == MI) + continue; + MachineBasicBlock *UseMBB = UseMI->getParent(); + if (UseMBB == MBB) { + // Local uses that come after the extension. + if (!LocalMIs.count(UseMI)) + Uses.push_back(&UseMO); + } else if (ReachedBBs.count(UseMBB)) + // Non-local uses where the result of extension is used. Always + // replace these. + Uses.push_back(&UseMO); + else if (Aggressive && DT->dominates(MBB, UseMBB)) + // We may want to extend live range of the extension result in order + // to replace these uses. + ExtendedUses.push_back(&UseMO); + else { + // Both will be live out of the def MBB anyway. Don't extend live + // range of the extension result. + ExtendLife = false; + break; + } + } + + if (ExtendLife && !ExtendedUses.empty()) + // Ok, we'll extend the liveness of the extension result. + std::copy(ExtendedUses.begin(), ExtendedUses.end(), + std::back_inserter(Uses)); + + // Now replace all uses. + if (!Uses.empty()) { + const TargetRegisterClass *RC = MRI->getRegClass(SrcReg); + for (unsigned i = 0, e = Uses.size(); i != e; ++i) { + MachineOperand *UseMO = Uses[i]; + MachineInstr *UseMI = UseMO->getParent(); + MachineBasicBlock *UseMBB = UseMI->getParent(); + unsigned NewVR = MRI->createVirtualRegister(RC); + BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(), + TII->get(TargetInstrInfo::EXTRACT_SUBREG), NewVR) + .addReg(DstReg).addImm(SubIdx); + UseMO->setReg(NewVR); + ++NumReuse; + Changed = true; + } + } + } + } + } + + return Changed; +} diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 5ef3354f350..a1bacbf0e64 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -713,9 +713,9 @@ bool X86InstrInfo::isMoveInstr(const MachineInstr& MI, } bool -X86InstrInfo::isCoalescableInstr(const MachineInstr &MI, bool &isCopy, - unsigned &SrcReg, unsigned &DstReg, - unsigned &SrcSubIdx, unsigned &DstSubIdx) const { +X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI, + unsigned &SrcReg, unsigned &DstReg, + unsigned &SubIdx) const { switch (MI.getOpcode()) { default: break; case X86::MOVSX16rr8: @@ -733,10 +733,8 @@ X86InstrInfo::isCoalescableInstr(const MachineInstr &MI, bool &isCopy, if (MI.getOperand(0).getSubReg() || MI.getOperand(1).getSubReg()) // Be conservative. return false; - isCopy = false; SrcReg = MI.getOperand(1).getReg(); DstReg = MI.getOperand(0).getReg(); - DstSubIdx = 0; switch (MI.getOpcode()) { default: llvm_unreachable(0); @@ -747,22 +745,23 @@ X86InstrInfo::isCoalescableInstr(const MachineInstr &MI, bool &isCopy, case X86::MOVZX32rr8: case X86::MOVSX64rr8: case X86::MOVZX64rr8: - SrcSubIdx = 1; + SubIdx = 1; break; case X86::MOVSX32rr16: case X86::MOVZX32rr16: case X86::MOVSX64rr16: case X86::MOVZX64rr16: - SrcSubIdx = 3; + SubIdx = 3; break; case X86::MOVSX64rr32: case X86::MOVZX64rr32: - SrcSubIdx = 4; + SubIdx = 4; break; } + return true; } } - return isMoveInstr(MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx); + return false; } /// isFrameOperand - Return true and the FrameIndex if the specified diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index 6ae7808e2dd..0ab85f4f45b 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -448,13 +448,15 @@ public: unsigned &SrcReg, unsigned &DstReg, unsigned &SrcSubIdx, unsigned &DstSubIdx) const; - /// isCoalescableInstr - Return true if the instruction is "coalescable". That - /// is, it's like a copy where it's legal for the source to overlap the - /// destination. e.g. X86::MOVSX64rr32. - virtual bool isCoalescableInstr(const MachineInstr &MI, bool &isCopy, - unsigned &SrcReg, unsigned &DstReg, - unsigned &SrcSubIdx, unsigned &DstSubIdx) const; - + /// isCoalescableExtInstr - Return true if the instruction is a "coalescable" + /// extension instruction. That is, it's like a copy where it's legal for the + /// source to overlap the destination. e.g. X86::MOVSX64rr32. If this returns + /// true, then it's expected the pre-extension value is available as a subreg + /// of the result register. This also returns the sub-register index in + /// SubIdx. + virtual bool isCoalescableExtInstr(const MachineInstr &MI, + unsigned &SrcReg, unsigned &DstReg, + unsigned &SubIdx) const; unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const; /// isLoadFromStackSlotPostFE - Check for post-frame ptr elimination diff --git a/test/CodeGen/X86/2008-08-05-SpillerBug.ll b/test/CodeGen/X86/2008-08-05-SpillerBug.ll index 67e14ffae5e..9361a6f7294 100644 --- a/test/CodeGen/X86/2008-08-05-SpillerBug.ll +++ b/test/CodeGen/X86/2008-08-05-SpillerBug.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=i386-apple-darwin -disable-fp-elim -stats |& grep asm-printer | grep 58 +; RUN: llc < %s -mtriple=i386-apple-darwin -disable-fp-elim -stats |& grep asm-printer | grep 57 ; PR2568 @g_3 = external global i16 ; [#uses=1] diff --git a/test/CodeGen/X86/sext-subreg.ll b/test/CodeGen/X86/sext-subreg.ll new file mode 100644 index 00000000000..b2b9f8121fd --- /dev/null +++ b/test/CodeGen/X86/sext-subreg.ll @@ -0,0 +1,17 @@ +; RUN: llc < %s -march=x86-64 | FileCheck %s +; rdar://7529457 + +define i64 @t(i64 %A, i64 %B, i32* %P, i64 *%P2) nounwind { +; CHECK: t: +; CHECK: movslq %e{{.*}}, %rax +; CHECK: movq %rax +; CHECK: movl %eax + %C = add i64 %A, %B + %D = trunc i64 %C to i32 + volatile store i32 %D, i32* %P + %E = shl i64 %C, 32 + %F = ashr i64 %E, 32 + volatile store i64 %F, i64 *%P2 + volatile store i32 %D, i32* %P + ret i64 undef +} diff --git a/test/CodeGen/X86/stack-color-with-reg.ll b/test/CodeGen/X86/stack-color-with-reg.ll index d7623920ff2..be9f3af308a 100644 --- a/test/CodeGen/X86/stack-color-with-reg.ll +++ b/test/CodeGen/X86/stack-color-with-reg.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -relocation-model=pic -disable-fp-elim -color-ss-with-regs -stats -info-output-file - > %t -; RUN: grep stackcoloring %t | grep "stack slot refs replaced with reg refs" | grep 6 +; RUN: grep stackcoloring %t | grep "stack slot refs replaced with reg refs" | grep 9 type { [62 x %struct.Bitvec*] } ; type %0 type { i8* } ; type %1 -- 2.34.1