X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FCodeGen%2FTwoAddressInstructionPass.cpp;h=6d6244e4f879a9f10f90b46679d3860fa71e6f86;hb=2b33f4cbad43dcaca944d02a6ea67991ff9db9cf;hp=c288ae0816d0b3979d28737bc5b21ed51e9cda90;hpb=f1250eeadf39ed34c852d43de6c6780d5d5176dc;p=oota-llvm.git diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index c288ae0816d..6d6244e4f87 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -33,6 +33,7 @@ #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -40,6 +41,7 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallSet.h" @@ -77,6 +79,10 @@ namespace { // registers from virtual registers. e.g. r1 = move v1024. DenseMap DstRegMap; + /// RegSequences - Keep track the list of REG_SEQUENCE instructions seen + /// during the initial walk of the machine function. + SmallVector RegSequences; + bool Sink3AddrInstruction(MachineBasicBlock *MBB, MachineInstr *MI, unsigned Reg, MachineBasicBlock::iterator OldPos); @@ -99,12 +105,12 @@ namespace { MachineFunction::iterator &mbbi, unsigned RegB, unsigned RegC, unsigned Dist); - bool isProfitableToConv3Addr(unsigned RegA); + bool isProfitableToConv3Addr(unsigned RegA, unsigned RegB); bool ConvertInstTo3Addr(MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, MachineFunction::iterator &mbbi, - unsigned RegB, unsigned Dist); + unsigned RegA, unsigned RegB, unsigned Dist); typedef std::pair, MachineInstr*> NewKill; bool canUpdateDeletedKills(SmallVector &Kills, @@ -118,14 +124,27 @@ namespace { MachineBasicBlock::iterator &nmi, MachineFunction::iterator &mbbi, unsigned SrcIdx, unsigned DstIdx, - unsigned Dist); + unsigned Dist, + SmallPtrSet &Processed); + + void ScanUses(unsigned DstReg, MachineBasicBlock *MBB, + SmallPtrSet &Processed); void ProcessCopy(MachineInstr *MI, MachineBasicBlock *MBB, SmallPtrSet &Processed); + void CoalesceExtSubRegs(SmallVector &Srcs, unsigned DstReg); + + /// EliminateRegSequences - Eliminate REG_SEQUENCE instructions as part + /// of the de-ssa process. This replaces sources of REG_SEQUENCE as + /// sub-register references of the register defined by REG_SEQUENCE. + bool EliminateRegSequences(); + public: static char ID; // Pass identification, replacement for typeid - TwoAddressInstructionPass() : MachineFunctionPass(&ID) {} + TwoAddressInstructionPass() : MachineFunctionPass(ID) { + initializeTwoAddressInstructionPassPass(*PassRegistry::getPassRegistry()); + } virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); @@ -133,10 +152,7 @@ namespace { AU.addPreserved(); AU.addPreservedID(MachineLoopInfoID); AU.addPreservedID(MachineDominatorsID); - if (StrongPHIElim) - AU.addPreservedID(StrongPHIEliminationID); - else - AU.addPreservedID(PHIEliminationID); + AU.addPreservedID(PHIEliminationID); MachineFunctionPass::getAnalysisUsage(AU); } @@ -146,10 +162,13 @@ namespace { } char TwoAddressInstructionPass::ID = 0; -static RegisterPass -X("twoaddressinstruction", "Two-Address instruction pass"); +INITIALIZE_PASS_BEGIN(TwoAddressInstructionPass, "twoaddressinstruction", + "Two-Address instruction pass", false, false) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_END(TwoAddressInstructionPass, "twoaddressinstruction", + "Two-Address instruction pass", false, false) -const PassInfo *const llvm::TwoAddressInstructionPassID = &X; +char &llvm::TwoAddressInstructionPassID = TwoAddressInstructionPass::ID; /// Sink3AddrInstruction - A two-address instruction has been converted to a /// three-address instruction to avoid clobbering a register. Try to sink it @@ -261,8 +280,8 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB, /// isTwoAddrUse - Return true if the specified MI is using the specified /// register as a two-address operand. static bool isTwoAddrUse(MachineInstr *UseMI, unsigned Reg) { - const TargetInstrDesc &TID = UseMI->getDesc(); - for (unsigned i = 0, e = TID.getNumOperands(); i != e; ++i) { + const MCInstrDesc &MCID = UseMI->getDesc(); + for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) { MachineOperand &MO = UseMI->getOperand(i); if (MO.isReg() && MO.getReg() == Reg && (MO.isDef() || UseMI->isRegTiedToDefOperand(i))) @@ -367,26 +386,18 @@ static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII, bool &IsSrcPhys, bool &IsDstPhys) { SrcReg = 0; DstReg = 0; - unsigned SrcSubIdx, DstSubIdx; - if (!TII->isMoveInstr(MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) { - if (MI.isExtractSubreg()) { - DstReg = MI.getOperand(0).getReg(); - SrcReg = MI.getOperand(1).getReg(); - } else if (MI.isInsertSubreg()) { - DstReg = MI.getOperand(0).getReg(); - SrcReg = MI.getOperand(2).getReg(); - } else if (MI.isSubregToReg()) { - DstReg = MI.getOperand(0).getReg(); - SrcReg = MI.getOperand(2).getReg(); - } - } + if (MI.isCopy()) { + DstReg = MI.getOperand(0).getReg(); + SrcReg = MI.getOperand(1).getReg(); + } else if (MI.isInsertSubreg() || MI.isSubregToReg()) { + DstReg = MI.getOperand(0).getReg(); + SrcReg = MI.getOperand(2).getReg(); + } else + return false; - if (DstReg) { - IsSrcPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg); - IsDstPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); - return true; - } - return false; + IsSrcPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg); + IsDstPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); + return true; } /// isKilled - Test if the given register value, which is used by the given @@ -432,8 +443,9 @@ static bool isKilled(MachineInstr &MI, unsigned Reg, /// isTwoAddrUse - Return true if the specified MI uses the specified register /// as a two-address use. If so, return the destination register by reference. static bool isTwoAddrUse(MachineInstr &MI, unsigned Reg, unsigned &DstReg) { - const TargetInstrDesc &TID = MI.getDesc(); - unsigned NumOps = MI.isInlineAsm() ? MI.getNumOperands():TID.getNumOperands(); + const MCInstrDesc &MCID = MI.getDesc(); + unsigned NumOps = MI.isInlineAsm() + ? MI.getNumOperands() : MCID.getNumOperands(); for (unsigned i = 0; i != NumOps; ++i) { const MachineOperand &MO = MI.getOperand(i); if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg) @@ -543,8 +555,9 @@ TwoAddressInstructionPass::isProfitableToCommute(unsigned regB, unsigned regC, unsigned FromRegC = getMappedReg(regC, SrcRegMap); unsigned ToRegB = getMappedReg(regB, DstRegMap); unsigned ToRegC = getMappedReg(regC, DstRegMap); - if (!regsAreCompatible(FromRegB, ToRegB, TRI) && - (regsAreCompatible(FromRegB, ToRegC, TRI) || + if ((FromRegB && ToRegB && !regsAreCompatible(FromRegB, ToRegB, TRI)) && + ((!FromRegC && !ToRegC) || + regsAreCompatible(FromRegB, ToRegC, TRI) || regsAreCompatible(FromRegC, ToRegB, TRI))) return true; @@ -607,16 +620,18 @@ TwoAddressInstructionPass::CommuteInstruction(MachineBasicBlock::iterator &mi, /// isProfitableToConv3Addr - Return true if it is profitable to convert the /// given 2-address instruction to a 3-address one. bool -TwoAddressInstructionPass::isProfitableToConv3Addr(unsigned RegA) { +TwoAddressInstructionPass::isProfitableToConv3Addr(unsigned RegA,unsigned RegB){ // Look for situations like this: // %reg1024 = MOV r1 // %reg1025 = MOV r0 // %reg1026 = ADD %reg1024, %reg1025 // r2 = MOV %reg1026 // Turn ADD into a 3-address instruction to avoid a copy. - unsigned FromRegA = getMappedReg(RegA, SrcRegMap); + unsigned FromRegB = getMappedReg(RegB, SrcRegMap); + if (!FromRegB) + return false; unsigned ToRegA = getMappedReg(RegA, DstRegMap); - return (FromRegA && ToRegA && !regsAreCompatible(FromRegA, ToRegA, TRI)); + return (ToRegA && !regsAreCompatible(FromRegB, ToRegA, TRI)); } /// ConvertInstTo3Addr - Convert the specified two-address instruction into a @@ -625,7 +640,8 @@ bool TwoAddressInstructionPass::ConvertInstTo3Addr(MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, MachineFunction::iterator &mbbi, - unsigned RegB, unsigned Dist) { + unsigned RegA, unsigned RegB, + unsigned Dist) { MachineInstr *NewMI = TII->convertToThreeAddress(mbbi, mi, LV); if (NewMI) { DEBUG(dbgs() << "2addr: CONVERTING 2-ADDR: " << *mi); @@ -645,12 +661,64 @@ TwoAddressInstructionPass::ConvertInstTo3Addr(MachineBasicBlock::iterator &mi, mi = NewMI; nmi = llvm::next(mi); } + + // Update source and destination register maps. + SrcRegMap.erase(RegA); + DstRegMap.erase(RegB); return true; } return false; } +/// ScanUses - Scan forward recursively for only uses, update maps if the use +/// is a copy or a two-address instruction. +void +TwoAddressInstructionPass::ScanUses(unsigned DstReg, MachineBasicBlock *MBB, + SmallPtrSet &Processed) { + SmallVector VirtRegPairs; + bool IsDstPhys; + bool IsCopy = false; + unsigned NewReg = 0; + unsigned Reg = DstReg; + while (MachineInstr *UseMI = findOnlyInterestingUse(Reg, MBB, MRI, TII,IsCopy, + NewReg, IsDstPhys)) { + if (IsCopy && !Processed.insert(UseMI)) + break; + + DenseMap::iterator DI = DistanceMap.find(UseMI); + if (DI != DistanceMap.end()) + // Earlier in the same MBB.Reached via a back edge. + break; + + if (IsDstPhys) { + VirtRegPairs.push_back(NewReg); + break; + } + bool isNew = SrcRegMap.insert(std::make_pair(NewReg, Reg)).second; + if (!isNew) + assert(SrcRegMap[NewReg] == Reg && "Can't map to two src registers!"); + VirtRegPairs.push_back(NewReg); + Reg = NewReg; + } + + if (!VirtRegPairs.empty()) { + unsigned ToReg = VirtRegPairs.back(); + VirtRegPairs.pop_back(); + while (!VirtRegPairs.empty()) { + unsigned FromReg = VirtRegPairs.back(); + VirtRegPairs.pop_back(); + bool isNew = DstRegMap.insert(std::make_pair(FromReg, ToReg)).second; + if (!isNew) + assert(DstRegMap[FromReg] == ToReg &&"Can't map to two dst registers!"); + ToReg = FromReg; + } + bool isNew = DstRegMap.insert(std::make_pair(DstReg, ToReg)).second; + if (!isNew) + assert(DstRegMap[DstReg] == ToReg && "Can't map to two dst registers!"); + } +} + /// ProcessCopy - If the specified instruction is not yet processed, process it /// if it's a copy. For a copy instruction, we find the physical registers the /// source and destination registers might be mapped to. These are kept in @@ -682,49 +750,11 @@ void TwoAddressInstructionPass::ProcessCopy(MachineInstr *MI, assert(SrcRegMap[DstReg] == SrcReg && "Can't map to two src physical registers!"); - SmallVector VirtRegPairs; - bool IsCopy = false; - unsigned NewReg = 0; - while (MachineInstr *UseMI = findOnlyInterestingUse(DstReg, MBB, MRI,TII, - IsCopy, NewReg, IsDstPhys)) { - if (IsCopy) { - if (!Processed.insert(UseMI)) - break; - } - - DenseMap::iterator DI = DistanceMap.find(UseMI); - if (DI != DistanceMap.end()) - // Earlier in the same MBB.Reached via a back edge. - break; - - if (IsDstPhys) { - VirtRegPairs.push_back(NewReg); - break; - } - bool isNew = SrcRegMap.insert(std::make_pair(NewReg, DstReg)).second; - if (!isNew) - assert(SrcRegMap[NewReg] == DstReg && - "Can't map to two src physical registers!"); - VirtRegPairs.push_back(NewReg); - DstReg = NewReg; - } - - if (!VirtRegPairs.empty()) { - unsigned ToReg = VirtRegPairs.back(); - VirtRegPairs.pop_back(); - while (!VirtRegPairs.empty()) { - unsigned FromReg = VirtRegPairs.back(); - VirtRegPairs.pop_back(); - bool isNew = DstRegMap.insert(std::make_pair(FromReg, ToReg)).second; - if (!isNew) - assert(DstRegMap[FromReg] == ToReg && - "Can't map to two dst physical registers!"); - ToReg = FromReg; - } - } + ScanUses(DstReg, MBB, Processed); } Processed.insert(MI); + return; } /// isSafeToDelete - If the specified instruction does not produce any side @@ -732,10 +762,10 @@ void TwoAddressInstructionPass::ProcessCopy(MachineInstr *MI, static bool isSafeToDelete(MachineInstr *MI, const TargetInstrInfo *TII, SmallVector &Kills) { - const TargetInstrDesc &TID = MI->getDesc(); - if (TID.mayStore() || TID.isCall()) + const MCInstrDesc &MCID = MI->getDesc(); + if (MCID.mayStore() || MCID.isCall()) return false; - if (TID.isTerminator() || TID.hasUnmodeledSideEffects()) + if (MCID.isTerminator() || MI->hasUnmodeledSideEffects()) return false; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { @@ -768,7 +798,7 @@ canUpdateDeletedKills(SmallVector &Kills, if (!LastKill) return false; - bool isModRef = LastKill->modifiesRegister(Kill); + bool isModRef = LastKill->definesRegister(Kill); NewKills.push_back(std::make_pair(std::make_pair(Kill, isModRef), LastKill)); } @@ -823,8 +853,9 @@ bool TwoAddressInstructionPass:: TryInstructionTransform(MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, MachineFunction::iterator &mbbi, - unsigned SrcIdx, unsigned DstIdx, unsigned Dist) { - const TargetInstrDesc &TID = mi->getDesc(); + unsigned SrcIdx, unsigned DstIdx, unsigned Dist, + SmallPtrSet &Processed) { + const MCInstrDesc &MCID = mi->getDesc(); unsigned regA = mi->getOperand(DstIdx).getReg(); unsigned regB = mi->getOperand(SrcIdx).getReg(); @@ -846,7 +877,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, unsigned regCIdx = ~0U; bool TryCommute = false; bool AggressiveCommute = false; - if (TID.isCommutable() && mi->getNumOperands() >= 3 && + if (MCID.isCommutable() && mi->getNumOperands() >= 3 && TII->findCommutedOpIndices(mi, SrcOp1, SrcOp2)) { if (SrcIdx == SrcOp1) regCIdx = SrcOp2; @@ -874,17 +905,122 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, return false; } - if (TID.isConvertibleTo3Addr()) { + if (TargetRegisterInfo::isVirtualRegister(regA)) + ScanUses(regA, &*mbbi, Processed); + + if (MCID.isConvertibleTo3Addr()) { // This instruction is potentially convertible to a true // three-address instruction. Check if it is profitable. - if (!regBKilled || isProfitableToConv3Addr(regA)) { + if (!regBKilled || isProfitableToConv3Addr(regA, regB)) { // Try to convert it. - if (ConvertInstTo3Addr(mi, nmi, mbbi, regB, Dist)) { + if (ConvertInstTo3Addr(mi, nmi, mbbi, regA, regB, Dist)) { ++NumConvertedTo3Addr; return true; // Done with this instruction. } } } + + // If this is an instruction with a load folded into it, try unfolding + // the load, e.g. avoid this: + // movq %rdx, %rcx + // addq (%rax), %rcx + // in favor of this: + // movq (%rax), %rcx + // addq %rdx, %rcx + // because it's preferable to schedule a load than a register copy. + if (MCID.mayLoad() && !regBKilled) { + // Determine if a load can be unfolded. + unsigned LoadRegIndex; + unsigned NewOpc = + TII->getOpcodeAfterMemoryUnfold(mi->getOpcode(), + /*UnfoldLoad=*/true, + /*UnfoldStore=*/false, + &LoadRegIndex); + if (NewOpc != 0) { + const MCInstrDesc &UnfoldMCID = TII->get(NewOpc); + if (UnfoldMCID.getNumDefs() == 1) { + MachineFunction &MF = *mbbi->getParent(); + + // Unfold the load. + DEBUG(dbgs() << "2addr: UNFOLDING: " << *mi); + const TargetRegisterClass *RC = + TII->getRegClass(UnfoldMCID, LoadRegIndex, TRI); + unsigned Reg = MRI->createVirtualRegister(RC); + SmallVector NewMIs; + if (!TII->unfoldMemoryOperand(MF, mi, Reg, + /*UnfoldLoad=*/true,/*UnfoldStore=*/false, + NewMIs)) { + DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n"); + return false; + } + assert(NewMIs.size() == 2 && + "Unfolded a load into multiple instructions!"); + // The load was previously folded, so this is the only use. + NewMIs[1]->addRegisterKilled(Reg, TRI); + + // Tentatively insert the instructions into the block so that they + // look "normal" to the transformation logic. + mbbi->insert(mi, NewMIs[0]); + mbbi->insert(mi, NewMIs[1]); + + DEBUG(dbgs() << "2addr: NEW LOAD: " << *NewMIs[0] + << "2addr: NEW INST: " << *NewMIs[1]); + + // Transform the instruction, now that it no longer has a load. + unsigned NewDstIdx = NewMIs[1]->findRegisterDefOperandIdx(regA); + unsigned NewSrcIdx = NewMIs[1]->findRegisterUseOperandIdx(regB); + MachineBasicBlock::iterator NewMI = NewMIs[1]; + bool TransformSuccess = + TryInstructionTransform(NewMI, mi, mbbi, + NewSrcIdx, NewDstIdx, Dist, Processed); + if (TransformSuccess || + NewMIs[1]->getOperand(NewSrcIdx).isKill()) { + // Success, or at least we made an improvement. Keep the unfolded + // instructions and discard the original. + if (LV) { + for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) { + MachineOperand &MO = mi->getOperand(i); + if (MO.isReg() && + TargetRegisterInfo::isVirtualRegister(MO.getReg())) { + if (MO.isUse()) { + if (MO.isKill()) { + if (NewMIs[0]->killsRegister(MO.getReg())) + LV->replaceKillInstruction(MO.getReg(), mi, NewMIs[0]); + else { + assert(NewMIs[1]->killsRegister(MO.getReg()) && + "Kill missing after load unfold!"); + LV->replaceKillInstruction(MO.getReg(), mi, NewMIs[1]); + } + } + } else if (LV->removeVirtualRegisterDead(MO.getReg(), mi)) { + if (NewMIs[1]->registerDefIsDead(MO.getReg())) + LV->addVirtualRegisterDead(MO.getReg(), NewMIs[1]); + else { + assert(NewMIs[0]->registerDefIsDead(MO.getReg()) && + "Dead flag missing after load unfold!"); + LV->addVirtualRegisterDead(MO.getReg(), NewMIs[0]); + } + } + } + } + LV->addVirtualRegisterKilled(Reg, NewMIs[1]); + } + mi->eraseFromParent(); + mi = NewMIs[1]; + if (TransformSuccess) + return true; + } else { + // Transforming didn't eliminate the tie and didn't lead to an + // improvement. Clean up the unfolded instructions and keep the + // original. + DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n"); + NewMIs[0]->eraseFromParent(); + NewMIs[1]->eraseFromParent(); + } + } + } + } + return false; } @@ -906,8 +1042,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { << MF.getFunction()->getName() << '\n'); // ReMatRegs - Keep track of the registers whose def's are remat'ed. - BitVector ReMatRegs; - ReMatRegs.resize(MRI->getLastVirtReg()+1); + BitVector ReMatRegs(MRI->getNumVirtRegs()); typedef DenseMap, 4> > TiedOperandMap; @@ -929,7 +1064,11 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { continue; } - const TargetInstrDesc &TID = mi->getDesc(); + // Remember REG_SEQUENCE instructions, we'll deal with them later. + if (mi->isRegSequence()) + RegSequences.push_back(&*mi); + + const MCInstrDesc &MCID = mi->getDesc(); bool FirstTied = true; DistanceMap.insert(std::make_pair(mi, ++Dist)); @@ -939,7 +1078,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { // First scan through all the tied register uses in this instruction // and record a list of pairs of tied operands for each register. unsigned NumOps = mi->isInlineAsm() - ? mi->getNumOperands() : TID.getNumOperands(); + ? mi->getNumOperands() : MCID.getNumOperands(); for (unsigned SrcIdx = 0; SrcIdx < NumOps; ++SrcIdx) { unsigned DstIdx = 0; if (!mi->isRegTiedToDefOperand(SrcIdx, &DstIdx)) @@ -957,12 +1096,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { "two address instruction invalid"); unsigned regB = mi->getOperand(SrcIdx).getReg(); - TiedOperandMap::iterator OI = TiedOperands.find(regB); - if (OI == TiedOperands.end()) { - SmallVector, 4> TiedPair; - OI = TiedOperands.insert(std::make_pair(regB, TiedPair)).first; - } - OI->second.push_back(std::make_pair(SrcIdx, DstIdx)); + TiedOperands[regB].push_back(std::make_pair(SrcIdx, DstIdx)); } // Now iterate over the information collected above. @@ -982,10 +1116,12 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { mi->getOperand(DstIdx).getReg()) break; // Done with this instruction. - if (TryInstructionTransform(mi, nmi, mbbi, SrcIdx, DstIdx, Dist)) + if (TryInstructionTransform(mi, nmi, mbbi, SrcIdx, DstIdx, Dist, + Processed)) break; // The tied operands have been eliminated. } + bool IsEarlyClobber = false; bool RemovedKillFlag = false; bool AllUsesCopied = true; unsigned LastCopiedReg = 0; @@ -993,7 +1129,11 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) { unsigned SrcIdx = TiedPairs[tpi].first; unsigned DstIdx = TiedPairs[tpi].second; - unsigned regA = mi->getOperand(DstIdx).getReg(); + + const MachineOperand &DstMO = mi->getOperand(DstIdx); + unsigned regA = DstMO.getReg(); + IsEarlyClobber |= DstMO.isEarlyClobber(); + // Grab regB from the instruction because it may have changed if the // instruction was commuted. regB = mi->getOperand(SrcIdx).getReg(); @@ -1031,13 +1171,12 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { isProfitableToReMat(regB, rc, mi, DefMI, mbbi, Dist)){ DEBUG(dbgs() << "2addr: REMATTING : " << *DefMI << "\n"); unsigned regASubIdx = mi->getOperand(DstIdx).getSubReg(); - TII->reMaterialize(*mbbi, mi, regA, regASubIdx, DefMI, TRI); - ReMatRegs.set(regB); + TII->reMaterialize(*mbbi, mi, regA, regASubIdx, DefMI, *TRI); + ReMatRegs.set(TargetRegisterInfo::virtReg2Index(regB)); ++NumReMats; } else { - bool Emitted = TII->copyRegToReg(*mbbi, mi, regA, regB, rc, rc); - (void)Emitted; - assert(Emitted && "Unable to issue a copy instruction!\n"); + BuildMI(*mbbi, mi, mi->getDebugLoc(), TII->get(TargetOpcode::COPY), + regA).addReg(regB); } MachineBasicBlock::iterator prevMI = prior(mi); @@ -1058,15 +1197,17 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { } if (AllUsesCopied) { - // Replace other (un-tied) uses of regB with LastCopiedReg. - for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) { - MachineOperand &MO = mi->getOperand(i); - if (MO.isReg() && MO.getReg() == regB && MO.isUse()) { - if (MO.isKill()) { - MO.setIsKill(false); - RemovedKillFlag = true; + if (!IsEarlyClobber) { + // Replace other (un-tied) uses of regB with LastCopiedReg. + for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) { + MachineOperand &MO = mi->getOperand(i); + if (MO.isReg() && MO.getReg() == regB && MO.isUse()) { + if (MO.isKill()) { + MO.setIsKill(false); + RemovedKillFlag = true; + } + MO.setReg(LastCopiedReg); } - MO.setReg(LastCopiedReg); } } @@ -1087,12 +1228,30 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { } } } - + + // Schedule the source copy / remat inserted to form two-address + // instruction. FIXME: Does it matter the distance map may not be + // accurate after it's scheduled? + TII->scheduleTwoAddrSource(prior(mi), mi, *TRI); + MadeChange = true; DEBUG(dbgs() << "\t\trewrite to:\t" << *mi); } + // Rewrite INSERT_SUBREG as COPY now that we no longer need SSA form. + if (mi->isInsertSubreg()) { + // From %reg = INSERT_SUBREG %reg, %subreg, subidx + // To %reg:subidx = COPY %subreg + unsigned SubIdx = mi->getOperand(3).getImm(); + mi->RemoveOperand(3); + assert(mi->getOperand(0).getSubReg() == 0 && "Unexpected subreg idx"); + mi->getOperand(0).setSubReg(SubIdx); + mi->RemoveOperand(1); + mi->setDesc(TII->get(TargetOpcode::COPY)); + DEBUG(dbgs() << "\t\tconvert to:\t" << *mi); + } + // Clear TiedOperands here instead of at the top of the loop // since most instructions do not have tied operands. TiedOperands.clear(); @@ -1101,14 +1260,271 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { } // Some remat'ed instructions are dead. - int VReg = ReMatRegs.find_first(); - while (VReg != -1) { + for (int i = ReMatRegs.find_first(); i != -1; i = ReMatRegs.find_next(i)) { + unsigned VReg = TargetRegisterInfo::index2VirtReg(i); if (MRI->use_nodbg_empty(VReg)) { MachineInstr *DefMI = MRI->getVRegDef(VReg); DefMI->eraseFromParent(); } - VReg = ReMatRegs.find_next(VReg); } + // Eliminate REG_SEQUENCE instructions. Their whole purpose was to preseve + // SSA form. It's now safe to de-SSA. + MadeChange |= EliminateRegSequences(); + return MadeChange; } + +static void UpdateRegSequenceSrcs(unsigned SrcReg, + unsigned DstReg, unsigned SubIdx, + MachineRegisterInfo *MRI, + const TargetRegisterInfo &TRI) { + for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(SrcReg), + RE = MRI->reg_end(); RI != RE; ) { + MachineOperand &MO = RI.getOperand(); + ++RI; + MO.substVirtReg(DstReg, SubIdx, TRI); + } +} + +/// CoalesceExtSubRegs - If a number of sources of the REG_SEQUENCE are +/// EXTRACT_SUBREG from the same register and to the same virtual register +/// with different sub-register indices, attempt to combine the +/// EXTRACT_SUBREGs and pre-coalesce them. e.g. +/// %reg1026 = VLDMQ %reg1025, 260, pred:14, pred:%reg0 +/// %reg1029:6 = EXTRACT_SUBREG %reg1026, 6 +/// %reg1029:5 = EXTRACT_SUBREG %reg1026, 5 +/// Since D subregs 5, 6 can combine to a Q register, we can coalesce +/// reg1026 to reg1029. +void +TwoAddressInstructionPass::CoalesceExtSubRegs(SmallVector &Srcs, + unsigned DstReg) { + SmallSet Seen; + for (unsigned i = 0, e = Srcs.size(); i != e; ++i) { + unsigned SrcReg = Srcs[i]; + if (!Seen.insert(SrcReg)) + continue; + + // Check that the instructions are all in the same basic block. + MachineInstr *SrcDefMI = MRI->getVRegDef(SrcReg); + MachineInstr *DstDefMI = MRI->getVRegDef(DstReg); + if (SrcDefMI->getParent() != DstDefMI->getParent()) + continue; + + // If there are no other uses than copies which feed into + // the reg_sequence, then we might be able to coalesce them. + bool CanCoalesce = true; + SmallVector SrcSubIndices, DstSubIndices; + for (MachineRegisterInfo::use_nodbg_iterator + UI = MRI->use_nodbg_begin(SrcReg), + UE = MRI->use_nodbg_end(); UI != UE; ++UI) { + MachineInstr *UseMI = &*UI; + if (!UseMI->isCopy() || UseMI->getOperand(0).getReg() != DstReg) { + CanCoalesce = false; + break; + } + SrcSubIndices.push_back(UseMI->getOperand(1).getSubReg()); + DstSubIndices.push_back(UseMI->getOperand(0).getSubReg()); + } + + if (!CanCoalesce || SrcSubIndices.size() < 2) + continue; + + // Check that the source subregisters can be combined. + std::sort(SrcSubIndices.begin(), SrcSubIndices.end()); + unsigned NewSrcSubIdx = 0; + if (!TRI->canCombineSubRegIndices(MRI->getRegClass(SrcReg), SrcSubIndices, + NewSrcSubIdx)) + continue; + + // Check that the destination subregisters can also be combined. + std::sort(DstSubIndices.begin(), DstSubIndices.end()); + unsigned NewDstSubIdx = 0; + if (!TRI->canCombineSubRegIndices(MRI->getRegClass(DstReg), DstSubIndices, + NewDstSubIdx)) + continue; + + // If neither source nor destination can be combined to the full register, + // just give up. This could be improved if it ever matters. + if (NewSrcSubIdx != 0 && NewDstSubIdx != 0) + continue; + + // Now that we know that all the uses are extract_subregs and that those + // subregs can somehow be combined, scan all the extract_subregs again to + // make sure the subregs are in the right order and can be composed. + MachineInstr *SomeMI = 0; + CanCoalesce = true; + for (MachineRegisterInfo::use_nodbg_iterator + UI = MRI->use_nodbg_begin(SrcReg), + UE = MRI->use_nodbg_end(); UI != UE; ++UI) { + MachineInstr *UseMI = &*UI; + assert(UseMI->isCopy()); + unsigned DstSubIdx = UseMI->getOperand(0).getSubReg(); + unsigned SrcSubIdx = UseMI->getOperand(1).getSubReg(); + assert(DstSubIdx != 0 && "missing subreg from RegSequence elimination"); + if ((NewDstSubIdx == 0 && + TRI->composeSubRegIndices(NewSrcSubIdx, DstSubIdx) != SrcSubIdx) || + (NewSrcSubIdx == 0 && + TRI->composeSubRegIndices(NewDstSubIdx, SrcSubIdx) != DstSubIdx)) { + CanCoalesce = false; + break; + } + // Keep track of one of the uses. + SomeMI = UseMI; + } + if (!CanCoalesce) + continue; + + // Insert a copy to replace the original. + MachineInstr *CopyMI = BuildMI(*SomeMI->getParent(), SomeMI, + SomeMI->getDebugLoc(), + TII->get(TargetOpcode::COPY)) + .addReg(DstReg, RegState::Define, NewDstSubIdx) + .addReg(SrcReg, 0, NewSrcSubIdx); + + // Remove all the old extract instructions. + for (MachineRegisterInfo::use_nodbg_iterator + UI = MRI->use_nodbg_begin(SrcReg), + UE = MRI->use_nodbg_end(); UI != UE; ) { + MachineInstr *UseMI = &*UI; + ++UI; + if (UseMI == CopyMI) + continue; + assert(UseMI->isCopy()); + // Move any kills to the new copy or extract instruction. + if (UseMI->getOperand(1).isKill()) { + CopyMI->getOperand(1).setIsKill(); + if (LV) + // Update live variables + LV->replaceKillInstruction(SrcReg, UseMI, &*CopyMI); + } + UseMI->eraseFromParent(); + } + } +} + +static bool HasOtherRegSequenceUses(unsigned Reg, MachineInstr *RegSeq, + MachineRegisterInfo *MRI) { + for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg), + UE = MRI->use_end(); UI != UE; ++UI) { + MachineInstr *UseMI = &*UI; + if (UseMI != RegSeq && UseMI->isRegSequence()) + return true; + } + return false; +} + +/// EliminateRegSequences - Eliminate REG_SEQUENCE instructions as part +/// of the de-ssa process. This replaces sources of REG_SEQUENCE as +/// sub-register references of the register defined by REG_SEQUENCE. e.g. +/// +/// %reg1029, %reg1030 = VLD1q16 %reg1024, ... +/// %reg1031 = REG_SEQUENCE %reg1029, 5, %reg1030, 6 +/// => +/// %reg1031:5, %reg1031:6 = VLD1q16 %reg1024, ... +bool TwoAddressInstructionPass::EliminateRegSequences() { + if (RegSequences.empty()) + return false; + + for (unsigned i = 0, e = RegSequences.size(); i != e; ++i) { + MachineInstr *MI = RegSequences[i]; + unsigned DstReg = MI->getOperand(0).getReg(); + if (MI->getOperand(0).getSubReg() || + TargetRegisterInfo::isPhysicalRegister(DstReg) || + !(MI->getNumOperands() & 1)) { + DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << *MI); + llvm_unreachable(0); + } + + bool IsImpDef = true; + SmallVector RealSrcs; + SmallSet Seen; + for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) { + unsigned SrcReg = MI->getOperand(i).getReg(); + unsigned SubIdx = MI->getOperand(i+1).getImm(); + if (MI->getOperand(i).getSubReg() || + TargetRegisterInfo::isPhysicalRegister(SrcReg)) { + DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << *MI); + llvm_unreachable(0); + } + + MachineInstr *DefMI = MRI->getVRegDef(SrcReg); + if (DefMI->isImplicitDef()) { + DefMI->eraseFromParent(); + continue; + } + IsImpDef = false; + + // Remember COPY sources. These might be candidate for coalescing. + if (DefMI->isCopy() && DefMI->getOperand(1).getSubReg()) + RealSrcs.push_back(DefMI->getOperand(1).getReg()); + + bool isKill = MI->getOperand(i).isKill(); + if (!Seen.insert(SrcReg) || MI->getParent() != DefMI->getParent() || + !isKill || HasOtherRegSequenceUses(SrcReg, MI, MRI) || + !TRI->getMatchingSuperRegClass(MRI->getRegClass(DstReg), + MRI->getRegClass(SrcReg), SubIdx)) { + // REG_SEQUENCE cannot have duplicated operands, add a copy. + // Also add an copy if the source is live-in the block. We don't want + // to end up with a partial-redef of a livein, e.g. + // BB0: + // reg1051:10 = + // ... + // BB1: + // ... = reg1051:10 + // BB2: + // reg1051:9 = + // LiveIntervalAnalysis won't like it. + // + // If the REG_SEQUENCE doesn't kill its source, keeping live variables + // correctly up to date becomes very difficult. Insert a copy. + + // Defer any kill flag to the last operand using SrcReg. Otherwise, we + // might insert a COPY that uses SrcReg after is was killed. + if (isKill) + for (unsigned j = i + 2; j < e; j += 2) + if (MI->getOperand(j).getReg() == SrcReg) { + MI->getOperand(j).setIsKill(); + isKill = false; + break; + } + + MachineBasicBlock::iterator InsertLoc = MI; + MachineInstr *CopyMI = BuildMI(*MI->getParent(), InsertLoc, + MI->getDebugLoc(), TII->get(TargetOpcode::COPY)) + .addReg(DstReg, RegState::Define, SubIdx) + .addReg(SrcReg, getKillRegState(isKill)); + MI->getOperand(i).setReg(0); + if (LV && isKill) + LV->replaceKillInstruction(SrcReg, MI, CopyMI); + DEBUG(dbgs() << "Inserted: " << *CopyMI); + } + } + + for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) { + unsigned SrcReg = MI->getOperand(i).getReg(); + if (!SrcReg) continue; + unsigned SubIdx = MI->getOperand(i+1).getImm(); + UpdateRegSequenceSrcs(SrcReg, DstReg, SubIdx, MRI, *TRI); + } + + if (IsImpDef) { + DEBUG(dbgs() << "Turned: " << *MI << " into an IMPLICIT_DEF"); + MI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF)); + for (int j = MI->getNumOperands() - 1, ee = 0; j > ee; --j) + MI->RemoveOperand(j); + } else { + DEBUG(dbgs() << "Eliminated: " << *MI); + MI->eraseFromParent(); + } + + // Try coalescing some EXTRACT_SUBREG instructions. This can create + // INSERT_SUBREG instructions that must have flags added by + // LiveIntervalAnalysis, so only run it when LiveVariables is available. + if (LV) + CoalesceExtSubRegs(RealSrcs, DstReg); + } + + RegSequences.clear(); + return true; +}