X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FCodeGen%2FScheduleDAGInstrs.cpp;h=c8328ad4911f573ba9d2dffd1875594338142734;hb=3bfc4d8e13edd83534b733f0f1de5b1d5f6bf828;hp=17d31f48b1d0468663e870d499ef1ada4533ed69;hpb=851bb2c9cbbd3b1847def5ca7ea8dadf457298b5;p=oota-llvm.git diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index 17d31f48b1d..c8328ad4911 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -21,6 +21,7 @@ #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" @@ -44,13 +45,24 @@ static cl::opt EnableAASchedMI("enable-aa-sched-mi", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Enable use of AA during MI GAD construction")); +// FIXME: Enable the use of TBAA. There are two known issues preventing this: +// 1. Stack coloring does not update TBAA when merging allocas +// 2. CGP inserts ptrtoint/inttoptr pairs when sinking address computations. +// Because BasicAA does not handle inttoptr, we'll often miss basic type +// punning idioms that we need to catch so we don't miscompile real-world +// code. +static cl::opt UseTBAA("use-tbaa-in-sched-mi", cl::Hidden, + cl::init(false), cl::desc("Enable use of TBAA during MI GAD construction")); + ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, const MachineLoopInfo &mli, const MachineDominatorTree &mdt, bool IsPostRAFlag, + bool RemoveKillFlags, LiveIntervals *lis) : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()), LIS(lis), - IsPostRA(IsPostRAFlag), CanHandleTerminators(false), FirstDbgValue(0) { + IsPostRA(IsPostRAFlag), RemoveKillFlags(RemoveKillFlags), + CanHandleTerminators(false), FirstDbgValue(0) { assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals"); DbgValues.clear(); assert(!(IsPostRA && MRI.getNumVirtRegs()) && @@ -136,31 +148,32 @@ static void getUnderlyingObjectsForInstr(const MachineInstr *MI, if (!V) return; + if (const PseudoSourceValue *PSV = dyn_cast(V)) { + // For now, ignore PseudoSourceValues which may alias LLVM IR values + // because the code that uses this function has no way to cope with + // such aliases. + if (!PSV->isAliased(MFI)) { + bool MayAlias = PSV->mayAlias(MFI); + Objects.push_back(UnderlyingObjectsVector::value_type(V, MayAlias)); + } + return; + } + SmallVector Objs; getUnderlyingObjects(V, Objs); for (SmallVectorImpl::iterator I = Objs.begin(), IE = Objs.end(); I != IE; ++I) { - bool MayAlias = true; V = *I; - if (const PseudoSourceValue *PSV = dyn_cast(V)) { - // For now, ignore PseudoSourceValues which may alias LLVM IR values - // because the code that uses this function has no way to cope with - // such aliases. - - if (PSV->isAliased(MFI)) { - Objects.clear(); - return; - } + assert(!isa(V) && "Underlying value is a stack slot!"); - MayAlias = PSV->mayAlias(MFI); - } else if (!isIdentifiedObject(V)) { + if (!isIdentifiedObject(V)) { Objects.clear(); return; } - Objects.push_back(UnderlyingObjectsVector::value_type(V, MayAlias)); + Objects.push_back(UnderlyingObjectsVector::value_type(V, true)); } } @@ -185,9 +198,6 @@ void ScheduleDAGInstrs::enterRegion(MachineBasicBlock *bb, RegionBegin = begin; RegionEnd = end; NumRegionInstrs = regioninstrs; - MISUnitMap.clear(); - - ScheduleDAG::clearDAG(); } /// Close the current scheduling region. Don't clear any state in case the @@ -287,8 +297,8 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) { /// this SUnit to following instructions in the same scheduling region that /// depend the physical register referenced at OperIdx. void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { - const MachineInstr *MI = SU->getInstr(); - const MachineOperand &MO = MI->getOperand(OperIdx); + MachineInstr *MI = SU->getInstr(); + MachineOperand &MO = MI->getOperand(OperIdx); // Optionally add output and anti dependencies. For anti // dependencies we use a latency of 0 because for a multi-issue @@ -326,6 +336,8 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { // retrieve the existing SUnits list for this register's uses. // Push this SUnit on the use list. Uses.insert(PhysRegSUOper(SU, OperIdx, MO.getReg())); + if (RemoveKillFlags) + MO.setIsKill(false); } else { addPhysRegDataDeps(SU, OperIdx); @@ -408,11 +420,18 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { unsigned Reg = MI->getOperand(OperIdx).getReg(); // Record this local VReg use. - VRegUses.insert(VReg2SUnit(Reg, SU)); + VReg2UseMap::iterator UI = VRegUses.find(Reg); + for (; UI != VRegUses.end(); ++UI) { + if (UI->SU == SU) + break; + } + if (UI == VRegUses.end()) + VRegUses.insert(VReg2SUnit(Reg, SU)); // Lookup this operand's reaching definition. assert(LIS && "vreg dependencies requires LiveIntervals"); - LiveRangeQuery LRQ(LIS->getInterval(Reg), LIS->getInstructionIndex(MI)); + LiveQueryResult LRQ + = LIS->getInterval(Reg).Query(LIS->getInstructionIndex(MI)); VNInfo *VNI = LRQ.valueIn(); // VNI will be valid because MachineOperand::readsReg() is checked by caller. @@ -503,6 +522,10 @@ static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI, if (MIa == MIb) return false; + // FIXME: Need to handle multiple memory operands to support all targets. + if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand()) + return true; + if (isUnsafeMemoryObject(MIa, MFI) || isUnsafeMemoryObject(MIb, MFI)) return true; @@ -518,10 +541,6 @@ static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI, MachineMemOperand *MMOa = *MIa->memoperands_begin(); MachineMemOperand *MMOb = *MIb->memoperands_begin(); - // FIXME: Need to handle multiple memory operands to support all targets. - if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand()) - llvm_unreachable("Multiple memory operands."); - // The following interface to AA is fashioned after DAGCombiner::isAlias // and operates with MachineMemOperand offset with some important // assumptions: @@ -546,10 +565,10 @@ static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI, int64_t Overlapb = MMOb->getSize() + MMOb->getOffset() - MinOffset; AliasAnalysis::AliasResult AAResult = AA->alias( - AliasAnalysis::Location(MMOa->getValue(), Overlapa, - MMOa->getTBAAInfo()), - AliasAnalysis::Location(MMOb->getValue(), Overlapb, - MMOb->getTBAAInfo())); + AliasAnalysis::Location(MMOa->getValue(), Overlapa, + UseTBAA ? MMOa->getTBAAInfo() : 0), + AliasAnalysis::Location(MMOb->getValue(), Overlapb, + UseTBAA ? MMOb->getTBAAInfo() : 0)); return (AAResult != AliasAnalysis::NoAlias); } @@ -683,22 +702,51 @@ void ScheduleDAGInstrs::initSUnits() { // Assign the Latency field of SU using target-provided information. SU->Latency = SchedModel.computeInstrLatency(SU->getInstr()); + + // If this SUnit uses an unbuffered resource, mark it as such. + // These resources are used for in-order execution pipelines within an + // out-of-order core and are identified by BufferSize=1. BufferSize=0 is + // used for dispatch/issue groups and is not considered here. + if (SchedModel.hasInstrSchedModel()) { + const MCSchedClassDesc *SC = getSchedClass(SU); + for (TargetSchedModel::ProcResIter + PI = SchedModel.getWriteProcResBegin(SC), + PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) { + switch (SchedModel.getProcResource(PI->ProcResourceIdx)->BufferSize) { + case 0: + SU->hasReservedResource = true; + break; + case 1: + SU->isUnbuffered = true; + break; + default: + break; + } + } + } } } -/// If RegPressure is non null, compute register pressure as a side effect. The +/// If RegPressure is non-null, compute register pressure as a side effect. The /// DAG builder is an efficient place to do it because it already visits /// operands. void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, - RegPressureTracker *RPTracker) { + RegPressureTracker *RPTracker, + PressureDiffs *PDiffs) { const TargetSubtargetInfo &ST = TM.getSubtarget(); bool UseAA = EnableAASchedMI.getNumOccurrences() > 0 ? EnableAASchedMI : ST.useAA(); AliasAnalysis *AAForDep = UseAA ? AA : 0; + MISUnitMap.clear(); + ScheduleDAG::clearDAG(); + // Create an SUnit for each real instruction. initSUnits(); + if (PDiffs) + PDiffs->init(SUnits.size()); + // We build scheduling units by walking a block's instruction list from bottom // to top. @@ -709,7 +757,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, // so that they can be given more precise dependencies. We track // separately the known memory locations that may alias and those // that are known not to alias - MapVector AliasMemDefs, NonAliasMemDefs; + MapVector > AliasMemDefs, NonAliasMemDefs; MapVector > AliasMemUses, NonAliasMemUses; std::set RejectMemNodes; @@ -736,7 +784,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, MachineInstr *DbgMI = NULL; for (MachineBasicBlock::iterator MII = RegionEnd, MIE = RegionBegin; MII != MIE; --MII) { - MachineInstr *MI = prior(MII); + MachineInstr *MI = std::prev(MII); if (MI && DbgMI) { DbgValues.push_back(std::make_pair(DbgMI, MI)); DbgMI = NULL; @@ -746,16 +794,19 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, DbgMI = MI; continue; } + SUnit *SU = MISUnitMap[MI]; + assert(SU && "No SUnit mapped to this MI"); + if (RPTracker) { - RPTracker->recede(); - assert(RPTracker->getPos() == prior(MII) && "RPTracker can't find MI"); + PressureDiff *PDiff = PDiffs ? &(*PDiffs)[SU->NodeNum] : 0; + RPTracker->recede(/*LiveUses=*/0, PDiff); + assert(RPTracker->getPos() == std::prev(MII) && + "RPTracker can't find MI"); } - assert((CanHandleTerminators || (!MI->isTerminator() && !MI->isLabel())) && - "Cannot schedule terminators or labels!"); - - SUnit *SU = MISUnitMap[MI]; - assert(SU && "No SUnit mapped to this MI"); + assert( + (CanHandleTerminators || (!MI->isTerminator() && !MI->isPosition())) && + "Cannot schedule terminators or labels!"); // Add register-based dependencies (data, anti, and output). bool HasVRegDef = false; @@ -803,9 +854,11 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, if (isGlobalMemoryObject(AA, MI)) { // Be conservative with these and add dependencies on all memory // references, even those that are known to not alias. - for (MapVector::iterator I = + for (MapVector >::iterator I = NonAliasMemDefs.begin(), E = NonAliasMemDefs.end(); I != E; ++I) { - I->second->addPred(SDep(SU, SDep::Barrier)); + for (unsigned i = 0, e = I->second.size(); i != e; ++i) { + I->second[i]->addPred(SDep(SU, SDep::Barrier)); + } } for (MapVector >::iterator I = NonAliasMemUses.begin(), E = NonAliasMemUses.end(); I != E; ++I) { @@ -841,9 +894,11 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) addChainDependency(AAForDep, MFI, SU, PendingLoads[k], RejectMemNodes, TrueMemOrderLatency); - for (MapVector::iterator I = AliasMemDefs.begin(), - E = AliasMemDefs.end(); I != E; ++I) - addChainDependency(AAForDep, MFI, SU, I->second, RejectMemNodes); + for (MapVector >::iterator I = + AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) { + for (unsigned i = 0, e = I->second.size(); i != e; ++i) + addChainDependency(AAForDep, MFI, SU, I->second[i], RejectMemNodes); + } for (MapVector >::iterator I = AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) { for (unsigned i = 0, e = I->second.size(); i != e; ++i) @@ -875,19 +930,29 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, // A store to a specific PseudoSourceValue. Add precise dependencies. // Record the def in MemDefs, first adding a dep if there is // an existing def. - MapVector::iterator I = + MapVector >::iterator I = ((ThisMayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V)); - MapVector::iterator IE = + MapVector >::iterator IE = ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); if (I != IE) { - addChainDependency(AAForDep, MFI, SU, I->second, RejectMemNodes, - 0, true); - I->second = SU; + for (unsigned i = 0, e = I->second.size(); i != e; ++i) + addChainDependency(AAForDep, MFI, SU, I->second[i], RejectMemNodes, + 0, true); + + // If we're not using AA, then we only need one store per object. + if (!AAForDep) + I->second.clear(); + I->second.push_back(SU); } else { - if (ThisMayAlias) - AliasMemDefs[V] = SU; - else - NonAliasMemDefs[V] = SU; + if (ThisMayAlias) { + if (!AAForDep) + AliasMemDefs[V].clear(); + AliasMemDefs[V].push_back(SU); + } else { + if (!AAForDep) + NonAliasMemDefs[V].clear(); + NonAliasMemDefs[V].push_back(SU); + } } // Handle the uses in MemUses, if there are any. MapVector >::iterator J = @@ -937,9 +1002,11 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, if (Objs.empty()) { // A load with no underlying object. Depend on all // potentially aliasing stores. - for (MapVector::iterator I = + for (MapVector >::iterator I = AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) - addChainDependency(AAForDep, MFI, SU, I->second, RejectMemNodes); + for (unsigned i = 0, e = I->second.size(); i != e; ++i) + addChainDependency(AAForDep, MFI, SU, I->second[i], + RejectMemNodes); PendingLoads.push_back(SU); MayAlias = true; @@ -956,13 +1023,14 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, MayAlias = true; // A load from a specific PseudoSourceValue. Add precise dependencies. - MapVector::iterator I = + MapVector >::iterator I = ((ThisMayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V)); - MapVector::iterator IE = + MapVector >::iterator IE = ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); if (I != IE) - addChainDependency(AAForDep, MFI, SU, I->second, RejectMemNodes, - 0, true); + for (unsigned i = 0, e = I->second.size(); i != e; ++i) + addChainDependency(AAForDep, MFI, SU, I->second[i], + RejectMemNodes, 0, true); if (ThisMayAlias) AliasMemUses[V].push_back(SU); else @@ -987,6 +1055,145 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, PendingLoads.clear(); } +/// \brief Initialize register live-range state for updating kills. +void ScheduleDAGInstrs::startBlockForKills(MachineBasicBlock *BB) { + // Start with no live registers. + LiveRegs.reset(); + + // Examine the live-in regs of all successors. + for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), + SE = BB->succ_end(); SI != SE; ++SI) { + for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), + E = (*SI)->livein_end(); I != E; ++I) { + unsigned Reg = *I; + // Repeat, for reg and all subregs. + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) + LiveRegs.set(*SubRegs); + } + } +} + +bool ScheduleDAGInstrs::toggleKillFlag(MachineInstr *MI, MachineOperand &MO) { + // Setting kill flag... + if (!MO.isKill()) { + MO.setIsKill(true); + return false; + } + + // If MO itself is live, clear the kill flag... + if (LiveRegs.test(MO.getReg())) { + MO.setIsKill(false); + return false; + } + + // If any subreg of MO is live, then create an imp-def for that + // subreg and keep MO marked as killed. + MO.setIsKill(false); + bool AllDead = true; + const unsigned SuperReg = MO.getReg(); + MachineInstrBuilder MIB(MF, MI); + for (MCSubRegIterator SubRegs(SuperReg, TRI); SubRegs.isValid(); ++SubRegs) { + if (LiveRegs.test(*SubRegs)) { + MIB.addReg(*SubRegs, RegState::ImplicitDefine); + AllDead = false; + } + } + + if(AllDead) + MO.setIsKill(true); + return false; +} + +// FIXME: Reuse the LivePhysRegs utility for this. +void ScheduleDAGInstrs::fixupKills(MachineBasicBlock *MBB) { + DEBUG(dbgs() << "Fixup kills for BB#" << MBB->getNumber() << '\n'); + + LiveRegs.resize(TRI->getNumRegs()); + BitVector killedRegs(TRI->getNumRegs()); + + startBlockForKills(MBB); + + // Examine block from end to start... + unsigned Count = MBB->size(); + for (MachineBasicBlock::iterator I = MBB->end(), E = MBB->begin(); + I != E; --Count) { + MachineInstr *MI = --I; + if (MI->isDebugValue()) + continue; + + // Update liveness. Registers that are defed but not used in this + // instruction are now dead. Mark register and all subregs as they + // are completely defined. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isRegMask()) + LiveRegs.clearBitsNotInMask(MO.getRegMask()); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + if (!MO.isDef()) continue; + // Ignore two-addr defs. + if (MI->isRegTiedToUseOperand(i)) continue; + + // Repeat for reg and all subregs. + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) + LiveRegs.reset(*SubRegs); + } + + // Examine all used registers and set/clear kill flag. When a + // register is used multiple times we only set the kill flag on + // the first use. Don't set kill flags on undef operands. + killedRegs.reset(); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue; + unsigned Reg = MO.getReg(); + if ((Reg == 0) || MRI.isReserved(Reg)) continue; + + bool kill = false; + if (!killedRegs.test(Reg)) { + kill = true; + // A register is not killed if any subregs are live... + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { + if (LiveRegs.test(*SubRegs)) { + kill = false; + break; + } + } + + // If subreg is not live, then register is killed if it became + // live in this instruction + if (kill) + kill = !LiveRegs.test(Reg); + } + + if (MO.isKill() != kill) { + DEBUG(dbgs() << "Fixing " << MO << " in "); + // Warning: toggleKillFlag may invalidate MO. + toggleKillFlag(MI, MO); + DEBUG(MI->dump()); + } + + killedRegs.set(Reg); + } + + // Mark any used register (that is not using undef) and subregs as + // now live... + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue; + unsigned Reg = MO.getReg(); + if ((Reg == 0) || MRI.isReserved(Reg)) continue; + + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) + LiveRegs.set(*SubRegs); + } + } +} + void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const { #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) SU->getInstr()->dump(); @@ -1222,7 +1429,7 @@ public: const SDep *backtrack() { DFSStack.pop_back(); - return DFSStack.empty() ? 0 : llvm::prior(DFSStack.back().second); + return DFSStack.empty() ? 0 : std::prev(DFSStack.back().second); } const SUnit *getCurr() const { return DFSStack.back().first; }