From: Evan Cheng Date: Fri, 23 Jul 2010 22:39:59 +0000 (+0000) Subject: - Allow target to specify when is register pressure "too high". In most cases, X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=3144687df78731ac4ddbc716a24b951678a73f57;p=oota-llvm.git - Allow target to specify when is register pressure "too high". In most cases, it's too late to start backing off aggressive latency scheduling when most of the registers are in use so the threshold should be a bit tighter. - Correctly handle live out's and extract_subreg etc. - Enable register pressure aware scheduling by default for hybrid scheduler. For ARM, this is almost always a win on # of instructions. It's runtime neutral for most of the tests. But for some kernels with high register pressure it can be a huge win. e.g. 464.h264ref reduced number of spills by 54 and sped up by 20%. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@109279 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index 285c4be5bff..2d8838c520b 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -186,6 +186,14 @@ public: return RepRegClassCostForVT[VT.getSimpleVT().SimpleTy]; } + /// getRegPressureLimit - Return the register pressure "high water mark" for + /// the specific register class. The scheduler is in high register pressure + /// mode (for the specific register class) if it goes over the limit. + virtual unsigned getRegPressureLimit(const TargetRegisterClass *RC, + MachineFunction &MF) const { + return 0; + } + /// isTypeLegal - Return true if the target has native support for the /// specified value type. This means that it has a register that directly /// holds it without promotions or expansions. diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 334ce58ac7c..2ffd35034a9 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -28,16 +28,12 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include using namespace llvm; -static cl::opt RegPressureAware("reg-pressure-aware-sched", - cl::init(false), cl::Hidden); - STATISTIC(NumBacktracks, "Number of times scheduler backtracked"); STATISTIC(NumUnfolds, "Number of nodes unfolded"); STATISTIC(NumDups, "Number of duplicated nodes"); @@ -1075,7 +1071,7 @@ namespace { std::fill(RegPressure.begin(), RegPressure.end(), 0); for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), E = TRI->regclass_end(); I != E; ++I) - RegLimit[(*I)->getID()] = tri->getAllocatableSet(MF, *I).count() - 1; + RegLimit[(*I)->getID()] = tli->getRegPressureLimit(*I, MF); } } @@ -1172,10 +1168,12 @@ namespace { SU->NodeQueueId = 0; } - bool HighRegPressure(const SUnit *SU) const { + bool HighRegPressure(const SUnit *SU, unsigned &Excess) const { if (!TLI) return false; + bool High = false; + Excess = 0; for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); I != E; ++I) { if (I->isCtrl()) @@ -1183,12 +1181,41 @@ namespace { SUnit *PredSU = I->getSUnit(); const SDNode *PN = PredSU->getNode(); if (!PN->isMachineOpcode()) { - if (PN->getOpcode() == ISD::CopyToReg) { - EVT VT = PN->getOperand(1).getValueType(); + if (PN->getOpcode() == ISD::CopyFromReg) { + EVT VT = PN->getValueType(0); unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); unsigned Cost = TLI->getRepRegClassCostFor(VT); - if (RegLimit[RCId] < (RegPressure[RCId] + Cost)) - return true; + if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) { + High = true; + Excess += (RegPressure[RCId] + Cost) - RegLimit[RCId]; + } + } + continue; + } + unsigned POpc = PN->getMachineOpcode(); + if (POpc == TargetOpcode::IMPLICIT_DEF) + continue; + if (POpc == TargetOpcode::EXTRACT_SUBREG) { + EVT VT = PN->getOperand(0).getValueType(); + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + unsigned Cost = TLI->getRepRegClassCostFor(VT); + // Check if this increases register pressure of the specific register + // class to the point where it would cause spills. + if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) { + High = true; + Excess += (RegPressure[RCId] + Cost) - RegLimit[RCId]; + } + continue; + } else if (POpc == TargetOpcode::INSERT_SUBREG || + POpc == TargetOpcode::SUBREG_TO_REG) { + EVT VT = PN->getValueType(0); + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + unsigned Cost = TLI->getRepRegClassCostFor(VT); + // Check if this increases register pressure of the specific register + // class to the point where it would cause spills. + if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) { + High = true; + Excess += (RegPressure[RCId] + Cost) - RegLimit[RCId]; } continue; } @@ -1201,12 +1228,14 @@ namespace { unsigned Cost = TLI->getRepRegClassCostFor(VT); // Check if this increases register pressure of the specific register // class to the point where it would cause spills. - if (RegLimit[RCId] < (RegPressure[RCId] + Cost)) - return true; + if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) { + High = true; + Excess += (RegPressure[RCId] + Cost) - RegLimit[RCId]; + } } } - return false; + return High; } void ScheduledNode(SUnit *SU) { @@ -1214,13 +1243,18 @@ namespace { return; const SDNode *N = SU->getNode(); - if (!N->isMachineOpcode()) - return; - unsigned Opc = N->getMachineOpcode(); - if (Opc == TargetOpcode::COPY_TO_REGCLASS || - Opc == TargetOpcode::REG_SEQUENCE || - Opc == TargetOpcode::IMPLICIT_DEF) - return; + if (!N->isMachineOpcode()) { + if (N->getOpcode() != ISD::CopyToReg) + return; + } else { + unsigned Opc = N->getMachineOpcode(); + if (Opc == TargetOpcode::EXTRACT_SUBREG || + Opc == TargetOpcode::INSERT_SUBREG || + Opc == TargetOpcode::SUBREG_TO_REG || + Opc == TargetOpcode::REG_SEQUENCE || + Opc == TargetOpcode::IMPLICIT_DEF) + return; + } for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { @@ -1231,8 +1265,8 @@ namespace { continue; const SDNode *PN = PredSU->getNode(); if (!PN->isMachineOpcode()) { - if (PN->getOpcode() == ISD::CopyToReg) { - EVT VT = PN->getOperand(1).getValueType(); + if (PN->getOpcode() == ISD::CopyFromReg) { + EVT VT = PN->getValueType(0); unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); } @@ -1241,6 +1275,18 @@ namespace { unsigned POpc = PN->getMachineOpcode(); if (POpc == TargetOpcode::IMPLICIT_DEF) continue; + if (POpc == TargetOpcode::EXTRACT_SUBREG) { + EVT VT = PN->getOperand(0).getValueType(); + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); + continue; + } else if (POpc == TargetOpcode::INSERT_SUBREG || + POpc == TargetOpcode::SUBREG_TO_REG) { + EVT VT = PN->getValueType(0); + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); + continue; + } unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs(); for (unsigned i = 0; i != NumDefs; ++i) { EVT VT = PN->getValueType(i); @@ -1251,19 +1297,19 @@ namespace { } } - if (!SU->NumSuccs) - return; - unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); - for (unsigned i = 0; i != NumDefs; ++i) { - EVT VT = N->getValueType(i); - if (!N->hasAnyUseOfValue(i)) - continue; - unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); - if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) - // Register pressure tracking is imprecise. This can happen. - RegPressure[RCId] = 0; - else - RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT); + if (SU->NumSuccs) { + unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); + for (unsigned i = 0; i != NumDefs; ++i) { + EVT VT = N->getValueType(i); + if (!N->hasAnyUseOfValue(i)) + continue; + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) + // Register pressure tracking is imprecise. This can happen. + RegPressure[RCId] = 0; + else + RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT); + } } dumpRegPressure(); @@ -1274,10 +1320,14 @@ namespace { return; const SDNode *N = SU->getNode(); - if (!N->isMachineOpcode()) - return; + if (!N->isMachineOpcode()) { + if (N->getOpcode() != ISD::CopyToReg) + return; + } unsigned Opc = N->getMachineOpcode(); - if (Opc == TargetOpcode::COPY_TO_REGCLASS || + if (Opc == TargetOpcode::EXTRACT_SUBREG || + Opc == TargetOpcode::INSERT_SUBREG || + Opc == TargetOpcode::SUBREG_TO_REG || Opc == TargetOpcode::REG_SEQUENCE || Opc == TargetOpcode::IMPLICIT_DEF) return; @@ -1291,8 +1341,8 @@ namespace { continue; const SDNode *PN = PredSU->getNode(); if (!PN->isMachineOpcode()) { - if (PN->getOpcode() == ISD::CopyToReg) { - EVT VT = PN->getOperand(1).getValueType(); + if (PN->getOpcode() == ISD::CopyFromReg) { + EVT VT = PN->getValueType(0); unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); } @@ -1301,6 +1351,18 @@ namespace { unsigned POpc = PN->getMachineOpcode(); if (POpc == TargetOpcode::IMPLICIT_DEF) continue; + if (POpc == TargetOpcode::EXTRACT_SUBREG) { + EVT VT = PN->getOperand(0).getValueType(); + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); + continue; + } else if (POpc == TargetOpcode::INSERT_SUBREG || + POpc == TargetOpcode::SUBREG_TO_REG) { + EVT VT = PN->getValueType(0); + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); + continue; + } unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs(); for (unsigned i = 0; i != NumDefs; ++i) { EVT VT = PN->getValueType(i); @@ -1315,17 +1377,17 @@ namespace { } } - if (!SU->NumSuccs) - return; - unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); - for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { - EVT VT = N->getValueType(i); - if (VT == MVT::Flag || VT == MVT::Other) - continue; - if (!N->hasAnyUseOfValue(i)) - continue; - unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); - RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); + if (SU->NumSuccs) { + unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); + for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { + EVT VT = N->getValueType(i); + if (VT == MVT::Flag || VT == MVT::Other) + continue; + if (!N->hasAnyUseOfValue(i)) + continue; + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); + } } dumpRegPressure(); @@ -1464,13 +1526,20 @@ bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { } bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{ - bool LHigh = SPQ->HighRegPressure(left); - bool RHigh = SPQ->HighRegPressure(right); + unsigned LExcess, RExcess; + bool LHigh = SPQ->HighRegPressure(left, LExcess); + bool RHigh = SPQ->HighRegPressure(right, RExcess); if (LHigh && !RHigh) return true; else if (!LHigh && RHigh) return false; - else if (!LHigh && !RHigh) { + else if (LHigh && RHigh) { + if (LExcess > RExcess) + return true; + else if (LExcess < RExcess) + return false; + // Otherwise schedule for register pressure reduction. + } else { // Low register pressure situation, schedule for latency if possible. bool LStall = left->SchedulingPref == Sched::Latency && SPQ->getCurCycle() < left->getHeight(); @@ -1889,8 +1958,7 @@ llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { const TargetLowering *TLI = &IS->getTargetLowering(); HybridBURRPriorityQueue *PQ = - new HybridBURRPriorityQueue(*IS->MF, RegPressureAware, TII, TRI, - (RegPressureAware ? TLI : 0)); + new HybridBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI); ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, true, PQ); PQ->setScheduleDAG(SD); return SD; diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 0e33758508f..1f9908c6f6c 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -166,6 +166,7 @@ static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) { ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) : TargetLowering(TM, createTLOF(TM)) { Subtarget = &TM.getSubtarget(); + RegInfo = TM.getRegisterInfo(); if (Subtarget->isTargetDarwin()) { // Uses VFP for Thumb libfuncs if available. @@ -729,6 +730,23 @@ Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const { return Sched::RegPressure; } +unsigned +ARMTargetLowering::getRegPressureLimit(const TargetRegisterClass *RC, + MachineFunction &MF) const { + unsigned FPDiff = RegInfo->hasFP(MF) ? 1 : 0; + switch (RC->getID()) { + default: + return 0; + case ARM::tGPRRegClassID: + return 5 - FPDiff; + case ARM::GPRRegClassID: + return 10 - FPDiff - (Subtarget->isR9Reserved() ? 1 : 0); + case ARM::SPRRegClassID: // Currently not used as 'rep' register class. + case ARM::DPRRegClassID: + return 32 - 10; + } +} + //===----------------------------------------------------------------------===// // Lowering Code //===----------------------------------------------------------------------===// diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 05d7d5f1cf1..b544b5eee2b 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -17,6 +17,7 @@ #include "ARMSubtarget.h" #include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/CallingConvLower.h" @@ -268,6 +269,9 @@ namespace llvm { Sched::Preference getSchedulingPreference(SDNode *N) const; + unsigned getRegPressureLimit(const TargetRegisterClass *RC, + MachineFunction &MF) const; + bool isShuffleMaskLegal(const SmallVectorImpl &M, EVT VT) const; bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; @@ -285,6 +289,8 @@ namespace llvm { /// make the right decision when generating code for different targets. const ARMSubtarget *Subtarget; + const TargetRegisterInfo *RegInfo; + /// ARMPCLabelIndex - Keep track of the number of ARM PC labels created. /// unsigned ARMPCLabelIndex; diff --git a/test/CodeGen/ARM/lsr-on-unrolled-loops.ll b/test/CodeGen/ARM/lsr-on-unrolled-loops.ll index fc43ff48778..99eed79c866 100644 --- a/test/CodeGen/ARM/lsr-on-unrolled-loops.ll +++ b/test/CodeGen/ARM/lsr-on-unrolled-loops.ll @@ -4,14 +4,14 @@ ; constant offset addressing, so that each of the following stores ; uses the same register. -; CHECK: vstr.32 s0, [r{{.*}}, #-128] -; CHECK: vstr.32 s0, [r{{.*}}, #-96] -; CHECK: vstr.32 s0, [r{{.*}}, #-64] -; CHECK: vstr.32 s0, [r{{.*}}, #-32] -; CHECK: vstr.32 s0, [r{{.*}}] -; CHECK: vstr.32 s0, [r{{.*}}, #32] -; CHECK: vstr.32 s0, [r{{.*}}, #64] -; CHECK: vstr.32 s0, [r{{.*}}, #96] +; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #-128] +; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #-96] +; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #-64] +; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #-32] +; CHECK: vstr.32 s{{.*}}, [r{{.*}}] +; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #32] +; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #64] +; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #96] target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"