X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FARM%2FARMBaseRegisterInfo.cpp;h=3f79a9b53d704beff2874b6655f7039ee653aeec;hb=c212856f78499d318396a94186c4b0d876251fda;hp=79f975e3ae24604d539f6158e533b9821b59314a;hpb=e75bf036116c78b3f0a315d6146e13e85f1114ef;p=oota-llvm.git diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 79f975e3ae2..3f79a9b53d7 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -38,24 +38,33 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" +#define DEBUG_TYPE "arm-register-info" + #define GET_REGINFO_TARGET_DESC #include "ARMGenRegisterInfo.inc" using namespace llvm; -ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMSubtarget &sti) - : ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC), STI(sti), - FramePtr((STI.isTargetMachO() || STI.isThumb()) ? ARM::R7 : ARM::R11), - BasePtr(ARM::R6) { +ARMBaseRegisterInfo::ARMBaseRegisterInfo() + : ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC), BasePtr(ARM::R6) {} + +static unsigned getFramePointerReg(const ARMSubtarget &STI) { + if (STI.isTargetMachO()) { + if (STI.isTargetDarwin() || STI.isThumb1Only()) + return ARM::R7; + else + return ARM::R11; + } else if (STI.isTargetWindows()) + return ARM::R11; + else // ARM EABI + return STI.isThumb() ? ARM::R7 : ARM::R11; } -const uint16_t* +const MCPhysReg* ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { - const uint16_t *RegList = (STI.isTargetIOS() && !STI.isAAPCS_ABI()) - ? CSR_iOS_SaveList - : CSR_AAPCS_SaveList; - - if (!MF) return RegList; + const ARMSubtarget &STI = MF->getSubtarget(); + const MCPhysReg *RegList = + STI.isTargetDarwin() ? CSR_iOS_SaveList : CSR_AAPCS_SaveList; const Function *F = MF->getFunction(); if (F->getCallingConv() == CallingConv::GHC) { @@ -81,13 +90,14 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { return RegList; } -const uint32_t* -ARMBaseRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const { +const uint32_t * +ARMBaseRegisterInfo::getCallPreservedMask(const MachineFunction &MF, + CallingConv::ID CC) const { + const ARMSubtarget &STI = MF.getSubtarget(); if (CC == CallingConv::GHC) // This is academic becase all GHC calls are (supposed to be) tail calls return CSR_NoRegs_RegMask; - return (STI.isTargetIOS() && !STI.isAAPCS_ABI()) - ? CSR_iOS_RegMask : CSR_AAPCS_RegMask; + return STI.isTargetDarwin() ? CSR_iOS_RegMask : CSR_AAPCS_RegMask; } const uint32_t* @@ -95,8 +105,10 @@ ARMBaseRegisterInfo::getNoPreservedMask() const { return CSR_NoRegs_RegMask; } -const uint32_t* -ARMBaseRegisterInfo::getThisReturnPreservedMask(CallingConv::ID CC) const { +const uint32_t * +ARMBaseRegisterInfo::getThisReturnPreservedMask(const MachineFunction &MF, + CallingConv::ID CC) const { + const ARMSubtarget &STI = MF.getSubtarget(); // This should return a register mask that is the same as that returned by // getCallPreservedMask but that additionally preserves the register used for // the first i32 argument (which must also be the register used to return a @@ -107,14 +119,15 @@ ARMBaseRegisterInfo::getThisReturnPreservedMask(CallingConv::ID CC) const { // should return NULL if (CC == CallingConv::GHC) // This is academic becase all GHC calls are (supposed to be) tail calls - return NULL; - return (STI.isTargetIOS() && !STI.isAAPCS_ABI()) - ? CSR_iOS_ThisReturn_RegMask : CSR_AAPCS_ThisReturn_RegMask; + return nullptr; + return STI.isTargetDarwin() ? CSR_iOS_ThisReturn_RegMask + : CSR_AAPCS_ThisReturn_RegMask; } BitVector ARMBaseRegisterInfo:: getReservedRegs(const MachineFunction &MF) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + const ARMSubtarget &STI = MF.getSubtarget(); + const TargetFrameLowering *TFI = STI.getFrameLowering(); // FIXME: avoid re-calculating this every time. BitVector Reserved(getNumRegs()); @@ -123,7 +136,7 @@ getReservedRegs(const MachineFunction &MF) const { Reserved.set(ARM::FPSCR); Reserved.set(ARM::APSR_NZCV); if (TFI->hasFP(MF)) - Reserved.set(FramePtr); + Reserved.set(getFramePointerReg(STI)); if (hasBasePointer(MF)) Reserved.set(BasePtr); // Some targets reserve R9. @@ -143,9 +156,9 @@ getReservedRegs(const MachineFunction &MF) const { return Reserved; } -const TargetRegisterClass* -ARMBaseRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC) - const { +const TargetRegisterClass * +ARMBaseRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC, + const MachineFunction &) const { const TargetRegisterClass *Super = RC; TargetRegisterClass::sc_iterator I = RC->getSuperClasses(); do { @@ -173,14 +186,15 @@ ARMBaseRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind const TargetRegisterClass * ARMBaseRegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const { if (RC == &ARM::CCRRegClass) - return 0; // Can't copy CCR registers. + return &ARM::rGPRRegClass; // Can't copy CCR registers. return RC; } unsigned ARMBaseRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + const ARMSubtarget &STI = MF.getSubtarget(); + const TargetFrameLowering *TFI = STI.getFrameLowering(); switch (RC->getID()) { default: @@ -231,11 +245,15 @@ ARMBaseRegisterInfo::getRegAllocationHints(unsigned VirtReg, // This register should preferably be even (Odd == 0) or odd (Odd == 1). // Check if the other part of the pair has already been assigned, and provide // the paired register as the first hint. + unsigned Paired = Hint.second; + if (Paired == 0) + return; + unsigned PairedPhys = 0; - if (VRM && VRM->hasPhys(Hint.second)) { - PairedPhys = getPairedGPR(VRM->getPhys(Hint.second), Odd, this); - if (PairedPhys && MRI.isReserved(PairedPhys)) - PairedPhys = 0; + if (TargetRegisterInfo::isPhysicalRegister(Paired)) { + PairedPhys = Paired; + } else if (VRM && VRM->hasPhys(Paired)) { + PairedPhys = getPairedGPR(VRM->getPhys(Paired), Odd, this); } // First prefer the paired physreg. @@ -257,7 +275,7 @@ ARMBaseRegisterInfo::getRegAllocationHints(unsigned VirtReg, } void -ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg, +ARMBaseRegisterInfo::updateRegAllocHint(unsigned Reg, unsigned NewReg, MachineFunction &MF) const { MachineRegisterInfo *MRI = &MF.getRegInfo(); std::pair Hint = MRI->getRegAllocationHint(Reg); @@ -270,39 +288,21 @@ ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg, // change. unsigned OtherReg = Hint.second; Hint = MRI->getRegAllocationHint(OtherReg); - if (Hint.second == Reg) - // Make sure the pair has not already divorced. + // Make sure the pair has not already divorced. + if (Hint.second == Reg) { MRI->setRegAllocationHint(OtherReg, Hint.first, NewReg); - } -} - -bool -ARMBaseRegisterInfo::avoidWriteAfterWrite(const TargetRegisterClass *RC) const { - // CortexA9 has a Write-after-write hazard for NEON registers. - if (!STI.isLikeA9()) - return false; - - switch (RC->getID()) { - case ARM::DPRRegClassID: - case ARM::DPR_8RegClassID: - case ARM::DPR_VFP2RegClassID: - case ARM::QPRRegClassID: - case ARM::QPR_8RegClassID: - case ARM::QPR_VFP2RegClassID: - case ARM::SPRRegClassID: - case ARM::SPR_8RegClassID: - // Avoid reusing S, D, and Q registers. - // Don't increase register pressure for QQ and QQQQ. - return true; - default: - return false; + if (TargetRegisterInfo::isVirtualRegister(NewReg)) + MRI->setRegAllocationHint(NewReg, + Hint.first == (unsigned)ARMRI::RegPairOdd ? ARMRI::RegPairEven + : ARMRI::RegPairOdd, OtherReg); + } } } bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); const ARMFunctionInfo *AFI = MF.getInfo(); - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); // When outgoing call frames are so large that we adjust the stack pointer // around the call, we can no longer use the stack pointer to reach the @@ -343,11 +343,11 @@ bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const { return false; // Stack realignment requires a frame pointer. If we already started // register allocation with frame pointer elimination, it is too late now. - if (!MRI->canReserveReg(FramePtr)) + if (!MRI->canReserveReg(getFramePointerReg(MF.getSubtarget()))) return false; // We may also need a base pointer if there are dynamic allocas or stack // pointer adjustments around calls. - if (MF.getTarget().getFrameLowering()->hasReservedCallFrame(MF)) + if (MF.getSubtarget().getFrameLowering()->hasReservedCallFrame(MF)) return true; // A base pointer is required and allowed. Check that it isn't too late to // reserve it. @@ -358,11 +358,10 @@ bool ARMBaseRegisterInfo:: needsStackRealignment(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); const Function *F = MF.getFunction(); - unsigned StackAlign = MF.getTarget().getFrameLowering()->getStackAlignment(); - bool requiresRealignment = - ((MFI->getMaxAlignment() > StackAlign) || - F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::StackAlignment)); + unsigned StackAlign = + MF.getSubtarget().getFrameLowering()->getStackAlignment(); + bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) || + F->hasFnAttribute(Attribute::StackAlignment)); return requiresRealignment && canRealignStack(MF); } @@ -378,10 +377,11 @@ cannotEliminateFrame(const MachineFunction &MF) const { unsigned ARMBaseRegisterInfo::getFrameRegister(const MachineFunction &MF) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + const ARMSubtarget &STI = MF.getSubtarget(); + const TargetFrameLowering *TFI = STI.getFrameLowering(); if (TFI->hasFP(MF)) - return FramePtr; + return getFramePointerReg(STI); return ARM::SP; } @@ -395,7 +395,7 @@ emitLoadConstPool(MachineBasicBlock &MBB, ARMCC::CondCodes Pred, unsigned PredReg, unsigned MIFlags) const { MachineFunction &MF = *MBB.getParent(); - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); MachineConstantPool *ConstantPool = MF.getConstantPool(); const Constant *C = ConstantInt::get(Type::getInt32Ty(MF.getFunction()->getContext()), Val); @@ -517,7 +517,7 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const { // Note that the incoming offset is based on the SP value at function entry, // so it'll be negative. MachineFunction &MF = *MI->getParent()->getParent(); - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo(); @@ -533,21 +533,20 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const { // The incoming offset is relating to the SP at the start of the function, // but when we access the local it'll be relative to the SP after local // allocation, so adjust our SP-relative offset by that allocation size. - Offset = -Offset; Offset += MFI->getLocalFrameSize(); // Assume that we'll have at least some spill slots allocated. // FIXME: This is a total SWAG number. We should run some statistics // and pick a real one. Offset += 128; // 128 bytes of spill slots - // If there is a frame pointer, try using it. + // If there's a frame pointer and the addressing mode allows it, try using it. // The FP is only available if there is no dynamic realignment. We // don't know for sure yet whether we'll need that, so we guess based // on whether there are any local variables that would trigger it. unsigned StackAlign = TFI->getStackAlignment(); - if (TFI->hasFP(MF) && + if (TFI->hasFP(MF) && !((MFI->getLocalFrameMaxAlign() > StackAlign) && canRealignStack(MF))) { - if (isFrameOffsetLegal(MI, FPOffset)) + if (isFrameOffsetLegal(MI, getFrameRegister(MF), FPOffset)) return false; } // If we can reference via the stack pointer, try that. @@ -555,7 +554,7 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const { // to only disallow SP relative references in the live range of // the VLA(s). In practice, it's unclear how much difference that // would make, but it may be worth doing. - if (!MFI->hasVarSizedObjects() && isFrameOffsetLegal(MI, Offset)) + if (!MFI->hasVarSizedObjects() && isFrameOffsetLegal(MI, ARM::SP, Offset)) return false; // The offset likely isn't legal, we want to allocate a virtual base register. @@ -570,7 +569,7 @@ materializeFrameBaseRegister(MachineBasicBlock *MBB, int64_t Offset) const { ARMFunctionInfo *AFI = MBB->getParent()->getInfo(); unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri : - (AFI->isThumb1OnlyFunction() ? ARM::tADDrSPi : ARM::t2ADDri); + (AFI->isThumb1OnlyFunction() ? ARM::tADDframe : ARM::t2ADDri); MachineBasicBlock::iterator Ins = MBB->begin(); DebugLoc DL; // Defaults to "unknown" @@ -579,25 +578,23 @@ materializeFrameBaseRegister(MachineBasicBlock *MBB, const MachineFunction &MF = *MBB->getParent(); MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); const MCInstrDesc &MCID = TII.get(ADDriOpc); MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this, MF)); - MachineInstrBuilder MIB = AddDefaultPred(BuildMI(*MBB, Ins, DL, MCID, BaseReg) - .addFrameIndex(FrameIdx).addImm(Offset)); + MachineInstrBuilder MIB = BuildMI(*MBB, Ins, DL, MCID, BaseReg) + .addFrameIndex(FrameIdx).addImm(Offset); if (!AFI->isThumb1OnlyFunction()) - AddDefaultCC(MIB); + AddDefaultCC(AddDefaultPred(MIB)); } -void -ARMBaseRegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I, - unsigned BaseReg, int64_t Offset) const { - MachineInstr &MI = *I; +void ARMBaseRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, + int64_t Offset) const { MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); const ARMBaseInstrInfo &TII = - *static_cast(MF.getTarget().getInstrInfo()); + *static_cast(MF.getSubtarget().getInstrInfo()); ARMFunctionInfo *AFI = MF.getInfo(); int Off = Offset; // ARM doesn't need the general 64-bit offsets unsigned i = 0; @@ -620,7 +617,7 @@ ARMBaseRegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I, (void)Done; } -bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, +bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, unsigned BaseReg, int64_t Offset) const { const MCInstrDesc &Desc = MI->getDesc(); unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); @@ -664,7 +661,7 @@ bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, NumBits = 8; break; case ARMII::AddrModeT1_s: - NumBits = 5; + NumBits = (BaseReg == ARM::SP ? 8 : 5); Scale = 4; isSigned = false; break; @@ -696,9 +693,9 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); const ARMBaseInstrInfo &TII = - *static_cast(MF.getTarget().getInstrInfo()); - const ARMFrameLowering *TFI = - static_cast(MF.getTarget().getFrameLowering()); + *static_cast(MF.getSubtarget().getInstrInfo()); + const ARMFrameLowering *TFI = static_cast( + MF.getSubtarget().getFrameLowering()); ARMFunctionInfo *AFI = MF.getInfo(); assert(!AFI->isThumb1OnlyFunction() && "This eliminateFrameIndex does not support Thumb1!"); @@ -765,3 +762,60 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MI.getOperand(FIOperandNum).ChangeToRegister(ScratchReg, false, false,true); } } + +bool ARMBaseRegisterInfo::shouldCoalesce(MachineInstr *MI, + const TargetRegisterClass *SrcRC, + unsigned SubReg, + const TargetRegisterClass *DstRC, + unsigned DstSubReg, + const TargetRegisterClass *NewRC) const { + auto MBB = MI->getParent(); + auto MF = MBB->getParent(); + const MachineRegisterInfo &MRI = MF->getRegInfo(); + // If not copying into a sub-register this should be ok because we shouldn't + // need to split the reg. + if (!DstSubReg) + return true; + // Small registers don't frequently cause a problem, so we can coalesce them. + if (NewRC->getSize() < 32 && DstRC->getSize() < 32 && SrcRC->getSize() < 32) + return true; + + auto NewRCWeight = + MRI.getTargetRegisterInfo()->getRegClassWeight(NewRC); + auto SrcRCWeight = + MRI.getTargetRegisterInfo()->getRegClassWeight(SrcRC); + auto DstRCWeight = + MRI.getTargetRegisterInfo()->getRegClassWeight(DstRC); + // If the source register class is more expensive than the destination, the + // coalescing is probably profitable. + if (SrcRCWeight.RegWeight > NewRCWeight.RegWeight) + return true; + if (DstRCWeight.RegWeight > NewRCWeight.RegWeight) + return true; + + // If the register allocator isn't constrained, we can always allow coalescing + // unfortunately we don't know yet if we will be constrained. + // The goal of this heuristic is to restrict how many expensive registers + // we allow to coalesce in a given basic block. + auto AFI = MF->getInfo(); + auto It = AFI->getCoalescedWeight(MBB); + + DEBUG(dbgs() << "\tARM::shouldCoalesce - Coalesced Weight: " + << It->second << "\n"); + DEBUG(dbgs() << "\tARM::shouldCoalesce - Reg Weight: " + << NewRCWeight.RegWeight << "\n"); + + // This number is the largest round number that which meets the criteria: + // (1) addresses PR18825 + // (2) generates better code in some test cases (like vldm-shed-a9.ll) + // (3) Doesn't regress any test cases (in-tree, test-suite, and SPEC) + // In practice the SizeMultiplier will only factor in for straight line code + // that uses a lot of NEON vectors, which isn't terribly common. + unsigned SizeMultiplier = MBB->size()/100; + SizeMultiplier = SizeMultiplier ? SizeMultiplier : 1; + if (It->second < NewRCWeight.WeightLimit * SizeMultiplier) { + It->second += NewRCWeight.RegWeight; + return true; + } + return false; +}