From: Hal Finkel Date: Tue, 9 Apr 2013 17:27:09 +0000 (+0000) Subject: Use virtual base registers on PPC X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=f6f8198d85f278ff03aaf32c9db6ae0b3826395c;p=oota-llvm.git Use virtual base registers on PPC On PowerPC, non-vector loads and stores have r+i forms; however, in functions with large stack frames these were not being used to access slots far from the stack pointer because such slots were out of range for the signed 16-bit immediate offset field. This increases register pressure because we need a separate register for each offset (when the r+r form is used). By enabling virtual base registers, we can deal with large stack frames without unduly increasing register pressure. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@179105 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index 3f5f6c26ffa..2be6324fd7b 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -454,6 +454,33 @@ PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF, return false; } +// Figure out if the offset in the instruction is shifted right two bits. This +// is true for instructions like "STD", which the machine implicitly adds two +// low zeros to. +static bool usesIXAddr(const MachineInstr &MI) { + unsigned OpC = MI.getOpcode(); + + switch (OpC) { + default: + return false; + case PPC::LWA: + case PPC::LD: + case PPC::STD: + return true; + } +} + +// Return the OffsetOperandNo given the FIOperandNum (and the instruction). +static unsigned getOffsetONFromFION(const MachineInstr &MI, + unsigned FIOperandNum) { + // Take into account whether it's an add or mem instruction + unsigned OffsetOperandNo = (FIOperandNum == 2) ? 1 : 2; + if (MI.isInlineAsm()) + OffsetOperandNo = FIOperandNum-1; + + return OffsetOperandNo; +} + void PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, @@ -471,10 +498,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); DebugLoc dl = MI.getDebugLoc(); - // Take into account whether it's an add or mem instruction - unsigned OffsetOperandNo = (FIOperandNum == 2) ? 1 : 2; - if (MI.isInlineAsm()) - OffsetOperandNo = FIOperandNum-1; + unsigned OffsetOperandNo = getOffsetONFromFION(MI, FIOperandNum); // Get the frame index. int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); @@ -516,17 +540,8 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, (is64Bit ? PPC::X1 : PPC::R1), false); - // Figure out if the offset in the instruction is shifted right two bits. This - // is true for instructions like "STD", which the machine implicitly adds two - // low zeros to. - bool isIXAddr = false; - switch (OpC) { - case PPC::LWA: - case PPC::LD: - case PPC::STD: - isIXAddr = true; - break; - } + // Figure out if the offset in the instruction is shifted right two bits. + bool isIXAddr = usesIXAddr(MI); // If the instruction is not present in ImmToIdxMap, then it has no immediate // form (and must be r+r). @@ -618,3 +633,124 @@ unsigned PPCRegisterInfo::getEHExceptionRegister() const { unsigned PPCRegisterInfo::getEHHandlerRegister() const { return !Subtarget.isPPC64() ? PPC::R4 : PPC::X4; } + +/// Returns true if the instruction's frame index +/// reference would be better served by a base register other than FP +/// or SP. Used by LocalStackFrameAllocation to determine which frame index +/// references it should create new base registers for. +bool PPCRegisterInfo:: +needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const { + assert(Offset < 0 && "Local offset must be negative"); + + unsigned FIOperandNum = 0; + while (!MI->getOperand(FIOperandNum).isFI()) { + ++FIOperandNum; + assert(FIOperandNum < MI->getNumOperands() && + "Instr doesn't have FrameIndex operand!"); + } + + unsigned OffsetOperandNo = getOffsetONFromFION(*MI, FIOperandNum); + + if (!usesIXAddr(*MI)) + Offset += MI->getOperand(OffsetOperandNo).getImm(); + else + Offset += MI->getOperand(OffsetOperandNo).getImm() << 2; + + // It's the load/store FI references that cause issues, as it can be difficult + // to materialize the offset if it won't fit in the literal field. Estimate + // based on the size of the local frame and some conservative assumptions + // about the rest of the stack frame (note, this is pre-regalloc, so + // we don't know everything for certain yet) whether this offset is likely + // to be out of range of the immediate. Return true if so. + + // We only generate virtual base registers for loads and stores that have + // an r+i form. Return false for everything else. + unsigned OpC = MI->getOpcode(); + if (!ImmToIdxMap.count(OpC)) + return false; + + // Don't generate a new virtual base register just to add zero to it. + if ((OpC == PPC::ADDI || OpC == PPC::ADDI8) && + MI->getOperand(2).getImm() == 0) + return false; + + MachineBasicBlock &MBB = *MI->getParent(); + MachineFunction &MF = *MBB.getParent(); + + const PPCFrameLowering *PPCFI = + static_cast(MF.getTarget().getFrameLowering()); + unsigned StackEst = + PPCFI->determineFrameLayout(MF, false, true); + + // If we likely don't need a stack frame, then we probably don't need a + // virtual base register either. + if (!StackEst) + return false; + + // Estimate an offset from the stack pointer. + // The incoming offset is relating to the SP at the start of the function, + // but when we access the local it'll be relative to the SP after local + // allocation, so adjust our SP-relative offset by that allocation size. + Offset += StackEst; + + // The frame pointer will point to the end of the stack, so estimate the + // offset as the difference between the object offset and the FP location. + return !isFrameOffsetLegal(MI, Offset); +} + +/// Insert defining instruction(s) for BaseReg to +/// be a pointer to FrameIdx at the beginning of the basic block. +void PPCRegisterInfo:: +materializeFrameBaseRegister(MachineBasicBlock *MBB, + unsigned BaseReg, int FrameIdx, + int64_t Offset) const { + unsigned ADDriOpc = Subtarget.isPPC64() ? PPC::ADDI8 : PPC::ADDI; + + MachineBasicBlock::iterator Ins = MBB->begin(); + DebugLoc DL; // Defaults to "unknown" + if (Ins != MBB->end()) + DL = Ins->getDebugLoc(); + + const MCInstrDesc &MCID = TII.get(ADDriOpc); + MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + const MachineFunction &MF = *MBB->getParent(); + MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this, MF)); + + BuildMI(*MBB, Ins, DL, MCID, BaseReg) + .addFrameIndex(FrameIdx).addImm(Offset); +} + +void +PPCRegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I, + unsigned BaseReg, int64_t Offset) const { + MachineInstr &MI = *I; + + unsigned FIOperandNum = 0; + while (!MI.getOperand(FIOperandNum).isFI()) { + ++FIOperandNum; + assert(FIOperandNum < MI.getNumOperands() && + "Instr doesn't have FrameIndex operand!"); + } + + MI.getOperand(FIOperandNum).ChangeToRegister(BaseReg, false); + unsigned OffsetOperandNo = getOffsetONFromFION(MI, FIOperandNum); + + bool isIXAddr = usesIXAddr(MI); + if (!isIXAddr) + Offset += MI.getOperand(OffsetOperandNo).getImm(); + else + Offset += MI.getOperand(OffsetOperandNo).getImm() << 2; + + // Figure out if the offset in the instruction is shifted right two bits. + if (isIXAddr) + Offset >>= 2; // The actual encoded value has the low two bits zero. + + MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset); +} + +bool PPCRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, + int64_t Offset) const { + return MI->getOpcode() == PPC::DBG_VALUE || // DBG_VALUE is always Reg+Imm + (isInt<16>(Offset) && (!usesIXAddr(*MI) || (Offset & 3) == 0)); +} + diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h index 7e6683eeb2e..7a48b4b9799 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/lib/Target/PowerPC/PPCRegisterInfo.h @@ -61,6 +61,10 @@ public: return true; } + virtual bool requiresVirtualBaseRegisters(const MachineFunction &MF) const { + return true; + } + void lowerDynamicAlloc(MachineBasicBlock::iterator II) const; void lowerCRSpilling(MachineBasicBlock::iterator II, unsigned FrameIndex) const; @@ -77,6 +81,15 @@ public: int SPAdj, unsigned FIOperandNum, RegScavenger *RS = NULL) const; + // Support for virtual base registers. + bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const; + void materializeFrameBaseRegister(MachineBasicBlock *MBB, + unsigned BaseReg, int FrameIdx, + int64_t Offset) const; + void resolveFrameIndex(MachineBasicBlock::iterator I, + unsigned BaseReg, int64_t Offset) const; + bool isFrameOffsetLegal(const MachineInstr *MI, int64_t Offset) const; + // Debug information queries. unsigned getFrameRegister(const MachineFunction &MF) const; diff --git a/test/CodeGen/PowerPC/2007-09-07-LoadStoreIdxForms.ll b/test/CodeGen/PowerPC/2007-09-07-LoadStoreIdxForms.ll index 40f46fda468..ba5c8c172f1 100644 --- a/test/CodeGen/PowerPC/2007-09-07-LoadStoreIdxForms.ll +++ b/test/CodeGen/PowerPC/2007-09-07-LoadStoreIdxForms.ll @@ -1,5 +1,9 @@ ; RUN: llc < %s -march=ppc64 | FileCheck %s +; Temporarily XFAIL this test until LSA stops creating single-use +; virtual base registers. +; XFAIL: * + %struct.__db_region = type { %struct.__mutex_t, [4 x i8], %struct.anon, i32, [1 x i32] } %struct.__mutex_t = type { i32 } %struct.anon = type { i64, i64 } diff --git a/test/CodeGen/PowerPC/lsa.ll b/test/CodeGen/PowerPC/lsa.ll new file mode 100644 index 00000000000..3daeccbf40f --- /dev/null +++ b/test/CodeGen/PowerPC/lsa.ll @@ -0,0 +1,48 @@ +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +define signext i32 @foo() #0 { +entry: + %v = alloca [8200 x i32], align 4 + %w = alloca [8200 x i32], align 4 + %q = alloca [8200 x i32], align 4 + %0 = bitcast [8200 x i32]* %v to i8* + call void @llvm.lifetime.start(i64 32800, i8* %0) #0 + %1 = bitcast [8200 x i32]* %w to i8* + call void @llvm.lifetime.start(i64 32800, i8* %1) #0 + %2 = bitcast [8200 x i32]* %q to i8* + call void @llvm.lifetime.start(i64 32800, i8* %2) #0 + %arraydecay = getelementptr inbounds [8200 x i32]* %q, i64 0, i64 0 + %arraydecay1 = getelementptr inbounds [8200 x i32]* %v, i64 0, i64 0 + %arraydecay2 = getelementptr inbounds [8200 x i32]* %w, i64 0, i64 0 + call void @bar(i32* %arraydecay, i32* %arraydecay1, i32* %arraydecay2) #0 + %3 = load i32* %arraydecay2, align 4, !tbaa !0 + %arrayidx3 = getelementptr inbounds [8200 x i32]* %w, i64 0, i64 1 + %4 = load i32* %arrayidx3, align 4, !tbaa !0 + +; CHECK: @foo +; CHECK-NOT: lwzx +; CHECK: lwz {{[0-9]+}}, 4([[REG:[0-9]+]]) +; CHECK: lwz {{[0-9]+}}, 0([[REG]]) +; CHECK: blr + + %add = add nsw i32 %4, %3 + call void @llvm.lifetime.end(i64 32800, i8* %2) #0 + call void @llvm.lifetime.end(i64 32800, i8* %1) #0 + call void @llvm.lifetime.end(i64 32800, i8* %0) #0 + ret i32 %add +} + +declare void @llvm.lifetime.start(i64, i8* nocapture) #0 + +declare void @bar(i32*, i32*, i32*) + +declare void @llvm.lifetime.end(i64, i8* nocapture) #0 + +attributes #0 = { nounwind } + +!0 = metadata !{metadata !"int", metadata !1} +!1 = metadata !{metadata !"omnipotent char", metadata !2} +!2 = metadata !{metadata !"Simple C/C++ TBAA"} +