X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FARM%2FARMLoadStoreOptimizer.cpp;h=bd40658638a6ebb86e4883b9fdfe03146ed29a7e;hb=d2f8ee71948854c205c74accf14c2b0b87502fef;hp=b7adfeeb5a1264f2047d93f53cf257c6f89442e5;hpb=9f85dccfc64b5f0b0c63ddfa0a42d8615aa1fcb3;p=oota-llvm.git diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index b7adfeeb5a1..bd40658638a 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -144,6 +144,46 @@ namespace { char ARMLoadStoreOpt::ID = 0; } +static bool definesCPSR(const MachineInstr *MI) { + for (const auto &MO : MI->operands()) { + if (!MO.isReg()) + continue; + if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead()) + // If the instruction has live CPSR def, then it's not safe to fold it + // into load / store. + return true; + } + + return false; +} + +static int getMemoryOpOffset(const MachineInstr *MI) { + int Opcode = MI->getOpcode(); + bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; + unsigned NumOperands = MI->getDesc().getNumOperands(); + unsigned OffField = MI->getOperand(NumOperands-3).getImm(); + + if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 || + Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 || + Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 || + Opcode == ARM::LDRi12 || Opcode == ARM::STRi12) + return OffField; + + // Thumb1 immediate offsets are scaled by 4 + if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi) + return OffField * 4; + + int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField) + : ARM_AM::getAM5Offset(OffField) * 4; + ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField) + : ARM_AM::getAM5Op(OffField); + + if (Op == ARM_AM::sub) + return -Offset; + + return Offset; +} + static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { switch (Opcode) { default: llvm_unreachable("Unhandled opcode!"); @@ -335,40 +375,50 @@ ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB, unsigned WordOffset, ARMCC::CondCodes Pred, unsigned PredReg) { assert(isThumb1 && "Can only update base register uses for Thumb1!"); - - // Start updating any instructions with immediate offsets. Insert a sub before + // Start updating any instructions with immediate offsets. Insert a SUB before // the first non-updateable instruction (if any). for (; MBBI != MBB.end(); ++MBBI) { + bool InsertSub = false; + unsigned Opc = MBBI->getOpcode(); + if (MBBI->readsRegister(Base)) { - unsigned Opc = MBBI->getOpcode(); int Offset; - bool InsertSub = false; + bool IsLoad = + Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi; + bool IsStore = + Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi; - if (Opc == ARM::tLDRi || Opc == ARM::tSTRi || - Opc == ARM::tLDRHi || Opc == ARM::tSTRHi || - Opc == ARM::tLDRBi || Opc == ARM::tSTRBi) { + if (IsLoad || IsStore) { // Loads and stores with immediate offsets can be updated, but only if // the new offset isn't negative. // The MachineOperand containing the offset immediate is the last one // before predicates. MachineOperand &MO = MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3); - // The offsets are scaled by 1, 2 or 4 depending on the Opcode + // The offsets are scaled by 1, 2 or 4 depending on the Opcode. Offset = MO.getImm() - WordOffset * getImmScale(Opc); - if (Offset >= 0) + + // If storing the base register, it needs to be reset first. + unsigned InstrSrcReg = MBBI->getOperand(0).getReg(); + + if (Offset >= 0 && !(IsStore && InstrSrcReg == Base)) MO.setImm(Offset); else InsertSub = true; - } else if (Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) { - // SUB/ADD using this register. Merge it with the update. - // If the merged offset is too large, insert a new sub instead. + } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) && + !definesCPSR(MBBI)) { + // SUBS/ADDS using this register, with a dead def of the CPSR. + // Merge it with the update; if the merged offset is too large, + // insert a new sub instead. MachineOperand &MO = MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3); Offset = (Opc == ARM::tSUBi8) ? MO.getImm() + WordOffset * 4 : MO.getImm() - WordOffset * 4 ; - if (TL->isLegalAddImmediate(Offset)) { + if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) { + // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if + // Offset == 0. MO.setImm(Offset); // The base register has now been reset, so exit early. return; @@ -381,13 +431,19 @@ ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB, InsertSub = true; } - if (InsertSub) { - // An instruction above couldn't be updated, so insert a sub. - AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base)) - .addReg(Base, getKillRegState(true)).addImm(WordOffset * 4) - .addImm(Pred).addReg(PredReg); - return; - } + } else if (definesCPSR(MBBI) || MBBI->isCall() || MBBI->isBranch()) { + // Since SUBS sets the condition flags, we can't place the base reset + // after an instruction that has a live CPSR def. + // The base register might also contain an argument for a function call. + InsertSub = true; + } + + if (InsertSub) { + // An instruction above couldn't be updated, so insert a sub. + AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true) + .addReg(Base, getKillRegState(false)).addImm(WordOffset * 4) + .addImm(Pred).addReg(PredReg); + return; } if (MBBI->killsRegister(Base)) @@ -395,15 +451,18 @@ ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB, return; } - // The end of the block was reached. This means register liveness escapes the - // block, and it's necessary to insert a sub before the last instruction. - if (MBB.succ_size() > 0) - // But only insert the SUB if there is actually a successor block. - // FIXME: Check more carefully if register is live at this point, e.g. by - // also examining the successor block's register liveness information. - AddDefaultT1CC(BuildMI(MBB, --MBBI, dl, TII->get(ARM::tSUBi8), Base)) - .addReg(Base, getKillRegState(true)).addImm(WordOffset * 4) + // End of block was reached. + if (MBB.succ_size() > 0) { + // FIXME: Because of a bug, live registers are sometimes missing from + // the successor blocks' live-in sets. This means we can't trust that + // information and *always* have to reset at the end of a block. + // See PR21029. + if (MBBI != MBB.end()) --MBBI; + AddDefaultT1CC( + BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true) + .addReg(Base, getKillRegState(false)).addImm(WordOffset * 4) .addImm(Pred).addReg(PredReg); + } } /// MergeOps - Create and insert a LDM or STM with Base as base register and @@ -422,6 +481,28 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, if (NumRegs <= 1) return false; + // For Thumb1 targets, it might be necessary to clobber the CPSR to merge. + // Compute liveness information for that register to make the decision. + bool SafeToClobberCPSR = !isThumb1 || + (MBB.computeRegisterLiveness(TRI, ARM::CPSR, std::prev(MBBI), 15) == + MachineBasicBlock::LQR_Dead); + + bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback. + + // Exception: If the base register is in the input reglist, Thumb1 LDM is + // non-writeback. + // It's also not possible to merge an STR of the base register in Thumb1. + if (isThumb1) + for (unsigned I = 0; I < NumRegs; ++I) + if (Base == Regs[I].first) { + if (Opcode == ARM::tLDRi) { + Writeback = false; + break; + } else if (Opcode == ARM::tSTRi) { + return false; + } + } + ARM_AM::AMSubMode Mode = ARM_AM::ia; // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA. bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); @@ -445,6 +526,11 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, if (NumRegs <= 2) return false; + // On Thumb1, it's not worth materializing a new base register without + // clobbering the CPSR (i.e. not using ADDS/SUBS). + if (!SafeToClobberCPSR) + return false; + unsigned NewBase; if (isi32Load(Opcode)) { // If it is a load, then just use one of the destination register to @@ -459,13 +545,15 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, int BaseOpc = isThumb2 ? ARM::t2ADDri : + (isThumb1 && Offset < 8) ? ARM::tADDi3 : isThumb1 ? ARM::tADDi8 : ARM::ADDri; if (Offset < 0) { + Offset = - Offset; BaseOpc = isThumb2 ? ARM::t2SUBri : + (isThumb1 && Offset < 8) ? ARM::tSUBi3 : isThumb1 ? ARM::tSUBi8 : ARM::SUBri; - Offset = - Offset; } if (!TL->isLegalAddImmediate(Offset)) @@ -473,22 +561,37 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, return false; // Probably not worth it then. if (isThumb1) { - if (Base != NewBase) { + // Thumb1: depending on immediate size, use either + // ADDS NewBase, Base, #imm3 + // or + // MOV NewBase, Base + // ADDS NewBase, #imm8. + if (Base != NewBase && Offset >= 8) { // Need to insert a MOV to the new base first. - // FIXME: If the immediate fits in 3 bits, use ADD instead. - BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase) - .addReg(Base, getKillRegState(BaseKill)) - .addImm(Pred).addReg(PredReg); + if (isARMLowRegister(NewBase) && isARMLowRegister(Base) && + !STI->hasV6Ops()) { + // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr + if (Pred != ARMCC::AL) + return false; + BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVSr), NewBase) + .addReg(Base, getKillRegState(BaseKill)); + } else + BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase) + .addReg(Base, getKillRegState(BaseKill)) + .addImm(Pred).addReg(PredReg); + + // Set up BaseKill and Base correctly to insert the ADDS/SUBS below. + Base = NewBase; + BaseKill = false; } - AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)) - .addReg(NewBase, getKillRegState(true)).addImm(Offset) + AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase), true) + .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) .addImm(Pred).addReg(PredReg); } else { BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) .addImm(Pred).addReg(PredReg).addReg(0); } - Base = NewBase; BaseKill = true; // New base is always killed straight away. } @@ -501,16 +604,16 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, Opcode = getLoadStoreMultipleOpcode(Opcode, Mode); if (!Opcode) return false; - bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback. - - // Exception: If the base register is in the input reglist, Thumb1 LDM is - // non-writeback. Check for this. - if (Opcode == ARM::tLDMIA && isThumb1) - for (unsigned I = 0; I < NumRegs; ++I) - if (Base == Regs[I].first) { - Writeback = false; - break; - } + // Check if a Thumb1 LDM/STM merge is safe. This is the case if: + // - There is no writeback (LDM of base register), + // - the base register is killed by the merged instruction, + // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS + // to reset the base register. + // Otherwise, don't merge. + // It's safe to return here since the code to materialize a new base register + // above is also conditional on SafeToClobberCPSR. + if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill) + return false; MachineInstrBuilder MIB; @@ -525,11 +628,11 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, MIB.addReg(Base, getDefRegState(true)) .addReg(Base, getKillRegState(BaseKill)); - // The base isn't dead after a merged instruction with writeback. Update - // future uses of the base with the added offset (if possible), or reset - // the base register as necessary. + // The base isn't dead after a merged instruction with writeback. + // Insert a sub instruction after the newly formed instruction to reset. if (!BaseKill) UpdateBaseRegUses(MBB, MBBI, dl, Base, NumRegs, Pred, PredReg); + } else { // No writeback, simply build the MachineInstr. MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)); @@ -700,6 +803,11 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, memOps[i].MBBI = Merges.back(); memOps[i].Position = insertPos; } + + // Update memOps offsets, since they may have been modified by MergeOps. + for (auto &MemOp : memOps) { + MemOp.Offset = getMemoryOpOffset(MemOp.MBBI); + } } /// MergeLDR_STR - Merge a number of load / store instructions into one or more @@ -721,7 +829,7 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg); unsigned Count = 1; unsigned Limit = ~0U; - + bool BaseKill = false; // vldm / vstm limit are 32 for S variants, 16 for D variants. switch (Opcode) { @@ -760,36 +868,28 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, ++Count; } else { // Can't merge this in. Try merge the earlier ones first. - MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, - Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges); + // We need to compute BaseKill here because the MemOps may have been + // reordered. + BaseKill = Loc->killsRegister(Base); + + MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, Base, + BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges); MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch, MemOps, Merges); return; } - if (MemOps[i].Position > MemOps[insertAfter].Position) + if (MemOps[i].Position > MemOps[insertAfter].Position) { insertAfter = i; + Loc = MemOps[i].MBBI; + } } - bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1; + BaseKill = Loc->killsRegister(Base); MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset, Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges); } -static bool definesCPSR(MachineInstr *MI) { - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg()) - continue; - if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead()) - // If the instruction has live CPSR def, then it's not safe to fold it - // into load / store. - return true; - } - - return false; -} - static bool isMatchingDecrement(MachineInstr *MI, unsigned Base, unsigned Bytes, unsigned Limit, ARMCC::CondCodes Pred, unsigned PredReg) { @@ -1327,34 +1427,6 @@ void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) { RS->forward(std::prev(Loc)); } -static int getMemoryOpOffset(const MachineInstr *MI) { - int Opcode = MI->getOpcode(); - bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; - unsigned NumOperands = MI->getDesc().getNumOperands(); - unsigned OffField = MI->getOperand(NumOperands-3).getImm(); - - if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 || - Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 || - Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 || - Opcode == ARM::LDRi12 || Opcode == ARM::STRi12) - return OffField; - - // Thumb1 immediate offsets are scaled by 4 - if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi) - return OffField * 4; - - int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField) - : ARM_AM::getAM5Offset(OffField) * 4; - if (isAM3) { - if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub) - Offset = -Offset; - } else { - if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub) - Offset = -Offset; - } - return Offset; -} - static void InsertLDR_STR(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, int Offset, bool isDef, @@ -1724,28 +1796,21 @@ bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { } bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { - const TargetMachine &TM = Fn.getTarget(); - TL = TM.getSubtargetImpl()->getTargetLowering(); + STI = &static_cast(Fn.getSubtarget()); + TL = STI->getTargetLowering(); AFI = Fn.getInfo(); - TII = TM.getSubtargetImpl()->getInstrInfo(); - TRI = TM.getSubtargetImpl()->getRegisterInfo(); - STI = &TM.getSubtarget(); + TII = STI->getInstrInfo(); + TRI = STI->getRegisterInfo(); RS = new RegScavenger(); isThumb2 = AFI->isThumb2Function(); isThumb1 = AFI->isThumbFunction() && !isThumb2; - // FIXME: Temporarily disabling for Thumb-1 due to miscompiles - if (isThumb1) { - delete RS; - return false; - } - bool Modified = false; for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; ++MFI) { MachineBasicBlock &MBB = *MFI; Modified |= LoadStoreMultipleOpti(MBB); - if (TM.getSubtarget().hasV5TOps()) + if (STI->hasV5TOps()) Modified |= MergeReturnIntoLDM(MBB); } @@ -1793,10 +1858,10 @@ namespace { } bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { - TD = Fn.getTarget().getSubtargetImpl()->getDataLayout(); - TII = Fn.getTarget().getSubtargetImpl()->getInstrInfo(); - TRI = Fn.getTarget().getSubtargetImpl()->getRegisterInfo(); - STI = &Fn.getTarget().getSubtarget(); + TD = Fn.getTarget().getDataLayout(); + STI = &static_cast(Fn.getSubtarget()); + TII = STI->getInstrInfo(); + TRI = STI->getRegisterInfo(); MRI = &Fn.getRegInfo(); MF = &Fn; @@ -1811,7 +1876,7 @@ bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, MachineBasicBlock::iterator I, MachineBasicBlock::iterator E, - SmallPtrSet &MemOps, + SmallPtrSetImpl &MemOps, SmallSet &MemRegs, const TargetRegisterInfo *TRI) { // Are there stores / loads / calls between them?