X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FARM%2FARMLoadStoreOptimizer.cpp;h=c8ed5760f935de0c9dab586e65dc8648f04ea0f2;hb=6fc631978cdd1128a790854e497e267639d9325d;hp=5770f7739ae6d7d731e4bd7d9c61a58aac8acdc0;hpb=892143ff70b5e7d0cb06e7c4596c232347806b17;p=oota-llvm.git diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 5770f7739ae..c8ed5760f93 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -18,8 +18,12 @@ #include "ARMBaseRegisterInfo.h" #include "ARMMachineFunctionInfo.h" #include "MCTargetDesc/ARMAddressingModes.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" @@ -27,19 +31,15 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/CodeGen/SelectionDAGNodes.h" -#include "llvm/Target/TargetData.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" using namespace llvm; STATISTIC(NumLDMGened , "Number of ldm instructions generated"); @@ -456,8 +456,7 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, DebugLoc dl = Loc->getDebugLoc(); const MachineOperand &PMO = Loc->getOperand(0); unsigned PReg = PMO.getReg(); - unsigned PRegNum = PMO.isUndef() ? UINT_MAX - : getARMRegisterNumbering(PReg); + unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg); unsigned Count = 1; unsigned Limit = ~0U; @@ -483,8 +482,7 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, int NewOffset = MemOps[i].Offset; const MachineOperand &MO = MemOps[i].MBBI->getOperand(0); unsigned Reg = MO.getReg(); - unsigned RegNum = MO.isUndef() ? UINT_MAX - : getARMRegisterNumbering(Reg); + unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg); // Register numbers must be in ascending order. For VFP / NEON load and // store multiples, the registers must also be consecutive and within the // limit on the number of registers per instruction. @@ -867,7 +865,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD; // Can't do the merge if the destination register is the same as the would-be // writeback register. - if (isLd && MI->getOperand(0).getReg() == Base) + if (MI->getOperand(0).getReg() == Base) return false; unsigned PredReg = 0; @@ -1160,6 +1158,11 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, unsigned NewOpc = (isLd) ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); + // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset, + // so adjust and use t2LDRi12 here for that. + unsigned NewOpc2 = (isLd) + ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) + : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); DebugLoc dl = MBBI->getDebugLoc(); // If this is a load and base register is killed, it may have been // re-defed by the load, make sure the first load does not clobber it. @@ -1167,7 +1170,7 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, (BaseKill || OffKill) && (TRI->regsOverlap(EvenReg, BaseReg))) { assert(!TRI->regsOverlap(OddReg, BaseReg)); - InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, + InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, OddReg, OddDeadKill, false, BaseReg, false, BaseUndef, false, OffUndef, Pred, PredReg, TII, isT2); @@ -1185,7 +1188,6 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, OddDeadKill = true; } // Never kill the base register in the first instruction. - // if (EvenReg == BaseReg) EvenDeadKill = false; InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, @@ -1193,7 +1195,7 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, BaseReg, false, BaseUndef, false, OffUndef, Pred, PredReg, TII, isT2); NewBBI = llvm::prior(MBBI); - InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, + InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, OddReg, OddDeadKill, OddUndef, BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, Pred, PredReg, TII, isT2); @@ -1256,6 +1258,22 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { // merge the ldr's so far, including this one. But don't try to // combine the following ldr(s). Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg()); + + // Watch out for: + // r4 := ldr [r0, #8] + // r4 := ldr [r0, #4] + // + // The optimization may reorder the second ldr in front of the first + // ldr, which violates write after write(WAW) dependence. The same as + // str. Try to merge inst(s) already in MemOps. + bool Overlap = false; + for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); I != E; ++I) { + if (TRI->regsOverlap(Reg, I->MBBI->getOperand(0).getReg())) { + Overlap = true; + break; + } + } + if (CurrBase == 0 && !Clobber) { // Start of a new chain. CurrBase = Base; @@ -1266,7 +1284,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI)); ++NumMemOps; Advance = true; - } else { + } else if (!Overlap) { if (Clobber) { TryMerge = true; Advance = true; @@ -1319,7 +1337,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { // First advance to the instruction just before the start of the chain. AdvanceRS(MBB, MemOps); // Find a scratch register. - unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass); + unsigned Scratch = RS->FindUnusedReg(&ARM::GPRRegClass); // Process the load / store instructions. RS->forward(prior(MBBI)); @@ -1405,7 +1423,7 @@ bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!"); PrevMI->setDesc(TII->get(NewOpc)); MO.setReg(ARM::PC); - PrevMI->copyImplicitOps(&*MBBI); + PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI); MBB.erase(MBBI); return true; } @@ -1445,7 +1463,7 @@ namespace { static char ID; ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {} - const TargetData *TD; + const DataLayout *TD; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; const ARMSubtarget *STI; @@ -1475,7 +1493,7 @@ namespace { } bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { - TD = Fn.getTarget().getTargetData(); + TD = Fn.getTarget().getDataLayout(); TII = Fn.getTarget().getInstrInfo(); TRI = Fn.getTarget().getRegisterInfo(); STI = &Fn.getTarget().getSubtarget(); @@ -1732,7 +1750,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, Ops.pop_back(); const MCInstrDesc &MCID = TII->get(NewOpc); - const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI); + const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF); MRI->constrainRegClass(EvenReg, TRC); MRI->constrainRegClass(OddReg, TRC);