1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
15 #define DEBUG_TYPE "arm-ldst-opt"
17 #include "ARMAddressingModes.h"
18 #include "ARMBaseInstrInfo.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMRegisterInfo.h"
21 #include "llvm/DerivedTypes.h"
22 #include "llvm/Function.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineFunctionPass.h"
25 #include "llvm/CodeGen/MachineInstr.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/RegisterScavenging.h"
29 #include "llvm/Target/TargetData.h"
30 #include "llvm/Target/TargetInstrInfo.h"
31 #include "llvm/Target/TargetMachine.h"
32 #include "llvm/Target/TargetRegisterInfo.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/ADT/DenseMap.h"
35 #include "llvm/ADT/STLExtras.h"
36 #include "llvm/ADT/SmallPtrSet.h"
37 #include "llvm/ADT/SmallSet.h"
38 #include "llvm/ADT/SmallVector.h"
39 #include "llvm/ADT/Statistic.h"
42 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
43 STATISTIC(NumSTMGened , "Number of stm instructions generated");
44 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
45 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
46 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
47 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
48 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
49 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
50 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
51 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
52 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
54 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
55 /// load / store instructions to form ldm / stm instructions.
58 struct ARMLoadStoreOpt : public MachineFunctionPass {
60 ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
62 const TargetInstrInfo *TII;
63 const TargetRegisterInfo *TRI;
68 virtual bool runOnMachineFunction(MachineFunction &Fn);
70 virtual const char *getPassName() const {
71 return "ARM load / store optimization pass";
75 struct MemOpQueueEntry {
80 MachineBasicBlock::iterator MBBI;
82 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
83 MachineBasicBlock::iterator i)
84 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
86 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
87 typedef MemOpQueue::iterator MemOpQueueIter;
89 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
90 int Offset, unsigned Base, bool BaseKill, int Opcode,
91 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
92 DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs);
93 void MergeOpsUpdate(MachineBasicBlock &MBB,
102 ARMCC::CondCodes Pred,
106 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
107 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
108 int Opcode, unsigned Size,
109 ARMCC::CondCodes Pred, unsigned PredReg,
110 unsigned Scratch, MemOpQueue &MemOps,
111 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
113 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
114 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
115 MachineBasicBlock::iterator &MBBI);
116 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
117 MachineBasicBlock::iterator MBBI,
118 const TargetInstrInfo *TII,
120 MachineBasicBlock::iterator &I);
121 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
122 MachineBasicBlock::iterator MBBI,
124 MachineBasicBlock::iterator &I);
125 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
126 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
128 char ARMLoadStoreOpt::ID = 0;
131 static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
133 default: llvm_unreachable("Unhandled opcode!");
137 default: llvm_unreachable("Unhandled submode!");
138 case ARM_AM::ia: return ARM::LDMIA;
139 case ARM_AM::da: return ARM::LDMDA;
140 case ARM_AM::db: return ARM::LDMDB;
141 case ARM_AM::ib: return ARM::LDMIB;
147 default: llvm_unreachable("Unhandled submode!");
148 case ARM_AM::ia: return ARM::STMIA;
149 case ARM_AM::da: return ARM::STMDA;
150 case ARM_AM::db: return ARM::STMDB;
151 case ARM_AM::ib: return ARM::STMIB;
158 default: llvm_unreachable("Unhandled submode!");
159 case ARM_AM::ia: return ARM::t2LDMIA;
160 case ARM_AM::db: return ARM::t2LDMDB;
167 default: llvm_unreachable("Unhandled submode!");
168 case ARM_AM::ia: return ARM::t2STMIA;
169 case ARM_AM::db: return ARM::t2STMDB;
175 default: llvm_unreachable("Unhandled submode!");
176 case ARM_AM::ia: return ARM::VLDMSIA;
177 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
183 default: llvm_unreachable("Unhandled submode!");
184 case ARM_AM::ia: return ARM::VSTMSIA;
185 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
191 default: llvm_unreachable("Unhandled submode!");
192 case ARM_AM::ia: return ARM::VLDMDIA;
193 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
199 default: llvm_unreachable("Unhandled submode!");
200 case ARM_AM::ia: return ARM::VSTMDIA;
201 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
212 AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
214 default: llvm_unreachable("Unhandled opcode!");
220 case ARM::t2LDMIA_RET:
222 case ARM::t2LDMIA_UPD:
224 case ARM::t2STMIA_UPD:
226 case ARM::VLDMSIA_UPD:
228 case ARM::VSTMSIA_UPD:
230 case ARM::VLDMDIA_UPD:
232 case ARM::VSTMDIA_UPD:
246 case ARM::t2LDMDB_UPD:
248 case ARM::t2STMDB_UPD:
249 case ARM::VLDMSDB_UPD:
250 case ARM::VSTMSDB_UPD:
251 case ARM::VLDMDDB_UPD:
252 case ARM::VSTMDDB_UPD:
262 return ARM_AM::bad_am_submode;
265 } // end namespace ARM_AM
266 } // end namespace llvm
268 static bool isT2i32Load(unsigned Opc) {
269 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
272 static bool isi32Load(unsigned Opc) {
273 return Opc == ARM::LDRi12 || isT2i32Load(Opc);
276 static bool isT2i32Store(unsigned Opc) {
277 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
280 static bool isi32Store(unsigned Opc) {
281 return Opc == ARM::STRi12 || isT2i32Store(Opc);
284 /// MergeOps - Create and insert a LDM or STM with Base as base register and
285 /// registers in Regs as the register operands that would be loaded / stored.
286 /// It returns true if the transformation is done.
288 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
289 MachineBasicBlock::iterator MBBI,
290 int Offset, unsigned Base, bool BaseKill,
291 int Opcode, ARMCC::CondCodes Pred,
292 unsigned PredReg, unsigned Scratch, DebugLoc dl,
293 SmallVector<std::pair<unsigned, bool>, 8> &Regs) {
294 // Only a single register to load / store. Don't bother.
295 unsigned NumRegs = Regs.size();
299 ARM_AM::AMSubMode Mode = ARM_AM::ia;
300 // VFP and Thumb2 do not support IB or DA modes.
301 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
302 bool haveIBAndDA = isNotVFP && !isThumb2;
303 if (Offset == 4 && haveIBAndDA)
305 else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA)
307 else if (Offset == -4 * (int)NumRegs && isNotVFP)
308 // VLDM/VSTM do not support DB mode without also updating the base reg.
310 else if (Offset != 0) {
311 // Check if this is a supported opcode before we insert instructions to
312 // calculate a new base register.
313 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;
315 // If starting offset isn't zero, insert a MI to materialize a new base.
316 // But only do so if it is cost effective, i.e. merging more than two
322 if (isi32Load(Opcode))
323 // If it is a load, then just use one of the destination register to
324 // use as the new base.
325 NewBase = Regs[NumRegs-1].first;
327 // Use the scratch register to use as a new base.
332 int BaseOpc = !isThumb2
334 : ((Base == ARM::SP) ? ARM::t2ADDrSPi : ARM::t2ADDri);
338 : ((Base == ARM::SP) ? ARM::t2SUBrSPi : ARM::t2SUBri);
341 int ImmedOffset = isThumb2
342 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
343 if (ImmedOffset == -1)
344 // FIXME: Try t2ADDri12 or t2SUBri12?
345 return false; // Probably not worth it then.
347 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
348 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
349 .addImm(Pred).addReg(PredReg).addReg(0);
351 BaseKill = true; // New base is always killed right its use.
354 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
355 Opcode == ARM::VLDRD);
356 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
357 if (!Opcode) return false;
358 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode))
359 .addReg(Base, getKillRegState(BaseKill))
360 .addImm(Pred).addReg(PredReg);
361 for (unsigned i = 0; i != NumRegs; ++i)
362 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
363 | getKillRegState(Regs[i].second));
368 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
370 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
372 unsigned memOpsBegin, unsigned memOpsEnd,
373 unsigned insertAfter, int Offset,
374 unsigned Base, bool BaseKill,
376 ARMCC::CondCodes Pred, unsigned PredReg,
379 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
380 // First calculate which of the registers should be killed by the merged
382 const unsigned insertPos = memOps[insertAfter].Position;
383 SmallSet<unsigned, 4> KilledRegs;
384 DenseMap<unsigned, unsigned> Killer;
385 for (unsigned i = 0, e = memOps.size(); i != e; ++i) {
386 if (i == memOpsBegin) {
391 if (memOps[i].Position < insertPos && memOps[i].isKill) {
392 unsigned Reg = memOps[i].Reg;
393 KilledRegs.insert(Reg);
398 SmallVector<std::pair<unsigned, bool>, 8> Regs;
399 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
400 unsigned Reg = memOps[i].Reg;
401 // If we are inserting the merged operation after an operation that
402 // uses the same register, make sure to transfer any kill flag.
403 bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
404 Regs.push_back(std::make_pair(Reg, isKill));
407 // Try to do the merge.
408 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
410 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
411 Pred, PredReg, Scratch, dl, Regs))
414 // Merge succeeded, update records.
415 Merges.push_back(prior(Loc));
416 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
417 // Remove kill flags from any memops that come before insertPos.
418 if (Regs[i-memOpsBegin].second) {
419 unsigned Reg = Regs[i-memOpsBegin].first;
420 if (KilledRegs.count(Reg)) {
421 unsigned j = Killer[Reg];
422 int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true);
423 assert(Idx >= 0 && "Cannot find killing operand");
424 memOps[j].MBBI->getOperand(Idx).setIsKill(false);
425 memOps[j].isKill = false;
427 memOps[i].isKill = true;
429 MBB.erase(memOps[i].MBBI);
430 // Update this memop to refer to the merged instruction.
431 // We may need to move kill flags again.
432 memOps[i].Merged = true;
433 memOps[i].MBBI = Merges.back();
434 memOps[i].Position = insertPos;
438 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
439 /// load / store multiple instructions.
441 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
442 unsigned Base, int Opcode, unsigned Size,
443 ARMCC::CondCodes Pred, unsigned PredReg,
444 unsigned Scratch, MemOpQueue &MemOps,
445 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
446 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
447 int Offset = MemOps[SIndex].Offset;
448 int SOffset = Offset;
449 unsigned insertAfter = SIndex;
450 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
451 DebugLoc dl = Loc->getDebugLoc();
452 const MachineOperand &PMO = Loc->getOperand(0);
453 unsigned PReg = PMO.getReg();
454 unsigned PRegNum = PMO.isUndef() ? UINT_MAX
455 : getARMRegisterNumbering(PReg);
458 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
459 int NewOffset = MemOps[i].Offset;
460 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
461 unsigned Reg = MO.getReg();
462 unsigned RegNum = MO.isUndef() ? UINT_MAX
463 : getARMRegisterNumbering(Reg);
464 // Register numbers must be in ascending order. For VFP, the registers
465 // must also be consecutive and there is a limit of 16 double-word
466 // registers per instruction.
467 if (Reg != ARM::SP &&
468 NewOffset == Offset + (int)Size &&
469 ((isNotVFP && RegNum > PRegNum)
470 || ((Size < 8 || Count < 16) && RegNum == PRegNum+1))) {
475 // Can't merge this in. Try merge the earlier ones first.
476 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
477 Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
478 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
483 if (MemOps[i].Position > MemOps[insertAfter].Position)
487 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
488 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
489 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
493 static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
494 unsigned Bytes, unsigned Limit,
495 ARMCC::CondCodes Pred, unsigned PredReg){
496 unsigned MyPredReg = 0;
499 if (MI->getOpcode() != ARM::t2SUBri &&
500 MI->getOpcode() != ARM::t2SUBrSPi &&
501 MI->getOpcode() != ARM::t2SUBrSPi12 &&
502 MI->getOpcode() != ARM::tSUBspi &&
503 MI->getOpcode() != ARM::SUBri)
506 // Make sure the offset fits in 8 bits.
507 if (Bytes == 0 || (Limit && Bytes >= Limit))
510 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
511 return (MI->getOperand(0).getReg() == Base &&
512 MI->getOperand(1).getReg() == Base &&
513 (MI->getOperand(2).getImm()*Scale) == Bytes &&
514 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
515 MyPredReg == PredReg);
518 static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
519 unsigned Bytes, unsigned Limit,
520 ARMCC::CondCodes Pred, unsigned PredReg){
521 unsigned MyPredReg = 0;
524 if (MI->getOpcode() != ARM::t2ADDri &&
525 MI->getOpcode() != ARM::t2ADDrSPi &&
526 MI->getOpcode() != ARM::t2ADDrSPi12 &&
527 MI->getOpcode() != ARM::tADDspi &&
528 MI->getOpcode() != ARM::ADDri)
531 if (Bytes == 0 || (Limit && Bytes >= Limit))
532 // Make sure the offset fits in 8 bits.
535 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
536 return (MI->getOperand(0).getReg() == Base &&
537 MI->getOperand(1).getReg() == Base &&
538 (MI->getOperand(2).getImm()*Scale) == Bytes &&
539 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
540 MyPredReg == PredReg);
543 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
544 switch (MI->getOpcode()) {
572 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
575 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
579 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
580 ARM_AM::AMSubMode Mode) {
582 default: llvm_unreachable("Unhandled opcode!");
588 default: llvm_unreachable("Unhandled submode!");
589 case ARM_AM::ia: return ARM::LDMIA_UPD;
590 case ARM_AM::ib: return ARM::LDMIB_UPD;
591 case ARM_AM::da: return ARM::LDMDA_UPD;
592 case ARM_AM::db: return ARM::LDMDB_UPD;
600 default: llvm_unreachable("Unhandled submode!");
601 case ARM_AM::ia: return ARM::STMIA_UPD;
602 case ARM_AM::ib: return ARM::STMIB_UPD;
603 case ARM_AM::da: return ARM::STMDA_UPD;
604 case ARM_AM::db: return ARM::STMDB_UPD;
610 default: llvm_unreachable("Unhandled submode!");
611 case ARM_AM::ia: return ARM::t2LDMIA_UPD;
612 case ARM_AM::db: return ARM::t2LDMDB_UPD;
618 default: llvm_unreachable("Unhandled submode!");
619 case ARM_AM::ia: return ARM::t2STMIA_UPD;
620 case ARM_AM::db: return ARM::t2STMDB_UPD;
625 default: llvm_unreachable("Unhandled submode!");
626 case ARM_AM::ia: return ARM::VLDMSIA_UPD;
627 case ARM_AM::db: return ARM::VLDMSDB_UPD;
632 default: llvm_unreachable("Unhandled submode!");
633 case ARM_AM::ia: return ARM::VLDMDIA_UPD;
634 case ARM_AM::db: return ARM::VLDMDDB_UPD;
639 default: llvm_unreachable("Unhandled submode!");
640 case ARM_AM::ia: return ARM::VSTMSIA_UPD;
641 case ARM_AM::db: return ARM::VSTMSDB_UPD;
646 default: llvm_unreachable("Unhandled submode!");
647 case ARM_AM::ia: return ARM::VSTMDIA_UPD;
648 case ARM_AM::db: return ARM::VSTMDDB_UPD;
656 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
657 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
659 /// stmia rn, <ra, rb, rc>
660 /// rn := rn + 4 * 3;
662 /// stmia rn!, <ra, rb, rc>
664 /// rn := rn - 4 * 3;
665 /// ldmia rn, <ra, rb, rc>
667 /// ldmdb rn!, <ra, rb, rc>
668 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
669 MachineBasicBlock::iterator MBBI,
671 MachineBasicBlock::iterator &I) {
672 MachineInstr *MI = MBBI;
673 unsigned Base = MI->getOperand(0).getReg();
674 bool BaseKill = MI->getOperand(0).isKill();
675 unsigned Bytes = getLSMultipleTransferSize(MI);
676 unsigned PredReg = 0;
677 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
678 int Opcode = MI->getOpcode();
679 DebugLoc dl = MI->getDebugLoc();
681 // Can't use an updating ld/st if the base register is also a dest
682 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
683 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
684 if (MI->getOperand(i).getReg() == Base)
687 bool DoMerge = false;
688 ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode);
690 // Try merging with the previous instruction.
691 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
692 if (MBBI != BeginMBBI) {
693 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
694 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
696 if (Mode == ARM_AM::ia &&
697 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
700 } else if (Mode == ARM_AM::ib &&
701 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
709 // Try merging with the next instruction.
710 MachineBasicBlock::iterator EndMBBI = MBB.end();
711 if (!DoMerge && MBBI != EndMBBI) {
712 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
713 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
715 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
716 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
718 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
719 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
734 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
735 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
736 .addReg(Base, getDefRegState(true)) // WB base register
737 .addReg(Base, getKillRegState(BaseKill))
738 .addImm(Pred).addReg(PredReg);
740 // Transfer the rest of operands.
741 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
742 MIB.addOperand(MI->getOperand(OpNum));
744 // Transfer memoperands.
745 (*MIB).setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
751 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
752 ARM_AM::AddrOpc Mode) {
759 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
761 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
763 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
765 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
768 return ARM::t2LDR_PRE;
771 return ARM::t2STR_PRE;
772 default: llvm_unreachable("Unhandled opcode!");
777 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
778 ARM_AM::AddrOpc Mode) {
781 return ARM::LDR_POST;
783 return ARM::STR_POST;
785 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
787 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
789 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
791 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
794 return ARM::t2LDR_POST;
797 return ARM::t2STR_POST;
798 default: llvm_unreachable("Unhandled opcode!");
803 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
804 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
805 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
806 MachineBasicBlock::iterator MBBI,
807 const TargetInstrInfo *TII,
809 MachineBasicBlock::iterator &I) {
810 MachineInstr *MI = MBBI;
811 unsigned Base = MI->getOperand(1).getReg();
812 bool BaseKill = MI->getOperand(1).isKill();
813 unsigned Bytes = getLSMultipleTransferSize(MI);
814 int Opcode = MI->getOpcode();
815 DebugLoc dl = MI->getDebugLoc();
816 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
817 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
818 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
819 if (isi32Load(Opcode) || isi32Store(Opcode))
820 if (MI->getOperand(2).getImm() != 0)
822 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
825 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
826 // Can't do the merge if the destination register is the same as the would-be
827 // writeback register.
828 if (isLd && MI->getOperand(0).getReg() == Base)
831 unsigned PredReg = 0;
832 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
833 bool DoMerge = false;
834 ARM_AM::AddrOpc AddSub = ARM_AM::add;
836 // AM2 - 12 bits, thumb2 - 8 bits.
837 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
839 // Try merging with the previous instruction.
840 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
841 if (MBBI != BeginMBBI) {
842 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
843 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
845 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
847 AddSub = ARM_AM::sub;
849 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
853 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
858 // Try merging with the next instruction.
859 MachineBasicBlock::iterator EndMBBI = MBB.end();
860 if (!DoMerge && MBBI != EndMBBI) {
861 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
862 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
865 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
867 AddSub = ARM_AM::sub;
868 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
872 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
886 Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
888 Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
891 // VLDM[SD}_UPD, VSTM[SD]_UPD
892 // (There are no base-updating versions of VLDR/VSTR instructions, but the
893 // updating load/store-multiple instructions can be used with only one
895 MachineOperand &MO = MI->getOperand(0);
896 BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
897 .addReg(Base, getDefRegState(true)) // WB base register
898 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
899 .addImm(Pred).addReg(PredReg)
900 .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
901 getKillRegState(MO.isKill())));
904 // LDR_PRE, LDR_POST,
905 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
906 .addReg(Base, RegState::Define)
907 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
909 // t2LDR_PRE, t2LDR_POST
910 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
911 .addReg(Base, RegState::Define)
912 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
914 MachineOperand &MO = MI->getOperand(0);
917 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
918 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
919 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
921 // t2STR_PRE, t2STR_POST
922 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
923 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
924 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
931 /// isMemoryOp - Returns true if instruction is a memory operations (that this
932 /// pass is capable of operating on).
933 static bool isMemoryOp(const MachineInstr *MI) {
934 // When no memory operands are present, conservatively assume unaligned,
935 // volatile, unfoldable.
936 if (!MI->hasOneMemOperand())
939 const MachineMemOperand *MMO = *MI->memoperands_begin();
941 // Don't touch volatile memory accesses - we may be changing their order.
942 if (MMO->isVolatile())
945 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
947 if (MMO->getAlignment() < 4)
950 // str <undef> could probably be eliminated entirely, but for now we just want
951 // to avoid making a mess of it.
952 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
953 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
954 MI->getOperand(0).isUndef())
957 // Likewise don't mess with references to undefined addresses.
958 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
959 MI->getOperand(1).isUndef())
962 int Opcode = MI->getOpcode();
967 return MI->getOperand(1).isReg();
970 return MI->getOperand(1).isReg();
977 return MI->getOperand(1).isReg();
982 /// AdvanceRS - Advance register scavenger to just before the earliest memory
983 /// op that is being merged.
984 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
985 MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
986 unsigned Position = MemOps[0].Position;
987 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
988 if (MemOps[i].Position < Position) {
989 Position = MemOps[i].Position;
990 Loc = MemOps[i].MBBI;
994 if (Loc != MBB.begin())
995 RS->forward(prior(Loc));
998 static int getMemoryOpOffset(const MachineInstr *MI) {
999 int Opcode = MI->getOpcode();
1000 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
1001 unsigned NumOperands = MI->getDesc().getNumOperands();
1002 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
1004 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
1005 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
1006 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
1007 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
1010 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
1011 : ARM_AM::getAM5Offset(OffField) * 4;
1013 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
1016 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
1022 static void InsertLDR_STR(MachineBasicBlock &MBB,
1023 MachineBasicBlock::iterator &MBBI,
1024 int Offset, bool isDef,
1025 DebugLoc dl, unsigned NewOpc,
1026 unsigned Reg, bool RegDeadKill, bool RegUndef,
1027 unsigned BaseReg, bool BaseKill, bool BaseUndef,
1028 bool OffKill, bool OffUndef,
1029 ARMCC::CondCodes Pred, unsigned PredReg,
1030 const TargetInstrInfo *TII, bool isT2) {
1032 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1034 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1035 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1036 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1038 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1040 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1041 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1042 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1046 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1047 MachineBasicBlock::iterator &MBBI) {
1048 MachineInstr *MI = &*MBBI;
1049 unsigned Opcode = MI->getOpcode();
1050 if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
1051 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
1052 unsigned EvenReg = MI->getOperand(0).getReg();
1053 unsigned OddReg = MI->getOperand(1).getReg();
1054 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1055 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
1056 if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)
1059 MachineBasicBlock::iterator NewBBI = MBBI;
1060 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1061 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1062 bool EvenDeadKill = isLd ?
1063 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1064 bool EvenUndef = MI->getOperand(0).isUndef();
1065 bool OddDeadKill = isLd ?
1066 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1067 bool OddUndef = MI->getOperand(1).isUndef();
1068 const MachineOperand &BaseOp = MI->getOperand(2);
1069 unsigned BaseReg = BaseOp.getReg();
1070 bool BaseKill = BaseOp.isKill();
1071 bool BaseUndef = BaseOp.isUndef();
1072 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1073 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1074 int OffImm = getMemoryOpOffset(MI);
1075 unsigned PredReg = 0;
1076 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
1078 if (OddRegNum > EvenRegNum && OffImm == 0) {
1079 // Ascending register numbers and no offset. It's safe to change it to a
1081 unsigned NewOpc = (isLd)
1082 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1083 : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1085 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1086 .addReg(BaseReg, getKillRegState(BaseKill))
1087 .addImm(Pred).addReg(PredReg)
1088 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1089 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1092 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1093 .addReg(BaseReg, getKillRegState(BaseKill))
1094 .addImm(Pred).addReg(PredReg)
1096 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1098 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
1101 NewBBI = llvm::prior(MBBI);
1103 // Split into two instructions.
1104 unsigned NewOpc = (isLd)
1105 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1106 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1107 DebugLoc dl = MBBI->getDebugLoc();
1108 // If this is a load and base register is killed, it may have been
1109 // re-defed by the load, make sure the first load does not clobber it.
1111 (BaseKill || OffKill) &&
1112 (TRI->regsOverlap(EvenReg, BaseReg))) {
1113 assert(!TRI->regsOverlap(OddReg, BaseReg));
1114 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
1115 OddReg, OddDeadKill, false,
1116 BaseReg, false, BaseUndef, false, OffUndef,
1117 Pred, PredReg, TII, isT2);
1118 NewBBI = llvm::prior(MBBI);
1119 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1120 EvenReg, EvenDeadKill, false,
1121 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1122 Pred, PredReg, TII, isT2);
1124 if (OddReg == EvenReg && EvenDeadKill) {
1125 // If the two source operands are the same, the kill marker is
1126 // probably on the first one. e.g.
1127 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1128 EvenDeadKill = false;
1131 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1132 EvenReg, EvenDeadKill, EvenUndef,
1133 BaseReg, false, BaseUndef, false, OffUndef,
1134 Pred, PredReg, TII, isT2);
1135 NewBBI = llvm::prior(MBBI);
1136 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
1137 OddReg, OddDeadKill, OddUndef,
1138 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1139 Pred, PredReg, TII, isT2);
1154 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
1155 /// ops of the same base and incrementing offset into LDM / STM ops.
1156 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1157 unsigned NumMerges = 0;
1158 unsigned NumMemOps = 0;
1160 unsigned CurrBase = 0;
1162 unsigned CurrSize = 0;
1163 ARMCC::CondCodes CurrPred = ARMCC::AL;
1164 unsigned CurrPredReg = 0;
1165 unsigned Position = 0;
1166 SmallVector<MachineBasicBlock::iterator,4> Merges;
1168 RS->enterBasicBlock(&MBB);
1169 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1171 if (FixInvalidRegPairOp(MBB, MBBI))
1174 bool Advance = false;
1175 bool TryMerge = false;
1176 bool Clobber = false;
1178 bool isMemOp = isMemoryOp(MBBI);
1180 int Opcode = MBBI->getOpcode();
1181 unsigned Size = getLSMultipleTransferSize(MBBI);
1182 const MachineOperand &MO = MBBI->getOperand(0);
1183 unsigned Reg = MO.getReg();
1184 bool isKill = MO.isDef() ? false : MO.isKill();
1185 unsigned Base = MBBI->getOperand(1).getReg();
1186 unsigned PredReg = 0;
1187 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg);
1188 int Offset = getMemoryOpOffset(MBBI);
1191 // r5 := ldr [r5, #4]
1192 // r6 := ldr [r5, #8]
1194 // The second ldr has effectively broken the chain even though it
1195 // looks like the later ldr(s) use the same base register. Try to
1196 // merge the ldr's so far, including this one. But don't try to
1197 // combine the following ldr(s).
1198 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
1199 if (CurrBase == 0 && !Clobber) {
1200 // Start of a new chain.
1205 CurrPredReg = PredReg;
1206 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
1215 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1216 // No need to match PredReg.
1217 // Continue adding to the queue.
1218 if (Offset > MemOps.back().Offset) {
1219 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
1224 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1226 if (Offset < I->Offset) {
1227 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
1232 } else if (Offset == I->Offset) {
1233 // Collision! This can't be merged!
1242 if (MBBI->isDebugValue()) {
1245 // Reach the end of the block, try merging the memory instructions.
1247 } else if (Advance) {
1251 // Reach the end of the block, try merging the memory instructions.
1257 if (NumMemOps > 1) {
1258 // Try to find a free register to use as a new base in case it's needed.
1259 // First advance to the instruction just before the start of the chain.
1260 AdvanceRS(MBB, MemOps);
1261 // Find a scratch register.
1262 unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass);
1263 // Process the load / store instructions.
1264 RS->forward(prior(MBBI));
1268 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1269 CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1271 // Try folding preceeding/trailing base inc/dec into the generated
1273 for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1274 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1276 NumMerges += Merges.size();
1278 // Try folding preceeding/trailing base inc/dec into those load/store
1279 // that were not merged to form LDM/STM ops.
1280 for (unsigned i = 0; i != NumMemOps; ++i)
1281 if (!MemOps[i].Merged)
1282 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1285 // RS may be pointing to an instruction that's deleted.
1286 RS->skipTo(prior(MBBI));
1287 } else if (NumMemOps == 1) {
1288 // Try folding preceeding/trailing base inc/dec into the single
1290 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1292 RS->forward(prior(MBBI));
1299 CurrPred = ARMCC::AL;
1306 // If iterator hasn't been advanced and this is not a memory op, skip it.
1307 // It can't start a new chain anyway.
1308 if (!Advance && !isMemOp && MBBI != E) {
1314 return NumMerges > 0;
1317 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
1318 /// ("bx lr" and "mov pc, lr") into the preceeding stack restore so it
1319 /// directly restore the value of LR into pc.
1320 /// ldmfd sp!, {..., lr}
1323 /// ldmfd sp!, {..., lr}
1326 /// ldmfd sp!, {..., pc}
1327 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1328 if (MBB.empty()) return false;
1330 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1331 if (MBBI != MBB.begin() &&
1332 (MBBI->getOpcode() == ARM::BX_RET ||
1333 MBBI->getOpcode() == ARM::tBX_RET ||
1334 MBBI->getOpcode() == ARM::MOVPCLR)) {
1335 MachineInstr *PrevMI = prior(MBBI);
1336 unsigned Opcode = PrevMI->getOpcode();
1337 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1338 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1339 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1340 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1341 if (MO.getReg() != ARM::LR)
1343 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1344 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1345 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1346 PrevMI->setDesc(TII->get(NewOpc));
1348 PrevMI->copyImplicitOps(&*MBBI);
1356 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1357 const TargetMachine &TM = Fn.getTarget();
1358 AFI = Fn.getInfo<ARMFunctionInfo>();
1359 TII = TM.getInstrInfo();
1360 TRI = TM.getRegisterInfo();
1361 RS = new RegScavenger();
1362 isThumb2 = AFI->isThumb2Function();
1364 bool Modified = false;
1365 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1367 MachineBasicBlock &MBB = *MFI;
1368 Modified |= LoadStoreMultipleOpti(MBB);
1369 if (TM.getSubtarget<ARMSubtarget>().hasV5TOps())
1370 Modified |= MergeReturnIntoLDM(MBB);
1378 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1379 /// load / stores from consecutive locations close to make it more
1380 /// likely they will be combined later.
1383 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1385 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1387 const TargetData *TD;
1388 const TargetInstrInfo *TII;
1389 const TargetRegisterInfo *TRI;
1390 const ARMSubtarget *STI;
1391 MachineRegisterInfo *MRI;
1392 MachineFunction *MF;
1394 virtual bool runOnMachineFunction(MachineFunction &Fn);
1396 virtual const char *getPassName() const {
1397 return "ARM pre- register allocation load / store optimization pass";
1401 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1402 unsigned &NewOpc, unsigned &EvenReg,
1403 unsigned &OddReg, unsigned &BaseReg,
1405 unsigned &PredReg, ARMCC::CondCodes &Pred,
1407 bool RescheduleOps(MachineBasicBlock *MBB,
1408 SmallVector<MachineInstr*, 4> &Ops,
1409 unsigned Base, bool isLd,
1410 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1411 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1413 char ARMPreAllocLoadStoreOpt::ID = 0;
1416 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1417 TD = Fn.getTarget().getTargetData();
1418 TII = Fn.getTarget().getInstrInfo();
1419 TRI = Fn.getTarget().getRegisterInfo();
1420 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
1421 MRI = &Fn.getRegInfo();
1424 bool Modified = false;
1425 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1427 Modified |= RescheduleLoadStoreInstrs(MFI);
1432 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1433 MachineBasicBlock::iterator I,
1434 MachineBasicBlock::iterator E,
1435 SmallPtrSet<MachineInstr*, 4> &MemOps,
1436 SmallSet<unsigned, 4> &MemRegs,
1437 const TargetRegisterInfo *TRI) {
1438 // Are there stores / loads / calls between them?
1439 // FIXME: This is overly conservative. We should make use of alias information
1441 SmallSet<unsigned, 4> AddedRegPressure;
1443 if (I->isDebugValue() || MemOps.count(&*I))
1445 const TargetInstrDesc &TID = I->getDesc();
1446 if (TID.isCall() || TID.isTerminator() || I->hasUnmodeledSideEffects())
1448 if (isLd && TID.mayStore())
1453 // It's not safe to move the first 'str' down.
1456 // str r4, [r0, #+4]
1460 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1461 MachineOperand &MO = I->getOperand(j);
1464 unsigned Reg = MO.getReg();
1465 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1467 if (Reg != Base && !MemRegs.count(Reg))
1468 AddedRegPressure.insert(Reg);
1472 // Estimate register pressure increase due to the transformation.
1473 if (MemRegs.size() <= 4)
1474 // Ok if we are moving small number of instructions.
1476 return AddedRegPressure.size() <= MemRegs.size() * 2;
1480 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1482 unsigned &NewOpc, unsigned &EvenReg,
1483 unsigned &OddReg, unsigned &BaseReg,
1484 int &Offset, unsigned &PredReg,
1485 ARMCC::CondCodes &Pred,
1487 // Make sure we're allowed to generate LDRD/STRD.
1488 if (!STI->hasV5TEOps())
1491 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1493 unsigned Opcode = Op0->getOpcode();
1494 if (Opcode == ARM::LDRi12)
1496 else if (Opcode == ARM::STRi12)
1498 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1499 NewOpc = ARM::t2LDRDi8;
1502 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1503 NewOpc = ARM::t2STRDi8;
1509 // Make sure the base address satisfies i64 ld / st alignment requirement.
1510 if (!Op0->hasOneMemOperand() ||
1511 !(*Op0->memoperands_begin())->getValue() ||
1512 (*Op0->memoperands_begin())->isVolatile())
1515 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1516 const Function *Func = MF->getFunction();
1517 unsigned ReqAlign = STI->hasV6Ops()
1518 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
1519 : 8; // Pre-v6 need 8-byte align
1520 if (Align < ReqAlign)
1523 // Then make sure the immediate offset fits.
1524 int OffImm = getMemoryOpOffset(Op0);
1526 int Limit = (1 << 8) * Scale;
1527 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
1531 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1533 AddSub = ARM_AM::sub;
1536 int Limit = (1 << 8) * Scale;
1537 if (OffImm >= Limit || (OffImm & (Scale-1)))
1539 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1541 EvenReg = Op0->getOperand(0).getReg();
1542 OddReg = Op1->getOperand(0).getReg();
1543 if (EvenReg == OddReg)
1545 BaseReg = Op0->getOperand(1).getReg();
1546 Pred = llvm::getInstrPredicate(Op0, PredReg);
1547 dl = Op0->getDebugLoc();
1552 struct OffsetCompare {
1553 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
1554 int LOffset = getMemoryOpOffset(LHS);
1555 int ROffset = getMemoryOpOffset(RHS);
1556 assert(LHS == RHS || LOffset != ROffset);
1557 return LOffset > ROffset;
1562 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1563 SmallVector<MachineInstr*, 4> &Ops,
1564 unsigned Base, bool isLd,
1565 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1566 bool RetVal = false;
1568 // Sort by offset (in reverse order).
1569 std::sort(Ops.begin(), Ops.end(), OffsetCompare());
1571 // The loads / stores of the same base are in order. Scan them from first to
1572 // last and check for the following:
1573 // 1. Any def of base.
1575 while (Ops.size() > 1) {
1576 unsigned FirstLoc = ~0U;
1577 unsigned LastLoc = 0;
1578 MachineInstr *FirstOp = 0;
1579 MachineInstr *LastOp = 0;
1581 unsigned LastOpcode = 0;
1582 unsigned LastBytes = 0;
1583 unsigned NumMove = 0;
1584 for (int i = Ops.size() - 1; i >= 0; --i) {
1585 MachineInstr *Op = Ops[i];
1586 unsigned Loc = MI2LocMap[Op];
1587 if (Loc <= FirstLoc) {
1591 if (Loc >= LastLoc) {
1596 unsigned Opcode = Op->getOpcode();
1597 if (LastOpcode && Opcode != LastOpcode)
1600 int Offset = getMemoryOpOffset(Op);
1601 unsigned Bytes = getLSMultipleTransferSize(Op);
1603 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1606 LastOffset = Offset;
1608 LastOpcode = Opcode;
1609 if (++NumMove == 8) // FIXME: Tune this limit.
1616 SmallPtrSet<MachineInstr*, 4> MemOps;
1617 SmallSet<unsigned, 4> MemRegs;
1618 for (int i = NumMove-1; i >= 0; --i) {
1619 MemOps.insert(Ops[i]);
1620 MemRegs.insert(Ops[i]->getOperand(0).getReg());
1623 // Be conservative, if the instructions are too far apart, don't
1624 // move them. We want to limit the increase of register pressure.
1625 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
1627 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
1628 MemOps, MemRegs, TRI);
1630 for (unsigned i = 0; i != NumMove; ++i)
1633 // This is the new location for the loads / stores.
1634 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
1635 while (InsertPos != MBB->end()
1636 && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
1639 // If we are moving a pair of loads / stores, see if it makes sense
1640 // to try to allocate a pair of registers that can form register pairs.
1641 MachineInstr *Op0 = Ops.back();
1642 MachineInstr *Op1 = Ops[Ops.size()-2];
1643 unsigned EvenReg = 0, OddReg = 0;
1644 unsigned BaseReg = 0, PredReg = 0;
1645 ARMCC::CondCodes Pred = ARMCC::AL;
1647 unsigned NewOpc = 0;
1650 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
1651 EvenReg, OddReg, BaseReg,
1652 Offset, PredReg, Pred, isT2)) {
1656 // Form the pair instruction.
1658 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1659 dl, TII->get(NewOpc))
1660 .addReg(EvenReg, RegState::Define)
1661 .addReg(OddReg, RegState::Define)
1663 // FIXME: We're converting from LDRi12 to an insn that still
1664 // uses addrmode2, so we need an explicit offset reg. It should
1665 // always by reg0 since we're transforming LDRi12s.
1668 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1671 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1672 dl, TII->get(NewOpc))
1676 // FIXME: We're converting from LDRi12 to an insn that still
1677 // uses addrmode2, so we need an explicit offset reg. It should
1678 // always by reg0 since we're transforming STRi12s.
1681 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1687 // Add register allocation hints to form register pairs.
1688 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
1689 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
1691 for (unsigned i = 0; i != NumMove; ++i) {
1692 MachineInstr *Op = Ops.back();
1694 MBB->splice(InsertPos, MBB, Op);
1698 NumLdStMoved += NumMove;
1708 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
1709 bool RetVal = false;
1711 DenseMap<MachineInstr*, unsigned> MI2LocMap;
1712 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
1713 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
1714 SmallVector<unsigned, 4> LdBases;
1715 SmallVector<unsigned, 4> StBases;
1718 MachineBasicBlock::iterator MBBI = MBB->begin();
1719 MachineBasicBlock::iterator E = MBB->end();
1721 for (; MBBI != E; ++MBBI) {
1722 MachineInstr *MI = MBBI;
1723 const TargetInstrDesc &TID = MI->getDesc();
1724 if (TID.isCall() || TID.isTerminator()) {
1725 // Stop at barriers.
1730 if (!MI->isDebugValue())
1731 MI2LocMap[MI] = ++Loc;
1733 if (!isMemoryOp(MI))
1735 unsigned PredReg = 0;
1736 if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL)
1739 int Opc = MI->getOpcode();
1740 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
1741 unsigned Base = MI->getOperand(1).getReg();
1742 int Offset = getMemoryOpOffset(MI);
1744 bool StopHere = false;
1746 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1747 Base2LdsMap.find(Base);
1748 if (BI != Base2LdsMap.end()) {
1749 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1750 if (Offset == getMemoryOpOffset(BI->second[i])) {
1756 BI->second.push_back(MI);
1758 SmallVector<MachineInstr*, 4> MIs;
1760 Base2LdsMap[Base] = MIs;
1761 LdBases.push_back(Base);
1764 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1765 Base2StsMap.find(Base);
1766 if (BI != Base2StsMap.end()) {
1767 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1768 if (Offset == getMemoryOpOffset(BI->second[i])) {
1774 BI->second.push_back(MI);
1776 SmallVector<MachineInstr*, 4> MIs;
1778 Base2StsMap[Base] = MIs;
1779 StBases.push_back(Base);
1784 // Found a duplicate (a base+offset combination that's seen earlier).
1791 // Re-schedule loads.
1792 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
1793 unsigned Base = LdBases[i];
1794 SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
1796 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
1799 // Re-schedule stores.
1800 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
1801 unsigned Base = StBases[i];
1802 SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
1804 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
1808 Base2LdsMap.clear();
1809 Base2StsMap.clear();
1819 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
1820 /// optimization pass.
1821 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
1823 return new ARMPreAllocLoadStoreOpt();
1824 return new ARMLoadStoreOpt();