1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
15 #define DEBUG_TYPE "arm-ldst-opt"
17 #include "ARMAddressingModes.h"
18 #include "ARMBaseInstrInfo.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMRegisterInfo.h"
21 #include "llvm/DerivedTypes.h"
22 #include "llvm/Function.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineFunctionPass.h"
25 #include "llvm/CodeGen/MachineInstr.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/RegisterScavenging.h"
29 #include "llvm/Target/TargetData.h"
30 #include "llvm/Target/TargetInstrInfo.h"
31 #include "llvm/Target/TargetMachine.h"
32 #include "llvm/Target/TargetRegisterInfo.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/ADT/DenseMap.h"
35 #include "llvm/ADT/STLExtras.h"
36 #include "llvm/ADT/SmallPtrSet.h"
37 #include "llvm/ADT/SmallSet.h"
38 #include "llvm/ADT/SmallVector.h"
39 #include "llvm/ADT/Statistic.h"
42 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
43 STATISTIC(NumSTMGened , "Number of stm instructions generated");
44 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
45 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
46 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
47 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
48 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
49 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
50 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
51 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
52 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
54 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
55 /// load / store instructions to form ldm / stm instructions.
58 struct ARMLoadStoreOpt : public MachineFunctionPass {
60 ARMLoadStoreOpt() : MachineFunctionPass(&ID) {}
62 const TargetInstrInfo *TII;
63 const TargetRegisterInfo *TRI;
68 virtual bool runOnMachineFunction(MachineFunction &Fn);
70 virtual const char *getPassName() const {
71 return "ARM load / store optimization pass";
75 struct MemOpQueueEntry {
78 MachineBasicBlock::iterator MBBI;
80 MemOpQueueEntry(int o, int p, MachineBasicBlock::iterator i)
81 : Offset(o), Position(p), MBBI(i), Merged(false) {}
83 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
84 typedef MemOpQueue::iterator MemOpQueueIter;
86 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
87 int Offset, unsigned Base, bool BaseKill, int Opcode,
88 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
89 DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs);
90 void MergeOpsUpdate(MachineBasicBlock &MBB,
99 ARMCC::CondCodes Pred,
103 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
104 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
105 int Opcode, unsigned Size,
106 ARMCC::CondCodes Pred, unsigned PredReg,
107 unsigned Scratch, MemOpQueue &MemOps,
108 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
110 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
111 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
112 MachineBasicBlock::iterator &MBBI);
113 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
114 MachineBasicBlock::iterator MBBI,
115 const TargetInstrInfo *TII,
117 MachineBasicBlock::iterator &I);
118 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
119 MachineBasicBlock::iterator MBBI,
121 MachineBasicBlock::iterator &I);
122 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
123 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
125 char ARMLoadStoreOpt::ID = 0;
128 static int getLoadStoreMultipleOpcode(int Opcode) {
156 default: llvm_unreachable("Unhandled opcode!");
161 static bool isT2i32Load(unsigned Opc) {
162 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
165 static bool isi32Load(unsigned Opc) {
166 return Opc == ARM::LDR || isT2i32Load(Opc);
169 static bool isT2i32Store(unsigned Opc) {
170 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
173 static bool isi32Store(unsigned Opc) {
174 return Opc == ARM::STR || isT2i32Store(Opc);
177 /// MergeOps - Create and insert a LDM or STM with Base as base register and
178 /// registers in Regs as the register operands that would be loaded / stored.
179 /// It returns true if the transformation is done.
181 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
182 MachineBasicBlock::iterator MBBI,
183 int Offset, unsigned Base, bool BaseKill,
184 int Opcode, ARMCC::CondCodes Pred,
185 unsigned PredReg, unsigned Scratch, DebugLoc dl,
186 SmallVector<std::pair<unsigned, bool>, 8> &Regs) {
187 // Only a single register to load / store. Don't bother.
188 unsigned NumRegs = Regs.size();
192 ARM_AM::AMSubMode Mode = ARM_AM::ia;
193 bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode);
194 if (isAM4 && Offset == 4) {
196 // Thumb2 does not support ldmib / stmib.
199 } else if (isAM4 && Offset == -4 * (int)NumRegs + 4) {
201 // Thumb2 does not support ldmda / stmda.
204 } else if (isAM4 && Offset == -4 * (int)NumRegs) {
206 } else if (Offset != 0) {
207 // If starting offset isn't zero, insert a MI to materialize a new base.
208 // But only do so if it is cost effective, i.e. merging more than two
214 if (isi32Load(Opcode))
215 // If it is a load, then just use one of the destination register to
216 // use as the new base.
217 NewBase = Regs[NumRegs-1].first;
219 // Use the scratch register to use as a new base.
224 int BaseOpc = !isThumb2
226 : ((Base == ARM::SP) ? ARM::t2ADDrSPi : ARM::t2ADDri);
230 : ((Base == ARM::SP) ? ARM::t2SUBrSPi : ARM::t2SUBri);
233 int ImmedOffset = isThumb2
234 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
235 if (ImmedOffset == -1)
236 // FIXME: Try t2ADDri12 or t2SUBri12?
237 return false; // Probably not worth it then.
239 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
240 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
241 .addImm(Pred).addReg(PredReg).addReg(0);
243 BaseKill = true; // New base is always killed right its use.
246 bool isDPR = (Opcode == ARM::VLDRD || Opcode == ARM::VSTRD);
247 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
248 Opcode == ARM::VLDRD);
249 Opcode = getLoadStoreMultipleOpcode(Opcode);
250 MachineInstrBuilder MIB = (isAM4)
251 ? BuildMI(MBB, MBBI, dl, TII->get(Opcode))
252 .addReg(Base, getKillRegState(BaseKill))
253 .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg)
254 : BuildMI(MBB, MBBI, dl, TII->get(Opcode))
255 .addReg(Base, getKillRegState(BaseKill))
256 .addImm(ARM_AM::getAM5Opc(Mode, false, isDPR ? NumRegs<<1 : NumRegs))
257 .addImm(Pred).addReg(PredReg);
258 for (unsigned i = 0; i != NumRegs; ++i)
259 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
260 | getKillRegState(Regs[i].second));
265 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
267 void ARMLoadStoreOpt::
268 MergeOpsUpdate(MachineBasicBlock &MBB,
270 unsigned memOpsBegin,
272 unsigned insertAfter,
277 ARMCC::CondCodes Pred,
281 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
282 // First calculate which of the registers should be killed by the merged
284 SmallVector<std::pair<unsigned, bool>, 8> Regs;
285 const unsigned insertPos = memOps[insertAfter].Position;
286 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
287 const MachineOperand &MO = memOps[i].MBBI->getOperand(0);
288 unsigned Reg = MO.getReg();
289 bool isKill = MO.isKill();
291 // If we are inserting the merged operation after an unmerged operation that
292 // uses the same register, make sure to transfer any kill flag.
293 for (unsigned j = memOpsEnd, e = memOps.size(); !isKill && j != e; ++j)
294 if (memOps[j].Position<insertPos) {
295 const MachineOperand &MOJ = memOps[j].MBBI->getOperand(0);
296 if (MOJ.getReg() == Reg && MOJ.isKill())
300 Regs.push_back(std::make_pair(Reg, isKill));
303 // Try to do the merge.
304 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
306 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
307 Pred, PredReg, Scratch, dl, Regs))
310 // Merge succeeded, update records.
311 Merges.push_back(prior(Loc));
312 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
313 // Remove kill flags from any unmerged memops that come before insertPos.
314 if (Regs[i-memOpsBegin].second)
315 for (unsigned j = memOpsEnd, e = memOps.size(); j != e; ++j)
316 if (memOps[j].Position<insertPos) {
317 MachineOperand &MOJ = memOps[j].MBBI->getOperand(0);
318 if (MOJ.getReg() == Regs[i-memOpsBegin].first && MOJ.isKill())
319 MOJ.setIsKill(false);
321 MBB.erase(memOps[i].MBBI);
322 memOps[i].Merged = true;
326 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
327 /// load / store multiple instructions.
329 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
330 unsigned Base, int Opcode, unsigned Size,
331 ARMCC::CondCodes Pred, unsigned PredReg,
332 unsigned Scratch, MemOpQueue &MemOps,
333 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
334 bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode);
335 int Offset = MemOps[SIndex].Offset;
336 int SOffset = Offset;
337 unsigned insertAfter = SIndex;
338 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
339 DebugLoc dl = Loc->getDebugLoc();
340 const MachineOperand &PMO = Loc->getOperand(0);
341 unsigned PReg = PMO.getReg();
342 unsigned PRegNum = PMO.isUndef() ? UINT_MAX
343 : ARMRegisterInfo::getRegisterNumbering(PReg);
345 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
346 int NewOffset = MemOps[i].Offset;
347 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
348 unsigned Reg = MO.getReg();
349 unsigned RegNum = MO.isUndef() ? UINT_MAX
350 : ARMRegisterInfo::getRegisterNumbering(Reg);
351 // AM4 - register numbers in ascending order.
352 // AM5 - consecutive register numbers in ascending order.
353 if (Reg != ARM::SP &&
354 NewOffset == Offset + (int)Size &&
355 ((isAM4 && RegNum > PRegNum) || RegNum == PRegNum+1)) {
359 // Can't merge this in. Try merge the earlier ones first.
360 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
361 Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
362 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
367 if (MemOps[i].Position > MemOps[insertAfter].Position)
371 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
372 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
373 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
377 static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
378 unsigned Bytes, unsigned Limit,
379 ARMCC::CondCodes Pred, unsigned PredReg){
380 unsigned MyPredReg = 0;
383 if (MI->getOpcode() != ARM::t2SUBri &&
384 MI->getOpcode() != ARM::t2SUBrSPi &&
385 MI->getOpcode() != ARM::t2SUBrSPi12 &&
386 MI->getOpcode() != ARM::tSUBspi &&
387 MI->getOpcode() != ARM::SUBri)
390 // Make sure the offset fits in 8 bits.
391 if (Bytes <= 0 || (Limit && Bytes >= Limit))
394 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
395 return (MI->getOperand(0).getReg() == Base &&
396 MI->getOperand(1).getReg() == Base &&
397 (MI->getOperand(2).getImm()*Scale) == Bytes &&
398 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
399 MyPredReg == PredReg);
402 static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
403 unsigned Bytes, unsigned Limit,
404 ARMCC::CondCodes Pred, unsigned PredReg){
405 unsigned MyPredReg = 0;
408 if (MI->getOpcode() != ARM::t2ADDri &&
409 MI->getOpcode() != ARM::t2ADDrSPi &&
410 MI->getOpcode() != ARM::t2ADDrSPi12 &&
411 MI->getOpcode() != ARM::tADDspi &&
412 MI->getOpcode() != ARM::ADDri)
415 if (Bytes <= 0 || (Limit && Bytes >= Limit))
416 // Make sure the offset fits in 8 bits.
419 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
420 return (MI->getOperand(0).getReg() == Base &&
421 MI->getOperand(1).getReg() == Base &&
422 (MI->getOperand(2).getImm()*Scale) == Bytes &&
423 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
424 MyPredReg == PredReg);
427 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
428 switch (MI->getOpcode()) {
446 return (MI->getNumOperands() - 4) * 4;
451 return ARM_AM::getAM5Offset(MI->getOperand(1).getImm()) * 4;
455 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc) {
457 case ARM::LDM: return ARM::LDM_UPD;
458 case ARM::STM: return ARM::STM_UPD;
459 case ARM::t2LDM: return ARM::t2LDM_UPD;
460 case ARM::t2STM: return ARM::t2STM_UPD;
461 case ARM::VLDMS: return ARM::VLDMS_UPD;
462 case ARM::VLDMD: return ARM::VLDMD_UPD;
463 case ARM::VSTMS: return ARM::VSTMS_UPD;
464 case ARM::VSTMD: return ARM::VSTMD_UPD;
465 default: llvm_unreachable("Unhandled opcode!");
470 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
471 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
473 /// stmia rn, <ra, rb, rc>
474 /// rn := rn + 4 * 3;
476 /// stmia rn!, <ra, rb, rc>
478 /// rn := rn - 4 * 3;
479 /// ldmia rn, <ra, rb, rc>
481 /// ldmdb rn!, <ra, rb, rc>
482 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
483 MachineBasicBlock::iterator MBBI,
485 MachineBasicBlock::iterator &I) {
486 MachineInstr *MI = MBBI;
487 unsigned Base = MI->getOperand(0).getReg();
488 bool BaseKill = MI->getOperand(0).isKill();
489 unsigned Bytes = getLSMultipleTransferSize(MI);
490 unsigned PredReg = 0;
491 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
492 int Opcode = MI->getOpcode();
493 DebugLoc dl = MI->getDebugLoc();
494 bool isAM4 = (Opcode == ARM::LDM || Opcode == ARM::t2LDM ||
495 Opcode == ARM::STM || Opcode == ARM::t2STM);
497 bool DoMerge = false;
498 ARM_AM::AMSubMode Mode = ARM_AM::ia;
502 // Can't use an updating ld/st if the base register is also a dest
503 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
504 for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) {
505 if (MI->getOperand(i).getReg() == Base)
508 assert(!ARM_AM::getAM4WBFlag(MI->getOperand(1).getImm()));
509 Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm());
511 // VLDM{D|S}, VSTM{D|S} addressing mode 5 ops.
512 assert(!ARM_AM::getAM5WBFlag(MI->getOperand(1).getImm()));
513 Mode = ARM_AM::getAM5SubMode(MI->getOperand(1).getImm());
514 Offset = ARM_AM::getAM5Offset(MI->getOperand(1).getImm());
517 // Try merging with the previous instruction.
518 if (MBBI != MBB.begin()) {
519 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
521 if (Mode == ARM_AM::ia &&
522 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
525 } else if (isAM4 && Mode == ARM_AM::ib &&
526 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
531 if (Mode == ARM_AM::ia &&
532 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
541 // Try merging with the next instruction.
542 if (!DoMerge && MBBI != MBB.end()) {
543 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
545 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
546 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
548 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
549 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
553 if (Mode == ARM_AM::ia &&
554 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
570 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode);
571 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
572 .addReg(Base, getDefRegState(true)) // WB base register
573 .addReg(Base, getKillRegState(BaseKill));
575 // [t2]LDM_UPD, [t2]STM_UPD
576 MIB.addImm(ARM_AM::getAM4ModeImm(Mode, true))
577 .addImm(Pred).addReg(PredReg);
579 // VLDM[SD}_UPD, VSTM[SD]_UPD
580 MIB.addImm(ARM_AM::getAM5Opc(Mode, true, Offset))
581 .addImm(Pred).addReg(PredReg);
583 // Transfer the rest of operands.
584 for (unsigned OpNum = 4, e = MI->getNumOperands(); OpNum != e; ++OpNum)
585 MIB.addOperand(MI->getOperand(OpNum));
586 // Transfer memoperands.
587 (*MIB).setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
593 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) {
595 case ARM::LDR: return ARM::LDR_PRE;
596 case ARM::STR: return ARM::STR_PRE;
597 case ARM::VLDRS: return ARM::VLDMS_UPD;
598 case ARM::VLDRD: return ARM::VLDMD_UPD;
599 case ARM::VSTRS: return ARM::VSTMS_UPD;
600 case ARM::VSTRD: return ARM::VSTMD_UPD;
603 return ARM::t2LDR_PRE;
606 return ARM::t2STR_PRE;
607 default: llvm_unreachable("Unhandled opcode!");
612 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) {
614 case ARM::LDR: return ARM::LDR_POST;
615 case ARM::STR: return ARM::STR_POST;
616 case ARM::VLDRS: return ARM::VLDMS_UPD;
617 case ARM::VLDRD: return ARM::VLDMD_UPD;
618 case ARM::VSTRS: return ARM::VSTMS_UPD;
619 case ARM::VSTRD: return ARM::VSTMD_UPD;
622 return ARM::t2LDR_POST;
625 return ARM::t2STR_POST;
626 default: llvm_unreachable("Unhandled opcode!");
631 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
632 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
633 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
634 MachineBasicBlock::iterator MBBI,
635 const TargetInstrInfo *TII,
637 MachineBasicBlock::iterator &I) {
638 MachineInstr *MI = MBBI;
639 unsigned Base = MI->getOperand(1).getReg();
640 bool BaseKill = MI->getOperand(1).isKill();
641 unsigned Bytes = getLSMultipleTransferSize(MI);
642 int Opcode = MI->getOpcode();
643 DebugLoc dl = MI->getDebugLoc();
644 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
645 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
646 bool isAM2 = (Opcode == ARM::LDR || Opcode == ARM::STR);
647 if (isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0)
649 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
651 if (isT2i32Load(Opcode) || isT2i32Store(Opcode))
652 if (MI->getOperand(2).getImm() != 0)
655 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
656 // Can't do the merge if the destination register is the same as the would-be
657 // writeback register.
658 if (isLd && MI->getOperand(0).getReg() == Base)
661 unsigned PredReg = 0;
662 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
663 bool DoMerge = false;
664 ARM_AM::AddrOpc AddSub = ARM_AM::add;
666 // AM2 - 12 bits, thumb2 - 8 bits.
667 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
669 // Try merging with the previous instruction.
670 if (MBBI != MBB.begin()) {
671 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
672 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
674 AddSub = ARM_AM::sub;
676 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
680 NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
685 // Try merging with the next instruction.
686 if (!DoMerge && MBBI != MBB.end()) {
687 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
689 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
691 AddSub = ARM_AM::sub;
692 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
696 NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
708 bool isDPR = NewOpc == ARM::VLDMD || NewOpc == ARM::VSTMD;
711 Offset = ARM_AM::getAM5Opc(AddSub == ARM_AM::sub ? ARM_AM::db : ARM_AM::ia,
712 true, (isDPR ? 2 : 1));
714 Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
716 Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
719 // VLDM[SD}_UPD, VSTM[SD]_UPD
720 MachineOperand &MO = MI->getOperand(0);
721 BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
722 .addReg(Base, getDefRegState(true)) // WB base register
723 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
725 .addImm(Pred).addReg(PredReg)
726 .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
727 getKillRegState(MO.isKill())));
730 // LDR_PRE, LDR_POST,
731 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
732 .addReg(Base, RegState::Define)
733 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
735 // t2LDR_PRE, t2LDR_POST
736 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
737 .addReg(Base, RegState::Define)
738 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
740 MachineOperand &MO = MI->getOperand(0);
743 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
744 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
745 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
747 // t2STR_PRE, t2STR_POST
748 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
749 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
750 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
757 /// isMemoryOp - Returns true if instruction is a memory operations (that this
758 /// pass is capable of operating on).
759 static bool isMemoryOp(const MachineInstr *MI) {
760 if (MI->hasOneMemOperand()) {
761 const MachineMemOperand *MMO = *MI->memoperands_begin();
763 // Don't touch volatile memory accesses - we may be changing their order.
764 if (MMO->isVolatile())
767 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
769 if (MMO->getAlignment() < 4)
773 // str <undef> could probably be eliminated entirely, but for now we just want
774 // to avoid making a mess of it.
775 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
776 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
777 MI->getOperand(0).isUndef())
780 // Likewise don't mess with references to undefined addresses.
781 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
782 MI->getOperand(1).isUndef())
785 int Opcode = MI->getOpcode();
790 return MI->getOperand(1).isReg() && MI->getOperand(2).getReg() == 0;
793 return MI->getOperand(1).isReg();
796 return MI->getOperand(1).isReg();
801 return MI->getOperand(1).isReg();
806 /// AdvanceRS - Advance register scavenger to just before the earliest memory
807 /// op that is being merged.
808 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
809 MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
810 unsigned Position = MemOps[0].Position;
811 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
812 if (MemOps[i].Position < Position) {
813 Position = MemOps[i].Position;
814 Loc = MemOps[i].MBBI;
818 if (Loc != MBB.begin())
819 RS->forward(prior(Loc));
822 static int getMemoryOpOffset(const MachineInstr *MI) {
823 int Opcode = MI->getOpcode();
824 bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
825 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
826 unsigned NumOperands = MI->getDesc().getNumOperands();
827 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
829 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
830 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
831 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8)
835 ? ARM_AM::getAM2Offset(OffField)
836 : (isAM3 ? ARM_AM::getAM3Offset(OffField)
837 : ARM_AM::getAM5Offset(OffField) * 4);
839 if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub)
842 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
845 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
851 static void InsertLDR_STR(MachineBasicBlock &MBB,
852 MachineBasicBlock::iterator &MBBI,
853 int OffImm, bool isDef,
854 DebugLoc dl, unsigned NewOpc,
855 unsigned Reg, bool RegDeadKill, bool RegUndef,
856 unsigned BaseReg, bool BaseKill, bool BaseUndef,
857 unsigned OffReg, bool OffKill, bool OffUndef,
858 ARMCC::CondCodes Pred, unsigned PredReg,
859 const TargetInstrInfo *TII, bool isT2) {
863 Offset = ARM_AM::getAM2Opc(ARM_AM::sub, -OffImm, ARM_AM::no_shift);
865 Offset = ARM_AM::getAM2Opc(ARM_AM::add, OffImm, ARM_AM::no_shift);
868 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
870 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
871 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
873 MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef));
874 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
876 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
878 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
879 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
881 MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef));
882 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
886 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
887 MachineBasicBlock::iterator &MBBI) {
888 MachineInstr *MI = &*MBBI;
889 unsigned Opcode = MI->getOpcode();
890 if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
891 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
892 unsigned EvenReg = MI->getOperand(0).getReg();
893 unsigned OddReg = MI->getOperand(1).getReg();
894 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
895 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
896 if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)
899 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
900 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
901 bool EvenDeadKill = isLd ?
902 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
903 bool EvenUndef = MI->getOperand(0).isUndef();
904 bool OddDeadKill = isLd ?
905 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
906 bool OddUndef = MI->getOperand(1).isUndef();
907 const MachineOperand &BaseOp = MI->getOperand(2);
908 unsigned BaseReg = BaseOp.getReg();
909 bool BaseKill = BaseOp.isKill();
910 bool BaseUndef = BaseOp.isUndef();
911 unsigned OffReg = isT2 ? 0 : MI->getOperand(3).getReg();
912 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
913 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
914 int OffImm = getMemoryOpOffset(MI);
915 unsigned PredReg = 0;
916 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
918 if (OddRegNum > EvenRegNum && OffReg == 0 && OffImm == 0) {
919 // Ascending register numbers and no offset. It's safe to change it to a
921 unsigned NewOpc = (isLd)
922 ? (isT2 ? ARM::t2LDM : ARM::LDM)
923 : (isT2 ? ARM::t2STM : ARM::STM);
925 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
926 .addReg(BaseReg, getKillRegState(BaseKill))
927 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
928 .addImm(Pred).addReg(PredReg)
929 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
930 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
933 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
934 .addReg(BaseReg, getKillRegState(BaseKill))
935 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
936 .addImm(Pred).addReg(PredReg)
938 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
940 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
944 // Split into two instructions.
945 assert((!isT2 || !OffReg) &&
946 "Thumb2 ldrd / strd does not encode offset register!");
947 unsigned NewOpc = (isLd)
948 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDR)
949 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STR);
950 DebugLoc dl = MBBI->getDebugLoc();
951 // If this is a load and base register is killed, it may have been
952 // re-defed by the load, make sure the first load does not clobber it.
954 (BaseKill || OffKill) &&
955 (TRI->regsOverlap(EvenReg, BaseReg) ||
956 (OffReg && TRI->regsOverlap(EvenReg, OffReg)))) {
957 assert(!TRI->regsOverlap(OddReg, BaseReg) &&
958 (!OffReg || !TRI->regsOverlap(OddReg, OffReg)));
959 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
960 OddReg, OddDeadKill, false,
961 BaseReg, false, BaseUndef, OffReg, false, OffUndef,
962 Pred, PredReg, TII, isT2);
963 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
964 EvenReg, EvenDeadKill, false,
965 BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
966 Pred, PredReg, TII, isT2);
968 if (OddReg == EvenReg && EvenDeadKill) {
969 // If the two source operands are the same, the kill marker is probably
970 // on the first one. e.g.
971 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
972 EvenDeadKill = false;
975 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
976 EvenReg, EvenDeadKill, EvenUndef,
977 BaseReg, false, BaseUndef, OffReg, false, OffUndef,
978 Pred, PredReg, TII, isT2);
979 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
980 OddReg, OddDeadKill, OddUndef,
981 BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
982 Pred, PredReg, TII, isT2);
996 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
997 /// ops of the same base and incrementing offset into LDM / STM ops.
998 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
999 unsigned NumMerges = 0;
1000 unsigned NumMemOps = 0;
1002 unsigned CurrBase = 0;
1004 unsigned CurrSize = 0;
1005 ARMCC::CondCodes CurrPred = ARMCC::AL;
1006 unsigned CurrPredReg = 0;
1007 unsigned Position = 0;
1008 SmallVector<MachineBasicBlock::iterator,4> Merges;
1010 RS->enterBasicBlock(&MBB);
1011 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1013 if (FixInvalidRegPairOp(MBB, MBBI))
1016 bool Advance = false;
1017 bool TryMerge = false;
1018 bool Clobber = false;
1020 bool isMemOp = isMemoryOp(MBBI);
1022 int Opcode = MBBI->getOpcode();
1023 unsigned Size = getLSMultipleTransferSize(MBBI);
1024 unsigned Base = MBBI->getOperand(1).getReg();
1025 unsigned PredReg = 0;
1026 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg);
1027 int Offset = getMemoryOpOffset(MBBI);
1030 // r5 := ldr [r5, #4]
1031 // r6 := ldr [r5, #8]
1033 // The second ldr has effectively broken the chain even though it
1034 // looks like the later ldr(s) use the same base register. Try to
1035 // merge the ldr's so far, including this one. But don't try to
1036 // combine the following ldr(s).
1037 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
1038 if (CurrBase == 0 && !Clobber) {
1039 // Start of a new chain.
1044 CurrPredReg = PredReg;
1045 MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
1054 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1055 // No need to match PredReg.
1056 // Continue adding to the queue.
1057 if (Offset > MemOps.back().Offset) {
1058 MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
1062 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1064 if (Offset < I->Offset) {
1065 MemOps.insert(I, MemOpQueueEntry(Offset, Position, MBBI));
1069 } else if (Offset == I->Offset) {
1070 // Collision! This can't be merged!
1083 // Reach the end of the block, try merging the memory instructions.
1089 if (NumMemOps > 1) {
1090 // Try to find a free register to use as a new base in case it's needed.
1091 // First advance to the instruction just before the start of the chain.
1092 AdvanceRS(MBB, MemOps);
1093 // Find a scratch register.
1094 unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass);
1095 // Process the load / store instructions.
1096 RS->forward(prior(MBBI));
1100 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1101 CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1103 // Try folding preceeding/trailing base inc/dec into the generated
1105 for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1106 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1108 NumMerges += Merges.size();
1110 // Try folding preceeding/trailing base inc/dec into those load/store
1111 // that were not merged to form LDM/STM ops.
1112 for (unsigned i = 0; i != NumMemOps; ++i)
1113 if (!MemOps[i].Merged)
1114 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1117 // RS may be pointing to an instruction that's deleted.
1118 RS->skipTo(prior(MBBI));
1119 } else if (NumMemOps == 1) {
1120 // Try folding preceeding/trailing base inc/dec into the single
1122 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1124 RS->forward(prior(MBBI));
1131 CurrPred = ARMCC::AL;
1138 // If iterator hasn't been advanced and this is not a memory op, skip it.
1139 // It can't start a new chain anyway.
1140 if (!Advance && !isMemOp && MBBI != E) {
1146 return NumMerges > 0;
1150 struct OffsetCompare {
1151 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
1152 int LOffset = getMemoryOpOffset(LHS);
1153 int ROffset = getMemoryOpOffset(RHS);
1154 assert(LHS == RHS || LOffset != ROffset);
1155 return LOffset > ROffset;
1160 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return op
1161 /// (bx lr) into the preceeding stack restore so it directly restore the value
1163 /// ldmfd sp!, {r7, lr}
1166 /// ldmfd sp!, {r7, pc}
1167 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1168 if (MBB.empty()) return false;
1170 MachineBasicBlock::iterator MBBI = prior(MBB.end());
1171 if (MBBI != MBB.begin() &&
1172 (MBBI->getOpcode() == ARM::BX_RET || MBBI->getOpcode() == ARM::tBX_RET)) {
1173 MachineInstr *PrevMI = prior(MBBI);
1174 if (PrevMI->getOpcode() == ARM::LDM_UPD ||
1175 PrevMI->getOpcode() == ARM::t2LDM_UPD) {
1176 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1177 if (MO.getReg() != ARM::LR)
1179 unsigned NewOpc = isThumb2 ? ARM::t2LDM_RET : ARM::LDM_RET;
1180 PrevMI->setDesc(TII->get(NewOpc));
1189 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1190 const TargetMachine &TM = Fn.getTarget();
1191 AFI = Fn.getInfo<ARMFunctionInfo>();
1192 TII = TM.getInstrInfo();
1193 TRI = TM.getRegisterInfo();
1194 RS = new RegScavenger();
1195 isThumb2 = AFI->isThumb2Function();
1197 bool Modified = false;
1198 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1200 MachineBasicBlock &MBB = *MFI;
1201 Modified |= LoadStoreMultipleOpti(MBB);
1202 Modified |= MergeReturnIntoLDM(MBB);
1210 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1211 /// load / stores from consecutive locations close to make it more
1212 /// likely they will be combined later.
1215 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1217 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(&ID) {}
1219 const TargetData *TD;
1220 const TargetInstrInfo *TII;
1221 const TargetRegisterInfo *TRI;
1222 const ARMSubtarget *STI;
1223 MachineRegisterInfo *MRI;
1224 MachineFunction *MF;
1226 virtual bool runOnMachineFunction(MachineFunction &Fn);
1228 virtual const char *getPassName() const {
1229 return "ARM pre- register allocation load / store optimization pass";
1233 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1234 unsigned &NewOpc, unsigned &EvenReg,
1235 unsigned &OddReg, unsigned &BaseReg,
1236 unsigned &OffReg, int &Offset,
1237 unsigned &PredReg, ARMCC::CondCodes &Pred,
1239 bool RescheduleOps(MachineBasicBlock *MBB,
1240 SmallVector<MachineInstr*, 4> &Ops,
1241 unsigned Base, bool isLd,
1242 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1243 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1245 char ARMPreAllocLoadStoreOpt::ID = 0;
1248 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1249 TD = Fn.getTarget().getTargetData();
1250 TII = Fn.getTarget().getInstrInfo();
1251 TRI = Fn.getTarget().getRegisterInfo();
1252 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
1253 MRI = &Fn.getRegInfo();
1256 bool Modified = false;
1257 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1259 Modified |= RescheduleLoadStoreInstrs(MFI);
1264 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1265 MachineBasicBlock::iterator I,
1266 MachineBasicBlock::iterator E,
1267 SmallPtrSet<MachineInstr*, 4> &MemOps,
1268 SmallSet<unsigned, 4> &MemRegs,
1269 const TargetRegisterInfo *TRI) {
1270 // Are there stores / loads / calls between them?
1271 // FIXME: This is overly conservative. We should make use of alias information
1273 SmallSet<unsigned, 4> AddedRegPressure;
1275 if (MemOps.count(&*I))
1277 const TargetInstrDesc &TID = I->getDesc();
1278 if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects())
1280 if (isLd && TID.mayStore())
1285 // It's not safe to move the first 'str' down.
1288 // str r4, [r0, #+4]
1292 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1293 MachineOperand &MO = I->getOperand(j);
1296 unsigned Reg = MO.getReg();
1297 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1299 if (Reg != Base && !MemRegs.count(Reg))
1300 AddedRegPressure.insert(Reg);
1304 // Estimate register pressure increase due to the transformation.
1305 if (MemRegs.size() <= 4)
1306 // Ok if we are moving small number of instructions.
1308 return AddedRegPressure.size() <= MemRegs.size() * 2;
1312 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1314 unsigned &NewOpc, unsigned &EvenReg,
1315 unsigned &OddReg, unsigned &BaseReg,
1316 unsigned &OffReg, int &Offset,
1318 ARMCC::CondCodes &Pred,
1320 // Make sure we're allowed to generate LDRD/STRD.
1321 if (!STI->hasV5TEOps())
1324 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1326 unsigned Opcode = Op0->getOpcode();
1327 if (Opcode == ARM::LDR)
1329 else if (Opcode == ARM::STR)
1331 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1332 NewOpc = ARM::t2LDRDi8;
1335 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1336 NewOpc = ARM::t2STRDi8;
1342 // Make sure the offset registers match.
1344 (Op0->getOperand(2).getReg() != Op1->getOperand(2).getReg()))
1347 // Must sure the base address satisfies i64 ld / st alignment requirement.
1348 if (!Op0->hasOneMemOperand() ||
1349 !(*Op0->memoperands_begin())->getValue() ||
1350 (*Op0->memoperands_begin())->isVolatile())
1353 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1354 Function *Func = MF->getFunction();
1355 unsigned ReqAlign = STI->hasV6Ops()
1356 ? TD->getPrefTypeAlignment(Type::getInt64Ty(Func->getContext()))
1357 : 8; // Pre-v6 need 8-byte align
1358 if (Align < ReqAlign)
1361 // Then make sure the immediate offset fits.
1362 int OffImm = getMemoryOpOffset(Op0);
1366 // Can't fall back to t2LDRi8 / t2STRi8.
1369 int Limit = (1 << 8) * Scale;
1370 if (OffImm >= Limit || (OffImm & (Scale-1)))
1375 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1377 AddSub = ARM_AM::sub;
1380 int Limit = (1 << 8) * Scale;
1381 if (OffImm >= Limit || (OffImm & (Scale-1)))
1383 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1385 EvenReg = Op0->getOperand(0).getReg();
1386 OddReg = Op1->getOperand(0).getReg();
1387 if (EvenReg == OddReg)
1389 BaseReg = Op0->getOperand(1).getReg();
1391 OffReg = Op0->getOperand(2).getReg();
1392 Pred = llvm::getInstrPredicate(Op0, PredReg);
1393 dl = Op0->getDebugLoc();
1397 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1398 SmallVector<MachineInstr*, 4> &Ops,
1399 unsigned Base, bool isLd,
1400 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1401 bool RetVal = false;
1403 // Sort by offset (in reverse order).
1404 std::sort(Ops.begin(), Ops.end(), OffsetCompare());
1406 // The loads / stores of the same base are in order. Scan them from first to
1407 // last and check for the followins:
1408 // 1. Any def of base.
1410 while (Ops.size() > 1) {
1411 unsigned FirstLoc = ~0U;
1412 unsigned LastLoc = 0;
1413 MachineInstr *FirstOp = 0;
1414 MachineInstr *LastOp = 0;
1416 unsigned LastOpcode = 0;
1417 unsigned LastBytes = 0;
1418 unsigned NumMove = 0;
1419 for (int i = Ops.size() - 1; i >= 0; --i) {
1420 MachineInstr *Op = Ops[i];
1421 unsigned Loc = MI2LocMap[Op];
1422 if (Loc <= FirstLoc) {
1426 if (Loc >= LastLoc) {
1431 unsigned Opcode = Op->getOpcode();
1432 if (LastOpcode && Opcode != LastOpcode)
1435 int Offset = getMemoryOpOffset(Op);
1436 unsigned Bytes = getLSMultipleTransferSize(Op);
1438 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1441 LastOffset = Offset;
1443 LastOpcode = Opcode;
1444 if (++NumMove == 8) // FIXME: Tune this limit.
1451 SmallPtrSet<MachineInstr*, 4> MemOps;
1452 SmallSet<unsigned, 4> MemRegs;
1453 for (int i = NumMove-1; i >= 0; --i) {
1454 MemOps.insert(Ops[i]);
1455 MemRegs.insert(Ops[i]->getOperand(0).getReg());
1458 // Be conservative, if the instructions are too far apart, don't
1459 // move them. We want to limit the increase of register pressure.
1460 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
1462 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
1463 MemOps, MemRegs, TRI);
1465 for (unsigned i = 0; i != NumMove; ++i)
1468 // This is the new location for the loads / stores.
1469 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
1470 while (InsertPos != MBB->end() && MemOps.count(InsertPos))
1473 // If we are moving a pair of loads / stores, see if it makes sense
1474 // to try to allocate a pair of registers that can form register pairs.
1475 MachineInstr *Op0 = Ops.back();
1476 MachineInstr *Op1 = Ops[Ops.size()-2];
1477 unsigned EvenReg = 0, OddReg = 0;
1478 unsigned BaseReg = 0, OffReg = 0, PredReg = 0;
1479 ARMCC::CondCodes Pred = ARMCC::AL;
1481 unsigned NewOpc = 0;
1484 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
1485 EvenReg, OddReg, BaseReg, OffReg,
1486 Offset, PredReg, Pred, isT2)) {
1490 // Form the pair instruction.
1492 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1493 dl, TII->get(NewOpc))
1494 .addReg(EvenReg, RegState::Define)
1495 .addReg(OddReg, RegState::Define)
1499 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1502 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1503 dl, TII->get(NewOpc))
1509 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1515 // Add register allocation hints to form register pairs.
1516 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
1517 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
1519 for (unsigned i = 0; i != NumMove; ++i) {
1520 MachineInstr *Op = Ops.back();
1522 MBB->splice(InsertPos, MBB, Op);
1526 NumLdStMoved += NumMove;
1536 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
1537 bool RetVal = false;
1539 DenseMap<MachineInstr*, unsigned> MI2LocMap;
1540 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
1541 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
1542 SmallVector<unsigned, 4> LdBases;
1543 SmallVector<unsigned, 4> StBases;
1546 MachineBasicBlock::iterator MBBI = MBB->begin();
1547 MachineBasicBlock::iterator E = MBB->end();
1549 for (; MBBI != E; ++MBBI) {
1550 MachineInstr *MI = MBBI;
1551 const TargetInstrDesc &TID = MI->getDesc();
1552 if (TID.isCall() || TID.isTerminator()) {
1553 // Stop at barriers.
1558 MI2LocMap[MI] = Loc++;
1559 if (!isMemoryOp(MI))
1561 unsigned PredReg = 0;
1562 if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL)
1565 int Opc = MI->getOpcode();
1566 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
1567 unsigned Base = MI->getOperand(1).getReg();
1568 int Offset = getMemoryOpOffset(MI);
1570 bool StopHere = false;
1572 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1573 Base2LdsMap.find(Base);
1574 if (BI != Base2LdsMap.end()) {
1575 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1576 if (Offset == getMemoryOpOffset(BI->second[i])) {
1582 BI->second.push_back(MI);
1584 SmallVector<MachineInstr*, 4> MIs;
1586 Base2LdsMap[Base] = MIs;
1587 LdBases.push_back(Base);
1590 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1591 Base2StsMap.find(Base);
1592 if (BI != Base2StsMap.end()) {
1593 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1594 if (Offset == getMemoryOpOffset(BI->second[i])) {
1600 BI->second.push_back(MI);
1602 SmallVector<MachineInstr*, 4> MIs;
1604 Base2StsMap[Base] = MIs;
1605 StBases.push_back(Base);
1610 // Found a duplicate (a base+offset combination that's seen earlier).
1617 // Re-schedule loads.
1618 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
1619 unsigned Base = LdBases[i];
1620 SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
1622 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
1625 // Re-schedule stores.
1626 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
1627 unsigned Base = StBases[i];
1628 SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
1630 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
1634 Base2LdsMap.clear();
1635 Base2StsMap.clear();
1645 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
1646 /// optimization pass.
1647 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
1649 return new ARMPreAllocLoadStoreOpt();
1650 return new ARMLoadStoreOpt();