1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
15 #define DEBUG_TYPE "arm-ldst-opt"
17 #include "ARMAddressingModes.h"
18 #include "ARMBaseInstrInfo.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMRegisterInfo.h"
21 #include "llvm/DerivedTypes.h"
22 #include "llvm/Function.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineFunctionPass.h"
25 #include "llvm/CodeGen/MachineInstr.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/RegisterScavenging.h"
29 #include "llvm/Target/TargetData.h"
30 #include "llvm/Target/TargetInstrInfo.h"
31 #include "llvm/Target/TargetMachine.h"
32 #include "llvm/Target/TargetRegisterInfo.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/ADT/DenseMap.h"
35 #include "llvm/ADT/STLExtras.h"
36 #include "llvm/ADT/SmallPtrSet.h"
37 #include "llvm/ADT/SmallSet.h"
38 #include "llvm/ADT/SmallVector.h"
39 #include "llvm/ADT/Statistic.h"
42 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
43 STATISTIC(NumSTMGened , "Number of stm instructions generated");
44 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
45 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
46 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
47 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
48 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
49 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
50 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
51 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
52 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
54 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
55 /// load / store instructions to form ldm / stm instructions.
58 struct ARMLoadStoreOpt : public MachineFunctionPass {
60 ARMLoadStoreOpt() : MachineFunctionPass(&ID) {}
62 const TargetInstrInfo *TII;
63 const TargetRegisterInfo *TRI;
68 virtual bool runOnMachineFunction(MachineFunction &Fn);
70 virtual const char *getPassName() const {
71 return "ARM load / store optimization pass";
75 struct MemOpQueueEntry {
78 MachineBasicBlock::iterator MBBI;
80 MemOpQueueEntry(int o, int p, MachineBasicBlock::iterator i)
81 : Offset(o), Position(p), MBBI(i), Merged(false) {}
83 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
84 typedef MemOpQueue::iterator MemOpQueueIter;
86 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
87 int Offset, unsigned Base, bool BaseKill, int Opcode,
88 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
89 DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs);
90 void MergeOpsUpdate(MachineBasicBlock &MBB,
99 ARMCC::CondCodes Pred,
103 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
104 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
105 int Opcode, unsigned Size,
106 ARMCC::CondCodes Pred, unsigned PredReg,
107 unsigned Scratch, MemOpQueue &MemOps,
108 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
110 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
111 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
112 MachineBasicBlock::iterator &MBBI);
113 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
114 MachineBasicBlock::iterator MBBI,
115 const TargetInstrInfo *TII,
117 MachineBasicBlock::iterator &I);
118 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
119 MachineBasicBlock::iterator MBBI,
121 MachineBasicBlock::iterator &I);
122 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
123 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
125 char ARMLoadStoreOpt::ID = 0;
128 static int getLoadStoreMultipleOpcode(int Opcode) {
156 default: llvm_unreachable("Unhandled opcode!");
161 static bool isT2i32Load(unsigned Opc) {
162 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
165 static bool isi32Load(unsigned Opc) {
166 return Opc == ARM::LDR || isT2i32Load(Opc);
169 static bool isT2i32Store(unsigned Opc) {
170 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
173 static bool isi32Store(unsigned Opc) {
174 return Opc == ARM::STR || isT2i32Store(Opc);
177 /// MergeOps - Create and insert a LDM or STM with Base as base register and
178 /// registers in Regs as the register operands that would be loaded / stored.
179 /// It returns true if the transformation is done.
181 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
182 MachineBasicBlock::iterator MBBI,
183 int Offset, unsigned Base, bool BaseKill,
184 int Opcode, ARMCC::CondCodes Pred,
185 unsigned PredReg, unsigned Scratch, DebugLoc dl,
186 SmallVector<std::pair<unsigned, bool>, 8> &Regs) {
187 // Only a single register to load / store. Don't bother.
188 unsigned NumRegs = Regs.size();
192 ARM_AM::AMSubMode Mode = ARM_AM::ia;
193 bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode);
194 if (isAM4 && Offset == 4) {
196 // Thumb2 does not support ldmib / stmib.
199 } else if (isAM4 && Offset == -4 * (int)NumRegs + 4) {
201 // Thumb2 does not support ldmda / stmda.
204 } else if (isAM4 && Offset == -4 * (int)NumRegs) {
206 } else if (Offset != 0) {
207 // If starting offset isn't zero, insert a MI to materialize a new base.
208 // But only do so if it is cost effective, i.e. merging more than two
214 if (isi32Load(Opcode))
215 // If it is a load, then just use one of the destination register to
216 // use as the new base.
217 NewBase = Regs[NumRegs-1].first;
219 // Use the scratch register to use as a new base.
224 int BaseOpc = !isThumb2
226 : ((Base == ARM::SP) ? ARM::t2ADDrSPi : ARM::t2ADDri);
230 : ((Base == ARM::SP) ? ARM::t2SUBrSPi : ARM::t2SUBri);
233 int ImmedOffset = isThumb2
234 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
235 if (ImmedOffset == -1)
236 // FIXME: Try t2ADDri12 or t2SUBri12?
237 return false; // Probably not worth it then.
239 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
240 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
241 .addImm(Pred).addReg(PredReg).addReg(0);
243 BaseKill = true; // New base is always killed right its use.
246 bool isDPR = Opcode == ARM::VLDRD || Opcode == ARM::VSTRD;
247 bool isDef = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
248 Opcode = getLoadStoreMultipleOpcode(Opcode);
249 MachineInstrBuilder MIB = (isAM4)
250 ? BuildMI(MBB, MBBI, dl, TII->get(Opcode))
251 .addReg(Base, getKillRegState(BaseKill))
252 .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg)
253 : BuildMI(MBB, MBBI, dl, TII->get(Opcode))
254 .addReg(Base, getKillRegState(BaseKill))
255 .addImm(ARM_AM::getAM5Opc(Mode, false, isDPR ? NumRegs<<1 : NumRegs))
256 .addImm(Pred).addReg(PredReg);
257 MIB.addReg(0); // Add optional writeback (0 for now).
258 for (unsigned i = 0; i != NumRegs; ++i)
259 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
260 | getKillRegState(Regs[i].second));
265 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
267 void ARMLoadStoreOpt::
268 MergeOpsUpdate(MachineBasicBlock &MBB,
270 unsigned memOpsBegin,
272 unsigned insertAfter,
277 ARMCC::CondCodes Pred,
281 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
282 // First calculate which of the registers should be killed by the merged
284 SmallVector<std::pair<unsigned, bool>, 8> Regs;
285 const unsigned insertPos = memOps[insertAfter].Position;
286 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
287 const MachineOperand &MO = memOps[i].MBBI->getOperand(0);
288 unsigned Reg = MO.getReg();
289 bool isKill = MO.isKill();
291 // If we are inserting the merged operation after an unmerged operation that
292 // uses the same register, make sure to transfer any kill flag.
293 for (unsigned j = memOpsEnd, e = memOps.size(); !isKill && j != e; ++j)
294 if (memOps[j].Position<insertPos) {
295 const MachineOperand &MOJ = memOps[j].MBBI->getOperand(0);
296 if (MOJ.getReg() == Reg && MOJ.isKill())
300 Regs.push_back(std::make_pair(Reg, isKill));
303 // Try to do the merge.
304 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
306 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
307 Pred, PredReg, Scratch, dl, Regs))
310 // Merge succeeded, update records.
311 Merges.push_back(prior(Loc));
312 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
313 // Remove kill flags from any unmerged memops that come before insertPos.
314 if (Regs[i-memOpsBegin].second)
315 for (unsigned j = memOpsEnd, e = memOps.size(); j != e; ++j)
316 if (memOps[j].Position<insertPos) {
317 MachineOperand &MOJ = memOps[j].MBBI->getOperand(0);
318 if (MOJ.getReg() == Regs[i-memOpsBegin].first && MOJ.isKill())
319 MOJ.setIsKill(false);
321 MBB.erase(memOps[i].MBBI);
322 memOps[i].Merged = true;
326 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
327 /// load / store multiple instructions.
329 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
330 unsigned Base, int Opcode, unsigned Size,
331 ARMCC::CondCodes Pred, unsigned PredReg,
332 unsigned Scratch, MemOpQueue &MemOps,
333 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
334 bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode);
335 int Offset = MemOps[SIndex].Offset;
336 int SOffset = Offset;
337 unsigned insertAfter = SIndex;
338 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
339 DebugLoc dl = Loc->getDebugLoc();
340 const MachineOperand &PMO = Loc->getOperand(0);
341 unsigned PReg = PMO.getReg();
342 unsigned PRegNum = PMO.isUndef() ? UINT_MAX
343 : ARMRegisterInfo::getRegisterNumbering(PReg);
345 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
346 int NewOffset = MemOps[i].Offset;
347 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
348 unsigned Reg = MO.getReg();
349 unsigned RegNum = MO.isUndef() ? UINT_MAX
350 : ARMRegisterInfo::getRegisterNumbering(Reg);
351 // AM4 - register numbers in ascending order.
352 // AM5 - consecutive register numbers in ascending order.
353 if (Reg != ARM::SP &&
354 NewOffset == Offset + (int)Size &&
355 ((isAM4 && RegNum > PRegNum) || RegNum == PRegNum+1)) {
359 // Can't merge this in. Try merge the earlier ones first.
360 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
361 Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
362 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
367 if (MemOps[i].Position > MemOps[insertAfter].Position)
371 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
372 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
373 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
377 static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
378 unsigned Bytes, unsigned Limit,
379 ARMCC::CondCodes Pred, unsigned PredReg){
380 unsigned MyPredReg = 0;
383 if (MI->getOpcode() != ARM::t2SUBri &&
384 MI->getOpcode() != ARM::t2SUBrSPi &&
385 MI->getOpcode() != ARM::t2SUBrSPi12 &&
386 MI->getOpcode() != ARM::tSUBspi &&
387 MI->getOpcode() != ARM::SUBri)
390 // Make sure the offset fits in 8 bits.
391 if (Bytes <= 0 || (Limit && Bytes >= Limit))
394 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
395 return (MI->getOperand(0).getReg() == Base &&
396 MI->getOperand(1).getReg() == Base &&
397 (MI->getOperand(2).getImm()*Scale) == Bytes &&
398 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
399 MyPredReg == PredReg);
402 static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
403 unsigned Bytes, unsigned Limit,
404 ARMCC::CondCodes Pred, unsigned PredReg){
405 unsigned MyPredReg = 0;
408 if (MI->getOpcode() != ARM::t2ADDri &&
409 MI->getOpcode() != ARM::t2ADDrSPi &&
410 MI->getOpcode() != ARM::t2ADDrSPi12 &&
411 MI->getOpcode() != ARM::tADDspi &&
412 MI->getOpcode() != ARM::ADDri)
415 if (Bytes <= 0 || (Limit && Bytes >= Limit))
416 // Make sure the offset fits in 8 bits.
419 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
420 return (MI->getOperand(0).getReg() == Base &&
421 MI->getOperand(1).getReg() == Base &&
422 (MI->getOperand(2).getImm()*Scale) == Bytes &&
423 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
424 MyPredReg == PredReg);
427 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
428 switch (MI->getOpcode()) {
446 return (MI->getNumOperands() - 5) * 4;
451 return ARM_AM::getAM5Offset(MI->getOperand(1).getImm()) * 4;
455 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
456 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
458 /// stmia rn, <ra, rb, rc>
459 /// rn := rn + 4 * 3;
461 /// stmia rn!, <ra, rb, rc>
463 /// rn := rn - 4 * 3;
464 /// ldmia rn, <ra, rb, rc>
466 /// ldmdb rn!, <ra, rb, rc>
467 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
468 MachineBasicBlock::iterator MBBI,
470 MachineBasicBlock::iterator &I) {
471 MachineInstr *MI = MBBI;
472 unsigned Base = MI->getOperand(0).getReg();
473 unsigned Bytes = getLSMultipleTransferSize(MI);
474 unsigned PredReg = 0;
475 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
476 int Opcode = MI->getOpcode();
477 bool isAM4 = Opcode == ARM::LDM || Opcode == ARM::t2LDM ||
478 Opcode == ARM::STM || Opcode == ARM::t2STM;
481 if (ARM_AM::getAM4WBFlag(MI->getOperand(1).getImm()))
484 // Can't use the updating AM4 sub-mode if the base register is also a dest
485 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
486 for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) {
487 if (MI->getOperand(i).getReg() == Base)
491 ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm());
492 if (MBBI != MBB.begin()) {
493 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
494 if (Mode == ARM_AM::ia &&
495 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
496 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::db, true));
497 MI->getOperand(4).setReg(Base);
498 MI->getOperand(4).setIsDef();
501 } else if (Mode == ARM_AM::ib &&
502 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
503 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::da, true));
504 MI->getOperand(4).setReg(Base); // WB to base
505 MI->getOperand(4).setIsDef();
511 if (MBBI != MBB.end()) {
512 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
513 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
514 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
515 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true));
516 MI->getOperand(4).setReg(Base); // WB to base
517 MI->getOperand(4).setIsDef();
524 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
525 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
526 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true));
527 MI->getOperand(4).setReg(Base); // WB to base
528 MI->getOperand(4).setIsDef();
538 // VLDM{D|S}, VSTM{D|S} addressing mode 5 ops.
539 if (ARM_AM::getAM5WBFlag(MI->getOperand(1).getImm()))
542 ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MI->getOperand(1).getImm());
543 unsigned Offset = ARM_AM::getAM5Offset(MI->getOperand(1).getImm());
544 if (MBBI != MBB.begin()) {
545 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
546 if (Mode == ARM_AM::ia &&
547 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
548 MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::db, true, Offset));
549 MI->getOperand(4).setReg(Base); // WB to base
550 MI->getOperand(4).setIsDef();
556 if (MBBI != MBB.end()) {
557 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
558 if (Mode == ARM_AM::ia &&
559 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
560 MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::ia, true, Offset));
561 MI->getOperand(4).setReg(Base); // WB to base
562 MI->getOperand(4).setIsDef();
576 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) {
578 case ARM::LDR: return ARM::LDR_PRE;
579 case ARM::STR: return ARM::STR_PRE;
580 case ARM::VLDRS: return ARM::VLDMS;
581 case ARM::VLDRD: return ARM::VLDMD;
582 case ARM::VSTRS: return ARM::VSTMS;
583 case ARM::VSTRD: return ARM::VSTMD;
586 return ARM::t2LDR_PRE;
589 return ARM::t2STR_PRE;
590 default: llvm_unreachable("Unhandled opcode!");
595 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) {
597 case ARM::LDR: return ARM::LDR_POST;
598 case ARM::STR: return ARM::STR_POST;
599 case ARM::VLDRS: return ARM::VLDMS;
600 case ARM::VLDRD: return ARM::VLDMD;
601 case ARM::VSTRS: return ARM::VSTMS;
602 case ARM::VSTRD: return ARM::VSTMD;
605 return ARM::t2LDR_POST;
608 return ARM::t2STR_POST;
609 default: llvm_unreachable("Unhandled opcode!");
614 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
615 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
616 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
617 MachineBasicBlock::iterator MBBI,
618 const TargetInstrInfo *TII,
620 MachineBasicBlock::iterator &I) {
621 MachineInstr *MI = MBBI;
622 unsigned Base = MI->getOperand(1).getReg();
623 bool BaseKill = MI->getOperand(1).isKill();
624 unsigned Bytes = getLSMultipleTransferSize(MI);
625 int Opcode = MI->getOpcode();
626 DebugLoc dl = MI->getDebugLoc();
627 bool isAM5 = Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
628 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS;
629 bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
630 if (isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0)
632 else if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
634 else if (isT2i32Load(Opcode) || isT2i32Store(Opcode))
635 if (MI->getOperand(2).getImm() != 0)
638 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
639 // Can't do the merge if the destination register is the same as the would-be
640 // writeback register.
641 if (isLd && MI->getOperand(0).getReg() == Base)
644 unsigned PredReg = 0;
645 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
646 bool DoMerge = false;
647 ARM_AM::AddrOpc AddSub = ARM_AM::add;
649 // AM2 - 12 bits, thumb2 - 8 bits.
650 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
651 if (MBBI != MBB.begin()) {
652 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
653 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
655 AddSub = ARM_AM::sub;
656 NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
658 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
660 NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
666 if (!DoMerge && MBBI != MBB.end()) {
667 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
669 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
671 AddSub = ARM_AM::sub;
672 NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
673 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
675 NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
689 bool isDPR = NewOpc == ARM::VLDMD || NewOpc == ARM::VSTMD;
692 Offset = ARM_AM::getAM5Opc((AddSub == ARM_AM::sub)
694 : ARM_AM::ia, true, (isDPR ? 2 : 1));
696 Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
698 Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
702 BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
703 .addReg(Base, getKillRegState(BaseKill))
704 .addImm(Offset).addImm(Pred).addReg(PredReg)
705 .addReg(Base, getDefRegState(true)) // WB base register
706 .addReg(MI->getOperand(0).getReg(), RegState::Define);
708 // LDR_PRE, LDR_POST,
709 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
710 .addReg(Base, RegState::Define)
711 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
713 // t2LDR_PRE, t2LDR_POST
714 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
715 .addReg(Base, RegState::Define)
716 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
718 MachineOperand &MO = MI->getOperand(0);
721 BuildMI(MBB, MBBI, dl, TII->get(NewOpc)).addReg(Base).addImm(Offset)
722 .addImm(Pred).addReg(PredReg)
723 .addReg(Base, getDefRegState(true)) // WB base register
724 .addReg(MO.getReg(), getKillRegState(MO.isKill()));
727 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
728 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
729 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
731 // t2STR_PRE, t2STR_POST
732 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
733 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
734 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
741 /// isMemoryOp - Returns true if instruction is a memory operations (that this
742 /// pass is capable of operating on).
743 static bool isMemoryOp(const MachineInstr *MI) {
744 if (MI->hasOneMemOperand()) {
745 const MachineMemOperand *MMO = *MI->memoperands_begin();
747 // Don't touch volatile memory accesses - we may be changing their order.
748 if (MMO->isVolatile())
751 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
753 if (MMO->getAlignment() < 4)
757 // str <undef> could probably be eliminated entirely, but for now we just want
758 // to avoid making a mess of it.
759 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
760 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
761 MI->getOperand(0).isUndef())
764 // Likewise don't mess with references to undefined addresses.
765 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
766 MI->getOperand(1).isUndef())
769 int Opcode = MI->getOpcode();
774 return MI->getOperand(1).isReg() && MI->getOperand(2).getReg() == 0;
777 return MI->getOperand(1).isReg();
780 return MI->getOperand(1).isReg();
785 return MI->getOperand(1).isReg();
790 /// AdvanceRS - Advance register scavenger to just before the earliest memory
791 /// op that is being merged.
792 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
793 MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
794 unsigned Position = MemOps[0].Position;
795 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
796 if (MemOps[i].Position < Position) {
797 Position = MemOps[i].Position;
798 Loc = MemOps[i].MBBI;
802 if (Loc != MBB.begin())
803 RS->forward(prior(Loc));
806 static int getMemoryOpOffset(const MachineInstr *MI) {
807 int Opcode = MI->getOpcode();
808 bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
809 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
810 unsigned NumOperands = MI->getDesc().getNumOperands();
811 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
813 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
814 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
815 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8)
819 ? ARM_AM::getAM2Offset(OffField)
820 : (isAM3 ? ARM_AM::getAM3Offset(OffField)
821 : ARM_AM::getAM5Offset(OffField) * 4);
823 if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub)
826 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
829 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
835 static void InsertLDR_STR(MachineBasicBlock &MBB,
836 MachineBasicBlock::iterator &MBBI,
837 int OffImm, bool isDef,
838 DebugLoc dl, unsigned NewOpc,
839 unsigned Reg, bool RegDeadKill, bool RegUndef,
840 unsigned BaseReg, bool BaseKill, bool BaseUndef,
841 unsigned OffReg, bool OffKill, bool OffUndef,
842 ARMCC::CondCodes Pred, unsigned PredReg,
843 const TargetInstrInfo *TII, bool isT2) {
847 Offset = ARM_AM::getAM2Opc(ARM_AM::sub, -OffImm, ARM_AM::no_shift);
849 Offset = ARM_AM::getAM2Opc(ARM_AM::add, OffImm, ARM_AM::no_shift);
852 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
854 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
855 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
857 MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef));
858 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
860 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
862 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
863 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
865 MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef));
866 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
870 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
871 MachineBasicBlock::iterator &MBBI) {
872 MachineInstr *MI = &*MBBI;
873 unsigned Opcode = MI->getOpcode();
874 if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
875 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
876 unsigned EvenReg = MI->getOperand(0).getReg();
877 unsigned OddReg = MI->getOperand(1).getReg();
878 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
879 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
880 if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)
883 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
884 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
885 bool EvenDeadKill = isLd ?
886 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
887 bool EvenUndef = MI->getOperand(0).isUndef();
888 bool OddDeadKill = isLd ?
889 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
890 bool OddUndef = MI->getOperand(1).isUndef();
891 const MachineOperand &BaseOp = MI->getOperand(2);
892 unsigned BaseReg = BaseOp.getReg();
893 bool BaseKill = BaseOp.isKill();
894 bool BaseUndef = BaseOp.isUndef();
895 unsigned OffReg = isT2 ? 0 : MI->getOperand(3).getReg();
896 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
897 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
898 int OffImm = getMemoryOpOffset(MI);
899 unsigned PredReg = 0;
900 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
902 if (OddRegNum > EvenRegNum && OffReg == 0 && OffImm == 0) {
903 // Ascending register numbers and no offset. It's safe to change it to a
905 unsigned NewOpc = (isLd)
906 ? (isT2 ? ARM::t2LDM : ARM::LDM)
907 : (isT2 ? ARM::t2STM : ARM::STM);
909 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
910 .addReg(BaseReg, getKillRegState(BaseKill))
911 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
912 .addImm(Pred).addReg(PredReg)
914 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
915 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
918 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
919 .addReg(BaseReg, getKillRegState(BaseKill))
920 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
921 .addImm(Pred).addReg(PredReg)
924 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
926 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
930 // Split into two instructions.
931 assert((!isT2 || !OffReg) &&
932 "Thumb2 ldrd / strd does not encode offset register!");
933 unsigned NewOpc = (isLd)
934 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDR)
935 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STR);
936 DebugLoc dl = MBBI->getDebugLoc();
937 // If this is a load and base register is killed, it may have been
938 // re-defed by the load, make sure the first load does not clobber it.
940 (BaseKill || OffKill) &&
941 (TRI->regsOverlap(EvenReg, BaseReg) ||
942 (OffReg && TRI->regsOverlap(EvenReg, OffReg)))) {
943 assert(!TRI->regsOverlap(OddReg, BaseReg) &&
944 (!OffReg || !TRI->regsOverlap(OddReg, OffReg)));
945 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
946 OddReg, OddDeadKill, false,
947 BaseReg, false, BaseUndef, OffReg, false, OffUndef,
948 Pred, PredReg, TII, isT2);
949 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
950 EvenReg, EvenDeadKill, false,
951 BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
952 Pred, PredReg, TII, isT2);
954 if (OddReg == EvenReg && EvenDeadKill) {
955 // If the two source operands are the same, the kill marker is probably
956 // on the first one. e.g.
957 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
958 EvenDeadKill = false;
961 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
962 EvenReg, EvenDeadKill, EvenUndef,
963 BaseReg, false, BaseUndef, OffReg, false, OffUndef,
964 Pred, PredReg, TII, isT2);
965 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
966 OddReg, OddDeadKill, OddUndef,
967 BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
968 Pred, PredReg, TII, isT2);
982 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
983 /// ops of the same base and incrementing offset into LDM / STM ops.
984 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
985 unsigned NumMerges = 0;
986 unsigned NumMemOps = 0;
988 unsigned CurrBase = 0;
990 unsigned CurrSize = 0;
991 ARMCC::CondCodes CurrPred = ARMCC::AL;
992 unsigned CurrPredReg = 0;
993 unsigned Position = 0;
994 SmallVector<MachineBasicBlock::iterator,4> Merges;
996 RS->enterBasicBlock(&MBB);
997 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
999 if (FixInvalidRegPairOp(MBB, MBBI))
1002 bool Advance = false;
1003 bool TryMerge = false;
1004 bool Clobber = false;
1006 bool isMemOp = isMemoryOp(MBBI);
1008 int Opcode = MBBI->getOpcode();
1009 unsigned Size = getLSMultipleTransferSize(MBBI);
1010 unsigned Base = MBBI->getOperand(1).getReg();
1011 unsigned PredReg = 0;
1012 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg);
1013 int Offset = getMemoryOpOffset(MBBI);
1016 // r5 := ldr [r5, #4]
1017 // r6 := ldr [r5, #8]
1019 // The second ldr has effectively broken the chain even though it
1020 // looks like the later ldr(s) use the same base register. Try to
1021 // merge the ldr's so far, including this one. But don't try to
1022 // combine the following ldr(s).
1023 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
1024 if (CurrBase == 0 && !Clobber) {
1025 // Start of a new chain.
1030 CurrPredReg = PredReg;
1031 MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
1040 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1041 // No need to match PredReg.
1042 // Continue adding to the queue.
1043 if (Offset > MemOps.back().Offset) {
1044 MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
1048 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1050 if (Offset < I->Offset) {
1051 MemOps.insert(I, MemOpQueueEntry(Offset, Position, MBBI));
1055 } else if (Offset == I->Offset) {
1056 // Collision! This can't be merged!
1069 // Reach the end of the block, try merging the memory instructions.
1075 if (NumMemOps > 1) {
1076 // Try to find a free register to use as a new base in case it's needed.
1077 // First advance to the instruction just before the start of the chain.
1078 AdvanceRS(MBB, MemOps);
1079 // Find a scratch register.
1080 unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass);
1081 // Process the load / store instructions.
1082 RS->forward(prior(MBBI));
1086 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1087 CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1089 // Try folding preceeding/trailing base inc/dec into the generated
1091 for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1092 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1094 NumMerges += Merges.size();
1096 // Try folding preceeding/trailing base inc/dec into those load/store
1097 // that were not merged to form LDM/STM ops.
1098 for (unsigned i = 0; i != NumMemOps; ++i)
1099 if (!MemOps[i].Merged)
1100 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1103 // RS may be pointing to an instruction that's deleted.
1104 RS->skipTo(prior(MBBI));
1105 } else if (NumMemOps == 1) {
1106 // Try folding preceeding/trailing base inc/dec into the single
1108 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1110 RS->forward(prior(MBBI));
1117 CurrPred = ARMCC::AL;
1124 // If iterator hasn't been advanced and this is not a memory op, skip it.
1125 // It can't start a new chain anyway.
1126 if (!Advance && !isMemOp && MBBI != E) {
1132 return NumMerges > 0;
1136 struct OffsetCompare {
1137 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
1138 int LOffset = getMemoryOpOffset(LHS);
1139 int ROffset = getMemoryOpOffset(RHS);
1140 assert(LHS == RHS || LOffset != ROffset);
1141 return LOffset > ROffset;
1146 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return op
1147 /// (bx lr) into the preceeding stack restore so it directly restore the value
1149 /// ldmfd sp!, {r7, lr}
1152 /// ldmfd sp!, {r7, pc}
1153 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1154 if (MBB.empty()) return false;
1156 MachineBasicBlock::iterator MBBI = prior(MBB.end());
1157 if (MBBI != MBB.begin() &&
1158 (MBBI->getOpcode() == ARM::BX_RET || MBBI->getOpcode() == ARM::tBX_RET)) {
1159 MachineInstr *PrevMI = prior(MBBI);
1160 if (PrevMI->getOpcode() == ARM::LDM || PrevMI->getOpcode() == ARM::t2LDM) {
1161 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1162 if (MO.getReg() != ARM::LR)
1164 unsigned NewOpc = isThumb2 ? ARM::t2LDM_RET : ARM::LDM_RET;
1165 PrevMI->setDesc(TII->get(NewOpc));
1174 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1175 const TargetMachine &TM = Fn.getTarget();
1176 AFI = Fn.getInfo<ARMFunctionInfo>();
1177 TII = TM.getInstrInfo();
1178 TRI = TM.getRegisterInfo();
1179 RS = new RegScavenger();
1180 isThumb2 = AFI->isThumb2Function();
1182 bool Modified = false;
1183 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1185 MachineBasicBlock &MBB = *MFI;
1186 Modified |= LoadStoreMultipleOpti(MBB);
1187 Modified |= MergeReturnIntoLDM(MBB);
1195 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1196 /// load / stores from consecutive locations close to make it more
1197 /// likely they will be combined later.
1200 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1202 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(&ID) {}
1204 const TargetData *TD;
1205 const TargetInstrInfo *TII;
1206 const TargetRegisterInfo *TRI;
1207 const ARMSubtarget *STI;
1208 MachineRegisterInfo *MRI;
1209 MachineFunction *MF;
1211 virtual bool runOnMachineFunction(MachineFunction &Fn);
1213 virtual const char *getPassName() const {
1214 return "ARM pre- register allocation load / store optimization pass";
1218 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1219 unsigned &NewOpc, unsigned &EvenReg,
1220 unsigned &OddReg, unsigned &BaseReg,
1221 unsigned &OffReg, int &Offset,
1222 unsigned &PredReg, ARMCC::CondCodes &Pred,
1224 bool RescheduleOps(MachineBasicBlock *MBB,
1225 SmallVector<MachineInstr*, 4> &Ops,
1226 unsigned Base, bool isLd,
1227 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1228 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1230 char ARMPreAllocLoadStoreOpt::ID = 0;
1233 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1234 TD = Fn.getTarget().getTargetData();
1235 TII = Fn.getTarget().getInstrInfo();
1236 TRI = Fn.getTarget().getRegisterInfo();
1237 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
1238 MRI = &Fn.getRegInfo();
1241 bool Modified = false;
1242 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1244 Modified |= RescheduleLoadStoreInstrs(MFI);
1249 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1250 MachineBasicBlock::iterator I,
1251 MachineBasicBlock::iterator E,
1252 SmallPtrSet<MachineInstr*, 4> &MemOps,
1253 SmallSet<unsigned, 4> &MemRegs,
1254 const TargetRegisterInfo *TRI) {
1255 // Are there stores / loads / calls between them?
1256 // FIXME: This is overly conservative. We should make use of alias information
1258 SmallSet<unsigned, 4> AddedRegPressure;
1260 if (MemOps.count(&*I))
1262 const TargetInstrDesc &TID = I->getDesc();
1263 if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects())
1265 if (isLd && TID.mayStore())
1270 // It's not safe to move the first 'str' down.
1273 // str r4, [r0, #+4]
1277 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1278 MachineOperand &MO = I->getOperand(j);
1281 unsigned Reg = MO.getReg();
1282 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1284 if (Reg != Base && !MemRegs.count(Reg))
1285 AddedRegPressure.insert(Reg);
1289 // Estimate register pressure increase due to the transformation.
1290 if (MemRegs.size() <= 4)
1291 // Ok if we are moving small number of instructions.
1293 return AddedRegPressure.size() <= MemRegs.size() * 2;
1297 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1299 unsigned &NewOpc, unsigned &EvenReg,
1300 unsigned &OddReg, unsigned &BaseReg,
1301 unsigned &OffReg, int &Offset,
1303 ARMCC::CondCodes &Pred,
1305 // Make sure we're allowed to generate LDRD/STRD.
1306 if (!STI->hasV5TEOps())
1309 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1311 unsigned Opcode = Op0->getOpcode();
1312 if (Opcode == ARM::LDR)
1314 else if (Opcode == ARM::STR)
1316 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1317 NewOpc = ARM::t2LDRDi8;
1320 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1321 NewOpc = ARM::t2STRDi8;
1327 // Make sure the offset registers match.
1329 (Op0->getOperand(2).getReg() != Op1->getOperand(2).getReg()))
1332 // Must sure the base address satisfies i64 ld / st alignment requirement.
1333 if (!Op0->hasOneMemOperand() ||
1334 !(*Op0->memoperands_begin())->getValue() ||
1335 (*Op0->memoperands_begin())->isVolatile())
1338 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1339 Function *Func = MF->getFunction();
1340 unsigned ReqAlign = STI->hasV6Ops()
1341 ? TD->getPrefTypeAlignment(Type::getInt64Ty(Func->getContext()))
1342 : 8; // Pre-v6 need 8-byte align
1343 if (Align < ReqAlign)
1346 // Then make sure the immediate offset fits.
1347 int OffImm = getMemoryOpOffset(Op0);
1351 // Can't fall back to t2LDRi8 / t2STRi8.
1354 int Limit = (1 << 8) * Scale;
1355 if (OffImm >= Limit || (OffImm & (Scale-1)))
1360 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1362 AddSub = ARM_AM::sub;
1365 int Limit = (1 << 8) * Scale;
1366 if (OffImm >= Limit || (OffImm & (Scale-1)))
1368 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1370 EvenReg = Op0->getOperand(0).getReg();
1371 OddReg = Op1->getOperand(0).getReg();
1372 if (EvenReg == OddReg)
1374 BaseReg = Op0->getOperand(1).getReg();
1376 OffReg = Op0->getOperand(2).getReg();
1377 Pred = llvm::getInstrPredicate(Op0, PredReg);
1378 dl = Op0->getDebugLoc();
1382 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1383 SmallVector<MachineInstr*, 4> &Ops,
1384 unsigned Base, bool isLd,
1385 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1386 bool RetVal = false;
1388 // Sort by offset (in reverse order).
1389 std::sort(Ops.begin(), Ops.end(), OffsetCompare());
1391 // The loads / stores of the same base are in order. Scan them from first to
1392 // last and check for the followins:
1393 // 1. Any def of base.
1395 while (Ops.size() > 1) {
1396 unsigned FirstLoc = ~0U;
1397 unsigned LastLoc = 0;
1398 MachineInstr *FirstOp = 0;
1399 MachineInstr *LastOp = 0;
1401 unsigned LastOpcode = 0;
1402 unsigned LastBytes = 0;
1403 unsigned NumMove = 0;
1404 for (int i = Ops.size() - 1; i >= 0; --i) {
1405 MachineInstr *Op = Ops[i];
1406 unsigned Loc = MI2LocMap[Op];
1407 if (Loc <= FirstLoc) {
1411 if (Loc >= LastLoc) {
1416 unsigned Opcode = Op->getOpcode();
1417 if (LastOpcode && Opcode != LastOpcode)
1420 int Offset = getMemoryOpOffset(Op);
1421 unsigned Bytes = getLSMultipleTransferSize(Op);
1423 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1426 LastOffset = Offset;
1428 LastOpcode = Opcode;
1429 if (++NumMove == 8) // FIXME: Tune this limit.
1436 SmallPtrSet<MachineInstr*, 4> MemOps;
1437 SmallSet<unsigned, 4> MemRegs;
1438 for (int i = NumMove-1; i >= 0; --i) {
1439 MemOps.insert(Ops[i]);
1440 MemRegs.insert(Ops[i]->getOperand(0).getReg());
1443 // Be conservative, if the instructions are too far apart, don't
1444 // move them. We want to limit the increase of register pressure.
1445 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
1447 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
1448 MemOps, MemRegs, TRI);
1450 for (unsigned i = 0; i != NumMove; ++i)
1453 // This is the new location for the loads / stores.
1454 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
1455 while (InsertPos != MBB->end() && MemOps.count(InsertPos))
1458 // If we are moving a pair of loads / stores, see if it makes sense
1459 // to try to allocate a pair of registers that can form register pairs.
1460 MachineInstr *Op0 = Ops.back();
1461 MachineInstr *Op1 = Ops[Ops.size()-2];
1462 unsigned EvenReg = 0, OddReg = 0;
1463 unsigned BaseReg = 0, OffReg = 0, PredReg = 0;
1464 ARMCC::CondCodes Pred = ARMCC::AL;
1466 unsigned NewOpc = 0;
1469 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
1470 EvenReg, OddReg, BaseReg, OffReg,
1471 Offset, PredReg, Pred, isT2)) {
1475 // Form the pair instruction.
1477 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1478 dl, TII->get(NewOpc))
1479 .addReg(EvenReg, RegState::Define)
1480 .addReg(OddReg, RegState::Define)
1484 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1487 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1488 dl, TII->get(NewOpc))
1494 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1500 // Add register allocation hints to form register pairs.
1501 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
1502 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
1504 for (unsigned i = 0; i != NumMove; ++i) {
1505 MachineInstr *Op = Ops.back();
1507 MBB->splice(InsertPos, MBB, Op);
1511 NumLdStMoved += NumMove;
1521 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
1522 bool RetVal = false;
1524 DenseMap<MachineInstr*, unsigned> MI2LocMap;
1525 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
1526 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
1527 SmallVector<unsigned, 4> LdBases;
1528 SmallVector<unsigned, 4> StBases;
1531 MachineBasicBlock::iterator MBBI = MBB->begin();
1532 MachineBasicBlock::iterator E = MBB->end();
1534 for (; MBBI != E; ++MBBI) {
1535 MachineInstr *MI = MBBI;
1536 const TargetInstrDesc &TID = MI->getDesc();
1537 if (TID.isCall() || TID.isTerminator()) {
1538 // Stop at barriers.
1543 MI2LocMap[MI] = Loc++;
1544 if (!isMemoryOp(MI))
1546 unsigned PredReg = 0;
1547 if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL)
1550 int Opc = MI->getOpcode();
1551 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
1552 unsigned Base = MI->getOperand(1).getReg();
1553 int Offset = getMemoryOpOffset(MI);
1555 bool StopHere = false;
1557 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1558 Base2LdsMap.find(Base);
1559 if (BI != Base2LdsMap.end()) {
1560 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1561 if (Offset == getMemoryOpOffset(BI->second[i])) {
1567 BI->second.push_back(MI);
1569 SmallVector<MachineInstr*, 4> MIs;
1571 Base2LdsMap[Base] = MIs;
1572 LdBases.push_back(Base);
1575 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1576 Base2StsMap.find(Base);
1577 if (BI != Base2StsMap.end()) {
1578 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1579 if (Offset == getMemoryOpOffset(BI->second[i])) {
1585 BI->second.push_back(MI);
1587 SmallVector<MachineInstr*, 4> MIs;
1589 Base2StsMap[Base] = MIs;
1590 StBases.push_back(Base);
1595 // Found a duplicate (a base+offset combination that's seen earlier).
1602 // Re-schedule loads.
1603 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
1604 unsigned Base = LdBases[i];
1605 SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
1607 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
1610 // Re-schedule stores.
1611 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
1612 unsigned Base = StBases[i];
1613 SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
1615 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
1619 Base2LdsMap.clear();
1620 Base2StsMap.clear();
1630 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
1631 /// optimization pass.
1632 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
1634 return new ARMPreAllocLoadStoreOpt();
1635 return new ARMLoadStoreOpt();