1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
15 #define DEBUG_TYPE "arm-ldst-opt"
17 #include "ARMAddressingModes.h"
18 #include "ARMBaseInstrInfo.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMRegisterInfo.h"
21 #include "llvm/DerivedTypes.h"
22 #include "llvm/Function.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineFunctionPass.h"
25 #include "llvm/CodeGen/MachineInstr.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/RegisterScavenging.h"
29 #include "llvm/Target/TargetData.h"
30 #include "llvm/Target/TargetInstrInfo.h"
31 #include "llvm/Target/TargetMachine.h"
32 #include "llvm/Target/TargetRegisterInfo.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/ADT/DenseMap.h"
35 #include "llvm/ADT/STLExtras.h"
36 #include "llvm/ADT/SmallPtrSet.h"
37 #include "llvm/ADT/SmallSet.h"
38 #include "llvm/ADT/SmallVector.h"
39 #include "llvm/ADT/Statistic.h"
42 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
43 STATISTIC(NumSTMGened , "Number of stm instructions generated");
44 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
45 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
46 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
47 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
48 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
49 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
50 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
51 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
52 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
54 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
55 /// load / store instructions to form ldm / stm instructions.
58 struct ARMLoadStoreOpt : public MachineFunctionPass {
60 ARMLoadStoreOpt() : MachineFunctionPass(&ID) {}
62 const TargetInstrInfo *TII;
63 const TargetRegisterInfo *TRI;
68 virtual bool runOnMachineFunction(MachineFunction &Fn);
70 virtual const char *getPassName() const {
71 return "ARM load / store optimization pass";
75 struct MemOpQueueEntry {
78 MachineBasicBlock::iterator MBBI;
80 MemOpQueueEntry(int o, int p, MachineBasicBlock::iterator i)
81 : Offset(o), Position(p), MBBI(i), Merged(false) {}
83 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
84 typedef MemOpQueue::iterator MemOpQueueIter;
86 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
87 int Offset, unsigned Base, bool BaseKill, int Opcode,
88 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
89 DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs);
90 void MergeOpsUpdate(MachineBasicBlock &MBB,
91 MachineBasicBlock::iterator MBBI,
96 ARMCC::CondCodes Pred,
103 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
104 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
105 int Opcode, unsigned Size,
106 ARMCC::CondCodes Pred, unsigned PredReg,
107 unsigned Scratch, MemOpQueue &MemOps,
108 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
110 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
111 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
112 MachineBasicBlock::iterator &MBBI);
113 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
114 MachineBasicBlock::iterator MBBI,
115 const TargetInstrInfo *TII,
117 MachineBasicBlock::iterator &I);
118 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
119 MachineBasicBlock::iterator MBBI,
121 MachineBasicBlock::iterator &I);
122 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
123 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
125 char ARMLoadStoreOpt::ID = 0;
128 static int getLoadStoreMultipleOpcode(int Opcode) {
156 default: llvm_unreachable("Unhandled opcode!");
161 static bool isT2i32Load(unsigned Opc) {
162 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
165 static bool isi32Load(unsigned Opc) {
166 return Opc == ARM::LDR || isT2i32Load(Opc);
169 static bool isT2i32Store(unsigned Opc) {
170 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
173 static bool isi32Store(unsigned Opc) {
174 return Opc == ARM::STR || isT2i32Store(Opc);
177 /// MergeOps - Create and insert a LDM or STM with Base as base register and
178 /// registers in Regs as the register operands that would be loaded / stored.
179 /// It returns true if the transformation is done.
181 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
182 MachineBasicBlock::iterator MBBI,
183 int Offset, unsigned Base, bool BaseKill,
184 int Opcode, ARMCC::CondCodes Pred,
185 unsigned PredReg, unsigned Scratch, DebugLoc dl,
186 SmallVector<std::pair<unsigned, bool>, 8> &Regs) {
187 // Only a single register to load / store. Don't bother.
188 unsigned NumRegs = Regs.size();
192 ARM_AM::AMSubMode Mode = ARM_AM::ia;
193 bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode);
194 if (isAM4 && Offset == 4) {
196 // Thumb2 does not support ldmib / stmib.
199 } else if (isAM4 && Offset == -4 * (int)NumRegs + 4) {
201 // Thumb2 does not support ldmda / stmda.
204 } else if (isAM4 && Offset == -4 * (int)NumRegs) {
206 } else if (Offset != 0) {
207 // If starting offset isn't zero, insert a MI to materialize a new base.
208 // But only do so if it is cost effective, i.e. merging more than two
214 if (isi32Load(Opcode))
215 // If it is a load, then just use one of the destination register to
216 // use as the new base.
217 NewBase = Regs[NumRegs-1].first;
219 // Use the scratch register to use as a new base.
224 int BaseOpc = !isThumb2
226 : ((Base == ARM::SP) ? ARM::t2ADDrSPi : ARM::t2ADDri);
230 : ((Base == ARM::SP) ? ARM::t2SUBrSPi : ARM::t2SUBri);
233 int ImmedOffset = isThumb2
234 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
235 if (ImmedOffset == -1)
236 // FIXME: Try t2ADDri12 or t2SUBri12?
237 return false; // Probably not worth it then.
239 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
240 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
241 .addImm(Pred).addReg(PredReg).addReg(0);
243 BaseKill = true; // New base is always killed right its use.
246 bool isDPR = Opcode == ARM::VLDRD || Opcode == ARM::VSTRD;
247 bool isDef = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
248 Opcode = getLoadStoreMultipleOpcode(Opcode);
249 MachineInstrBuilder MIB = (isAM4)
250 ? BuildMI(MBB, MBBI, dl, TII->get(Opcode))
251 .addReg(Base, getKillRegState(BaseKill))
252 .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg)
253 : BuildMI(MBB, MBBI, dl, TII->get(Opcode))
254 .addReg(Base, getKillRegState(BaseKill))
255 .addImm(ARM_AM::getAM5Opc(Mode, false, isDPR ? NumRegs<<1 : NumRegs))
256 .addImm(Pred).addReg(PredReg);
257 MIB.addReg(0); // Add optional writeback (0 for now).
258 for (unsigned i = 0; i != NumRegs; ++i)
259 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
260 | getKillRegState(Regs[i].second));
265 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
267 void ARMLoadStoreOpt::
268 MergeOpsUpdate(MachineBasicBlock &MBB,
269 MachineBasicBlock::iterator MBBI,
274 ARMCC::CondCodes Pred,
281 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
282 // First calculate which of the registers should be killed by the merged
284 SmallVector<std::pair<unsigned, bool>, 8> Regs;
285 for (unsigned i = memOpsFrom; i < memOpsTo; ++i) {
286 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
287 Regs.push_back(std::make_pair(MO.getReg(), MO.isKill()));
290 if (!MergeOps(MBB, MBBI, Offset, Base, BaseKill, Opcode,
291 Pred, PredReg, Scratch, dl, Regs))
294 // Merge succeeded, update records.
295 Merges.push_back(prior(MBBI));
296 for (unsigned i = memOpsFrom; i < memOpsTo; ++i) {
297 MBB.erase(MemOps[i].MBBI);
298 MemOps[i].Merged = true;
302 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
303 /// load / store multiple instructions.
305 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
306 unsigned Base, int Opcode, unsigned Size,
307 ARMCC::CondCodes Pred, unsigned PredReg,
308 unsigned Scratch, MemOpQueue &MemOps,
309 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
310 bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode);
311 int Offset = MemOps[SIndex].Offset;
312 int SOffset = Offset;
313 unsigned Pos = MemOps[SIndex].Position;
314 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
315 DebugLoc dl = Loc->getDebugLoc();
316 unsigned PReg = Loc->getOperand(0).getReg();
317 unsigned PRegNum = ARMRegisterInfo::getRegisterNumbering(PReg);
319 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
320 int NewOffset = MemOps[i].Offset;
321 unsigned Reg = MemOps[i].MBBI->getOperand(0).getReg();
322 unsigned RegNum = ARMRegisterInfo::getRegisterNumbering(Reg);
323 // AM4 - register numbers in ascending order.
324 // AM5 - consecutive register numbers in ascending order.
325 if (NewOffset == Offset + (int)Size &&
326 ((isAM4 && RegNum > PRegNum) || RegNum == PRegNum+1)) {
330 // Can't merge this in. Try merge the earlier ones first.
331 MergeOpsUpdate(MBB, ++Loc, SOffset, Base, false, Opcode, Pred, PredReg,
332 Scratch, dl, MemOps, SIndex, i, Merges);
333 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
338 if (MemOps[i].Position > Pos) {
339 Pos = MemOps[i].Position;
340 Loc = MemOps[i].MBBI;
344 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
345 MergeOpsUpdate(MBB, ++Loc, SOffset, Base, BaseKill, Opcode, Pred, PredReg,
346 Scratch, dl, MemOps, SIndex, MemOps.size(), Merges);
350 static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
351 unsigned Bytes, unsigned Limit,
352 ARMCC::CondCodes Pred, unsigned PredReg){
353 unsigned MyPredReg = 0;
356 if (MI->getOpcode() != ARM::t2SUBri &&
357 MI->getOpcode() != ARM::t2SUBrSPi &&
358 MI->getOpcode() != ARM::t2SUBrSPi12 &&
359 MI->getOpcode() != ARM::tSUBspi &&
360 MI->getOpcode() != ARM::SUBri)
363 // Make sure the offset fits in 8 bits.
364 if (Bytes <= 0 || (Limit && Bytes >= Limit))
367 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
368 return (MI->getOperand(0).getReg() == Base &&
369 MI->getOperand(1).getReg() == Base &&
370 (MI->getOperand(2).getImm()*Scale) == Bytes &&
371 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
372 MyPredReg == PredReg);
375 static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
376 unsigned Bytes, unsigned Limit,
377 ARMCC::CondCodes Pred, unsigned PredReg){
378 unsigned MyPredReg = 0;
381 if (MI->getOpcode() != ARM::t2ADDri &&
382 MI->getOpcode() != ARM::t2ADDrSPi &&
383 MI->getOpcode() != ARM::t2ADDrSPi12 &&
384 MI->getOpcode() != ARM::tADDspi &&
385 MI->getOpcode() != ARM::ADDri)
388 if (Bytes <= 0 || (Limit && Bytes >= Limit))
389 // Make sure the offset fits in 8 bits.
392 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
393 return (MI->getOperand(0).getReg() == Base &&
394 MI->getOperand(1).getReg() == Base &&
395 (MI->getOperand(2).getImm()*Scale) == Bytes &&
396 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
397 MyPredReg == PredReg);
400 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
401 switch (MI->getOpcode()) {
419 return (MI->getNumOperands() - 5) * 4;
424 return ARM_AM::getAM5Offset(MI->getOperand(1).getImm()) * 4;
428 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
429 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
431 /// stmia rn, <ra, rb, rc>
432 /// rn := rn + 4 * 3;
434 /// stmia rn!, <ra, rb, rc>
436 /// rn := rn - 4 * 3;
437 /// ldmia rn, <ra, rb, rc>
439 /// ldmdb rn!, <ra, rb, rc>
440 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
441 MachineBasicBlock::iterator MBBI,
443 MachineBasicBlock::iterator &I) {
444 MachineInstr *MI = MBBI;
445 unsigned Base = MI->getOperand(0).getReg();
446 unsigned Bytes = getLSMultipleTransferSize(MI);
447 unsigned PredReg = 0;
448 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
449 int Opcode = MI->getOpcode();
450 bool isAM4 = Opcode == ARM::LDM || Opcode == ARM::t2LDM ||
451 Opcode == ARM::STM || Opcode == ARM::t2STM;
454 if (ARM_AM::getAM4WBFlag(MI->getOperand(1).getImm()))
457 // Can't use the updating AM4 sub-mode if the base register is also a dest
458 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
459 for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) {
460 if (MI->getOperand(i).getReg() == Base)
464 ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm());
465 if (MBBI != MBB.begin()) {
466 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
467 if (Mode == ARM_AM::ia &&
468 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
469 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::db, true));
470 MI->getOperand(4).setReg(Base);
471 MI->getOperand(4).setIsDef();
474 } else if (Mode == ARM_AM::ib &&
475 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
476 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::da, true));
477 MI->getOperand(4).setReg(Base); // WB to base
478 MI->getOperand(4).setIsDef();
484 if (MBBI != MBB.end()) {
485 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
486 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
487 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
488 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true));
489 MI->getOperand(4).setReg(Base); // WB to base
490 MI->getOperand(4).setIsDef();
497 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
498 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
499 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true));
500 MI->getOperand(4).setReg(Base); // WB to base
501 MI->getOperand(4).setIsDef();
511 // VLDM{D|S}, VSTM{D|S} addressing mode 5 ops.
512 if (ARM_AM::getAM5WBFlag(MI->getOperand(1).getImm()))
515 ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MI->getOperand(1).getImm());
516 unsigned Offset = ARM_AM::getAM5Offset(MI->getOperand(1).getImm());
517 if (MBBI != MBB.begin()) {
518 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
519 if (Mode == ARM_AM::ia &&
520 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
521 MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::db, true, Offset));
522 MI->getOperand(4).setReg(Base); // WB to base
523 MI->getOperand(4).setIsDef();
529 if (MBBI != MBB.end()) {
530 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
531 if (Mode == ARM_AM::ia &&
532 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
533 MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::ia, true, Offset));
534 MI->getOperand(4).setReg(Base); // WB to base
535 MI->getOperand(4).setIsDef();
549 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) {
551 case ARM::LDR: return ARM::LDR_PRE;
552 case ARM::STR: return ARM::STR_PRE;
553 case ARM::VLDRS: return ARM::VLDMS;
554 case ARM::VLDRD: return ARM::VLDMD;
555 case ARM::VSTRS: return ARM::VSTMS;
556 case ARM::VSTRD: return ARM::VSTMD;
559 return ARM::t2LDR_PRE;
562 return ARM::t2STR_PRE;
563 default: llvm_unreachable("Unhandled opcode!");
568 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) {
570 case ARM::LDR: return ARM::LDR_POST;
571 case ARM::STR: return ARM::STR_POST;
572 case ARM::VLDRS: return ARM::VLDMS;
573 case ARM::VLDRD: return ARM::VLDMD;
574 case ARM::VSTRS: return ARM::VSTMS;
575 case ARM::VSTRD: return ARM::VSTMD;
578 return ARM::t2LDR_POST;
581 return ARM::t2STR_POST;
582 default: llvm_unreachable("Unhandled opcode!");
587 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
588 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
589 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
590 MachineBasicBlock::iterator MBBI,
591 const TargetInstrInfo *TII,
593 MachineBasicBlock::iterator &I) {
594 MachineInstr *MI = MBBI;
595 unsigned Base = MI->getOperand(1).getReg();
596 bool BaseKill = MI->getOperand(1).isKill();
597 unsigned Bytes = getLSMultipleTransferSize(MI);
598 int Opcode = MI->getOpcode();
599 DebugLoc dl = MI->getDebugLoc();
600 bool isAM5 = Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
601 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS;
602 bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
603 if (isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0)
605 else if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
607 else if (isT2i32Load(Opcode) || isT2i32Store(Opcode))
608 if (MI->getOperand(2).getImm() != 0)
611 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
612 // Can't do the merge if the destination register is the same as the would-be
613 // writeback register.
614 if (isLd && MI->getOperand(0).getReg() == Base)
617 unsigned PredReg = 0;
618 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
619 bool DoMerge = false;
620 ARM_AM::AddrOpc AddSub = ARM_AM::add;
622 // AM2 - 12 bits, thumb2 - 8 bits.
623 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
624 if (MBBI != MBB.begin()) {
625 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
626 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
628 AddSub = ARM_AM::sub;
629 NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
631 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
633 NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
639 if (!DoMerge && MBBI != MBB.end()) {
640 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
642 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
644 AddSub = ARM_AM::sub;
645 NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
646 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
648 NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
662 bool isDPR = NewOpc == ARM::VLDMD || NewOpc == ARM::VSTMD;
665 Offset = ARM_AM::getAM5Opc((AddSub == ARM_AM::sub)
667 : ARM_AM::ia, true, (isDPR ? 2 : 1));
669 Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
671 Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
675 BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
676 .addReg(Base, getKillRegState(BaseKill))
677 .addImm(Offset).addImm(Pred).addReg(PredReg)
678 .addReg(Base, getDefRegState(true)) // WB base register
679 .addReg(MI->getOperand(0).getReg(), RegState::Define);
681 // LDR_PRE, LDR_POST,
682 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
683 .addReg(Base, RegState::Define)
684 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
686 // t2LDR_PRE, t2LDR_POST
687 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
688 .addReg(Base, RegState::Define)
689 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
691 MachineOperand &MO = MI->getOperand(0);
694 BuildMI(MBB, MBBI, dl, TII->get(NewOpc)).addReg(Base).addImm(Offset)
695 .addImm(Pred).addReg(PredReg)
696 .addReg(Base, getDefRegState(true)) // WB base register
697 .addReg(MO.getReg(), getKillRegState(MO.isKill()));
700 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
701 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
702 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
704 // t2STR_PRE, t2STR_POST
705 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
706 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
707 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
714 /// isMemoryOp - Returns true if instruction is a memory operations (that this
715 /// pass is capable of operating on).
716 static bool isMemoryOp(const MachineInstr *MI) {
717 int Opcode = MI->getOpcode();
722 return MI->getOperand(1).isReg() && MI->getOperand(2).getReg() == 0;
725 return MI->getOperand(1).isReg();
728 return MI->getOperand(1).isReg();
733 return MI->getOperand(1).isReg();
738 /// AdvanceRS - Advance register scavenger to just before the earliest memory
739 /// op that is being merged.
740 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
741 MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
742 unsigned Position = MemOps[0].Position;
743 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
744 if (MemOps[i].Position < Position) {
745 Position = MemOps[i].Position;
746 Loc = MemOps[i].MBBI;
750 if (Loc != MBB.begin())
751 RS->forward(prior(Loc));
754 static int getMemoryOpOffset(const MachineInstr *MI) {
755 int Opcode = MI->getOpcode();
756 bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
757 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
758 unsigned NumOperands = MI->getDesc().getNumOperands();
759 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
761 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
762 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
763 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8)
767 ? ARM_AM::getAM2Offset(OffField)
768 : (isAM3 ? ARM_AM::getAM3Offset(OffField)
769 : ARM_AM::getAM5Offset(OffField) * 4);
771 if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub)
774 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
777 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
783 static void InsertLDR_STR(MachineBasicBlock &MBB,
784 MachineBasicBlock::iterator &MBBI,
785 int OffImm, bool isDef,
786 DebugLoc dl, unsigned NewOpc,
787 unsigned Reg, bool RegDeadKill, bool RegUndef,
788 unsigned BaseReg, bool BaseKill, bool BaseUndef,
789 unsigned OffReg, bool OffKill, bool OffUndef,
790 ARMCC::CondCodes Pred, unsigned PredReg,
791 const TargetInstrInfo *TII, bool isT2) {
795 Offset = ARM_AM::getAM2Opc(ARM_AM::sub, -OffImm, ARM_AM::no_shift);
797 Offset = ARM_AM::getAM2Opc(ARM_AM::add, OffImm, ARM_AM::no_shift);
800 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
802 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
803 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
805 MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef));
806 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
808 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
810 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
811 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
813 MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef));
814 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
818 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
819 MachineBasicBlock::iterator &MBBI) {
820 MachineInstr *MI = &*MBBI;
821 unsigned Opcode = MI->getOpcode();
822 if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
823 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
824 unsigned EvenReg = MI->getOperand(0).getReg();
825 unsigned OddReg = MI->getOperand(1).getReg();
826 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
827 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
828 if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)
831 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
832 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
833 bool EvenDeadKill = isLd ?
834 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
835 bool EvenUndef = MI->getOperand(0).isUndef();
836 bool OddDeadKill = isLd ?
837 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
838 bool OddUndef = MI->getOperand(1).isUndef();
839 const MachineOperand &BaseOp = MI->getOperand(2);
840 unsigned BaseReg = BaseOp.getReg();
841 bool BaseKill = BaseOp.isKill();
842 bool BaseUndef = BaseOp.isUndef();
843 unsigned OffReg = isT2 ? 0 : MI->getOperand(3).getReg();
844 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
845 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
846 int OffImm = getMemoryOpOffset(MI);
847 unsigned PredReg = 0;
848 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
850 if (OddRegNum > EvenRegNum && OffReg == 0 && OffImm == 0) {
851 // Ascending register numbers and no offset. It's safe to change it to a
853 unsigned NewOpc = (isLd)
854 ? (isT2 ? ARM::t2LDM : ARM::LDM)
855 : (isT2 ? ARM::t2STM : ARM::STM);
857 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
858 .addReg(BaseReg, getKillRegState(BaseKill))
859 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
860 .addImm(Pred).addReg(PredReg)
862 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
863 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
866 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
867 .addReg(BaseReg, getKillRegState(BaseKill))
868 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
869 .addImm(Pred).addReg(PredReg)
872 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
874 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
878 // Split into two instructions.
879 assert((!isT2 || !OffReg) &&
880 "Thumb2 ldrd / strd does not encode offset register!");
881 unsigned NewOpc = (isLd)
882 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDR)
883 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STR);
884 DebugLoc dl = MBBI->getDebugLoc();
885 // If this is a load and base register is killed, it may have been
886 // re-defed by the load, make sure the first load does not clobber it.
888 (BaseKill || OffKill) &&
889 (TRI->regsOverlap(EvenReg, BaseReg) ||
890 (OffReg && TRI->regsOverlap(EvenReg, OffReg)))) {
891 assert(!TRI->regsOverlap(OddReg, BaseReg) &&
892 (!OffReg || !TRI->regsOverlap(OddReg, OffReg)));
893 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
894 OddReg, OddDeadKill, false,
895 BaseReg, false, BaseUndef, OffReg, false, OffUndef,
896 Pred, PredReg, TII, isT2);
897 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
898 EvenReg, EvenDeadKill, false,
899 BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
900 Pred, PredReg, TII, isT2);
902 if (OddReg == EvenReg && EvenDeadKill) {
903 // If the two source operands are the same, the kill marker is probably
904 // on the first one. e.g.
905 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
906 EvenDeadKill = false;
909 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
910 EvenReg, EvenDeadKill, EvenUndef,
911 BaseReg, false, BaseUndef, OffReg, false, OffUndef,
912 Pred, PredReg, TII, isT2);
913 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
914 OddReg, OddDeadKill, OddUndef,
915 BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
916 Pred, PredReg, TII, isT2);
930 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
931 /// ops of the same base and incrementing offset into LDM / STM ops.
932 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
933 unsigned NumMerges = 0;
934 unsigned NumMemOps = 0;
936 unsigned CurrBase = 0;
938 unsigned CurrSize = 0;
939 ARMCC::CondCodes CurrPred = ARMCC::AL;
940 unsigned CurrPredReg = 0;
941 unsigned Position = 0;
942 SmallVector<MachineBasicBlock::iterator,4> Merges;
944 RS->enterBasicBlock(&MBB);
945 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
947 if (FixInvalidRegPairOp(MBB, MBBI))
950 bool Advance = false;
951 bool TryMerge = false;
952 bool Clobber = false;
954 bool isMemOp = isMemoryOp(MBBI);
956 int Opcode = MBBI->getOpcode();
957 unsigned Size = getLSMultipleTransferSize(MBBI);
958 unsigned Base = MBBI->getOperand(1).getReg();
959 unsigned PredReg = 0;
960 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg);
961 int Offset = getMemoryOpOffset(MBBI);
964 // r5 := ldr [r5, #4]
965 // r6 := ldr [r5, #8]
967 // The second ldr has effectively broken the chain even though it
968 // looks like the later ldr(s) use the same base register. Try to
969 // merge the ldr's so far, including this one. But don't try to
970 // combine the following ldr(s).
971 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
972 if (CurrBase == 0 && !Clobber) {
973 // Start of a new chain.
978 CurrPredReg = PredReg;
979 MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
988 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
989 // No need to match PredReg.
990 // Continue adding to the queue.
991 if (Offset > MemOps.back().Offset) {
992 MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
996 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
998 if (Offset < I->Offset) {
999 MemOps.insert(I, MemOpQueueEntry(Offset, Position, MBBI));
1003 } else if (Offset == I->Offset) {
1004 // Collision! This can't be merged!
1017 // Reach the end of the block, try merging the memory instructions.
1023 if (NumMemOps > 1) {
1024 // Try to find a free register to use as a new base in case it's needed.
1025 // First advance to the instruction just before the start of the chain.
1026 AdvanceRS(MBB, MemOps);
1027 // Find a scratch register.
1028 unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass);
1029 // Process the load / store instructions.
1030 RS->forward(prior(MBBI));
1034 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1035 CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1037 // Try folding preceeding/trailing base inc/dec into the generated
1039 for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1040 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1042 NumMerges += Merges.size();
1044 // Try folding preceeding/trailing base inc/dec into those load/store
1045 // that were not merged to form LDM/STM ops.
1046 for (unsigned i = 0; i != NumMemOps; ++i)
1047 if (!MemOps[i].Merged)
1048 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1051 // RS may be pointing to an instruction that's deleted.
1052 RS->skipTo(prior(MBBI));
1053 } else if (NumMemOps == 1) {
1054 // Try folding preceeding/trailing base inc/dec into the single
1056 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1058 RS->forward(prior(MBBI));
1065 CurrPred = ARMCC::AL;
1072 // If iterator hasn't been advanced and this is not a memory op, skip it.
1073 // It can't start a new chain anyway.
1074 if (!Advance && !isMemOp && MBBI != E) {
1080 return NumMerges > 0;
1084 struct OffsetCompare {
1085 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
1086 int LOffset = getMemoryOpOffset(LHS);
1087 int ROffset = getMemoryOpOffset(RHS);
1088 assert(LHS == RHS || LOffset != ROffset);
1089 return LOffset > ROffset;
1094 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return op
1095 /// (bx lr) into the preceeding stack restore so it directly restore the value
1097 /// ldmfd sp!, {r7, lr}
1100 /// ldmfd sp!, {r7, pc}
1101 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1102 if (MBB.empty()) return false;
1104 MachineBasicBlock::iterator MBBI = prior(MBB.end());
1105 if (MBBI != MBB.begin() &&
1106 (MBBI->getOpcode() == ARM::BX_RET || MBBI->getOpcode() == ARM::tBX_RET)) {
1107 MachineInstr *PrevMI = prior(MBBI);
1108 if (PrevMI->getOpcode() == ARM::LDM || PrevMI->getOpcode() == ARM::t2LDM) {
1109 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1110 if (MO.getReg() != ARM::LR)
1112 unsigned NewOpc = isThumb2 ? ARM::t2LDM_RET : ARM::LDM_RET;
1113 PrevMI->setDesc(TII->get(NewOpc));
1122 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1123 const TargetMachine &TM = Fn.getTarget();
1124 AFI = Fn.getInfo<ARMFunctionInfo>();
1125 TII = TM.getInstrInfo();
1126 TRI = TM.getRegisterInfo();
1127 RS = new RegScavenger();
1128 isThumb2 = AFI->isThumb2Function();
1130 bool Modified = false;
1131 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1133 MachineBasicBlock &MBB = *MFI;
1134 Modified |= LoadStoreMultipleOpti(MBB);
1135 Modified |= MergeReturnIntoLDM(MBB);
1143 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1144 /// load / stores from consecutive locations close to make it more
1145 /// likely they will be combined later.
1148 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1150 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(&ID) {}
1152 const TargetData *TD;
1153 const TargetInstrInfo *TII;
1154 const TargetRegisterInfo *TRI;
1155 const ARMSubtarget *STI;
1156 MachineRegisterInfo *MRI;
1157 MachineFunction *MF;
1159 virtual bool runOnMachineFunction(MachineFunction &Fn);
1161 virtual const char *getPassName() const {
1162 return "ARM pre- register allocation load / store optimization pass";
1166 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1167 unsigned &NewOpc, unsigned &EvenReg,
1168 unsigned &OddReg, unsigned &BaseReg,
1169 unsigned &OffReg, int &Offset,
1170 unsigned &PredReg, ARMCC::CondCodes &Pred,
1172 bool RescheduleOps(MachineBasicBlock *MBB,
1173 SmallVector<MachineInstr*, 4> &Ops,
1174 unsigned Base, bool isLd,
1175 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1176 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1178 char ARMPreAllocLoadStoreOpt::ID = 0;
1181 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1182 TD = Fn.getTarget().getTargetData();
1183 TII = Fn.getTarget().getInstrInfo();
1184 TRI = Fn.getTarget().getRegisterInfo();
1185 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
1186 MRI = &Fn.getRegInfo();
1189 bool Modified = false;
1190 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1192 Modified |= RescheduleLoadStoreInstrs(MFI);
1197 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1198 MachineBasicBlock::iterator I,
1199 MachineBasicBlock::iterator E,
1200 SmallPtrSet<MachineInstr*, 4> &MemOps,
1201 SmallSet<unsigned, 4> &MemRegs,
1202 const TargetRegisterInfo *TRI) {
1203 // Are there stores / loads / calls between them?
1204 // FIXME: This is overly conservative. We should make use of alias information
1206 SmallSet<unsigned, 4> AddedRegPressure;
1208 if (MemOps.count(&*I))
1210 const TargetInstrDesc &TID = I->getDesc();
1211 if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects())
1213 if (isLd && TID.mayStore())
1218 // It's not safe to move the first 'str' down.
1221 // str r4, [r0, #+4]
1225 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1226 MachineOperand &MO = I->getOperand(j);
1229 unsigned Reg = MO.getReg();
1230 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1232 if (Reg != Base && !MemRegs.count(Reg))
1233 AddedRegPressure.insert(Reg);
1237 // Estimate register pressure increase due to the transformation.
1238 if (MemRegs.size() <= 4)
1239 // Ok if we are moving small number of instructions.
1241 return AddedRegPressure.size() <= MemRegs.size() * 2;
1245 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1247 unsigned &NewOpc, unsigned &EvenReg,
1248 unsigned &OddReg, unsigned &BaseReg,
1249 unsigned &OffReg, int &Offset,
1251 ARMCC::CondCodes &Pred,
1253 // Make sure we're allowed to generate LDRD/STRD.
1254 if (!STI->hasV5TEOps())
1257 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1259 unsigned Opcode = Op0->getOpcode();
1260 if (Opcode == ARM::LDR)
1262 else if (Opcode == ARM::STR)
1264 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1265 NewOpc = ARM::t2LDRDi8;
1268 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1269 NewOpc = ARM::t2STRDi8;
1275 // Make sure the offset registers match.
1277 (Op0->getOperand(2).getReg() != Op1->getOperand(2).getReg()))
1280 // Must sure the base address satisfies i64 ld / st alignment requirement.
1281 if (!Op0->hasOneMemOperand() ||
1282 !(*Op0->memoperands_begin())->getValue() ||
1283 (*Op0->memoperands_begin())->isVolatile())
1286 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1287 Function *Func = MF->getFunction();
1288 unsigned ReqAlign = STI->hasV6Ops()
1289 ? TD->getPrefTypeAlignment(Type::getInt64Ty(Func->getContext()))
1290 : 8; // Pre-v6 need 8-byte align
1291 if (Align < ReqAlign)
1294 // Then make sure the immediate offset fits.
1295 int OffImm = getMemoryOpOffset(Op0);
1299 // Can't fall back to t2LDRi8 / t2STRi8.
1302 int Limit = (1 << 8) * Scale;
1303 if (OffImm >= Limit || (OffImm & (Scale-1)))
1308 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1310 AddSub = ARM_AM::sub;
1313 int Limit = (1 << 8) * Scale;
1314 if (OffImm >= Limit || (OffImm & (Scale-1)))
1316 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1318 EvenReg = Op0->getOperand(0).getReg();
1319 OddReg = Op1->getOperand(0).getReg();
1320 if (EvenReg == OddReg)
1322 BaseReg = Op0->getOperand(1).getReg();
1324 OffReg = Op0->getOperand(2).getReg();
1325 Pred = llvm::getInstrPredicate(Op0, PredReg);
1326 dl = Op0->getDebugLoc();
1330 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1331 SmallVector<MachineInstr*, 4> &Ops,
1332 unsigned Base, bool isLd,
1333 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1334 bool RetVal = false;
1336 // Sort by offset (in reverse order).
1337 std::sort(Ops.begin(), Ops.end(), OffsetCompare());
1339 // The loads / stores of the same base are in order. Scan them from first to
1340 // last and check for the followins:
1341 // 1. Any def of base.
1343 while (Ops.size() > 1) {
1344 unsigned FirstLoc = ~0U;
1345 unsigned LastLoc = 0;
1346 MachineInstr *FirstOp = 0;
1347 MachineInstr *LastOp = 0;
1349 unsigned LastOpcode = 0;
1350 unsigned LastBytes = 0;
1351 unsigned NumMove = 0;
1352 for (int i = Ops.size() - 1; i >= 0; --i) {
1353 MachineInstr *Op = Ops[i];
1354 unsigned Loc = MI2LocMap[Op];
1355 if (Loc <= FirstLoc) {
1359 if (Loc >= LastLoc) {
1364 unsigned Opcode = Op->getOpcode();
1365 if (LastOpcode && Opcode != LastOpcode)
1368 int Offset = getMemoryOpOffset(Op);
1369 unsigned Bytes = getLSMultipleTransferSize(Op);
1371 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1374 LastOffset = Offset;
1376 LastOpcode = Opcode;
1377 if (++NumMove == 8) // FIXME: Tune this limit.
1384 SmallPtrSet<MachineInstr*, 4> MemOps;
1385 SmallSet<unsigned, 4> MemRegs;
1386 for (int i = NumMove-1; i >= 0; --i) {
1387 MemOps.insert(Ops[i]);
1388 MemRegs.insert(Ops[i]->getOperand(0).getReg());
1391 // Be conservative, if the instructions are too far apart, don't
1392 // move them. We want to limit the increase of register pressure.
1393 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
1395 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
1396 MemOps, MemRegs, TRI);
1398 for (unsigned i = 0; i != NumMove; ++i)
1401 // This is the new location for the loads / stores.
1402 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
1403 while (InsertPos != MBB->end() && MemOps.count(InsertPos))
1406 // If we are moving a pair of loads / stores, see if it makes sense
1407 // to try to allocate a pair of registers that can form register pairs.
1408 MachineInstr *Op0 = Ops.back();
1409 MachineInstr *Op1 = Ops[Ops.size()-2];
1410 unsigned EvenReg = 0, OddReg = 0;
1411 unsigned BaseReg = 0, OffReg = 0, PredReg = 0;
1412 ARMCC::CondCodes Pred = ARMCC::AL;
1414 unsigned NewOpc = 0;
1417 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
1418 EvenReg, OddReg, BaseReg, OffReg,
1419 Offset, PredReg, Pred, isT2)) {
1423 // Form the pair instruction.
1425 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1426 dl, TII->get(NewOpc))
1427 .addReg(EvenReg, RegState::Define)
1428 .addReg(OddReg, RegState::Define)
1432 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1435 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1436 dl, TII->get(NewOpc))
1442 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1448 // Add register allocation hints to form register pairs.
1449 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
1450 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
1452 for (unsigned i = 0; i != NumMove; ++i) {
1453 MachineInstr *Op = Ops.back();
1455 MBB->splice(InsertPos, MBB, Op);
1459 NumLdStMoved += NumMove;
1469 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
1470 bool RetVal = false;
1472 DenseMap<MachineInstr*, unsigned> MI2LocMap;
1473 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
1474 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
1475 SmallVector<unsigned, 4> LdBases;
1476 SmallVector<unsigned, 4> StBases;
1479 MachineBasicBlock::iterator MBBI = MBB->begin();
1480 MachineBasicBlock::iterator E = MBB->end();
1482 for (; MBBI != E; ++MBBI) {
1483 MachineInstr *MI = MBBI;
1484 const TargetInstrDesc &TID = MI->getDesc();
1485 if (TID.isCall() || TID.isTerminator()) {
1486 // Stop at barriers.
1491 MI2LocMap[MI] = Loc++;
1492 if (!isMemoryOp(MI))
1494 unsigned PredReg = 0;
1495 if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL)
1498 int Opc = MI->getOpcode();
1499 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
1500 unsigned Base = MI->getOperand(1).getReg();
1501 int Offset = getMemoryOpOffset(MI);
1503 bool StopHere = false;
1505 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1506 Base2LdsMap.find(Base);
1507 if (BI != Base2LdsMap.end()) {
1508 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1509 if (Offset == getMemoryOpOffset(BI->second[i])) {
1515 BI->second.push_back(MI);
1517 SmallVector<MachineInstr*, 4> MIs;
1519 Base2LdsMap[Base] = MIs;
1520 LdBases.push_back(Base);
1523 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1524 Base2StsMap.find(Base);
1525 if (BI != Base2StsMap.end()) {
1526 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1527 if (Offset == getMemoryOpOffset(BI->second[i])) {
1533 BI->second.push_back(MI);
1535 SmallVector<MachineInstr*, 4> MIs;
1537 Base2StsMap[Base] = MIs;
1538 StBases.push_back(Base);
1543 // Found a duplicate (a base+offset combination that's seen earlier).
1550 // Re-schedule loads.
1551 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
1552 unsigned Base = LdBases[i];
1553 SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
1555 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
1558 // Re-schedule stores.
1559 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
1560 unsigned Base = StBases[i];
1561 SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
1563 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
1567 Base2LdsMap.clear();
1568 Base2StsMap.clear();
1578 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
1579 /// optimization pass.
1580 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
1582 return new ARMPreAllocLoadStoreOpt();
1583 return new ARMLoadStoreOpt();