1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
15 #define DEBUG_TYPE "arm-ldst-opt"
17 #include "ARMAddressingModes.h"
18 #include "ARMBaseInstrInfo.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMRegisterInfo.h"
21 #include "llvm/DerivedTypes.h"
22 #include "llvm/Function.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineFunctionPass.h"
25 #include "llvm/CodeGen/MachineInstr.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/RegisterScavenging.h"
29 #include "llvm/Target/TargetData.h"
30 #include "llvm/Target/TargetInstrInfo.h"
31 #include "llvm/Target/TargetMachine.h"
32 #include "llvm/Target/TargetRegisterInfo.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/ADT/DenseMap.h"
35 #include "llvm/ADT/STLExtras.h"
36 #include "llvm/ADT/SmallPtrSet.h"
37 #include "llvm/ADT/SmallSet.h"
38 #include "llvm/ADT/SmallVector.h"
39 #include "llvm/ADT/Statistic.h"
42 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
43 STATISTIC(NumSTMGened , "Number of stm instructions generated");
44 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
45 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
46 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
47 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
48 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
49 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
50 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
51 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
52 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
54 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
55 /// load / store instructions to form ldm / stm instructions.
58 struct ARMLoadStoreOpt : public MachineFunctionPass {
60 ARMLoadStoreOpt() : MachineFunctionPass(&ID) {}
62 const TargetInstrInfo *TII;
63 const TargetRegisterInfo *TRI;
68 virtual bool runOnMachineFunction(MachineFunction &Fn);
70 virtual const char *getPassName() const {
71 return "ARM load / store optimization pass";
75 struct MemOpQueueEntry {
78 MachineBasicBlock::iterator MBBI;
80 MemOpQueueEntry(int o, int p, MachineBasicBlock::iterator i)
81 : Offset(o), Position(p), MBBI(i), Merged(false) {}
83 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
84 typedef MemOpQueue::iterator MemOpQueueIter;
86 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
87 int Offset, unsigned Base, bool BaseKill, int Opcode,
88 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
89 DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs);
90 void MergeOpsUpdate(MachineBasicBlock &MBB,
99 ARMCC::CondCodes Pred,
103 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
104 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
105 int Opcode, unsigned Size,
106 ARMCC::CondCodes Pred, unsigned PredReg,
107 unsigned Scratch, MemOpQueue &MemOps,
108 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
110 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
111 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
112 MachineBasicBlock::iterator &MBBI);
113 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
114 MachineBasicBlock::iterator MBBI,
115 const TargetInstrInfo *TII,
117 MachineBasicBlock::iterator &I);
118 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
119 MachineBasicBlock::iterator MBBI,
121 MachineBasicBlock::iterator &I);
122 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
123 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
125 char ARMLoadStoreOpt::ID = 0;
128 static int getLoadStoreMultipleOpcode(int Opcode) {
156 default: llvm_unreachable("Unhandled opcode!");
161 static bool isT2i32Load(unsigned Opc) {
162 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
165 static bool isi32Load(unsigned Opc) {
166 return Opc == ARM::LDR || isT2i32Load(Opc);
169 static bool isT2i32Store(unsigned Opc) {
170 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
173 static bool isi32Store(unsigned Opc) {
174 return Opc == ARM::STR || isT2i32Store(Opc);
177 /// MergeOps - Create and insert a LDM or STM with Base as base register and
178 /// registers in Regs as the register operands that would be loaded / stored.
179 /// It returns true if the transformation is done.
181 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
182 MachineBasicBlock::iterator MBBI,
183 int Offset, unsigned Base, bool BaseKill,
184 int Opcode, ARMCC::CondCodes Pred,
185 unsigned PredReg, unsigned Scratch, DebugLoc dl,
186 SmallVector<std::pair<unsigned, bool>, 8> &Regs) {
187 // Only a single register to load / store. Don't bother.
188 unsigned NumRegs = Regs.size();
192 ARM_AM::AMSubMode Mode = ARM_AM::ia;
193 bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode);
194 if (isAM4 && Offset == 4) {
196 // Thumb2 does not support ldmib / stmib.
199 } else if (isAM4 && Offset == -4 * (int)NumRegs + 4) {
201 // Thumb2 does not support ldmda / stmda.
204 } else if (isAM4 && Offset == -4 * (int)NumRegs) {
206 } else if (Offset != 0) {
207 // If starting offset isn't zero, insert a MI to materialize a new base.
208 // But only do so if it is cost effective, i.e. merging more than two
214 if (isi32Load(Opcode))
215 // If it is a load, then just use one of the destination register to
216 // use as the new base.
217 NewBase = Regs[NumRegs-1].first;
219 // Use the scratch register to use as a new base.
224 int BaseOpc = !isThumb2
226 : ((Base == ARM::SP) ? ARM::t2ADDrSPi : ARM::t2ADDri);
230 : ((Base == ARM::SP) ? ARM::t2SUBrSPi : ARM::t2SUBri);
233 int ImmedOffset = isThumb2
234 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
235 if (ImmedOffset == -1)
236 // FIXME: Try t2ADDri12 or t2SUBri12?
237 return false; // Probably not worth it then.
239 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
240 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
241 .addImm(Pred).addReg(PredReg).addReg(0);
243 BaseKill = true; // New base is always killed right its use.
246 bool isDPR = (Opcode == ARM::VLDRD || Opcode == ARM::VSTRD);
247 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
248 Opcode == ARM::VLDRD);
249 Opcode = getLoadStoreMultipleOpcode(Opcode);
250 MachineInstrBuilder MIB = (isAM4)
251 ? BuildMI(MBB, MBBI, dl, TII->get(Opcode))
252 .addReg(Base, getKillRegState(BaseKill))
253 .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg)
254 : BuildMI(MBB, MBBI, dl, TII->get(Opcode))
255 .addReg(Base, getKillRegState(BaseKill))
256 .addImm(ARM_AM::getAM5Opc(Mode, isDPR ? NumRegs<<1 : NumRegs))
257 .addImm(Pred).addReg(PredReg);
258 for (unsigned i = 0; i != NumRegs; ++i)
259 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
260 | getKillRegState(Regs[i].second));
265 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
267 void ARMLoadStoreOpt::
268 MergeOpsUpdate(MachineBasicBlock &MBB,
270 unsigned memOpsBegin,
272 unsigned insertAfter,
277 ARMCC::CondCodes Pred,
281 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
282 // First calculate which of the registers should be killed by the merged
284 SmallVector<std::pair<unsigned, bool>, 8> Regs;
285 const unsigned insertPos = memOps[insertAfter].Position;
286 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
287 const MachineOperand &MO = memOps[i].MBBI->getOperand(0);
288 unsigned Reg = MO.getReg();
289 bool isKill = MO.isKill();
291 // If we are inserting the merged operation after an unmerged operation that
292 // uses the same register, make sure to transfer any kill flag.
293 for (unsigned j = memOpsEnd, e = memOps.size(); !isKill && j != e; ++j)
294 if (memOps[j].Position<insertPos) {
295 const MachineOperand &MOJ = memOps[j].MBBI->getOperand(0);
296 if (MOJ.getReg() == Reg && MOJ.isKill())
300 Regs.push_back(std::make_pair(Reg, isKill));
303 // Try to do the merge.
304 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
306 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
307 Pred, PredReg, Scratch, dl, Regs))
310 // Merge succeeded, update records.
311 Merges.push_back(prior(Loc));
312 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
313 // Remove kill flags from any unmerged memops that come before insertPos.
314 if (Regs[i-memOpsBegin].second)
315 for (unsigned j = memOpsEnd, e = memOps.size(); j != e; ++j)
316 if (memOps[j].Position<insertPos) {
317 MachineOperand &MOJ = memOps[j].MBBI->getOperand(0);
318 if (MOJ.getReg() == Regs[i-memOpsBegin].first && MOJ.isKill())
319 MOJ.setIsKill(false);
321 MBB.erase(memOps[i].MBBI);
322 memOps[i].Merged = true;
326 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
327 /// load / store multiple instructions.
329 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
330 unsigned Base, int Opcode, unsigned Size,
331 ARMCC::CondCodes Pred, unsigned PredReg,
332 unsigned Scratch, MemOpQueue &MemOps,
333 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
334 bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode);
335 int Offset = MemOps[SIndex].Offset;
336 int SOffset = Offset;
337 unsigned insertAfter = SIndex;
338 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
339 DebugLoc dl = Loc->getDebugLoc();
340 const MachineOperand &PMO = Loc->getOperand(0);
341 unsigned PReg = PMO.getReg();
342 unsigned PRegNum = PMO.isUndef() ? UINT_MAX
343 : ARMRegisterInfo::getRegisterNumbering(PReg);
346 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
347 int NewOffset = MemOps[i].Offset;
348 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
349 unsigned Reg = MO.getReg();
350 unsigned RegNum = MO.isUndef() ? UINT_MAX
351 : ARMRegisterInfo::getRegisterNumbering(Reg);
352 // AM4 - register numbers in ascending order.
353 // AM5 - consecutive register numbers in ascending order.
354 // Can only do up to 16 double-word registers per insn.
355 if (Reg != ARM::SP &&
356 NewOffset == Offset + (int)Size &&
357 ((isAM4 && RegNum > PRegNum)
358 || ((Size < 8 || Count < 16) && RegNum == PRegNum+1))) {
363 // Can't merge this in. Try merge the earlier ones first.
364 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
365 Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
366 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
371 if (MemOps[i].Position > MemOps[insertAfter].Position)
375 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
376 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
377 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
381 static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
382 unsigned Bytes, unsigned Limit,
383 ARMCC::CondCodes Pred, unsigned PredReg){
384 unsigned MyPredReg = 0;
387 if (MI->getOpcode() != ARM::t2SUBri &&
388 MI->getOpcode() != ARM::t2SUBrSPi &&
389 MI->getOpcode() != ARM::t2SUBrSPi12 &&
390 MI->getOpcode() != ARM::tSUBspi &&
391 MI->getOpcode() != ARM::SUBri)
394 // Make sure the offset fits in 8 bits.
395 if (Bytes <= 0 || (Limit && Bytes >= Limit))
398 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
399 return (MI->getOperand(0).getReg() == Base &&
400 MI->getOperand(1).getReg() == Base &&
401 (MI->getOperand(2).getImm()*Scale) == Bytes &&
402 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
403 MyPredReg == PredReg);
406 static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
407 unsigned Bytes, unsigned Limit,
408 ARMCC::CondCodes Pred, unsigned PredReg){
409 unsigned MyPredReg = 0;
412 if (MI->getOpcode() != ARM::t2ADDri &&
413 MI->getOpcode() != ARM::t2ADDrSPi &&
414 MI->getOpcode() != ARM::t2ADDrSPi12 &&
415 MI->getOpcode() != ARM::tADDspi &&
416 MI->getOpcode() != ARM::ADDri)
419 if (Bytes <= 0 || (Limit && Bytes >= Limit))
420 // Make sure the offset fits in 8 bits.
423 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
424 return (MI->getOperand(0).getReg() == Base &&
425 MI->getOperand(1).getReg() == Base &&
426 (MI->getOperand(2).getImm()*Scale) == Bytes &&
427 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
428 MyPredReg == PredReg);
431 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
432 switch (MI->getOpcode()) {
450 return (MI->getNumOperands() - 4) * 4;
455 return ARM_AM::getAM5Offset(MI->getOperand(1).getImm()) * 4;
459 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc) {
461 case ARM::LDM: return ARM::LDM_UPD;
462 case ARM::STM: return ARM::STM_UPD;
463 case ARM::t2LDM: return ARM::t2LDM_UPD;
464 case ARM::t2STM: return ARM::t2STM_UPD;
465 case ARM::VLDMS: return ARM::VLDMS_UPD;
466 case ARM::VLDMD: return ARM::VLDMD_UPD;
467 case ARM::VSTMS: return ARM::VSTMS_UPD;
468 case ARM::VSTMD: return ARM::VSTMD_UPD;
469 default: llvm_unreachable("Unhandled opcode!");
474 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
475 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
477 /// stmia rn, <ra, rb, rc>
478 /// rn := rn + 4 * 3;
480 /// stmia rn!, <ra, rb, rc>
482 /// rn := rn - 4 * 3;
483 /// ldmia rn, <ra, rb, rc>
485 /// ldmdb rn!, <ra, rb, rc>
486 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
487 MachineBasicBlock::iterator MBBI,
489 MachineBasicBlock::iterator &I) {
490 MachineInstr *MI = MBBI;
491 unsigned Base = MI->getOperand(0).getReg();
492 bool BaseKill = MI->getOperand(0).isKill();
493 unsigned Bytes = getLSMultipleTransferSize(MI);
494 unsigned PredReg = 0;
495 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
496 int Opcode = MI->getOpcode();
497 DebugLoc dl = MI->getDebugLoc();
498 bool isAM4 = (Opcode == ARM::LDM || Opcode == ARM::t2LDM ||
499 Opcode == ARM::STM || Opcode == ARM::t2STM);
501 bool DoMerge = false;
502 ARM_AM::AMSubMode Mode = ARM_AM::ia;
506 // Can't use an updating ld/st if the base register is also a dest
507 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
508 for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) {
509 if (MI->getOperand(i).getReg() == Base)
512 Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm());
514 // VLDM{D|S}, VSTM{D|S} addressing mode 5 ops.
515 Mode = ARM_AM::getAM5SubMode(MI->getOperand(1).getImm());
516 Offset = ARM_AM::getAM5Offset(MI->getOperand(1).getImm());
519 // Try merging with the previous instruction.
520 if (MBBI != MBB.begin()) {
521 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
523 if (Mode == ARM_AM::ia &&
524 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
527 } else if (isAM4 && Mode == ARM_AM::ib &&
528 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
533 if (Mode == ARM_AM::ia &&
534 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
543 // Try merging with the next instruction.
544 if (!DoMerge && MBBI != MBB.end()) {
545 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
547 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
548 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
550 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
551 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
555 if (Mode == ARM_AM::ia &&
556 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
572 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode);
573 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
574 .addReg(Base, getDefRegState(true)) // WB base register
575 .addReg(Base, getKillRegState(BaseKill));
577 // [t2]LDM_UPD, [t2]STM_UPD
578 MIB.addImm(ARM_AM::getAM4ModeImm(Mode))
579 .addImm(Pred).addReg(PredReg);
581 // VLDM[SD}_UPD, VSTM[SD]_UPD
582 MIB.addImm(ARM_AM::getAM5Opc(Mode, Offset))
583 .addImm(Pred).addReg(PredReg);
585 // Transfer the rest of operands.
586 for (unsigned OpNum = 4, e = MI->getNumOperands(); OpNum != e; ++OpNum)
587 MIB.addOperand(MI->getOperand(OpNum));
588 // Transfer memoperands.
589 (*MIB).setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
595 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) {
597 case ARM::LDR: return ARM::LDR_PRE;
598 case ARM::STR: return ARM::STR_PRE;
599 case ARM::VLDRS: return ARM::VLDMS_UPD;
600 case ARM::VLDRD: return ARM::VLDMD_UPD;
601 case ARM::VSTRS: return ARM::VSTMS_UPD;
602 case ARM::VSTRD: return ARM::VSTMD_UPD;
605 return ARM::t2LDR_PRE;
608 return ARM::t2STR_PRE;
609 default: llvm_unreachable("Unhandled opcode!");
614 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) {
616 case ARM::LDR: return ARM::LDR_POST;
617 case ARM::STR: return ARM::STR_POST;
618 case ARM::VLDRS: return ARM::VLDMS_UPD;
619 case ARM::VLDRD: return ARM::VLDMD_UPD;
620 case ARM::VSTRS: return ARM::VSTMS_UPD;
621 case ARM::VSTRD: return ARM::VSTMD_UPD;
624 return ARM::t2LDR_POST;
627 return ARM::t2STR_POST;
628 default: llvm_unreachable("Unhandled opcode!");
633 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
634 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
635 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
636 MachineBasicBlock::iterator MBBI,
637 const TargetInstrInfo *TII,
639 MachineBasicBlock::iterator &I) {
640 MachineInstr *MI = MBBI;
641 unsigned Base = MI->getOperand(1).getReg();
642 bool BaseKill = MI->getOperand(1).isKill();
643 unsigned Bytes = getLSMultipleTransferSize(MI);
644 int Opcode = MI->getOpcode();
645 DebugLoc dl = MI->getDebugLoc();
646 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
647 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
648 bool isAM2 = (Opcode == ARM::LDR || Opcode == ARM::STR);
649 if (isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0)
651 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
653 if (isT2i32Load(Opcode) || isT2i32Store(Opcode))
654 if (MI->getOperand(2).getImm() != 0)
657 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
658 // Can't do the merge if the destination register is the same as the would-be
659 // writeback register.
660 if (isLd && MI->getOperand(0).getReg() == Base)
663 unsigned PredReg = 0;
664 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
665 bool DoMerge = false;
666 ARM_AM::AddrOpc AddSub = ARM_AM::add;
668 // AM2 - 12 bits, thumb2 - 8 bits.
669 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
671 // Try merging with the previous instruction.
672 if (MBBI != MBB.begin()) {
673 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
674 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
676 AddSub = ARM_AM::sub;
678 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
682 NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
687 // Try merging with the next instruction.
688 if (!DoMerge && MBBI != MBB.end()) {
689 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
691 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
693 AddSub = ARM_AM::sub;
694 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
698 NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
710 bool isDPR = NewOpc == ARM::VLDMD || NewOpc == ARM::VSTMD;
713 Offset = ARM_AM::getAM5Opc(AddSub == ARM_AM::sub ? ARM_AM::db : ARM_AM::ia,
716 Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
718 Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
721 // VLDM[SD}_UPD, VSTM[SD]_UPD
722 MachineOperand &MO = MI->getOperand(0);
723 BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
724 .addReg(Base, getDefRegState(true)) // WB base register
725 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
727 .addImm(Pred).addReg(PredReg)
728 .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
729 getKillRegState(MO.isKill())));
732 // LDR_PRE, LDR_POST,
733 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
734 .addReg(Base, RegState::Define)
735 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
737 // t2LDR_PRE, t2LDR_POST
738 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
739 .addReg(Base, RegState::Define)
740 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
742 MachineOperand &MO = MI->getOperand(0);
745 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
746 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
747 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
749 // t2STR_PRE, t2STR_POST
750 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
751 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
752 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
759 /// isMemoryOp - Returns true if instruction is a memory operations (that this
760 /// pass is capable of operating on).
761 static bool isMemoryOp(const MachineInstr *MI) {
762 if (MI->hasOneMemOperand()) {
763 const MachineMemOperand *MMO = *MI->memoperands_begin();
765 // Don't touch volatile memory accesses - we may be changing their order.
766 if (MMO->isVolatile())
769 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
771 if (MMO->getAlignment() < 4)
775 // str <undef> could probably be eliminated entirely, but for now we just want
776 // to avoid making a mess of it.
777 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
778 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
779 MI->getOperand(0).isUndef())
782 // Likewise don't mess with references to undefined addresses.
783 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
784 MI->getOperand(1).isUndef())
787 int Opcode = MI->getOpcode();
792 return MI->getOperand(1).isReg() && MI->getOperand(2).getReg() == 0;
795 return MI->getOperand(1).isReg();
798 return MI->getOperand(1).isReg();
803 return MI->getOperand(1).isReg();
808 /// AdvanceRS - Advance register scavenger to just before the earliest memory
809 /// op that is being merged.
810 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
811 MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
812 unsigned Position = MemOps[0].Position;
813 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
814 if (MemOps[i].Position < Position) {
815 Position = MemOps[i].Position;
816 Loc = MemOps[i].MBBI;
820 if (Loc != MBB.begin())
821 RS->forward(prior(Loc));
824 static int getMemoryOpOffset(const MachineInstr *MI) {
825 int Opcode = MI->getOpcode();
826 bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
827 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
828 unsigned NumOperands = MI->getDesc().getNumOperands();
829 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
831 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
832 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
833 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8)
837 ? ARM_AM::getAM2Offset(OffField)
838 : (isAM3 ? ARM_AM::getAM3Offset(OffField)
839 : ARM_AM::getAM5Offset(OffField) * 4);
841 if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub)
844 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
847 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
853 static void InsertLDR_STR(MachineBasicBlock &MBB,
854 MachineBasicBlock::iterator &MBBI,
855 int OffImm, bool isDef,
856 DebugLoc dl, unsigned NewOpc,
857 unsigned Reg, bool RegDeadKill, bool RegUndef,
858 unsigned BaseReg, bool BaseKill, bool BaseUndef,
859 unsigned OffReg, bool OffKill, bool OffUndef,
860 ARMCC::CondCodes Pred, unsigned PredReg,
861 const TargetInstrInfo *TII, bool isT2) {
865 Offset = ARM_AM::getAM2Opc(ARM_AM::sub, -OffImm, ARM_AM::no_shift);
867 Offset = ARM_AM::getAM2Opc(ARM_AM::add, OffImm, ARM_AM::no_shift);
870 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
872 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
873 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
875 MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef));
876 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
878 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
880 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
881 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
883 MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef));
884 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
888 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
889 MachineBasicBlock::iterator &MBBI) {
890 MachineInstr *MI = &*MBBI;
891 unsigned Opcode = MI->getOpcode();
892 if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
893 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
894 unsigned EvenReg = MI->getOperand(0).getReg();
895 unsigned OddReg = MI->getOperand(1).getReg();
896 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
897 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
898 if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)
901 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
902 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
903 bool EvenDeadKill = isLd ?
904 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
905 bool EvenUndef = MI->getOperand(0).isUndef();
906 bool OddDeadKill = isLd ?
907 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
908 bool OddUndef = MI->getOperand(1).isUndef();
909 const MachineOperand &BaseOp = MI->getOperand(2);
910 unsigned BaseReg = BaseOp.getReg();
911 bool BaseKill = BaseOp.isKill();
912 bool BaseUndef = BaseOp.isUndef();
913 unsigned OffReg = isT2 ? 0 : MI->getOperand(3).getReg();
914 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
915 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
916 int OffImm = getMemoryOpOffset(MI);
917 unsigned PredReg = 0;
918 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
920 if (OddRegNum > EvenRegNum && OffReg == 0 && OffImm == 0) {
921 // Ascending register numbers and no offset. It's safe to change it to a
923 unsigned NewOpc = (isLd)
924 ? (isT2 ? ARM::t2LDM : ARM::LDM)
925 : (isT2 ? ARM::t2STM : ARM::STM);
927 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
928 .addReg(BaseReg, getKillRegState(BaseKill))
929 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
930 .addImm(Pred).addReg(PredReg)
931 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
932 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
935 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
936 .addReg(BaseReg, getKillRegState(BaseKill))
937 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
938 .addImm(Pred).addReg(PredReg)
940 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
942 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
946 // Split into two instructions.
947 assert((!isT2 || !OffReg) &&
948 "Thumb2 ldrd / strd does not encode offset register!");
949 unsigned NewOpc = (isLd)
950 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDR)
951 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STR);
952 DebugLoc dl = MBBI->getDebugLoc();
953 // If this is a load and base register is killed, it may have been
954 // re-defed by the load, make sure the first load does not clobber it.
956 (BaseKill || OffKill) &&
957 (TRI->regsOverlap(EvenReg, BaseReg) ||
958 (OffReg && TRI->regsOverlap(EvenReg, OffReg)))) {
959 assert(!TRI->regsOverlap(OddReg, BaseReg) &&
960 (!OffReg || !TRI->regsOverlap(OddReg, OffReg)));
961 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
962 OddReg, OddDeadKill, false,
963 BaseReg, false, BaseUndef, OffReg, false, OffUndef,
964 Pred, PredReg, TII, isT2);
965 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
966 EvenReg, EvenDeadKill, false,
967 BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
968 Pred, PredReg, TII, isT2);
970 if (OddReg == EvenReg && EvenDeadKill) {
971 // If the two source operands are the same, the kill marker is probably
972 // on the first one. e.g.
973 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
974 EvenDeadKill = false;
977 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
978 EvenReg, EvenDeadKill, EvenUndef,
979 BaseReg, false, BaseUndef, OffReg, false, OffUndef,
980 Pred, PredReg, TII, isT2);
981 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
982 OddReg, OddDeadKill, OddUndef,
983 BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
984 Pred, PredReg, TII, isT2);
998 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
999 /// ops of the same base and incrementing offset into LDM / STM ops.
1000 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1001 unsigned NumMerges = 0;
1002 unsigned NumMemOps = 0;
1004 unsigned CurrBase = 0;
1006 unsigned CurrSize = 0;
1007 ARMCC::CondCodes CurrPred = ARMCC::AL;
1008 unsigned CurrPredReg = 0;
1009 unsigned Position = 0;
1010 SmallVector<MachineBasicBlock::iterator,4> Merges;
1012 RS->enterBasicBlock(&MBB);
1013 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1015 if (FixInvalidRegPairOp(MBB, MBBI))
1018 bool Advance = false;
1019 bool TryMerge = false;
1020 bool Clobber = false;
1022 bool isMemOp = isMemoryOp(MBBI);
1024 int Opcode = MBBI->getOpcode();
1025 unsigned Size = getLSMultipleTransferSize(MBBI);
1026 unsigned Base = MBBI->getOperand(1).getReg();
1027 unsigned PredReg = 0;
1028 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg);
1029 int Offset = getMemoryOpOffset(MBBI);
1032 // r5 := ldr [r5, #4]
1033 // r6 := ldr [r5, #8]
1035 // The second ldr has effectively broken the chain even though it
1036 // looks like the later ldr(s) use the same base register. Try to
1037 // merge the ldr's so far, including this one. But don't try to
1038 // combine the following ldr(s).
1039 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
1040 if (CurrBase == 0 && !Clobber) {
1041 // Start of a new chain.
1046 CurrPredReg = PredReg;
1047 MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
1056 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1057 // No need to match PredReg.
1058 // Continue adding to the queue.
1059 if (Offset > MemOps.back().Offset) {
1060 MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
1064 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1066 if (Offset < I->Offset) {
1067 MemOps.insert(I, MemOpQueueEntry(Offset, Position, MBBI));
1071 } else if (Offset == I->Offset) {
1072 // Collision! This can't be merged!
1085 // Reach the end of the block, try merging the memory instructions.
1091 if (NumMemOps > 1) {
1092 // Try to find a free register to use as a new base in case it's needed.
1093 // First advance to the instruction just before the start of the chain.
1094 AdvanceRS(MBB, MemOps);
1095 // Find a scratch register.
1096 unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass);
1097 // Process the load / store instructions.
1098 RS->forward(prior(MBBI));
1102 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1103 CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1105 // Try folding preceeding/trailing base inc/dec into the generated
1107 for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1108 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1110 NumMerges += Merges.size();
1112 // Try folding preceeding/trailing base inc/dec into those load/store
1113 // that were not merged to form LDM/STM ops.
1114 for (unsigned i = 0; i != NumMemOps; ++i)
1115 if (!MemOps[i].Merged)
1116 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1119 // RS may be pointing to an instruction that's deleted.
1120 RS->skipTo(prior(MBBI));
1121 } else if (NumMemOps == 1) {
1122 // Try folding preceeding/trailing base inc/dec into the single
1124 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1126 RS->forward(prior(MBBI));
1133 CurrPred = ARMCC::AL;
1140 // If iterator hasn't been advanced and this is not a memory op, skip it.
1141 // It can't start a new chain anyway.
1142 if (!Advance && !isMemOp && MBBI != E) {
1148 return NumMerges > 0;
1152 struct OffsetCompare {
1153 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
1154 int LOffset = getMemoryOpOffset(LHS);
1155 int ROffset = getMemoryOpOffset(RHS);
1156 assert(LHS == RHS || LOffset != ROffset);
1157 return LOffset > ROffset;
1162 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
1163 /// ("bx lr" and "mov pc, lr") into the preceeding stack restore so it
1164 /// directly restore the value of LR into pc.
1165 /// ldmfd sp!, {..., lr}
1168 /// ldmfd sp!, {..., lr}
1171 /// ldmfd sp!, {..., pc}
1172 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1173 if (MBB.empty()) return false;
1175 MachineBasicBlock::iterator MBBI = prior(MBB.end());
1176 if (MBBI != MBB.begin() &&
1177 (MBBI->getOpcode() == ARM::BX_RET ||
1178 MBBI->getOpcode() == ARM::tBX_RET ||
1179 MBBI->getOpcode() == ARM::MOVPCLR)) {
1180 MachineInstr *PrevMI = prior(MBBI);
1181 if (PrevMI->getOpcode() == ARM::LDM_UPD ||
1182 PrevMI->getOpcode() == ARM::t2LDM_UPD) {
1183 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1184 if (MO.getReg() != ARM::LR)
1186 unsigned NewOpc = isThumb2 ? ARM::t2LDM_RET : ARM::LDM_RET;
1187 PrevMI->setDesc(TII->get(NewOpc));
1196 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1197 const TargetMachine &TM = Fn.getTarget();
1198 AFI = Fn.getInfo<ARMFunctionInfo>();
1199 TII = TM.getInstrInfo();
1200 TRI = TM.getRegisterInfo();
1201 RS = new RegScavenger();
1202 isThumb2 = AFI->isThumb2Function();
1204 bool Modified = false;
1205 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1207 MachineBasicBlock &MBB = *MFI;
1208 Modified |= LoadStoreMultipleOpti(MBB);
1209 Modified |= MergeReturnIntoLDM(MBB);
1217 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1218 /// load / stores from consecutive locations close to make it more
1219 /// likely they will be combined later.
1222 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1224 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(&ID) {}
1226 const TargetData *TD;
1227 const TargetInstrInfo *TII;
1228 const TargetRegisterInfo *TRI;
1229 const ARMSubtarget *STI;
1230 MachineRegisterInfo *MRI;
1231 MachineFunction *MF;
1233 virtual bool runOnMachineFunction(MachineFunction &Fn);
1235 virtual const char *getPassName() const {
1236 return "ARM pre- register allocation load / store optimization pass";
1240 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1241 unsigned &NewOpc, unsigned &EvenReg,
1242 unsigned &OddReg, unsigned &BaseReg,
1243 unsigned &OffReg, int &Offset,
1244 unsigned &PredReg, ARMCC::CondCodes &Pred,
1246 bool RescheduleOps(MachineBasicBlock *MBB,
1247 SmallVector<MachineInstr*, 4> &Ops,
1248 unsigned Base, bool isLd,
1249 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1250 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1252 char ARMPreAllocLoadStoreOpt::ID = 0;
1255 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1256 TD = Fn.getTarget().getTargetData();
1257 TII = Fn.getTarget().getInstrInfo();
1258 TRI = Fn.getTarget().getRegisterInfo();
1259 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
1260 MRI = &Fn.getRegInfo();
1263 bool Modified = false;
1264 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1266 Modified |= RescheduleLoadStoreInstrs(MFI);
1271 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1272 MachineBasicBlock::iterator I,
1273 MachineBasicBlock::iterator E,
1274 SmallPtrSet<MachineInstr*, 4> &MemOps,
1275 SmallSet<unsigned, 4> &MemRegs,
1276 const TargetRegisterInfo *TRI) {
1277 // Are there stores / loads / calls between them?
1278 // FIXME: This is overly conservative. We should make use of alias information
1280 SmallSet<unsigned, 4> AddedRegPressure;
1282 if (MemOps.count(&*I))
1284 const TargetInstrDesc &TID = I->getDesc();
1285 if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects())
1287 if (isLd && TID.mayStore())
1292 // It's not safe to move the first 'str' down.
1295 // str r4, [r0, #+4]
1299 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1300 MachineOperand &MO = I->getOperand(j);
1303 unsigned Reg = MO.getReg();
1304 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1306 if (Reg != Base && !MemRegs.count(Reg))
1307 AddedRegPressure.insert(Reg);
1311 // Estimate register pressure increase due to the transformation.
1312 if (MemRegs.size() <= 4)
1313 // Ok if we are moving small number of instructions.
1315 return AddedRegPressure.size() <= MemRegs.size() * 2;
1319 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1321 unsigned &NewOpc, unsigned &EvenReg,
1322 unsigned &OddReg, unsigned &BaseReg,
1323 unsigned &OffReg, int &Offset,
1325 ARMCC::CondCodes &Pred,
1327 // Make sure we're allowed to generate LDRD/STRD.
1328 if (!STI->hasV5TEOps())
1331 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1333 unsigned Opcode = Op0->getOpcode();
1334 if (Opcode == ARM::LDR)
1336 else if (Opcode == ARM::STR)
1338 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1339 NewOpc = ARM::t2LDRDi8;
1342 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1343 NewOpc = ARM::t2STRDi8;
1349 // Make sure the offset registers match.
1351 (Op0->getOperand(2).getReg() != Op1->getOperand(2).getReg()))
1354 // Must sure the base address satisfies i64 ld / st alignment requirement.
1355 if (!Op0->hasOneMemOperand() ||
1356 !(*Op0->memoperands_begin())->getValue() ||
1357 (*Op0->memoperands_begin())->isVolatile())
1360 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1361 Function *Func = MF->getFunction();
1362 unsigned ReqAlign = STI->hasV6Ops()
1363 ? TD->getPrefTypeAlignment(Type::getInt64Ty(Func->getContext()))
1364 : 8; // Pre-v6 need 8-byte align
1365 if (Align < ReqAlign)
1368 // Then make sure the immediate offset fits.
1369 int OffImm = getMemoryOpOffset(Op0);
1373 // Can't fall back to t2LDRi8 / t2STRi8.
1376 int Limit = (1 << 8) * Scale;
1377 if (OffImm >= Limit || (OffImm & (Scale-1)))
1382 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1384 AddSub = ARM_AM::sub;
1387 int Limit = (1 << 8) * Scale;
1388 if (OffImm >= Limit || (OffImm & (Scale-1)))
1390 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1392 EvenReg = Op0->getOperand(0).getReg();
1393 OddReg = Op1->getOperand(0).getReg();
1394 if (EvenReg == OddReg)
1396 BaseReg = Op0->getOperand(1).getReg();
1398 OffReg = Op0->getOperand(2).getReg();
1399 Pred = llvm::getInstrPredicate(Op0, PredReg);
1400 dl = Op0->getDebugLoc();
1404 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1405 SmallVector<MachineInstr*, 4> &Ops,
1406 unsigned Base, bool isLd,
1407 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1408 bool RetVal = false;
1410 // Sort by offset (in reverse order).
1411 std::sort(Ops.begin(), Ops.end(), OffsetCompare());
1413 // The loads / stores of the same base are in order. Scan them from first to
1414 // last and check for the followins:
1415 // 1. Any def of base.
1417 while (Ops.size() > 1) {
1418 unsigned FirstLoc = ~0U;
1419 unsigned LastLoc = 0;
1420 MachineInstr *FirstOp = 0;
1421 MachineInstr *LastOp = 0;
1423 unsigned LastOpcode = 0;
1424 unsigned LastBytes = 0;
1425 unsigned NumMove = 0;
1426 for (int i = Ops.size() - 1; i >= 0; --i) {
1427 MachineInstr *Op = Ops[i];
1428 unsigned Loc = MI2LocMap[Op];
1429 if (Loc <= FirstLoc) {
1433 if (Loc >= LastLoc) {
1438 unsigned Opcode = Op->getOpcode();
1439 if (LastOpcode && Opcode != LastOpcode)
1442 int Offset = getMemoryOpOffset(Op);
1443 unsigned Bytes = getLSMultipleTransferSize(Op);
1445 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1448 LastOffset = Offset;
1450 LastOpcode = Opcode;
1451 if (++NumMove == 8) // FIXME: Tune this limit.
1458 SmallPtrSet<MachineInstr*, 4> MemOps;
1459 SmallSet<unsigned, 4> MemRegs;
1460 for (int i = NumMove-1; i >= 0; --i) {
1461 MemOps.insert(Ops[i]);
1462 MemRegs.insert(Ops[i]->getOperand(0).getReg());
1465 // Be conservative, if the instructions are too far apart, don't
1466 // move them. We want to limit the increase of register pressure.
1467 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
1469 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
1470 MemOps, MemRegs, TRI);
1472 for (unsigned i = 0; i != NumMove; ++i)
1475 // This is the new location for the loads / stores.
1476 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
1477 while (InsertPos != MBB->end() && MemOps.count(InsertPos))
1480 // If we are moving a pair of loads / stores, see if it makes sense
1481 // to try to allocate a pair of registers that can form register pairs.
1482 MachineInstr *Op0 = Ops.back();
1483 MachineInstr *Op1 = Ops[Ops.size()-2];
1484 unsigned EvenReg = 0, OddReg = 0;
1485 unsigned BaseReg = 0, OffReg = 0, PredReg = 0;
1486 ARMCC::CondCodes Pred = ARMCC::AL;
1488 unsigned NewOpc = 0;
1491 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
1492 EvenReg, OddReg, BaseReg, OffReg,
1493 Offset, PredReg, Pred, isT2)) {
1497 // Form the pair instruction.
1499 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1500 dl, TII->get(NewOpc))
1501 .addReg(EvenReg, RegState::Define)
1502 .addReg(OddReg, RegState::Define)
1506 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1509 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1510 dl, TII->get(NewOpc))
1516 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1522 // Add register allocation hints to form register pairs.
1523 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
1524 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
1526 for (unsigned i = 0; i != NumMove; ++i) {
1527 MachineInstr *Op = Ops.back();
1529 MBB->splice(InsertPos, MBB, Op);
1533 NumLdStMoved += NumMove;
1543 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
1544 bool RetVal = false;
1546 DenseMap<MachineInstr*, unsigned> MI2LocMap;
1547 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
1548 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
1549 SmallVector<unsigned, 4> LdBases;
1550 SmallVector<unsigned, 4> StBases;
1553 MachineBasicBlock::iterator MBBI = MBB->begin();
1554 MachineBasicBlock::iterator E = MBB->end();
1556 for (; MBBI != E; ++MBBI) {
1557 MachineInstr *MI = MBBI;
1558 const TargetInstrDesc &TID = MI->getDesc();
1559 if (TID.isCall() || TID.isTerminator()) {
1560 // Stop at barriers.
1565 MI2LocMap[MI] = Loc++;
1566 if (!isMemoryOp(MI))
1568 unsigned PredReg = 0;
1569 if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL)
1572 int Opc = MI->getOpcode();
1573 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
1574 unsigned Base = MI->getOperand(1).getReg();
1575 int Offset = getMemoryOpOffset(MI);
1577 bool StopHere = false;
1579 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1580 Base2LdsMap.find(Base);
1581 if (BI != Base2LdsMap.end()) {
1582 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1583 if (Offset == getMemoryOpOffset(BI->second[i])) {
1589 BI->second.push_back(MI);
1591 SmallVector<MachineInstr*, 4> MIs;
1593 Base2LdsMap[Base] = MIs;
1594 LdBases.push_back(Base);
1597 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1598 Base2StsMap.find(Base);
1599 if (BI != Base2StsMap.end()) {
1600 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1601 if (Offset == getMemoryOpOffset(BI->second[i])) {
1607 BI->second.push_back(MI);
1609 SmallVector<MachineInstr*, 4> MIs;
1611 Base2StsMap[Base] = MIs;
1612 StBases.push_back(Base);
1617 // Found a duplicate (a base+offset combination that's seen earlier).
1624 // Re-schedule loads.
1625 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
1626 unsigned Base = LdBases[i];
1627 SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
1629 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
1632 // Re-schedule stores.
1633 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
1634 unsigned Base = StBases[i];
1635 SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
1637 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
1641 Base2LdsMap.clear();
1642 Base2StsMap.clear();
1652 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
1653 /// optimization pass.
1654 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
1656 return new ARMPreAllocLoadStoreOpt();
1657 return new ARMLoadStoreOpt();