1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
15 #define DEBUG_TYPE "arm-ldst-opt"
17 #include "ARMAddressingModes.h"
18 #include "ARMBaseInstrInfo.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMRegisterInfo.h"
21 #include "llvm/DerivedTypes.h"
22 #include "llvm/Function.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineFunctionPass.h"
25 #include "llvm/CodeGen/MachineInstr.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/RegisterScavenging.h"
29 #include "llvm/Target/TargetData.h"
30 #include "llvm/Target/TargetInstrInfo.h"
31 #include "llvm/Target/TargetMachine.h"
32 #include "llvm/Target/TargetRegisterInfo.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/ADT/DenseMap.h"
35 #include "llvm/ADT/STLExtras.h"
36 #include "llvm/ADT/SmallPtrSet.h"
37 #include "llvm/ADT/SmallSet.h"
38 #include "llvm/ADT/SmallVector.h"
39 #include "llvm/ADT/Statistic.h"
42 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
43 STATISTIC(NumSTMGened , "Number of stm instructions generated");
44 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
45 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
46 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
47 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
48 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
49 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
50 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
51 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
52 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
54 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
55 /// load / store instructions to form ldm / stm instructions.
58 struct ARMLoadStoreOpt : public MachineFunctionPass {
60 ARMLoadStoreOpt() : MachineFunctionPass(&ID) {}
62 const TargetInstrInfo *TII;
63 const TargetRegisterInfo *TRI;
68 virtual bool runOnMachineFunction(MachineFunction &Fn);
70 virtual const char *getPassName() const {
71 return "ARM load / store optimization pass";
75 struct MemOpQueueEntry {
78 MachineBasicBlock::iterator MBBI;
80 MemOpQueueEntry(int o, int p, MachineBasicBlock::iterator i)
81 : Offset(o), Position(p), MBBI(i), Merged(false) {}
83 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
84 typedef MemOpQueue::iterator MemOpQueueIter;
86 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
87 int Offset, unsigned Base, bool BaseKill, int Opcode,
88 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
89 DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs);
90 void MergeOpsUpdate(MachineBasicBlock &MBB,
99 ARMCC::CondCodes Pred,
103 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
104 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
105 int Opcode, unsigned Size,
106 ARMCC::CondCodes Pred, unsigned PredReg,
107 unsigned Scratch, MemOpQueue &MemOps,
108 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
110 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
111 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
112 MachineBasicBlock::iterator &MBBI);
113 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
114 MachineBasicBlock::iterator MBBI,
115 const TargetInstrInfo *TII,
117 MachineBasicBlock::iterator &I);
118 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
119 MachineBasicBlock::iterator MBBI,
121 MachineBasicBlock::iterator &I);
122 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
123 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
125 char ARMLoadStoreOpt::ID = 0;
128 static int getLoadStoreMultipleOpcode(int Opcode) {
156 default: llvm_unreachable("Unhandled opcode!");
161 static bool isT2i32Load(unsigned Opc) {
162 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
165 static bool isi32Load(unsigned Opc) {
166 return Opc == ARM::LDR || isT2i32Load(Opc);
169 static bool isT2i32Store(unsigned Opc) {
170 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
173 static bool isi32Store(unsigned Opc) {
174 return Opc == ARM::STR || isT2i32Store(Opc);
177 /// MergeOps - Create and insert a LDM or STM with Base as base register and
178 /// registers in Regs as the register operands that would be loaded / stored.
179 /// It returns true if the transformation is done.
181 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
182 MachineBasicBlock::iterator MBBI,
183 int Offset, unsigned Base, bool BaseKill,
184 int Opcode, ARMCC::CondCodes Pred,
185 unsigned PredReg, unsigned Scratch, DebugLoc dl,
186 SmallVector<std::pair<unsigned, bool>, 8> &Regs) {
187 // Only a single register to load / store. Don't bother.
188 unsigned NumRegs = Regs.size();
192 ARM_AM::AMSubMode Mode = ARM_AM::ia;
193 bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode);
194 if (isAM4 && Offset == 4) {
196 // Thumb2 does not support ldmib / stmib.
199 } else if (isAM4 && Offset == -4 * (int)NumRegs + 4) {
201 // Thumb2 does not support ldmda / stmda.
204 } else if (isAM4 && Offset == -4 * (int)NumRegs) {
206 } else if (Offset != 0) {
207 // If starting offset isn't zero, insert a MI to materialize a new base.
208 // But only do so if it is cost effective, i.e. merging more than two
214 if (isi32Load(Opcode))
215 // If it is a load, then just use one of the destination register to
216 // use as the new base.
217 NewBase = Regs[NumRegs-1].first;
219 // Use the scratch register to use as a new base.
224 int BaseOpc = !isThumb2
226 : ((Base == ARM::SP) ? ARM::t2ADDrSPi : ARM::t2ADDri);
230 : ((Base == ARM::SP) ? ARM::t2SUBrSPi : ARM::t2SUBri);
233 int ImmedOffset = isThumb2
234 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
235 if (ImmedOffset == -1)
236 // FIXME: Try t2ADDri12 or t2SUBri12?
237 return false; // Probably not worth it then.
239 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
240 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
241 .addImm(Pred).addReg(PredReg).addReg(0);
243 BaseKill = true; // New base is always killed right its use.
246 bool isDPR = Opcode == ARM::VLDRD || Opcode == ARM::VSTRD;
247 bool isDef = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
248 Opcode = getLoadStoreMultipleOpcode(Opcode);
249 MachineInstrBuilder MIB = (isAM4)
250 ? BuildMI(MBB, MBBI, dl, TII->get(Opcode))
251 .addReg(Base, getKillRegState(BaseKill))
252 .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg)
253 : BuildMI(MBB, MBBI, dl, TII->get(Opcode))
254 .addReg(Base, getKillRegState(BaseKill))
255 .addImm(ARM_AM::getAM5Opc(Mode, false, isDPR ? NumRegs<<1 : NumRegs))
256 .addImm(Pred).addReg(PredReg);
257 MIB.addReg(0); // Add optional writeback (0 for now).
258 for (unsigned i = 0; i != NumRegs; ++i)
259 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
260 | getKillRegState(Regs[i].second));
265 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
267 void ARMLoadStoreOpt::
268 MergeOpsUpdate(MachineBasicBlock &MBB,
270 unsigned memOpsBegin,
272 unsigned insertAfter,
277 ARMCC::CondCodes Pred,
281 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
282 // First calculate which of the registers should be killed by the merged
284 SmallVector<std::pair<unsigned, bool>, 8> Regs;
285 const unsigned insertPos = memOps[insertAfter].Position;
286 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
287 const MachineOperand &MO = memOps[i].MBBI->getOperand(0);
288 unsigned Reg = MO.getReg();
289 bool isKill = MO.isKill();
291 // If we are inserting the merged operation after an unmerged operation that
292 // uses the same register, make sure to transfer any kill flag.
293 for (unsigned j = memOpsEnd, e = memOps.size(); !isKill && j != e; ++j)
294 if (memOps[j].Position<insertPos) {
295 const MachineOperand &MOJ = memOps[j].MBBI->getOperand(0);
296 if (MOJ.getReg() == Reg && MOJ.isKill())
300 Regs.push_back(std::make_pair(Reg, isKill));
303 // Try to do the merge.
304 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
306 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
307 Pred, PredReg, Scratch, dl, Regs))
310 // Merge succeeded, update records.
311 Merges.push_back(prior(Loc));
312 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
313 // Remove kill flags from any unmerged memops that come before insertPos.
314 if (Regs[i-memOpsBegin].second)
315 for (unsigned j = memOpsEnd, e = memOps.size(); j != e; ++j)
316 if (memOps[j].Position<insertPos) {
317 MachineOperand &MOJ = memOps[j].MBBI->getOperand(0);
318 if (MOJ.getReg() == Regs[i-memOpsBegin].first && MOJ.isKill())
319 MOJ.setIsKill(false);
321 MBB.erase(memOps[i].MBBI);
322 memOps[i].Merged = true;
326 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
327 /// load / store multiple instructions.
329 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
330 unsigned Base, int Opcode, unsigned Size,
331 ARMCC::CondCodes Pred, unsigned PredReg,
332 unsigned Scratch, MemOpQueue &MemOps,
333 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
334 bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode);
335 int Offset = MemOps[SIndex].Offset;
336 int SOffset = Offset;
337 unsigned insertAfter = SIndex;
338 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
339 DebugLoc dl = Loc->getDebugLoc();
340 const MachineOperand &PMO = Loc->getOperand(0);
341 unsigned PReg = PMO.getReg();
342 unsigned PRegNum = PMO.isUndef() ? UINT_MAX
343 : ARMRegisterInfo::getRegisterNumbering(PReg);
345 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
346 int NewOffset = MemOps[i].Offset;
347 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
348 unsigned Reg = MO.getReg();
349 unsigned RegNum = MO.isUndef() ? UINT_MAX
350 : ARMRegisterInfo::getRegisterNumbering(Reg);
351 // AM4 - register numbers in ascending order.
352 // AM5 - consecutive register numbers in ascending order.
353 if (NewOffset == Offset + (int)Size &&
354 ((isAM4 && RegNum > PRegNum) || RegNum == PRegNum+1)) {
358 // Can't merge this in. Try merge the earlier ones first.
359 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
360 Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
361 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
366 if (MemOps[i].Position > MemOps[insertAfter].Position)
370 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
371 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
372 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
376 static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
377 unsigned Bytes, unsigned Limit,
378 ARMCC::CondCodes Pred, unsigned PredReg){
379 unsigned MyPredReg = 0;
382 if (MI->getOpcode() != ARM::t2SUBri &&
383 MI->getOpcode() != ARM::t2SUBrSPi &&
384 MI->getOpcode() != ARM::t2SUBrSPi12 &&
385 MI->getOpcode() != ARM::tSUBspi &&
386 MI->getOpcode() != ARM::SUBri)
389 // Make sure the offset fits in 8 bits.
390 if (Bytes <= 0 || (Limit && Bytes >= Limit))
393 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
394 return (MI->getOperand(0).getReg() == Base &&
395 MI->getOperand(1).getReg() == Base &&
396 (MI->getOperand(2).getImm()*Scale) == Bytes &&
397 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
398 MyPredReg == PredReg);
401 static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
402 unsigned Bytes, unsigned Limit,
403 ARMCC::CondCodes Pred, unsigned PredReg){
404 unsigned MyPredReg = 0;
407 if (MI->getOpcode() != ARM::t2ADDri &&
408 MI->getOpcode() != ARM::t2ADDrSPi &&
409 MI->getOpcode() != ARM::t2ADDrSPi12 &&
410 MI->getOpcode() != ARM::tADDspi &&
411 MI->getOpcode() != ARM::ADDri)
414 if (Bytes <= 0 || (Limit && Bytes >= Limit))
415 // Make sure the offset fits in 8 bits.
418 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
419 return (MI->getOperand(0).getReg() == Base &&
420 MI->getOperand(1).getReg() == Base &&
421 (MI->getOperand(2).getImm()*Scale) == Bytes &&
422 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
423 MyPredReg == PredReg);
426 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
427 switch (MI->getOpcode()) {
445 return (MI->getNumOperands() - 5) * 4;
450 return ARM_AM::getAM5Offset(MI->getOperand(1).getImm()) * 4;
454 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
455 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
457 /// stmia rn, <ra, rb, rc>
458 /// rn := rn + 4 * 3;
460 /// stmia rn!, <ra, rb, rc>
462 /// rn := rn - 4 * 3;
463 /// ldmia rn, <ra, rb, rc>
465 /// ldmdb rn!, <ra, rb, rc>
466 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
467 MachineBasicBlock::iterator MBBI,
469 MachineBasicBlock::iterator &I) {
470 MachineInstr *MI = MBBI;
471 unsigned Base = MI->getOperand(0).getReg();
472 unsigned Bytes = getLSMultipleTransferSize(MI);
473 unsigned PredReg = 0;
474 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
475 int Opcode = MI->getOpcode();
476 bool isAM4 = Opcode == ARM::LDM || Opcode == ARM::t2LDM ||
477 Opcode == ARM::STM || Opcode == ARM::t2STM;
480 if (ARM_AM::getAM4WBFlag(MI->getOperand(1).getImm()))
483 // Can't use the updating AM4 sub-mode if the base register is also a dest
484 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
485 for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) {
486 if (MI->getOperand(i).getReg() == Base)
490 ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm());
491 if (MBBI != MBB.begin()) {
492 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
493 if (Mode == ARM_AM::ia &&
494 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
495 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::db, true));
496 MI->getOperand(4).setReg(Base);
497 MI->getOperand(4).setIsDef();
500 } else if (Mode == ARM_AM::ib &&
501 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
502 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::da, true));
503 MI->getOperand(4).setReg(Base); // WB to base
504 MI->getOperand(4).setIsDef();
510 if (MBBI != MBB.end()) {
511 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
512 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
513 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
514 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true));
515 MI->getOperand(4).setReg(Base); // WB to base
516 MI->getOperand(4).setIsDef();
523 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
524 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
525 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true));
526 MI->getOperand(4).setReg(Base); // WB to base
527 MI->getOperand(4).setIsDef();
537 // VLDM{D|S}, VSTM{D|S} addressing mode 5 ops.
538 if (ARM_AM::getAM5WBFlag(MI->getOperand(1).getImm()))
541 ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MI->getOperand(1).getImm());
542 unsigned Offset = ARM_AM::getAM5Offset(MI->getOperand(1).getImm());
543 if (MBBI != MBB.begin()) {
544 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
545 if (Mode == ARM_AM::ia &&
546 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
547 MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::db, true, Offset));
548 MI->getOperand(4).setReg(Base); // WB to base
549 MI->getOperand(4).setIsDef();
555 if (MBBI != MBB.end()) {
556 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
557 if (Mode == ARM_AM::ia &&
558 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
559 MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::ia, true, Offset));
560 MI->getOperand(4).setReg(Base); // WB to base
561 MI->getOperand(4).setIsDef();
575 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) {
577 case ARM::LDR: return ARM::LDR_PRE;
578 case ARM::STR: return ARM::STR_PRE;
579 case ARM::VLDRS: return ARM::VLDMS;
580 case ARM::VLDRD: return ARM::VLDMD;
581 case ARM::VSTRS: return ARM::VSTMS;
582 case ARM::VSTRD: return ARM::VSTMD;
585 return ARM::t2LDR_PRE;
588 return ARM::t2STR_PRE;
589 default: llvm_unreachable("Unhandled opcode!");
594 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) {
596 case ARM::LDR: return ARM::LDR_POST;
597 case ARM::STR: return ARM::STR_POST;
598 case ARM::VLDRS: return ARM::VLDMS;
599 case ARM::VLDRD: return ARM::VLDMD;
600 case ARM::VSTRS: return ARM::VSTMS;
601 case ARM::VSTRD: return ARM::VSTMD;
604 return ARM::t2LDR_POST;
607 return ARM::t2STR_POST;
608 default: llvm_unreachable("Unhandled opcode!");
613 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
614 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
615 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
616 MachineBasicBlock::iterator MBBI,
617 const TargetInstrInfo *TII,
619 MachineBasicBlock::iterator &I) {
620 MachineInstr *MI = MBBI;
621 unsigned Base = MI->getOperand(1).getReg();
622 bool BaseKill = MI->getOperand(1).isKill();
623 unsigned Bytes = getLSMultipleTransferSize(MI);
624 int Opcode = MI->getOpcode();
625 DebugLoc dl = MI->getDebugLoc();
626 bool isAM5 = Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
627 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS;
628 bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
629 if (isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0)
631 else if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
633 else if (isT2i32Load(Opcode) || isT2i32Store(Opcode))
634 if (MI->getOperand(2).getImm() != 0)
637 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
638 // Can't do the merge if the destination register is the same as the would-be
639 // writeback register.
640 if (isLd && MI->getOperand(0).getReg() == Base)
643 unsigned PredReg = 0;
644 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
645 bool DoMerge = false;
646 ARM_AM::AddrOpc AddSub = ARM_AM::add;
648 // AM2 - 12 bits, thumb2 - 8 bits.
649 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
650 if (MBBI != MBB.begin()) {
651 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
652 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
654 AddSub = ARM_AM::sub;
655 NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
657 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
659 NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
665 if (!DoMerge && MBBI != MBB.end()) {
666 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
668 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
670 AddSub = ARM_AM::sub;
671 NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
672 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
674 NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
688 bool isDPR = NewOpc == ARM::VLDMD || NewOpc == ARM::VSTMD;
691 Offset = ARM_AM::getAM5Opc((AddSub == ARM_AM::sub)
693 : ARM_AM::ia, true, (isDPR ? 2 : 1));
695 Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
697 Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
701 BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
702 .addReg(Base, getKillRegState(BaseKill))
703 .addImm(Offset).addImm(Pred).addReg(PredReg)
704 .addReg(Base, getDefRegState(true)) // WB base register
705 .addReg(MI->getOperand(0).getReg(), RegState::Define);
707 // LDR_PRE, LDR_POST,
708 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
709 .addReg(Base, RegState::Define)
710 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
712 // t2LDR_PRE, t2LDR_POST
713 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
714 .addReg(Base, RegState::Define)
715 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
717 MachineOperand &MO = MI->getOperand(0);
720 BuildMI(MBB, MBBI, dl, TII->get(NewOpc)).addReg(Base).addImm(Offset)
721 .addImm(Pred).addReg(PredReg)
722 .addReg(Base, getDefRegState(true)) // WB base register
723 .addReg(MO.getReg(), getKillRegState(MO.isKill()));
726 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
727 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
728 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
730 // t2STR_PRE, t2STR_POST
731 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
732 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
733 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
740 /// isMemoryOp - Returns true if instruction is a memory operations (that this
741 /// pass is capable of operating on).
742 static bool isMemoryOp(const MachineInstr *MI) {
743 if (MI->hasOneMemOperand()) {
744 const MachineMemOperand *MMO = *MI->memoperands_begin();
746 // Don't touch volatile memory accesses - we may be changing their order.
747 if (MMO->isVolatile())
750 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is not.
751 if (MMO->getAlignment() < 4)
755 int Opcode = MI->getOpcode();
760 return MI->getOperand(1).isReg() && MI->getOperand(2).getReg() == 0;
763 return MI->getOperand(1).isReg();
766 return MI->getOperand(1).isReg();
771 return MI->getOperand(1).isReg();
776 /// AdvanceRS - Advance register scavenger to just before the earliest memory
777 /// op that is being merged.
778 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
779 MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
780 unsigned Position = MemOps[0].Position;
781 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
782 if (MemOps[i].Position < Position) {
783 Position = MemOps[i].Position;
784 Loc = MemOps[i].MBBI;
788 if (Loc != MBB.begin())
789 RS->forward(prior(Loc));
792 static int getMemoryOpOffset(const MachineInstr *MI) {
793 int Opcode = MI->getOpcode();
794 bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
795 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
796 unsigned NumOperands = MI->getDesc().getNumOperands();
797 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
799 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
800 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
801 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8)
805 ? ARM_AM::getAM2Offset(OffField)
806 : (isAM3 ? ARM_AM::getAM3Offset(OffField)
807 : ARM_AM::getAM5Offset(OffField) * 4);
809 if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub)
812 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
815 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
821 static void InsertLDR_STR(MachineBasicBlock &MBB,
822 MachineBasicBlock::iterator &MBBI,
823 int OffImm, bool isDef,
824 DebugLoc dl, unsigned NewOpc,
825 unsigned Reg, bool RegDeadKill, bool RegUndef,
826 unsigned BaseReg, bool BaseKill, bool BaseUndef,
827 unsigned OffReg, bool OffKill, bool OffUndef,
828 ARMCC::CondCodes Pred, unsigned PredReg,
829 const TargetInstrInfo *TII, bool isT2) {
833 Offset = ARM_AM::getAM2Opc(ARM_AM::sub, -OffImm, ARM_AM::no_shift);
835 Offset = ARM_AM::getAM2Opc(ARM_AM::add, OffImm, ARM_AM::no_shift);
838 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
840 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
841 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
843 MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef));
844 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
846 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
848 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
849 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
851 MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef));
852 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
856 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
857 MachineBasicBlock::iterator &MBBI) {
858 MachineInstr *MI = &*MBBI;
859 unsigned Opcode = MI->getOpcode();
860 if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
861 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
862 unsigned EvenReg = MI->getOperand(0).getReg();
863 unsigned OddReg = MI->getOperand(1).getReg();
864 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
865 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
866 if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)
869 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
870 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
871 bool EvenDeadKill = isLd ?
872 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
873 bool EvenUndef = MI->getOperand(0).isUndef();
874 bool OddDeadKill = isLd ?
875 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
876 bool OddUndef = MI->getOperand(1).isUndef();
877 const MachineOperand &BaseOp = MI->getOperand(2);
878 unsigned BaseReg = BaseOp.getReg();
879 bool BaseKill = BaseOp.isKill();
880 bool BaseUndef = BaseOp.isUndef();
881 unsigned OffReg = isT2 ? 0 : MI->getOperand(3).getReg();
882 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
883 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
884 int OffImm = getMemoryOpOffset(MI);
885 unsigned PredReg = 0;
886 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
888 if (OddRegNum > EvenRegNum && OffReg == 0 && OffImm == 0) {
889 // Ascending register numbers and no offset. It's safe to change it to a
891 unsigned NewOpc = (isLd)
892 ? (isT2 ? ARM::t2LDM : ARM::LDM)
893 : (isT2 ? ARM::t2STM : ARM::STM);
895 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
896 .addReg(BaseReg, getKillRegState(BaseKill))
897 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
898 .addImm(Pred).addReg(PredReg)
900 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
901 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
904 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
905 .addReg(BaseReg, getKillRegState(BaseKill))
906 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
907 .addImm(Pred).addReg(PredReg)
910 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
912 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
916 // Split into two instructions.
917 assert((!isT2 || !OffReg) &&
918 "Thumb2 ldrd / strd does not encode offset register!");
919 unsigned NewOpc = (isLd)
920 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDR)
921 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STR);
922 DebugLoc dl = MBBI->getDebugLoc();
923 // If this is a load and base register is killed, it may have been
924 // re-defed by the load, make sure the first load does not clobber it.
926 (BaseKill || OffKill) &&
927 (TRI->regsOverlap(EvenReg, BaseReg) ||
928 (OffReg && TRI->regsOverlap(EvenReg, OffReg)))) {
929 assert(!TRI->regsOverlap(OddReg, BaseReg) &&
930 (!OffReg || !TRI->regsOverlap(OddReg, OffReg)));
931 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
932 OddReg, OddDeadKill, false,
933 BaseReg, false, BaseUndef, OffReg, false, OffUndef,
934 Pred, PredReg, TII, isT2);
935 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
936 EvenReg, EvenDeadKill, false,
937 BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
938 Pred, PredReg, TII, isT2);
940 if (OddReg == EvenReg && EvenDeadKill) {
941 // If the two source operands are the same, the kill marker is probably
942 // on the first one. e.g.
943 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
944 EvenDeadKill = false;
947 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
948 EvenReg, EvenDeadKill, EvenUndef,
949 BaseReg, false, BaseUndef, OffReg, false, OffUndef,
950 Pred, PredReg, TII, isT2);
951 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
952 OddReg, OddDeadKill, OddUndef,
953 BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
954 Pred, PredReg, TII, isT2);
968 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
969 /// ops of the same base and incrementing offset into LDM / STM ops.
970 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
971 unsigned NumMerges = 0;
972 unsigned NumMemOps = 0;
974 unsigned CurrBase = 0;
976 unsigned CurrSize = 0;
977 ARMCC::CondCodes CurrPred = ARMCC::AL;
978 unsigned CurrPredReg = 0;
979 unsigned Position = 0;
980 SmallVector<MachineBasicBlock::iterator,4> Merges;
982 RS->enterBasicBlock(&MBB);
983 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
985 if (FixInvalidRegPairOp(MBB, MBBI))
988 bool Advance = false;
989 bool TryMerge = false;
990 bool Clobber = false;
992 bool isMemOp = isMemoryOp(MBBI);
994 int Opcode = MBBI->getOpcode();
995 unsigned Size = getLSMultipleTransferSize(MBBI);
996 unsigned Base = MBBI->getOperand(1).getReg();
997 unsigned PredReg = 0;
998 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg);
999 int Offset = getMemoryOpOffset(MBBI);
1002 // r5 := ldr [r5, #4]
1003 // r6 := ldr [r5, #8]
1005 // The second ldr has effectively broken the chain even though it
1006 // looks like the later ldr(s) use the same base register. Try to
1007 // merge the ldr's so far, including this one. But don't try to
1008 // combine the following ldr(s).
1009 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
1010 if (CurrBase == 0 && !Clobber) {
1011 // Start of a new chain.
1016 CurrPredReg = PredReg;
1017 MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
1026 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1027 // No need to match PredReg.
1028 // Continue adding to the queue.
1029 if (Offset > MemOps.back().Offset) {
1030 MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
1034 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1036 if (Offset < I->Offset) {
1037 MemOps.insert(I, MemOpQueueEntry(Offset, Position, MBBI));
1041 } else if (Offset == I->Offset) {
1042 // Collision! This can't be merged!
1055 // Reach the end of the block, try merging the memory instructions.
1061 if (NumMemOps > 1) {
1062 // Try to find a free register to use as a new base in case it's needed.
1063 // First advance to the instruction just before the start of the chain.
1064 AdvanceRS(MBB, MemOps);
1065 // Find a scratch register.
1066 unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass);
1067 // Process the load / store instructions.
1068 RS->forward(prior(MBBI));
1072 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1073 CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1075 // Try folding preceeding/trailing base inc/dec into the generated
1077 for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1078 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1080 NumMerges += Merges.size();
1082 // Try folding preceeding/trailing base inc/dec into those load/store
1083 // that were not merged to form LDM/STM ops.
1084 for (unsigned i = 0; i != NumMemOps; ++i)
1085 if (!MemOps[i].Merged)
1086 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1089 // RS may be pointing to an instruction that's deleted.
1090 RS->skipTo(prior(MBBI));
1091 } else if (NumMemOps == 1) {
1092 // Try folding preceeding/trailing base inc/dec into the single
1094 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1096 RS->forward(prior(MBBI));
1103 CurrPred = ARMCC::AL;
1110 // If iterator hasn't been advanced and this is not a memory op, skip it.
1111 // It can't start a new chain anyway.
1112 if (!Advance && !isMemOp && MBBI != E) {
1118 return NumMerges > 0;
1122 struct OffsetCompare {
1123 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
1124 int LOffset = getMemoryOpOffset(LHS);
1125 int ROffset = getMemoryOpOffset(RHS);
1126 assert(LHS == RHS || LOffset != ROffset);
1127 return LOffset > ROffset;
1132 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return op
1133 /// (bx lr) into the preceeding stack restore so it directly restore the value
1135 /// ldmfd sp!, {r7, lr}
1138 /// ldmfd sp!, {r7, pc}
1139 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1140 if (MBB.empty()) return false;
1142 MachineBasicBlock::iterator MBBI = prior(MBB.end());
1143 if (MBBI != MBB.begin() &&
1144 (MBBI->getOpcode() == ARM::BX_RET || MBBI->getOpcode() == ARM::tBX_RET)) {
1145 MachineInstr *PrevMI = prior(MBBI);
1146 if (PrevMI->getOpcode() == ARM::LDM || PrevMI->getOpcode() == ARM::t2LDM) {
1147 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1148 if (MO.getReg() != ARM::LR)
1150 unsigned NewOpc = isThumb2 ? ARM::t2LDM_RET : ARM::LDM_RET;
1151 PrevMI->setDesc(TII->get(NewOpc));
1160 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1161 const TargetMachine &TM = Fn.getTarget();
1162 AFI = Fn.getInfo<ARMFunctionInfo>();
1163 TII = TM.getInstrInfo();
1164 TRI = TM.getRegisterInfo();
1165 RS = new RegScavenger();
1166 isThumb2 = AFI->isThumb2Function();
1168 bool Modified = false;
1169 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1171 MachineBasicBlock &MBB = *MFI;
1172 Modified |= LoadStoreMultipleOpti(MBB);
1173 Modified |= MergeReturnIntoLDM(MBB);
1181 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1182 /// load / stores from consecutive locations close to make it more
1183 /// likely they will be combined later.
1186 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1188 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(&ID) {}
1190 const TargetData *TD;
1191 const TargetInstrInfo *TII;
1192 const TargetRegisterInfo *TRI;
1193 const ARMSubtarget *STI;
1194 MachineRegisterInfo *MRI;
1195 MachineFunction *MF;
1197 virtual bool runOnMachineFunction(MachineFunction &Fn);
1199 virtual const char *getPassName() const {
1200 return "ARM pre- register allocation load / store optimization pass";
1204 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1205 unsigned &NewOpc, unsigned &EvenReg,
1206 unsigned &OddReg, unsigned &BaseReg,
1207 unsigned &OffReg, int &Offset,
1208 unsigned &PredReg, ARMCC::CondCodes &Pred,
1210 bool RescheduleOps(MachineBasicBlock *MBB,
1211 SmallVector<MachineInstr*, 4> &Ops,
1212 unsigned Base, bool isLd,
1213 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1214 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1216 char ARMPreAllocLoadStoreOpt::ID = 0;
1219 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1220 TD = Fn.getTarget().getTargetData();
1221 TII = Fn.getTarget().getInstrInfo();
1222 TRI = Fn.getTarget().getRegisterInfo();
1223 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
1224 MRI = &Fn.getRegInfo();
1227 bool Modified = false;
1228 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1230 Modified |= RescheduleLoadStoreInstrs(MFI);
1235 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1236 MachineBasicBlock::iterator I,
1237 MachineBasicBlock::iterator E,
1238 SmallPtrSet<MachineInstr*, 4> &MemOps,
1239 SmallSet<unsigned, 4> &MemRegs,
1240 const TargetRegisterInfo *TRI) {
1241 // Are there stores / loads / calls between them?
1242 // FIXME: This is overly conservative. We should make use of alias information
1244 SmallSet<unsigned, 4> AddedRegPressure;
1246 if (MemOps.count(&*I))
1248 const TargetInstrDesc &TID = I->getDesc();
1249 if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects())
1251 if (isLd && TID.mayStore())
1256 // It's not safe to move the first 'str' down.
1259 // str r4, [r0, #+4]
1263 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1264 MachineOperand &MO = I->getOperand(j);
1267 unsigned Reg = MO.getReg();
1268 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1270 if (Reg != Base && !MemRegs.count(Reg))
1271 AddedRegPressure.insert(Reg);
1275 // Estimate register pressure increase due to the transformation.
1276 if (MemRegs.size() <= 4)
1277 // Ok if we are moving small number of instructions.
1279 return AddedRegPressure.size() <= MemRegs.size() * 2;
1283 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1285 unsigned &NewOpc, unsigned &EvenReg,
1286 unsigned &OddReg, unsigned &BaseReg,
1287 unsigned &OffReg, int &Offset,
1289 ARMCC::CondCodes &Pred,
1291 // Make sure we're allowed to generate LDRD/STRD.
1292 if (!STI->hasV5TEOps())
1295 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1297 unsigned Opcode = Op0->getOpcode();
1298 if (Opcode == ARM::LDR)
1300 else if (Opcode == ARM::STR)
1302 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1303 NewOpc = ARM::t2LDRDi8;
1306 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1307 NewOpc = ARM::t2STRDi8;
1313 // Make sure the offset registers match.
1315 (Op0->getOperand(2).getReg() != Op1->getOperand(2).getReg()))
1318 // Must sure the base address satisfies i64 ld / st alignment requirement.
1319 if (!Op0->hasOneMemOperand() ||
1320 !(*Op0->memoperands_begin())->getValue() ||
1321 (*Op0->memoperands_begin())->isVolatile())
1324 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1325 Function *Func = MF->getFunction();
1326 unsigned ReqAlign = STI->hasV6Ops()
1327 ? TD->getPrefTypeAlignment(Type::getInt64Ty(Func->getContext()))
1328 : 8; // Pre-v6 need 8-byte align
1329 if (Align < ReqAlign)
1332 // Then make sure the immediate offset fits.
1333 int OffImm = getMemoryOpOffset(Op0);
1337 // Can't fall back to t2LDRi8 / t2STRi8.
1340 int Limit = (1 << 8) * Scale;
1341 if (OffImm >= Limit || (OffImm & (Scale-1)))
1346 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1348 AddSub = ARM_AM::sub;
1351 int Limit = (1 << 8) * Scale;
1352 if (OffImm >= Limit || (OffImm & (Scale-1)))
1354 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1356 EvenReg = Op0->getOperand(0).getReg();
1357 OddReg = Op1->getOperand(0).getReg();
1358 if (EvenReg == OddReg)
1360 BaseReg = Op0->getOperand(1).getReg();
1362 OffReg = Op0->getOperand(2).getReg();
1363 Pred = llvm::getInstrPredicate(Op0, PredReg);
1364 dl = Op0->getDebugLoc();
1368 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1369 SmallVector<MachineInstr*, 4> &Ops,
1370 unsigned Base, bool isLd,
1371 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1372 bool RetVal = false;
1374 // Sort by offset (in reverse order).
1375 std::sort(Ops.begin(), Ops.end(), OffsetCompare());
1377 // The loads / stores of the same base are in order. Scan them from first to
1378 // last and check for the followins:
1379 // 1. Any def of base.
1381 while (Ops.size() > 1) {
1382 unsigned FirstLoc = ~0U;
1383 unsigned LastLoc = 0;
1384 MachineInstr *FirstOp = 0;
1385 MachineInstr *LastOp = 0;
1387 unsigned LastOpcode = 0;
1388 unsigned LastBytes = 0;
1389 unsigned NumMove = 0;
1390 for (int i = Ops.size() - 1; i >= 0; --i) {
1391 MachineInstr *Op = Ops[i];
1392 unsigned Loc = MI2LocMap[Op];
1393 if (Loc <= FirstLoc) {
1397 if (Loc >= LastLoc) {
1402 unsigned Opcode = Op->getOpcode();
1403 if (LastOpcode && Opcode != LastOpcode)
1406 int Offset = getMemoryOpOffset(Op);
1407 unsigned Bytes = getLSMultipleTransferSize(Op);
1409 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1412 LastOffset = Offset;
1414 LastOpcode = Opcode;
1415 if (++NumMove == 8) // FIXME: Tune this limit.
1422 SmallPtrSet<MachineInstr*, 4> MemOps;
1423 SmallSet<unsigned, 4> MemRegs;
1424 for (int i = NumMove-1; i >= 0; --i) {
1425 MemOps.insert(Ops[i]);
1426 MemRegs.insert(Ops[i]->getOperand(0).getReg());
1429 // Be conservative, if the instructions are too far apart, don't
1430 // move them. We want to limit the increase of register pressure.
1431 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
1433 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
1434 MemOps, MemRegs, TRI);
1436 for (unsigned i = 0; i != NumMove; ++i)
1439 // This is the new location for the loads / stores.
1440 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
1441 while (InsertPos != MBB->end() && MemOps.count(InsertPos))
1444 // If we are moving a pair of loads / stores, see if it makes sense
1445 // to try to allocate a pair of registers that can form register pairs.
1446 MachineInstr *Op0 = Ops.back();
1447 MachineInstr *Op1 = Ops[Ops.size()-2];
1448 unsigned EvenReg = 0, OddReg = 0;
1449 unsigned BaseReg = 0, OffReg = 0, PredReg = 0;
1450 ARMCC::CondCodes Pred = ARMCC::AL;
1452 unsigned NewOpc = 0;
1455 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
1456 EvenReg, OddReg, BaseReg, OffReg,
1457 Offset, PredReg, Pred, isT2)) {
1461 // Form the pair instruction.
1463 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1464 dl, TII->get(NewOpc))
1465 .addReg(EvenReg, RegState::Define)
1466 .addReg(OddReg, RegState::Define)
1470 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1473 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1474 dl, TII->get(NewOpc))
1480 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1486 // Add register allocation hints to form register pairs.
1487 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
1488 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
1490 for (unsigned i = 0; i != NumMove; ++i) {
1491 MachineInstr *Op = Ops.back();
1493 MBB->splice(InsertPos, MBB, Op);
1497 NumLdStMoved += NumMove;
1507 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
1508 bool RetVal = false;
1510 DenseMap<MachineInstr*, unsigned> MI2LocMap;
1511 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
1512 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
1513 SmallVector<unsigned, 4> LdBases;
1514 SmallVector<unsigned, 4> StBases;
1517 MachineBasicBlock::iterator MBBI = MBB->begin();
1518 MachineBasicBlock::iterator E = MBB->end();
1520 for (; MBBI != E; ++MBBI) {
1521 MachineInstr *MI = MBBI;
1522 const TargetInstrDesc &TID = MI->getDesc();
1523 if (TID.isCall() || TID.isTerminator()) {
1524 // Stop at barriers.
1529 MI2LocMap[MI] = Loc++;
1530 if (!isMemoryOp(MI))
1532 unsigned PredReg = 0;
1533 if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL)
1536 int Opc = MI->getOpcode();
1537 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
1538 unsigned Base = MI->getOperand(1).getReg();
1539 int Offset = getMemoryOpOffset(MI);
1541 bool StopHere = false;
1543 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1544 Base2LdsMap.find(Base);
1545 if (BI != Base2LdsMap.end()) {
1546 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1547 if (Offset == getMemoryOpOffset(BI->second[i])) {
1553 BI->second.push_back(MI);
1555 SmallVector<MachineInstr*, 4> MIs;
1557 Base2LdsMap[Base] = MIs;
1558 LdBases.push_back(Base);
1561 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1562 Base2StsMap.find(Base);
1563 if (BI != Base2StsMap.end()) {
1564 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1565 if (Offset == getMemoryOpOffset(BI->second[i])) {
1571 BI->second.push_back(MI);
1573 SmallVector<MachineInstr*, 4> MIs;
1575 Base2StsMap[Base] = MIs;
1576 StBases.push_back(Base);
1581 // Found a duplicate (a base+offset combination that's seen earlier).
1588 // Re-schedule loads.
1589 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
1590 unsigned Base = LdBases[i];
1591 SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
1593 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
1596 // Re-schedule stores.
1597 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
1598 unsigned Base = StBases[i];
1599 SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
1601 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
1605 Base2LdsMap.clear();
1606 Base2StsMap.clear();
1616 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
1617 /// optimization pass.
1618 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
1620 return new ARMPreAllocLoadStoreOpt();
1621 return new ARMLoadStoreOpt();