1 //=- AArch64LoadStoreOptimizer.cpp - AArch64 load/store opt. pass -*- C++ -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
15 #include "AArch64InstrInfo.h"
16 #include "AArch64Subtarget.h"
17 #include "MCTargetDesc/AArch64AddressingModes.h"
18 #include "llvm/ADT/BitVector.h"
19 #include "llvm/ADT/Statistic.h"
20 #include "llvm/CodeGen/MachineBasicBlock.h"
21 #include "llvm/CodeGen/MachineFunctionPass.h"
22 #include "llvm/CodeGen/MachineInstr.h"
23 #include "llvm/CodeGen/MachineInstrBuilder.h"
24 #include "llvm/Support/CommandLine.h"
25 #include "llvm/Support/Debug.h"
26 #include "llvm/Support/ErrorHandling.h"
27 #include "llvm/Support/raw_ostream.h"
28 #include "llvm/Target/TargetInstrInfo.h"
29 #include "llvm/Target/TargetMachine.h"
30 #include "llvm/Target/TargetRegisterInfo.h"
33 #define DEBUG_TYPE "aarch64-ldst-opt"
35 /// AArch64AllocLoadStoreOpt - Post-register allocation pass to combine
36 /// load / store instructions to form ldp / stp instructions.
38 STATISTIC(NumPairCreated, "Number of load/store pair instructions generated");
39 STATISTIC(NumPostFolded, "Number of post-index updates folded");
40 STATISTIC(NumPreFolded, "Number of pre-index updates folded");
41 STATISTIC(NumUnscaledPairCreated,
42 "Number of load/store from unscaled generated");
44 static cl::opt<unsigned> ScanLimit("aarch64-load-store-scan-limit",
45 cl::init(20), cl::Hidden);
47 // Place holder while testing unscaled load/store combining
48 static cl::opt<bool> EnableAArch64UnscaledMemOp(
49 "aarch64-unscaled-mem-op", cl::Hidden,
50 cl::desc("Allow AArch64 unscaled load/store combining"), cl::init(true));
53 struct AArch64LoadStoreOpt : public MachineFunctionPass {
55 AArch64LoadStoreOpt() : MachineFunctionPass(ID) {}
57 const AArch64InstrInfo *TII;
58 const TargetRegisterInfo *TRI;
60 // Scan the instructions looking for a load/store that can be combined
61 // with the current instruction into a load/store pair.
62 // Return the matching instruction if one is found, else MBB->end().
63 // If a matching instruction is found, MergeForward is set to true if the
64 // merge is to remove the first instruction and replace the second with
65 // a pair-wise insn, and false if the reverse is true.
66 MachineBasicBlock::iterator findMatchingInsn(MachineBasicBlock::iterator I,
69 // Merge the two instructions indicated into a single pair-wise instruction.
70 // If MergeForward is true, erase the first instruction and fold its
71 // operation into the second. If false, the reverse. Return the instruction
72 // following the first instruction (which may change during processing).
73 MachineBasicBlock::iterator
74 mergePairedInsns(MachineBasicBlock::iterator I,
75 MachineBasicBlock::iterator Paired, bool MergeForward);
77 // Scan the instruction list to find a base register update that can
78 // be combined with the current instruction (a load or store) using
79 // pre or post indexed addressing with writeback. Scan forwards.
80 MachineBasicBlock::iterator
81 findMatchingUpdateInsnForward(MachineBasicBlock::iterator I, unsigned Limit,
84 // Scan the instruction list to find a base register update that can
85 // be combined with the current instruction (a load or store) using
86 // pre or post indexed addressing with writeback. Scan backwards.
87 MachineBasicBlock::iterator
88 findMatchingUpdateInsnBackward(MachineBasicBlock::iterator I, unsigned Limit);
90 // Merge a pre-index base register update into a ld/st instruction.
91 MachineBasicBlock::iterator
92 mergePreIdxUpdateInsn(MachineBasicBlock::iterator I,
93 MachineBasicBlock::iterator Update);
95 // Merge a post-index base register update into a ld/st instruction.
96 MachineBasicBlock::iterator
97 mergePostIdxUpdateInsn(MachineBasicBlock::iterator I,
98 MachineBasicBlock::iterator Update);
100 bool optimizeBlock(MachineBasicBlock &MBB);
102 bool runOnMachineFunction(MachineFunction &Fn) override;
104 const char *getPassName() const override {
105 return "AArch64 load / store optimization pass";
109 int getMemSize(MachineInstr *MemMI);
111 char AArch64LoadStoreOpt::ID = 0;
114 static bool isUnscaledLdst(unsigned Opc) {
118 case AArch64::STURSi:
120 case AArch64::STURDi:
122 case AArch64::STURQi:
124 case AArch64::STURWi:
126 case AArch64::STURXi:
128 case AArch64::LDURSi:
130 case AArch64::LDURDi:
132 case AArch64::LDURQi:
134 case AArch64::LDURWi:
136 case AArch64::LDURXi:
138 case AArch64::LDURSWi:
143 // Size in bytes of the data moved by an unscaled load or store
144 int AArch64LoadStoreOpt::getMemSize(MachineInstr *MemMI) {
145 switch (MemMI->getOpcode()) {
147 llvm_unreachable("Opcode has unknown size!");
148 case AArch64::STRSui:
149 case AArch64::STURSi:
151 case AArch64::STRDui:
152 case AArch64::STURDi:
154 case AArch64::STRQui:
155 case AArch64::STURQi:
157 case AArch64::STRWui:
158 case AArch64::STURWi:
160 case AArch64::STRXui:
161 case AArch64::STURXi:
163 case AArch64::LDRSui:
164 case AArch64::LDURSi:
166 case AArch64::LDRDui:
167 case AArch64::LDURDi:
169 case AArch64::LDRQui:
170 case AArch64::LDURQi:
172 case AArch64::LDRWui:
173 case AArch64::LDURWi:
175 case AArch64::LDRXui:
176 case AArch64::LDURXi:
178 case AArch64::LDRSWui:
179 case AArch64::LDURSWi:
184 static unsigned getMatchingPairOpcode(unsigned Opc) {
187 llvm_unreachable("Opcode has no pairwise equivalent!");
188 case AArch64::STRSui:
189 case AArch64::STURSi:
190 return AArch64::STPSi;
191 case AArch64::STRDui:
192 case AArch64::STURDi:
193 return AArch64::STPDi;
194 case AArch64::STRQui:
195 case AArch64::STURQi:
196 return AArch64::STPQi;
197 case AArch64::STRWui:
198 case AArch64::STURWi:
199 return AArch64::STPWi;
200 case AArch64::STRXui:
201 case AArch64::STURXi:
202 return AArch64::STPXi;
203 case AArch64::LDRSui:
204 case AArch64::LDURSi:
205 return AArch64::LDPSi;
206 case AArch64::LDRDui:
207 case AArch64::LDURDi:
208 return AArch64::LDPDi;
209 case AArch64::LDRQui:
210 case AArch64::LDURQi:
211 return AArch64::LDPQi;
212 case AArch64::LDRWui:
213 case AArch64::LDURWi:
214 return AArch64::LDPWi;
215 case AArch64::LDRXui:
216 case AArch64::LDURXi:
217 return AArch64::LDPXi;
218 case AArch64::LDRSWui:
219 case AArch64::LDURSWi:
220 return AArch64::LDPSWi;
224 static unsigned getPreIndexedOpcode(unsigned Opc) {
227 llvm_unreachable("Opcode has no pre-indexed equivalent!");
228 case AArch64::STRSui:
229 return AArch64::STRSpre;
230 case AArch64::STRDui:
231 return AArch64::STRDpre;
232 case AArch64::STRQui:
233 return AArch64::STRQpre;
234 case AArch64::STRWui:
235 return AArch64::STRWpre;
236 case AArch64::STRXui:
237 return AArch64::STRXpre;
238 case AArch64::LDRSui:
239 return AArch64::LDRSpre;
240 case AArch64::LDRDui:
241 return AArch64::LDRDpre;
242 case AArch64::LDRQui:
243 return AArch64::LDRQpre;
244 case AArch64::LDRWui:
245 return AArch64::LDRWpre;
246 case AArch64::LDRXui:
247 return AArch64::LDRXpre;
248 case AArch64::LDRSWui:
249 return AArch64::LDRSWpre;
253 static unsigned getPostIndexedOpcode(unsigned Opc) {
256 llvm_unreachable("Opcode has no post-indexed wise equivalent!");
257 case AArch64::STRSui:
258 return AArch64::STRSpost;
259 case AArch64::STRDui:
260 return AArch64::STRDpost;
261 case AArch64::STRQui:
262 return AArch64::STRQpost;
263 case AArch64::STRWui:
264 return AArch64::STRWpost;
265 case AArch64::STRXui:
266 return AArch64::STRXpost;
267 case AArch64::LDRSui:
268 return AArch64::LDRSpost;
269 case AArch64::LDRDui:
270 return AArch64::LDRDpost;
271 case AArch64::LDRQui:
272 return AArch64::LDRQpost;
273 case AArch64::LDRWui:
274 return AArch64::LDRWpost;
275 case AArch64::LDRXui:
276 return AArch64::LDRXpost;
277 case AArch64::LDRSWui:
278 return AArch64::LDRSWpost;
282 MachineBasicBlock::iterator
283 AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
284 MachineBasicBlock::iterator Paired,
286 MachineBasicBlock::iterator NextI = I;
288 // If NextI is the second of the two instructions to be merged, we need
289 // to skip one further. Either way we merge will invalidate the iterator,
290 // and we don't need to scan the new instruction, as it's a pairwise
291 // instruction, which we're not considering for further action anyway.
295 bool IsUnscaled = isUnscaledLdst(I->getOpcode());
297 IsUnscaled && EnableAArch64UnscaledMemOp ? getMemSize(I) : 1;
299 unsigned NewOpc = getMatchingPairOpcode(I->getOpcode());
300 // Insert our new paired instruction after whichever of the paired
301 // instructions MergeForward indicates.
302 MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I;
303 // Also based on MergeForward is from where we copy the base register operand
304 // so we get the flags compatible with the input code.
305 MachineOperand &BaseRegOp =
306 MergeForward ? Paired->getOperand(1) : I->getOperand(1);
308 // Which register is Rt and which is Rt2 depends on the offset order.
309 MachineInstr *RtMI, *Rt2MI;
310 if (I->getOperand(2).getImm() ==
311 Paired->getOperand(2).getImm() + OffsetStride) {
319 int OffsetImm = RtMI->getOperand(2).getImm();
320 if (IsUnscaled && EnableAArch64UnscaledMemOp)
321 OffsetImm /= OffsetStride;
323 // Construct the new instruction.
324 MachineInstrBuilder MIB = BuildMI(*I->getParent(), InsertionPoint,
325 I->getDebugLoc(), TII->get(NewOpc))
326 .addOperand(RtMI->getOperand(0))
327 .addOperand(Rt2MI->getOperand(0))
328 .addOperand(BaseRegOp)
332 // FIXME: Do we need/want to copy the mem operands from the source
333 // instructions? Probably. What uses them after this?
335 DEBUG(dbgs() << "Creating pair load/store. Replacing instructions:\n ");
336 DEBUG(I->print(dbgs()));
337 DEBUG(dbgs() << " ");
338 DEBUG(Paired->print(dbgs()));
339 DEBUG(dbgs() << " with instruction:\n ");
340 DEBUG(((MachineInstr *)MIB)->print(dbgs()));
341 DEBUG(dbgs() << "\n");
343 // Erase the old instructions.
344 I->eraseFromParent();
345 Paired->eraseFromParent();
350 /// trackRegDefsUses - Remember what registers the specified instruction uses
352 static void trackRegDefsUses(MachineInstr *MI, BitVector &ModifiedRegs,
354 const TargetRegisterInfo *TRI) {
355 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
356 MachineOperand &MO = MI->getOperand(i);
358 ModifiedRegs.setBitsNotInMask(MO.getRegMask());
362 unsigned Reg = MO.getReg();
364 for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
365 ModifiedRegs.set(*AI);
367 assert(MO.isUse() && "Reg operand not a def and not a use?!?");
368 for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
374 static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride) {
375 if (!IsUnscaled && (Offset > 63 || Offset < -64))
378 // Convert the byte-offset used by unscaled into an "element" offset used
379 // by the scaled pair load/store instructions.
380 int ElemOffset = Offset / OffsetStride;
381 if (ElemOffset > 63 || ElemOffset < -64)
387 // Do alignment, specialized to power of 2 and for signed ints,
388 // avoiding having to do a C-style cast from uint_64t to int when
389 // using RoundUpToAlignment from include/llvm/Support/MathExtras.h.
390 // FIXME: Move this function to include/MathExtras.h?
391 static int alignTo(int Num, int PowOf2) {
392 return (Num + PowOf2 - 1) & ~(PowOf2 - 1);
395 /// findMatchingInsn - Scan the instructions looking for a load/store that can
396 /// be combined with the current instruction into a load/store pair.
397 MachineBasicBlock::iterator
398 AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
399 bool &MergeForward, unsigned Limit) {
400 MachineBasicBlock::iterator E = I->getParent()->end();
401 MachineBasicBlock::iterator MBBI = I;
402 MachineInstr *FirstMI = I;
405 int Opc = FirstMI->getOpcode();
406 bool MayLoad = FirstMI->mayLoad();
407 bool IsUnscaled = isUnscaledLdst(Opc);
408 unsigned Reg = FirstMI->getOperand(0).getReg();
409 unsigned BaseReg = FirstMI->getOperand(1).getReg();
410 int Offset = FirstMI->getOperand(2).getImm();
412 // Early exit if the first instruction modifies the base register.
413 // e.g., ldr x0, [x0]
414 // Early exit if the offset if not possible to match. (6 bits of positive
415 // range, plus allow an extra one in case we find a later insn that matches
417 if (FirstMI->modifiesRegister(BaseReg, TRI))
420 IsUnscaled && EnableAArch64UnscaledMemOp ? getMemSize(FirstMI) : 1;
421 if (!inBoundsForPair(IsUnscaled, Offset, OffsetStride))
424 // Track which registers have been modified and used between the first insn
425 // (inclusive) and the second insn.
426 BitVector ModifiedRegs, UsedRegs;
427 ModifiedRegs.resize(TRI->getNumRegs());
428 UsedRegs.resize(TRI->getNumRegs());
429 for (unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) {
430 MachineInstr *MI = MBBI;
431 // Skip DBG_VALUE instructions. Otherwise debug info can affect the
432 // optimization by changing how far we scan.
433 if (MI->isDebugValue())
436 // Now that we know this is a real instruction, count it.
439 if (Opc == MI->getOpcode() && MI->getOperand(2).isImm()) {
440 // If we've found another instruction with the same opcode, check to see
441 // if the base and offset are compatible with our starting instruction.
442 // These instructions all have scaled immediate operands, so we just
443 // check for +1/-1. Make sure to check the new instruction offset is
444 // actually an immediate and not a symbolic reference destined for
447 // Pairwise instructions have a 7-bit signed offset field. Single insns
448 // have a 12-bit unsigned offset field. To be a valid combine, the
449 // final offset must be in range.
450 unsigned MIBaseReg = MI->getOperand(1).getReg();
451 int MIOffset = MI->getOperand(2).getImm();
452 if (BaseReg == MIBaseReg && ((Offset == MIOffset + OffsetStride) ||
453 (Offset + OffsetStride == MIOffset))) {
454 int MinOffset = Offset < MIOffset ? Offset : MIOffset;
455 // If this is a volatile load/store that otherwise matched, stop looking
456 // as something is going on that we don't have enough information to
457 // safely transform. Similarly, stop if we see a hint to avoid pairs.
458 if (MI->hasOrderedMemoryRef() || TII->isLdStPairSuppressed(MI))
460 // If the resultant immediate offset of merging these instructions
461 // is out of range for a pairwise instruction, bail and keep looking.
462 bool MIIsUnscaled = isUnscaledLdst(MI->getOpcode());
463 if (!inBoundsForPair(MIIsUnscaled, MinOffset, OffsetStride)) {
464 trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
467 // If the alignment requirements of the paired (scaled) instruction
468 // can't express the offset of the unscaled input, bail and keep
470 if (IsUnscaled && EnableAArch64UnscaledMemOp &&
471 (alignTo(MinOffset, OffsetStride) != MinOffset)) {
472 trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
475 // If the destination register of the loads is the same register, bail
476 // and keep looking. A load-pair instruction with both destination
477 // registers the same is UNPREDICTABLE and will result in an exception.
478 if (MayLoad && Reg == MI->getOperand(0).getReg()) {
479 trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
483 // If the Rt of the second instruction was not modified or used between
484 // the two instructions, we can combine the second into the first.
485 if (!ModifiedRegs[MI->getOperand(0).getReg()] &&
486 !UsedRegs[MI->getOperand(0).getReg()]) {
487 MergeForward = false;
491 // Likewise, if the Rt of the first instruction is not modified or used
492 // between the two instructions, we can combine the first into the
494 if (!ModifiedRegs[FirstMI->getOperand(0).getReg()] &&
495 !UsedRegs[FirstMI->getOperand(0).getReg()]) {
499 // Unable to combine these instructions due to interference in between.
504 // If the instruction wasn't a matching load or store, but does (or can)
505 // modify memory, stop searching, as we don't have alias analysis or
506 // anything like that to tell us whether the access is tromping on the
507 // locations we care about. The big one we want to catch is calls.
509 // FIXME: Theoretically, we can do better than that for SP and FP based
510 // references since we can effectively know where those are touching. It's
511 // unclear if it's worth the extra code, though. Most paired instructions
512 // will be sequential, perhaps with a few intervening non-memory related
514 if (MI->mayStore() || MI->isCall())
516 // Likewise, if we're matching a store instruction, we don't want to
517 // move across a load, as it may be reading the same location.
518 if (FirstMI->mayStore() && MI->mayLoad())
521 // Update modified / uses register lists.
522 trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
524 // Otherwise, if the base register is modified, we have no match, so
526 if (ModifiedRegs[BaseReg])
532 MachineBasicBlock::iterator
533 AArch64LoadStoreOpt::mergePreIdxUpdateInsn(MachineBasicBlock::iterator I,
534 MachineBasicBlock::iterator Update) {
535 assert((Update->getOpcode() == AArch64::ADDXri ||
536 Update->getOpcode() == AArch64::SUBXri) &&
537 "Unexpected base register update instruction to merge!");
538 MachineBasicBlock::iterator NextI = I;
539 // Return the instruction following the merged instruction, which is
540 // the instruction following our unmerged load. Unless that's the add/sub
541 // instruction we're merging, in which case it's the one after that.
542 if (++NextI == Update)
545 int Value = Update->getOperand(2).getImm();
546 assert(AArch64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 &&
547 "Can't merge 1 << 12 offset into pre-indexed load / store");
548 if (Update->getOpcode() == AArch64::SUBXri)
551 unsigned NewOpc = getPreIndexedOpcode(I->getOpcode());
552 MachineInstrBuilder MIB =
553 BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
554 .addOperand(Update->getOperand(0))
555 .addOperand(I->getOperand(0))
556 .addOperand(I->getOperand(1))
560 DEBUG(dbgs() << "Creating pre-indexed load/store.");
561 DEBUG(dbgs() << " Replacing instructions:\n ");
562 DEBUG(I->print(dbgs()));
563 DEBUG(dbgs() << " ");
564 DEBUG(Update->print(dbgs()));
565 DEBUG(dbgs() << " with instruction:\n ");
566 DEBUG(((MachineInstr *)MIB)->print(dbgs()));
567 DEBUG(dbgs() << "\n");
569 // Erase the old instructions for the block.
570 I->eraseFromParent();
571 Update->eraseFromParent();
576 MachineBasicBlock::iterator AArch64LoadStoreOpt::mergePostIdxUpdateInsn(
577 MachineBasicBlock::iterator I, MachineBasicBlock::iterator Update) {
578 assert((Update->getOpcode() == AArch64::ADDXri ||
579 Update->getOpcode() == AArch64::SUBXri) &&
580 "Unexpected base register update instruction to merge!");
581 MachineBasicBlock::iterator NextI = I;
582 // Return the instruction following the merged instruction, which is
583 // the instruction following our unmerged load. Unless that's the add/sub
584 // instruction we're merging, in which case it's the one after that.
585 if (++NextI == Update)
588 int Value = Update->getOperand(2).getImm();
589 assert(AArch64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 &&
590 "Can't merge 1 << 12 offset into post-indexed load / store");
591 if (Update->getOpcode() == AArch64::SUBXri)
594 unsigned NewOpc = getPostIndexedOpcode(I->getOpcode());
595 MachineInstrBuilder MIB =
596 BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
597 .addOperand(Update->getOperand(0))
598 .addOperand(I->getOperand(0))
599 .addOperand(I->getOperand(1))
603 DEBUG(dbgs() << "Creating post-indexed load/store.");
604 DEBUG(dbgs() << " Replacing instructions:\n ");
605 DEBUG(I->print(dbgs()));
606 DEBUG(dbgs() << " ");
607 DEBUG(Update->print(dbgs()));
608 DEBUG(dbgs() << " with instruction:\n ");
609 DEBUG(((MachineInstr *)MIB)->print(dbgs()));
610 DEBUG(dbgs() << "\n");
612 // Erase the old instructions for the block.
613 I->eraseFromParent();
614 Update->eraseFromParent();
619 static bool isMatchingUpdateInsn(MachineInstr *MI, unsigned BaseReg,
621 switch (MI->getOpcode()) {
624 case AArch64::SUBXri:
625 // Negate the offset for a SUB instruction.
628 case AArch64::ADDXri:
629 // Make sure it's a vanilla immediate operand, not a relocation or
630 // anything else we can't handle.
631 if (!MI->getOperand(2).isImm())
633 // Watch out for 1 << 12 shifted value.
634 if (AArch64_AM::getShiftValue(MI->getOperand(3).getImm()))
636 // If the instruction has the base register as source and dest and the
637 // immediate will fit in a signed 9-bit integer, then we have a match.
638 if (MI->getOperand(0).getReg() == BaseReg &&
639 MI->getOperand(1).getReg() == BaseReg &&
640 MI->getOperand(2).getImm() <= 255 &&
641 MI->getOperand(2).getImm() >= -256) {
642 // If we have a non-zero Offset, we check that it matches the amount
643 // we're adding to the register.
644 if (!Offset || Offset == MI->getOperand(2).getImm())
652 MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(
653 MachineBasicBlock::iterator I, unsigned Limit, int Value) {
654 MachineBasicBlock::iterator E = I->getParent()->end();
655 MachineInstr *MemMI = I;
656 MachineBasicBlock::iterator MBBI = I;
657 const MachineFunction &MF = *MemMI->getParent()->getParent();
659 unsigned DestReg = MemMI->getOperand(0).getReg();
660 unsigned BaseReg = MemMI->getOperand(1).getReg();
661 int Offset = MemMI->getOperand(2).getImm() *
662 TII->getRegClass(MemMI->getDesc(), 0, TRI, MF)->getSize();
664 // If the base register overlaps the destination register, we can't
666 if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
669 // Scan forward looking for post-index opportunities.
670 // Updating instructions can't be formed if the memory insn already
671 // has an offset other than the value we're looking for.
675 // Track which registers have been modified and used between the first insn
676 // (inclusive) and the second insn.
677 BitVector ModifiedRegs, UsedRegs;
678 ModifiedRegs.resize(TRI->getNumRegs());
679 UsedRegs.resize(TRI->getNumRegs());
681 for (unsigned Count = 0; MBBI != E; ++MBBI) {
682 MachineInstr *MI = MBBI;
683 // Skip DBG_VALUE instructions. Otherwise debug info can affect the
684 // optimization by changing how far we scan.
685 if (MI->isDebugValue())
688 // Now that we know this is a real instruction, count it.
691 // If we found a match, return it.
692 if (isMatchingUpdateInsn(MI, BaseReg, Value))
695 // Update the status of what the instruction clobbered and used.
696 trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
698 // Otherwise, if the base register is used or modified, we have no match, so
700 if (ModifiedRegs[BaseReg] || UsedRegs[BaseReg])
706 MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
707 MachineBasicBlock::iterator I, unsigned Limit) {
708 MachineBasicBlock::iterator B = I->getParent()->begin();
709 MachineBasicBlock::iterator E = I->getParent()->end();
710 MachineInstr *MemMI = I;
711 MachineBasicBlock::iterator MBBI = I;
712 const MachineFunction &MF = *MemMI->getParent()->getParent();
714 unsigned DestReg = MemMI->getOperand(0).getReg();
715 unsigned BaseReg = MemMI->getOperand(1).getReg();
716 int Offset = MemMI->getOperand(2).getImm();
717 unsigned RegSize = TII->getRegClass(MemMI->getDesc(), 0, TRI, MF)->getSize();
719 // If the load/store is the first instruction in the block, there's obviously
720 // not any matching update. Ditto if the memory offset isn't zero.
721 if (MBBI == B || Offset != 0)
723 // If the base register overlaps the destination register, we can't
725 if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
728 // Track which registers have been modified and used between the first insn
729 // (inclusive) and the second insn.
730 BitVector ModifiedRegs, UsedRegs;
731 ModifiedRegs.resize(TRI->getNumRegs());
732 UsedRegs.resize(TRI->getNumRegs());
734 for (unsigned Count = 0; MBBI != B; --MBBI) {
735 MachineInstr *MI = MBBI;
736 // Skip DBG_VALUE instructions. Otherwise debug info can affect the
737 // optimization by changing how far we scan.
738 if (MI->isDebugValue())
741 // Now that we know this is a real instruction, count it.
744 // If we found a match, return it.
745 if (isMatchingUpdateInsn(MI, BaseReg, RegSize))
748 // Update the status of what the instruction clobbered and used.
749 trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
751 // Otherwise, if the base register is used or modified, we have no match, so
753 if (ModifiedRegs[BaseReg] || UsedRegs[BaseReg])
759 bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) {
760 bool Modified = false;
761 // Two tranformations to do here:
762 // 1) Find loads and stores that can be merged into a single load or store
769 // 2) Find base register updates that can be merged into the load or store
770 // as a base-reg writeback.
777 for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
779 MachineInstr *MI = MBBI;
780 switch (MI->getOpcode()) {
782 // Just move on to the next instruction.
785 case AArch64::STRSui:
786 case AArch64::STRDui:
787 case AArch64::STRQui:
788 case AArch64::STRXui:
789 case AArch64::STRWui:
790 case AArch64::LDRSui:
791 case AArch64::LDRDui:
792 case AArch64::LDRQui:
793 case AArch64::LDRXui:
794 case AArch64::LDRWui:
795 case AArch64::LDRSWui:
796 // do the unscaled versions as well
797 case AArch64::STURSi:
798 case AArch64::STURDi:
799 case AArch64::STURQi:
800 case AArch64::STURWi:
801 case AArch64::STURXi:
802 case AArch64::LDURSi:
803 case AArch64::LDURDi:
804 case AArch64::LDURQi:
805 case AArch64::LDURWi:
806 case AArch64::LDURXi:
807 case AArch64::LDURSWi: {
808 // If this is a volatile load/store, don't mess with it.
809 if (MI->hasOrderedMemoryRef()) {
813 // Make sure this is a reg+imm (as opposed to an address reloc).
814 if (!MI->getOperand(2).isImm()) {
818 // Check if this load/store has a hint to avoid pair formation.
819 // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
820 if (TII->isLdStPairSuppressed(MI)) {
824 // Look ahead up to ScanLimit instructions for a pairable instruction.
825 bool MergeForward = false;
826 MachineBasicBlock::iterator Paired =
827 findMatchingInsn(MBBI, MergeForward, ScanLimit);
829 // Merge the loads into a pair. Keeping the iterator straight is a
830 // pain, so we let the merge routine tell us what the next instruction
831 // is after it's done mucking about.
832 MBBI = mergePairedInsns(MBBI, Paired, MergeForward);
836 if (isUnscaledLdst(MI->getOpcode()))
837 ++NumUnscaledPairCreated;
843 // FIXME: Do the other instructions.
847 for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
849 MachineInstr *MI = MBBI;
850 // Do update merging. It's simpler to keep this separate from the above
851 // switch, though not strictly necessary.
852 int Opc = MI->getOpcode();
855 // Just move on to the next instruction.
858 case AArch64::STRSui:
859 case AArch64::STRDui:
860 case AArch64::STRQui:
861 case AArch64::STRXui:
862 case AArch64::STRWui:
863 case AArch64::LDRSui:
864 case AArch64::LDRDui:
865 case AArch64::LDRQui:
866 case AArch64::LDRXui:
867 case AArch64::LDRWui:
868 // do the unscaled versions as well
869 case AArch64::STURSi:
870 case AArch64::STURDi:
871 case AArch64::STURQi:
872 case AArch64::STURWi:
873 case AArch64::STURXi:
874 case AArch64::LDURSi:
875 case AArch64::LDURDi:
876 case AArch64::LDURQi:
877 case AArch64::LDURWi:
878 case AArch64::LDURXi: {
879 // Make sure this is a reg+imm (as opposed to an address reloc).
880 if (!MI->getOperand(2).isImm()) {
884 // Look ahead up to ScanLimit instructions for a mergable instruction.
885 MachineBasicBlock::iterator Update =
886 findMatchingUpdateInsnForward(MBBI, ScanLimit, 0);
888 // Merge the update into the ld/st.
889 MBBI = mergePostIdxUpdateInsn(MBBI, Update);
894 // Don't know how to handle pre/post-index versions, so move to the next
896 if (isUnscaledLdst(Opc)) {
901 // Look back to try to find a pre-index instruction. For example,
906 Update = findMatchingUpdateInsnBackward(MBBI, ScanLimit);
908 // Merge the update into the ld/st.
909 MBBI = mergePreIdxUpdateInsn(MBBI, Update);
915 // Look forward to try to find a post-index instruction. For example,
919 // ldr x1, [x0, #64]!
921 // The immediate in the load/store is scaled by the size of the register
922 // being loaded. The immediate in the add we're looking for,
923 // however, is not, so adjust here.
924 int Value = MI->getOperand(2).getImm() *
925 TII->getRegClass(MI->getDesc(), 0, TRI, *(MBB.getParent()))
927 Update = findMatchingUpdateInsnForward(MBBI, ScanLimit, Value);
929 // Merge the update into the ld/st.
930 MBBI = mergePreIdxUpdateInsn(MBBI, Update);
936 // Nothing found. Just move to the next instruction.
940 // FIXME: Do the other instructions.
947 bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
948 const TargetMachine &TM = Fn.getTarget();
949 TII = static_cast<const AArch64InstrInfo *>(
950 TM.getSubtargetImpl()->getInstrInfo());
951 TRI = TM.getSubtargetImpl()->getRegisterInfo();
953 bool Modified = false;
955 Modified |= optimizeBlock(MBB);
960 // FIXME: Do we need/want a pre-alloc pass like ARM has to try to keep
961 // loads and stores near one another?
963 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
964 /// optimization pass.
965 FunctionPass *llvm::createAArch64LoadStoreOptimizationPass() {
966 return new AArch64LoadStoreOpt();