From 0ee7f2ef5cadacbeec590c7bffc0a4363074ea5d Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Tue, 29 Sep 2015 16:07:32 +0000 Subject: [PATCH] [AArch64] Scale offsets by the size of the memory operation. NFC. The immediate in the load/store should be scaled by the size of the memory operation, not the size of the register being loaded/stored. This change gets us one step closer to forming LDPSW instructions. This change also enables pre- and post-indexing for halfword and byte loads and stores. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@248804 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../AArch64/AArch64LoadStoreOptimizer.cpp | 38 ++++++++++--------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp index 7a7688bd5ac..d767c9ec805 100644 --- a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -164,8 +164,8 @@ static bool isUnscaledLdSt(MachineInstr *MI) { return isUnscaledLdSt(MI->getOpcode()); } -// Size in bytes of the data moved by an unscaled load or store. -static int getMemSize(MachineInstr *MI) { +// Scaling factor for unscaled load or store. +static int getMemScale(MachineInstr *MI) { switch (MI->getOpcode()) { default: llvm_unreachable("Opcode has unknown size!"); @@ -179,6 +179,10 @@ static int getMemSize(MachineInstr *MI) { case AArch64::STURSi: case AArch64::STRWui: case AArch64::STURWi: + case AArch64::LDPSi: + case AArch64::LDPWi: + case AArch64::STPSi: + case AArch64::STPWi: return 4; case AArch64::LDRDui: case AArch64::LDURDi: @@ -188,11 +192,17 @@ static int getMemSize(MachineInstr *MI) { case AArch64::STURDi: case AArch64::STRXui: case AArch64::STURXi: + case AArch64::LDPDi: + case AArch64::LDPXi: + case AArch64::STPDi: + case AArch64::STPXi: return 8; case AArch64::LDRQui: case AArch64::LDURQi: case AArch64::STRQui: case AArch64::STURQi: + case AArch64::LDPQi: + case AArch64::STPQi: return 16; } } @@ -425,7 +435,7 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, SExtIdx == -1 ? I->getOpcode() : getMatchingNonSExtOpcode(I->getOpcode()); bool IsUnscaled = isUnscaledLdSt(Opc); int OffsetStride = - IsUnscaled && EnableAArch64UnscaledMemOp ? getMemSize(I) : 1; + IsUnscaled && EnableAArch64UnscaledMemOp ? getMemScale(I) : 1; bool MergeForward = Flags.getMergeForward(); unsigned NewOpc = getMatchingPairOpcode(Opc); @@ -613,7 +623,7 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, // range, plus allow an extra one in case we find a later insn that matches // with Offset-1) int OffsetStride = - IsUnscaled && EnableAArch64UnscaledMemOp ? getMemSize(FirstMI) : 1; + IsUnscaled && EnableAArch64UnscaledMemOp ? getMemScale(FirstMI) : 1; if (!inBoundsForPair(IsUnscaled, Offset, OffsetStride)) return E; @@ -774,8 +784,7 @@ AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I, .addImm(Value); } else { // Paired instruction. - const MachineFunction &MF = *I->getParent()->getParent(); - int Scale = TII->getRegClass(I->getDesc(), 0, TRI, MF)->getSize(); + int Scale = getMemScale(I); MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc)) .addOperand(getLdStRegOp(Update)) .addOperand(getLdStRegOp(I, 0)) @@ -840,8 +849,7 @@ bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr *MemMI, // the scaling factor. The scaled offset must also fit into a signed 7-bit // integer. if (IsPairedInsn) { - const MachineFunction &MF = *MemMI->getParent()->getParent(); - int Scale = TII->getRegClass(MemMI->getDesc(), 0, TRI, MF)->getSize(); + int Scale = getMemScale(MemMI); if (UpdateOffset % Scale != 0) break; @@ -864,11 +872,9 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward( MachineBasicBlock::iterator E = I->getParent()->end(); MachineInstr *MemMI = I; MachineBasicBlock::iterator MBBI = I; - const MachineFunction &MF = *MemMI->getParent()->getParent(); unsigned BaseReg = getLdStBaseOp(MemMI).getReg(); - int Offset = getLdStOffsetOp(MemMI).getImm() * - TII->getRegClass(MemMI->getDesc(), 0, TRI, MF)->getSize(); + int Offset = getLdStOffsetOp(MemMI).getImm() * getMemScale(MemMI); // If the base register overlaps a destination register, we can't // merge the update. @@ -922,11 +928,10 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward( MachineBasicBlock::iterator E = I->getParent()->end(); MachineInstr *MemMI = I; MachineBasicBlock::iterator MBBI = I; - const MachineFunction &MF = *MemMI->getParent()->getParent(); unsigned BaseReg = getLdStBaseOp(MemMI).getReg(); int Offset = getLdStOffsetOp(MemMI).getImm(); - unsigned RegSize = TII->getRegClass(MemMI->getDesc(), 0, TRI, MF)->getSize(); + unsigned MemSize = getMemScale(MemMI); // If the load/store is the first instruction in the block, there's obviously // not any matching update. Ditto if the memory offset isn't zero. @@ -958,7 +963,7 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward( ++Count; // If we found a match, return it. - if (isMatchingUpdateInsn(I, MI, BaseReg, RegSize)) + if (isMatchingUpdateInsn(I, MI, BaseReg, MemSize)) return MBBI; // Update the status of what the instruction clobbered and used. @@ -1147,9 +1152,8 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) { // The immediate in the load/store is scaled by the size of the register // being loaded. The immediate in the add we're looking for, // however, is not, so adjust here. - int Value = MI->getOperand(isPairedLdSt(MI) ? 3 : 2).getImm() * - TII->getRegClass(MI->getDesc(), 0, TRI, *(MBB.getParent())) - ->getSize(); + int Value = + MI->getOperand(isPairedLdSt(MI) ? 3 : 2).getImm() * getMemScale(MI); // FIXME: The immediate in the load/store should be scaled by the size of // the memory operation, not the size of the register being loaded/stored. -- 2.34.1