From d421582e907886a10155191bcd9f81c31317f546 Mon Sep 17 00:00:00 2001 From: Matthias Braun Date: Mon, 1 Jun 2015 23:27:08 +0000 Subject: [PATCH] ARM: Thumb2 LDRD/STRD supports independent input/output regs The existing code would unnecessarily break LDRD/STRD apart with non-adjacent registers, on thumb2 this is not necessary. Ideally on thumb2 we shouldn't match for ldrd/strd pre-regalloc anymore as there is not reason to set register hints anymore, changing that is something for a future patch however. Differential Revision: http://reviews.llvm.org/D9694 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@238795 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMLoadStoreOptimizer.cpp | 42 +++++++++++++----------- test/CodeGen/ARM/atomic-ops-v8.ll | 14 ++++---- test/CodeGen/ARM/ldrd.ll | 16 +++++++++ test/CodeGen/Thumb2/float-ops.ll | 6 ++-- 4 files changed, 48 insertions(+), 30 deletions(-) diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 10787b118b0..8570ecd617f 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -1470,8 +1470,7 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI) { MachineInstr *MI = &*MBBI; unsigned Opcode = MI->getOpcode(); - if (Opcode == ARM::LDRD || Opcode == ARM::STRD || - Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) { + if (Opcode == ARM::LDRD || Opcode == ARM::STRD) { const MachineOperand &BaseOp = MI->getOperand(2); unsigned BaseReg = BaseOp.getReg(); unsigned EvenReg = MI->getOperand(0).getReg(); @@ -1949,10 +1948,11 @@ static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0, bool ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, - DebugLoc &dl, - unsigned &NewOpc, unsigned &EvenReg, - unsigned &OddReg, unsigned &BaseReg, - int &Offset, unsigned &PredReg, + DebugLoc &dl, unsigned &NewOpc, + unsigned &FirstReg, + unsigned &SecondReg, + unsigned &BaseReg, int &Offset, + unsigned &PredReg, ARMCC::CondCodes &Pred, bool &isT2) { // Make sure we're allowed to generate LDRD/STRD. @@ -2011,9 +2011,9 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, return false; Offset = ARM_AM::getAM3Opc(AddSub, OffImm); } - EvenReg = Op0->getOperand(0).getReg(); - OddReg = Op1->getOperand(0).getReg(); - if (EvenReg == OddReg) + FirstReg = Op0->getOperand(0).getReg(); + SecondReg = Op1->getOperand(0).getReg(); + if (FirstReg == SecondReg) return false; BaseReg = Op0->getOperand(1).getReg(); Pred = getInstrPredicate(Op0, PredReg); @@ -2109,7 +2109,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, // to try to allocate a pair of registers that can form register pairs. MachineInstr *Op0 = Ops.back(); MachineInstr *Op1 = Ops[Ops.size()-2]; - unsigned EvenReg = 0, OddReg = 0; + unsigned FirstReg = 0, SecondReg = 0; unsigned BaseReg = 0, PredReg = 0; ARMCC::CondCodes Pred = ARMCC::AL; bool isT2 = false; @@ -2117,21 +2117,21 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, int Offset = 0; DebugLoc dl; if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc, - EvenReg, OddReg, BaseReg, + FirstReg, SecondReg, BaseReg, Offset, PredReg, Pred, isT2)) { Ops.pop_back(); Ops.pop_back(); const MCInstrDesc &MCID = TII->get(NewOpc); const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF); - MRI->constrainRegClass(EvenReg, TRC); - MRI->constrainRegClass(OddReg, TRC); + MRI->constrainRegClass(FirstReg, TRC); + MRI->constrainRegClass(SecondReg, TRC); // Form the pair instruction. if (isLd) { MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) - .addReg(EvenReg, RegState::Define) - .addReg(OddReg, RegState::Define) + .addReg(FirstReg, RegState::Define) + .addReg(SecondReg, RegState::Define) .addReg(BaseReg); // FIXME: We're converting from LDRi12 to an insn that still // uses addrmode2, so we need an explicit offset reg. It should @@ -2144,8 +2144,8 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, ++NumLDRDFormed; } else { MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) - .addReg(EvenReg) - .addReg(OddReg) + .addReg(FirstReg) + .addReg(SecondReg) .addReg(BaseReg); // FIXME: We're converting from LDRi12 to an insn that still // uses addrmode2, so we need an explicit offset reg. It should @@ -2160,9 +2160,11 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, MBB->erase(Op0); MBB->erase(Op1); - // Add register allocation hints to form register pairs. - MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg); - MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg); + if (!isT2) { + // Add register allocation hints to form register pairs. + MRI->setRegAllocationHint(FirstReg, ARMRI::RegPairEven, SecondReg); + MRI->setRegAllocationHint(SecondReg, ARMRI::RegPairOdd, FirstReg); + } } else { for (unsigned i = 0; i != NumMove; ++i) { MachineInstr *Op = Ops.back(); diff --git a/test/CodeGen/ARM/atomic-ops-v8.ll b/test/CodeGen/ARM/atomic-ops-v8.ll index db5007b0758..86287c1178d 100644 --- a/test/CodeGen/ARM/atomic-ops-v8.ll +++ b/test/CodeGen/ARM/atomic-ops-v8.ll @@ -664,7 +664,7 @@ define void @test_atomic_load_min_i64(i64 %offset) nounwind { ; CHECK: movt r[[ADDR]], :upper16:var64 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK: ldaexd [[OLD1:r[0-9]+]], [[OLD2:r[0-9]+]], [r[[ADDR]]] +; CHECK: ldaexd [[OLD1:r[0-9]+|lr]], [[OLD2:r[0-9]+|lr]], [r[[ADDR]]] ; r0, r1 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-ARM: mov [[LOCARRY:r[0-9]+|lr]], #0 @@ -782,7 +782,7 @@ define void @test_atomic_load_max_i64(i64 %offset) nounwind { ; CHECK: movt r[[ADDR]], :upper16:var64 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK: ldrexd [[OLD1:r[0-9]+]], [[OLD2:r[0-9]+]], [r[[ADDR]]] +; CHECK: ldrexd [[OLD1:r[0-9]+]], [[OLD2:r[0-9]+|lr]], [r[[ADDR]]] ; r0, r1 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-ARM: mov [[LOCARRY:r[0-9]+|lr]], #0 @@ -900,7 +900,7 @@ define void @test_atomic_load_umin_i64(i64 %offset) nounwind { ; CHECK: movt r[[ADDR]], :upper16:var64 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK: ldaexd [[OLD1:r[0-9]+]], [[OLD2:r[0-9]+]], [r[[ADDR]]] +; CHECK: ldaexd [[OLD1:r[0-9]+|lr]], [[OLD2:r[0-9]+|lr]], [r[[ADDR]]] ; r0, r1 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-ARM: mov [[LOCARRY:r[0-9]+|lr]], #0 @@ -1018,7 +1018,7 @@ define void @test_atomic_load_umax_i64(i64 %offset) nounwind { ; CHECK: movt r[[ADDR]], :upper16:var64 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK: ldaexd [[OLD1:r[0-9]+]], [[OLD2:r[0-9]+]], [r[[ADDR]]] +; CHECK: ldaexd [[OLD1:r[0-9]+|lr]], [[OLD2:r[0-9]+|lr]], [r[[ADDR]]] ; r0, r1 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-ARM: mov [[LOCARRY:r[0-9]+|lr]], #0 @@ -1146,10 +1146,12 @@ define void @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind { ; function there. ; CHECK-LE-DAG: eor{{(\.w)?}} [[MISMATCH_LO:r[0-9]+|lr]], [[OLD1]], r0 ; CHECK-LE-DAG: eor{{(\.w)?}} [[MISMATCH_HI:r[0-9]+|lr]], [[OLD2]], r1 -; CHECK-LE: orrs{{(\.w)?}} {{r[0-9]+}}, [[MISMATCH_LO]], [[MISMATCH_HI]] +; CHECK-ARM-LE: orrs{{(\.w)?}} {{r[0-9]+}}, [[MISMATCH_LO]], [[MISMATCH_HI]] +; CHECK-THUMB-LE: orrs{{(\.w)?}} {{(r[0-9]+, )?}}[[MISMATCH_HI]], [[MISMATCH_LO]] ; CHECK-BE-DAG: eor{{(\.w)?}} [[MISMATCH_HI:r[0-9]+|lr]], [[OLD2]], r1 ; CHECK-BE-DAG: eor{{(\.w)?}} [[MISMATCH_LO:r[0-9]+|lr]], [[OLD1]], r0 -; CHECK-BE: orrs{{(\.w)?}} {{r[0-9]+}}, [[MISMATCH_HI]], [[MISMATCH_LO]] +; CHECK-ARM-BE: orrs{{(\.w)?}} {{r[0-9]+}}, [[MISMATCH_HI]], [[MISMATCH_LO]] +; CHECK-THUMB-BE: orrs{{(\.w)?}} {{(r[0-9]+, )?}}[[MISMATCH_LO]], [[MISMATCH_HI]] ; CHECK-NEXT: bne .LBB{{[0-9]+}}_3 ; CHECK-NEXT: BB#2: ; As above, r2, r3 is a reasonable guess. diff --git a/test/CodeGen/ARM/ldrd.ll b/test/CodeGen/ARM/ldrd.ll index 7ce846844e0..a8070ea68aa 100644 --- a/test/CodeGen/ARM/ldrd.ll +++ b/test/CodeGen/ARM/ldrd.ll @@ -92,6 +92,22 @@ entry: ret void } +declare void @extfunc(i32, i32, i32, i32) + +; CHECK-LABEL: Func2: +; A8: ldrd +; A8: blx +; A8: pop +define void @Func2(i32* %p) { +entry: + %addr0 = getelementptr i32, i32* %p, i32 0 + %addr1 = getelementptr i32, i32* %p, i32 1 + %v0 = load i32, i32* %addr0 + %v1 = load i32, i32* %addr1 + ; try to force %v0/%v1 into non-adjacent registers + call void @extfunc(i32 %v0, i32 0, i32 0, i32 %v1) + ret void +} declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind diff --git a/test/CodeGen/Thumb2/float-ops.ll b/test/CodeGen/Thumb2/float-ops.ll index 7ec08f86665..4c42908ce13 100644 --- a/test/CodeGen/Thumb2/float-ops.ll +++ b/test/CodeGen/Thumb2/float-ops.ll @@ -109,7 +109,7 @@ entry: define double @load_d(double* %a) { entry: ; CHECK-LABEL: load_d: -; NONE: ldm r0, {r0, r1} +; NONE: ldrd r0, r1, [r0] ; HARD: vldr d0, [r0] %0 = load double, double* %a, align 8 ret double %0 @@ -127,9 +127,7 @@ entry: define void @store_d(double* %a, double %b) { entry: ; CHECK-LABEL: store_d: -; NONE: mov r1, r3 -; NONE: str r2, [r0] -; NONE: str r1, [r0, #4] +; NONE: strd r2, r3, [r0] ; HARD: vstr d0, [r0] store double %b, double* %a, align 8 ret void -- 2.34.1