From 815baebe1c8dc02accf128ae10dff9a1742d3244 Mon Sep 17 00:00:00 2001 From: Bob Wilson Date: Sat, 13 Mar 2010 01:08:20 +0000 Subject: [PATCH] Change ARM ld/st multiple instructions to have variant instructions for writebacks to the address register. This gets rid of the hack that the first register on the list was the magic writeback register operand. There was an implicit constraint that if that operand was not reg0 it had to match the base register operand. The post-RA scheduler's antidependency breaker did not understand that constraint and sometimes changed one without the other. This also fixes Radar 7495976 and should help the verifier work better for ARM code. There are now new ld/st instructions explicit writeback operands and explicit constraints that tie those registers together. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@98409 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMConstantIslandPass.cpp | 7 +- lib/Target/ARM/ARMInstrFormats.td | 24 +-- lib/Target/ARM/ARMInstrInfo.td | 47 +++-- lib/Target/ARM/ARMInstrThumb.td | 35 ++-- lib/Target/ARM/ARMInstrThumb2.td | 57 ++++-- lib/Target/ARM/ARMInstrVFP.td | 52 ++++-- lib/Target/ARM/ARMLoadStoreOptimizer.cpp | 183 +++++++++++--------- lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp | 7 +- lib/Target/ARM/Thumb1InstrInfo.cpp | 2 - lib/Target/ARM/Thumb1RegisterInfo.cpp | 5 +- lib/Target/ARM/Thumb2SizeReduction.cpp | 51 ++++-- 11 files changed, 291 insertions(+), 179 deletions(-) diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp index 8fa3c04b6c4..1c5bd42d63d 100644 --- a/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -1473,11 +1473,10 @@ bool ARMConstantIslands::UndoLRSpillRestore() { bool MadeChange = false; for (unsigned i = 0, e = PushPopMIs.size(); i != e; ++i) { MachineInstr *MI = PushPopMIs[i]; - // First two operands are predicates, the third is a zero since there - // is no writeback. + // First two operands are predicates. if (MI->getOpcode() == ARM::tPOP_RET && - MI->getOperand(3).getReg() == ARM::PC && - MI->getNumExplicitOperands() == 4) { + MI->getOperand(2).getReg() == ARM::PC && + MI->getNumExplicitOperands() == 3) { BuildMI(MI->getParent(), MI->getDebugLoc(), TII->get(ARM::tBX_RET)); MI->eraseFromParent(); MadeChange = true; diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index f997dd591bc..84eca85192c 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -853,17 +853,17 @@ class AI3stdpo pattern> + string asm, string cstr, list pattern> : XI { + asm, cstr, pattern> { let Inst{20} = 1; // L bit let Inst{22} = 0; // S bit let Inst{27-25} = 0b100; } class AXI4st pattern> + string asm, string cstr, list pattern> : XI { + asm, cstr, pattern> { let Inst{20} = 0; // L bit let Inst{22} = 0; // S bit let Inst{27-25} = 0b100; @@ -997,9 +997,9 @@ class T1JTI pattern> + string asm, string cstr, list pattern> : Thumb1I; + asm, cstr, pattern>; // Thumb1 instruction that can either be predicated or set CPSR. class Thumb1sI pattern> : Thumb2I; +// Two-address instructions +class T2XIt pattern> + : Thumb2XI; // T2Iidxldst - Thumb2 indexed load / store instructions. class T2Iidxldst opcod, bit load, bit pre, @@ -1311,9 +1315,9 @@ class ASI5 opcod1, bits<2> opcod2, dag oops, dag iops, // Load / store multiple class AXDI5 pattern> + string asm, string cstr, list pattern> : VFPXI { + VFPLdStMulFrm, itin, asm, cstr, pattern> { // TODO: Mark the instructions with the appropriate subtarget info. let Inst{27-25} = 0b110; let Inst{11-8} = 0b1011; @@ -1323,9 +1327,9 @@ class AXDI5 pattern> + string asm, string cstr, list pattern> : VFPXI { + VFPLdStMulFrm, itin, asm, cstr, pattern> { // TODO: Mark the instructions with the appropriate subtarget info. let Inst{27-25} = 0b110; let Inst{11-8} = 0b1010; diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index b8b733307cd..a82a5d1e575 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -378,7 +378,7 @@ def am3offset : Operand, def addrmode4 : Operand, ComplexPattern { let PrintMethod = "printAddrMode4Operand"; - let MIOperandInfo = (ops GPR, i32imm); + let MIOperandInfo = (ops GPR:$addr, i32imm); } // addrmode5 := reg +/- imm8*4 @@ -386,7 +386,7 @@ def addrmode4 : Operand, def addrmode5 : Operand, ComplexPattern { let PrintMethod = "printAddrMode5Operand"; - let MIOperandInfo = (ops GPR, i32imm); + let MIOperandInfo = (ops GPR:$base, i32imm); } // addrmode6 := reg with optional writeback @@ -906,10 +906,11 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { // FIXME: Should pc be an implicit operand like PICADD, etc? let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1, hasExtraDefRegAllocReq = 1 in - def LDM_RET : AXI4ld<(outs), - (ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops), - LdStMulFrm, IIC_Br, "ldm${addr:submode}${p}\t$addr, $wb", - []>; + def LDM_RET : AXI4ld<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p, + reglist:$dsts, variable_ops), + LdStMulFrm, IIC_Br, + "ldm${addr:submode}${p}\t$addr, $dsts", + "$addr.addr = $wb", []>; // On non-Darwin platforms R9 is callee-saved. let isCall = 1, @@ -1344,17 +1345,29 @@ def STRHT: AI3sthpo<(outs GPR:$base_wb), // Load / store multiple Instructions. // -let mayLoad = 1, hasExtraDefRegAllocReq = 1 in -def LDM : AXI4ld<(outs), - (ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops), - LdStMulFrm, IIC_iLoadm, "ldm${addr:submode}${p}\t$addr, $wb", - []>; - -let mayStore = 1, hasExtraSrcRegAllocReq = 1 in -def STM : AXI4st<(outs), - (ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops), - LdStMulFrm, IIC_iStorem, "stm${addr:submode}${p}\t$addr, $wb", - []>; +let mayLoad = 1, hasExtraDefRegAllocReq = 1 in { +def LDM : AXI4ld<(outs), (ins addrmode4:$addr, pred:$p, + reglist:$dsts, variable_ops), LdStMulFrm, IIC_iLoadm, + "ldm${addr:submode}${p}\t$addr, $dsts", "", []>; + +def LDM_UPD : AXI4ld<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p, + reglist:$dsts, variable_ops), + LdStMulFrm, IIC_iLoadm, + "ldm${addr:submode}${p}\t$addr, $dsts", + "$addr.addr = $wb", []>; +} // mayLoad, hasExtraDefRegAllocReq + +let mayStore = 1, hasExtraSrcRegAllocReq = 1 in { +def STM : AXI4st<(outs), (ins addrmode4:$addr, pred:$p, + reglist:$srcs, variable_ops), LdStMulFrm, IIC_iStorem, + "stm${addr:submode}${p}\t$addr, $srcs", "", []>; + +def STM_UPD : AXI4st<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p, + reglist:$srcs, variable_ops), + LdStMulFrm, IIC_iStorem, + "stm${addr:submode}${p}\t$addr, $srcs", + "$addr.addr = $wb", []>; +} // mayStore, hasExtraSrcRegAllocReq //===----------------------------------------------------------------------===// // Move Instructions. diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index 54910214c75..37c9fc5f734 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -290,8 +290,8 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { // FIXME: remove when we have a way to marking a MI with these properties. let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1, hasExtraDefRegAllocReq = 1 in -def tPOP_RET : T1I<(outs), (ins pred:$p, reglist:$wb, variable_ops), IIC_Br, - "pop${p}\t$wb", []>, +def tPOP_RET : T1I<(outs), (ins pred:$p, reglist:$dsts, variable_ops), IIC_Br, + "pop${p}\t$dsts", []>, T1Misc<{1,1,0,?,?,?,?}>; let isCall = 1, @@ -539,28 +539,37 @@ def tSpill : T1pIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr), IIC_iStorei, // // These requires base address to be written back or one of the loaded regs. -let mayLoad = 1, hasExtraDefRegAllocReq = 1 in +let mayLoad = 1, hasExtraDefRegAllocReq = 1 in { def tLDM : T1I<(outs), - (ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops), + (ins addrmode4:$addr, pred:$p, reglist:$dsts, variable_ops), IIC_iLoadm, - "ldm${addr:submode}${p}\t$addr, $wb", []>, + "ldm${addr:submode}${p}\t$addr, $dsts", []>, T1Encoding<{1,1,0,0,1,?}>; // A6.2 & A8.6.53 +def tLDM_UPD : T1It<(outs tGPR:$wb), + (ins addrmode4:$addr, pred:$p, reglist:$dsts, variable_ops), + IIC_iLoadm, + "ldm${addr:submode}${p}\t$addr, $dsts", + "$addr.addr = $wb", []>, + T1Encoding<{1,1,0,0,1,?}>; // A6.2 & A8.6.53 +} // mayLoad, hasExtraDefRegAllocReq + let mayStore = 1, hasExtraSrcRegAllocReq = 1 in -def tSTM : T1I<(outs), - (ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops), - IIC_iStorem, - "stm${addr:submode}${p}\t$addr, $wb", []>, +def tSTM_UPD : T1It<(outs tGPR:$wb), + (ins addrmode4:$addr, pred:$p, reglist:$srcs, variable_ops), + IIC_iStorem, + "stm${addr:submode}${p}\t$addr, $srcs", + "$addr.addr = $wb", []>, T1Encoding<{1,1,0,0,0,?}>; // A6.2 & A8.6.189 let mayLoad = 1, Uses = [SP], Defs = [SP], hasExtraDefRegAllocReq = 1 in -def tPOP : T1I<(outs), (ins pred:$p, reglist:$wb, variable_ops), IIC_Br, - "pop${p}\t$wb", []>, +def tPOP : T1I<(outs), (ins pred:$p, reglist:$dsts, variable_ops), IIC_Br, + "pop${p}\t$dsts", []>, T1Misc<{1,1,0,?,?,?,?}>; let mayStore = 1, Uses = [SP], Defs = [SP], hasExtraSrcRegAllocReq = 1 in -def tPUSH : T1I<(outs), (ins pred:$p, reglist:$wb, variable_ops), IIC_Br, - "push${p}\t$wb", []>, +def tPUSH : T1I<(outs), (ins pred:$p, reglist:$srcs, variable_ops), IIC_Br, + "push${p}\t$srcs", []>, T1Misc<{0,1,0,?,?,?,?}>; //===----------------------------------------------------------------------===// diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index f4a74cb59aa..ab9e926099c 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -1204,29 +1204,56 @@ defm t2PLI : T2Ipl<1, 0, "pli">; // Load / store multiple Instructions. // -let mayLoad = 1, hasExtraDefRegAllocReq = 1 in -def t2LDM : T2XI<(outs), - (ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops), - IIC_iLoadm, "ldm${addr:submode}${p}${addr:wide}\t$addr, $wb", []> { +let mayLoad = 1, hasExtraDefRegAllocReq = 1 in { +def t2LDM : T2XI<(outs), (ins addrmode4:$addr, pred:$p, + reglist:$dsts, variable_ops), IIC_iLoadm, + "ldm${addr:submode}${p}${addr:wide}\t$addr, $dsts", []> { + let Inst{31-27} = 0b11101; + let Inst{26-25} = 0b00; + let Inst{24-23} = {?, ?}; // IA: '01', DB: '10' + let Inst{22} = 0; + let Inst{21} = 0; // The W bit. + let Inst{20} = 1; // Load +} + +def t2LDM_UPD : T2XIt<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p, + reglist:$dsts, variable_ops), IIC_iLoadm, + "ldm${addr:submode}${p}${addr:wide}\t$addr, $dsts", + "$addr.addr = $wb", []> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b00; let Inst{24-23} = {?, ?}; // IA: '01', DB: '10' let Inst{22} = 0; - let Inst{21} = ?; // The W bit. + let Inst{21} = 1; // The W bit. let Inst{20} = 1; // Load } +} // mayLoad, hasExtraDefRegAllocReq + +let mayStore = 1, hasExtraSrcRegAllocReq = 1 in { +def t2STM : T2XI<(outs), (ins addrmode4:$addr, pred:$p, + reglist:$srcs, variable_ops), IIC_iStorem, + "stm${addr:submode}${p}${addr:wide}\t$addr, $srcs", []> { + let Inst{31-27} = 0b11101; + let Inst{26-25} = 0b00; + let Inst{24-23} = {?, ?}; // IA: '01', DB: '10' + let Inst{22} = 0; + let Inst{21} = 0; // The W bit. + let Inst{20} = 0; // Store +} -let mayStore = 1, hasExtraSrcRegAllocReq = 1 in -def t2STM : T2XI<(outs), - (ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops), - IIC_iStorem, "stm${addr:submode}${p}${addr:wide}\t$addr, $wb", []> { +def t2STM_UPD : T2XIt<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p, + reglist:$srcs, variable_ops), + IIC_iStorem, + "stm${addr:submode}${p}${addr:wide}\t$addr, $srcs", + "$addr.addr = $wb", []> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b00; let Inst{24-23} = {?, ?}; // IA: '01', DB: '10' let Inst{22} = 0; - let Inst{21} = ?; // The W bit. + let Inst{21} = 1; // The W bit. let Inst{20} = 0; // Store } +} // mayStore, hasExtraSrcRegAllocReq //===----------------------------------------------------------------------===// // Move Instructions. @@ -2374,15 +2401,15 @@ let Defs = // FIXME: Should pc be an implicit operand like PICADD, etc? let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1, hasExtraDefRegAllocReq = 1 in - def t2LDM_RET : T2XI<(outs), - (ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops), - IIC_Br, "ldm${addr:submode}${p}${addr:wide}\t$addr, $wb", - []> { + def t2LDM_RET : T2XIt<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p, + reglist:$dsts, variable_ops), IIC_Br, + "ldm${addr:submode}${p}${addr:wide}\t$addr, $dsts", + "$addr.addr = $wb", []> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b00; let Inst{24-23} = {?, ?}; // IA: '01', DB: '10' let Inst{22} = 0; - let Inst{21} = ?; // The W bit. + let Inst{21} = 1; // The W bit. let Inst{20} = 1; // Load } diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index cad24c4834d..1d35a33fe53 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -77,33 +77,61 @@ def VSTRS : ASI5<0b1101, 0b00, (outs), (ins SPR:$src, addrmode5:$addr), // let mayLoad = 1, hasExtraDefRegAllocReq = 1 in { -def VLDMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb, +def VLDMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$dsts, variable_ops), IIC_fpLoadm, - "vldm${addr:submode}${p}\t${addr:base}, $wb", - []> { + "vldm${addr:submode}${p}\t${addr:base}, $dsts", "", []> { let Inst{20} = 1; } -def VLDMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb, +def VLDMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$dsts, variable_ops), IIC_fpLoadm, - "vldm${addr:submode}${p}\t${addr:base}, $wb", - []> { + "vldm${addr:submode}${p}\t${addr:base}, $dsts", "", []> { + let Inst{20} = 1; +} + +def VLDMD_UPD : AXDI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p, + reglist:$dsts, variable_ops), + IIC_fpLoadm, + "vldm${addr:submode}${p}\t${addr:base}, $dsts", + "$addr.base = $wb", []> { + let Inst{20} = 1; +} + +def VLDMS_UPD : AXSI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p, + reglist:$dsts, variable_ops), + IIC_fpLoadm, + "vldm${addr:submode}${p}\t${addr:base}, $dsts", + "$addr.base = $wb", []> { let Inst{20} = 1; } } // mayLoad, hasExtraDefRegAllocReq let mayStore = 1, hasExtraSrcRegAllocReq = 1 in { -def VSTMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb, +def VSTMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$srcs, variable_ops), IIC_fpStorem, - "vstm${addr:submode}${p}\t${addr:base}, $wb", - []> { + "vstm${addr:submode}${p}\t${addr:base}, $srcs", "", []> { let Inst{20} = 0; } -def VSTMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb, +def VSTMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$srcs, variable_ops), IIC_fpStorem, - "vstm${addr:submode}${p}\t${addr:base}, $wb", - []> { + "vstm${addr:submode}${p}\t${addr:base}, $srcs", "", []> { + let Inst{20} = 0; +} + +def VSTMD_UPD : AXDI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p, + reglist:$srcs, variable_ops), + IIC_fpStorem, + "vstm${addr:submode}${p}\t${addr:base}, $srcs", + "$addr.base = $wb", []> { + let Inst{20} = 0; +} + +def VSTMS_UPD : AXSI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p, + reglist:$srcs, variable_ops), + IIC_fpStorem, + "vstm${addr:submode}${p}\t${addr:base}, $srcs", + "$addr.base = $wb", []> { let Inst{20} = 0; } } // mayStore, hasExtraSrcRegAllocReq diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 808999c3b35..77dd4667615 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -254,7 +254,6 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, .addReg(Base, getKillRegState(BaseKill)) .addImm(ARM_AM::getAM5Opc(Mode, false, isDPR ? NumRegs<<1 : NumRegs)) .addImm(Pred).addReg(PredReg); - MIB.addReg(0); // Add optional writeback (0 for now). for (unsigned i = 0; i != NumRegs; ++i) MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef) | getKillRegState(Regs[i].second)); @@ -443,7 +442,7 @@ static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { case ARM::STM: case ARM::t2LDM: case ARM::t2STM: - return (MI->getNumOperands() - 5) * 4; + return (MI->getNumOperands() - 4) * 4; case ARM::VLDMS: case ARM::VSTMS: case ARM::VLDMD: @@ -452,6 +451,21 @@ static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { } } +static unsigned getUpdatingLSMultipleOpcode(unsigned Opc) { + switch (Opc) { + case ARM::LDM: return ARM::LDM_UPD; + case ARM::STM: return ARM::STM_UPD; + case ARM::t2LDM: return ARM::t2LDM_UPD; + case ARM::t2STM: return ARM::t2STM_UPD; + case ARM::VLDMS: return ARM::VLDMS_UPD; + case ARM::VLDMD: return ARM::VLDMD_UPD; + case ARM::VSTMS: return ARM::VSTMS_UPD; + case ARM::VSTMD: return ARM::VSTMD_UPD; + default: llvm_unreachable("Unhandled opcode!"); + } + return 0; +} + /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible: /// @@ -470,117 +484,119 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I) { MachineInstr *MI = MBBI; unsigned Base = MI->getOperand(0).getReg(); + bool BaseKill = MI->getOperand(0).isKill(); unsigned Bytes = getLSMultipleTransferSize(MI); unsigned PredReg = 0; ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); int Opcode = MI->getOpcode(); + DebugLoc dl = MI->getDebugLoc(); bool isAM4 = (Opcode == ARM::LDM || Opcode == ARM::t2LDM || Opcode == ARM::STM || Opcode == ARM::t2STM); - if (isAM4) { - if (ARM_AM::getAM4WBFlag(MI->getOperand(1).getImm())) - return false; + bool DoMerge = false; + ARM_AM::AMSubMode Mode = ARM_AM::ia; + unsigned Offset = 0; - // Can't use the updating AM4 sub-mode if the base register is also a dest + if (isAM4) { + // Can't use an updating ld/st if the base register is also a dest // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) { if (MI->getOperand(i).getReg() == Base) return false; } + assert(!ARM_AM::getAM4WBFlag(MI->getOperand(1).getImm())); + Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm()); + } else { + // VLDM{D|S}, VSTM{D|S} addressing mode 5 ops. + assert(!ARM_AM::getAM5WBFlag(MI->getOperand(1).getImm())); + Mode = ARM_AM::getAM5SubMode(MI->getOperand(1).getImm()); + Offset = ARM_AM::getAM5Offset(MI->getOperand(1).getImm()); + } - ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm()); - if (MBBI != MBB.begin()) { - MachineBasicBlock::iterator PrevMBBI = prior(MBBI); + // Try merging with the previous instruction. + if (MBBI != MBB.begin()) { + MachineBasicBlock::iterator PrevMBBI = prior(MBBI); + if (isAM4) { if (Mode == ARM_AM::ia && isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { - MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::db, true)); - MI->getOperand(4).setReg(Base); - MI->getOperand(4).setIsDef(); - MBB.erase(PrevMBBI); - return true; - } else if (Mode == ARM_AM::ib && + DoMerge = true; + Mode = ARM_AM::db; + } else if (isAM4 && Mode == ARM_AM::ib && isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { - MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::da, true)); - MI->getOperand(4).setReg(Base); // WB to base - MI->getOperand(4).setIsDef(); - MBB.erase(PrevMBBI); - return true; + DoMerge = true; + Mode = ARM_AM::da; + } + } else { + if (Mode == ARM_AM::ia && + isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { + Mode = ARM_AM::db; + DoMerge = true; } } + if (DoMerge) + MBB.erase(PrevMBBI); + } - if (MBBI != MBB.end()) { - MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); + // Try merging with the next instruction. + if (!DoMerge && MBBI != MBB.end()) { + MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); + if (isAM4) { if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) && isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { - MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true)); - MI->getOperand(4).setReg(Base); // WB to base - MI->getOperand(4).setIsDef(); - if (NextMBBI == I) { - Advance = true; - ++I; - } - MBB.erase(NextMBBI); - return true; + DoMerge = true; } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) && isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { - MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true)); - MI->getOperand(4).setReg(Base); // WB to base - MI->getOperand(4).setIsDef(); - if (NextMBBI == I) { - Advance = true; - ++I; - } - MBB.erase(NextMBBI); - return true; + DoMerge = true; } - } - } else { - // VLDM{D|S}, VSTM{D|S} addressing mode 5 ops. - if (ARM_AM::getAM5WBFlag(MI->getOperand(1).getImm())) - return false; - - ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MI->getOperand(1).getImm()); - unsigned Offset = ARM_AM::getAM5Offset(MI->getOperand(1).getImm()); - if (MBBI != MBB.begin()) { - MachineBasicBlock::iterator PrevMBBI = prior(MBBI); + } else { if (Mode == ARM_AM::ia && - isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { - MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::db, true, Offset)); - MI->getOperand(4).setReg(Base); // WB to base - MI->getOperand(4).setIsDef(); - MBB.erase(PrevMBBI); - return true; + isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { + DoMerge = true; } } - - if (MBBI != MBB.end()) { - MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); - if (Mode == ARM_AM::ia && - isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { - MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::ia, true, Offset)); - MI->getOperand(4).setReg(Base); // WB to base - MI->getOperand(4).setIsDef(); - if (NextMBBI == I) { - Advance = true; - ++I; - } - MBB.erase(NextMBBI); + if (DoMerge) { + if (NextMBBI == I) { + Advance = true; + ++I; } - return true; + MBB.erase(NextMBBI); } } - return false; + if (!DoMerge) + return false; + + unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode); + MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) + .addReg(Base, getDefRegState(true)) // WB base register + .addReg(Base, getKillRegState(BaseKill)); + if (isAM4) { + // [t2]LDM_UPD, [t2]STM_UPD + MIB.addImm(ARM_AM::getAM4ModeImm(Mode, true)) + .addImm(Pred).addReg(PredReg); + } else { + // VLDM[SD}_UPD, VSTM[SD]_UPD + MIB.addImm(ARM_AM::getAM5Opc(Mode, true, Offset)) + .addImm(Pred).addReg(PredReg); + } + // Transfer the rest of operands. + for (unsigned OpNum = 4, e = MI->getNumOperands(); OpNum != e; ++OpNum) + MIB.addOperand(MI->getOperand(OpNum)); + // Transfer memoperands. + (*MIB).setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + + MBB.erase(MBBI); + return true; } static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) { switch (Opc) { case ARM::LDR: return ARM::LDR_PRE; case ARM::STR: return ARM::STR_PRE; - case ARM::VLDRS: return ARM::VLDMS; - case ARM::VLDRD: return ARM::VLDMD; - case ARM::VSTRS: return ARM::VSTMS; - case ARM::VSTRD: return ARM::VSTMD; + case ARM::VLDRS: return ARM::VLDMS_UPD; + case ARM::VLDRD: return ARM::VLDMD_UPD; + case ARM::VSTRS: return ARM::VSTMS_UPD; + case ARM::VSTRD: return ARM::VSTMD_UPD; case ARM::t2LDRi8: case ARM::t2LDRi12: return ARM::t2LDR_PRE; @@ -596,10 +612,10 @@ static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) { switch (Opc) { case ARM::LDR: return ARM::LDR_POST; case ARM::STR: return ARM::STR_POST; - case ARM::VLDRS: return ARM::VLDMS; - case ARM::VLDRD: return ARM::VLDMD; - case ARM::VSTRS: return ARM::VSTMS; - case ARM::VSTRD: return ARM::VSTMD; + case ARM::VLDRS: return ARM::VLDMS_UPD; + case ARM::VLDRD: return ARM::VLDMD_UPD; + case ARM::VSTRS: return ARM::VSTMS_UPD; + case ARM::VSTRD: return ARM::VSTMD_UPD; case ARM::t2LDRi8: case ARM::t2LDRi12: return ARM::t2LDR_POST; @@ -699,13 +715,13 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; if (isAM5) { - // VLDM[SD}, VSTM[SD] + // VLDM[SD}_UPD, VSTM[SD]_UPD MachineOperand &MO = MI->getOperand(0); BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) + .addReg(Base, getDefRegState(true)) // WB base register .addReg(Base, getKillRegState(isLd ? BaseKill : false)) .addImm(Offset) .addImm(Pred).addReg(PredReg) - .addReg(Base, getDefRegState(true)) // WB base register .addReg(MO.getReg(), (isLd ? getDefRegState(true) : getKillRegState(MO.isKill()))); } else if (isLd) { @@ -909,7 +925,6 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, .addReg(BaseReg, getKillRegState(BaseKill)) .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)) .addImm(Pred).addReg(PredReg) - .addReg(0) .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill)) .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill)); ++NumLDRD2LDM; @@ -918,7 +933,6 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, .addReg(BaseReg, getKillRegState(BaseKill)) .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)) .addImm(Pred).addReg(PredReg) - .addReg(0) .addReg(EvenReg, getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef)) .addReg(OddReg, @@ -1156,7 +1170,8 @@ bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { if (MBBI != MBB.begin() && (MBBI->getOpcode() == ARM::BX_RET || MBBI->getOpcode() == ARM::tBX_RET)) { MachineInstr *PrevMI = prior(MBBI); - if (PrevMI->getOpcode() == ARM::LDM || PrevMI->getOpcode() == ARM::t2LDM) { + if (PrevMI->getOpcode() == ARM::LDM_UPD || + PrevMI->getOpcode() == ARM::t2LDM_UPD) { MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1); if (MO.getReg() != ARM::LR) return false; diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp index a4443045808..bb4a33012bf 100644 --- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp @@ -522,8 +522,10 @@ void ARMAsmPrinter::printAddrMode4Operand(const MachineInstr *MI, int Op, if (MO1.getReg() == ARM::SP) { // FIXME bool isLDM = (MI->getOpcode() == ARM::LDM || + MI->getOpcode() == ARM::LDM_UPD || MI->getOpcode() == ARM::LDM_RET || MI->getOpcode() == ARM::t2LDM || + MI->getOpcode() == ARM::t2LDM_UPD || MI->getOpcode() == ARM::t2LDM_RET); O << ARM_AM::getAMSubModeAltStr(Mode, isLDM); } else @@ -816,11 +818,10 @@ void ARMAsmPrinter::printPCLabel(const MachineInstr *MI, int OpNum) { void ARMAsmPrinter::printRegisterList(const MachineInstr *MI, int OpNum) { O << "{"; - // Always skip the first operand, it's the optional (and implicit writeback). - for (unsigned i = OpNum+1, e = MI->getNumOperands(); i != e; ++i) { + for (unsigned i = OpNum, e = MI->getNumOperands(); i != e; ++i) { if (MI->getOperand(i).isImplicit()) continue; - if ((int)i != OpNum+1) O << ", "; + if ((int)i != OpNum) O << ", "; printOperand(MI, i); } O << "}"; diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp index 7f42c82abe9..29ae631269a 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.cpp +++ b/lib/Target/ARM/Thumb1InstrInfo.cpp @@ -159,7 +159,6 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, get(ARM::tPUSH)); AddDefaultPred(MIB); - MIB.addReg(0); // No write back. for (unsigned i = CSI.size(); i != 0; --i) { unsigned Reg = CSI[i-1].getReg(); // Add the callee-saved register as live-in. It's killed at the spill. @@ -182,7 +181,6 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB, DebugLoc DL = MI->getDebugLoc(); MachineInstrBuilder MIB = BuildMI(MF, DL, get(ARM::tPOP)); AddDefaultPred(MIB); - MIB.addReg(0); // No write back. bool NumRegs = false; for (unsigned i = CSI.size(); i != 0; --i) { diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp index 49fd3fa05be..99c38b1f4d4 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp @@ -791,9 +791,9 @@ static bool isCSRestore(MachineInstr *MI, const unsigned *CSRegs) { isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs)) return true; else if (MI->getOpcode() == ARM::tPOP) { - // The first three operands are predicates and such. The last two are + // The first two operands are predicates. The last two are // imp-def and imp-use of SP. Check everything in between. - for (int i = 3, e = MI->getNumOperands() - 2; i != e; ++i) + for (int i = 2, e = MI->getNumOperands() - 2; i != e; ++i) if (!isCalleeSavedRegister(MI->getOperand(i).getReg(), CSRegs)) return false; return true; @@ -864,7 +864,6 @@ void Thumb1RegisterInfo::emitEpilogue(MachineFunction &MF, ++MBBI; // Epilogue for vararg functions: pop LR to R3 and branch off it. AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP))) - .addReg(0) // No write back. .addReg(ARM::R3, RegState::Define); emitSPUpdate(MBB, MBBI, TII, dl, *this, VARegSaveSize); diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp index 5086eff59df..60d7eae8ad7 100644 --- a/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -121,9 +121,11 @@ namespace { { ARM::t2STRHi12,ARM::tSTRH, 0, 5, 0, 1, 0, 0,0, 1 }, { ARM::t2STRHs, ARM::tSTRH, 0, 0, 0, 1, 0, 0,0, 1 }, + { ARM::t2LDM, ARM::tLDM, 0, 0, 0, 1, 1, 1,1, 1 }, { ARM::t2LDM_RET,0, ARM::tPOP_RET, 0, 0, 1, 1, 1,1, 1 }, - { ARM::t2LDM, ARM::tLDM, ARM::tPOP, 0, 0, 1, 1, 1,1, 1 }, - { ARM::t2STM, ARM::tSTM, ARM::tPUSH, 0, 0, 1, 1, 1,1, 1 }, + { ARM::t2LDM_UPD,ARM::tLDM_UPD,ARM::tPOP, 0, 0, 1, 1, 1,1, 1 }, + // ARM::t2STM (with no basereg writeback) has no Thumb1 equivalent + { ARM::t2STM_UPD,ARM::tSTM_UPD,ARM::tPUSH, 0, 0, 1, 1, 1,1, 1 }, }; class Thumb2SizeReduce : public MachineFunctionPass { @@ -231,8 +233,9 @@ Thumb2SizeReduce::VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry, static bool VerifyLowRegs(MachineInstr *MI) { unsigned Opc = MI->getOpcode(); - bool isPCOk = (Opc == ARM::t2LDM_RET) || (Opc == ARM::t2LDM); - bool isLROk = (Opc == ARM::t2STM); + bool isPCOk = (Opc == ARM::t2LDM_RET || Opc == ARM::t2LDM || + Opc == ARM::t2LDM_UPD); + bool isLROk = (Opc == ARM::t2STM_UPD); bool isSPOk = isPCOk || isLROk || (Opc == ARM::t2ADDrSPi); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); @@ -307,19 +310,35 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, HasShift = true; OpNum = 4; break; - case ARM::t2LDM_RET: - case ARM::t2LDM: - case ARM::t2STM: { - OpNum = 0; + case ARM::t2LDM: { unsigned BaseReg = MI->getOperand(0).getReg(); - unsigned Mode = MI->getOperand(1).getImm(); - if (BaseReg == ARM::SP && ARM_AM::getAM4WBFlag(Mode)) { - Opc = Entry.NarrowOpc2; - OpNum = 2; - } else if (Entry.WideOpc == ARM::t2LDM_RET || - !isARMLowRegister(BaseReg) || - !ARM_AM::getAM4WBFlag(Mode) || - ARM_AM::getAM4SubMode(Mode) != ARM_AM::ia) { + ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm()); + if (!isARMLowRegister(BaseReg) || Mode != ARM_AM::ia) + return false; + OpNum = 0; + isLdStMul = true; + break; + } + case ARM::t2LDM_RET: { + unsigned BaseReg = MI->getOperand(1).getReg(); + if (BaseReg != ARM::SP) + return false; + Opc = Entry.NarrowOpc2; // tPOP_RET + OpNum = 3; + isLdStMul = true; + break; + } + case ARM::t2LDM_UPD: + case ARM::t2STM_UPD: { + OpNum = 0; + unsigned BaseReg = MI->getOperand(1).getReg(); + ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(2).getImm()); + if (BaseReg == ARM::SP && + (Entry.WideOpc == ARM::t2LDM_UPD && Mode == ARM_AM::ia) || + (Entry.WideOpc == ARM::t2STM_UPD && Mode == ARM_AM::db)) { + Opc = Entry.NarrowOpc2; // tPOP or tPUSH + OpNum = 3; + } else if (!isARMLowRegister(BaseReg) || Mode != ARM_AM::ia) { return false; } isLdStMul = true; -- 2.34.1