From e4fa341dde3c9521b7f11bd53ecdcbeb3f8fcbda Mon Sep 17 00:00:00 2001 From: Gerolf Hoflehner Date: Thu, 7 Aug 2014 21:40:58 +0000 Subject: [PATCH] MachineCombiner Pass for selecting faster instruction sequence on AArch64 Re-commit of r214832,r21469 with a work-around that avoids the previous problem with gcc build compilers The work-around is to use SmallVector instead of ArrayRef of basic blocks in preservesResourceLen()/MachineCombiner.cpp git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@215151 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineCombiner.cpp | 4 +- lib/Target/AArch64/AArch64InstrFormats.td | 5 +- lib/Target/AArch64/AArch64InstrInfo.cpp | 476 +++++++++++++++++- lib/Target/AArch64/AArch64InstrInfo.h | 20 +- .../AArch64/AArch64MachineCombinerPattern.h | 42 ++ lib/Target/AArch64/AArch64TargetMachine.cpp | 6 + test/CodeGen/AArch64/madd-lohi.ll | 19 + test/CodeGen/AArch64/mul-lohi.ll | 13 +- 8 files changed, 564 insertions(+), 21 deletions(-) create mode 100644 lib/Target/AArch64/AArch64MachineCombinerPattern.h create mode 100644 test/CodeGen/AArch64/madd-lohi.ll diff --git a/lib/CodeGen/MachineCombiner.cpp b/lib/CodeGen/MachineCombiner.cpp index 7cf3506c0a5..032b4ee5cbb 100644 --- a/lib/CodeGen/MachineCombiner.cpp +++ b/lib/CodeGen/MachineCombiner.cpp @@ -273,7 +273,9 @@ bool MachineCombiner::preservesResourceLen( // Compute current resource length - ArrayRef MBBarr(MBB); + //ArrayRef MBBarr(MBB); + SmallVector MBBarr; + MBBarr.push_back(MBB); unsigned ResLenBeforeCombine = BlockTrace.getResourceLength(MBBarr); // Deal with SC rather than Instructions. diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td index 106f2158909..3f764239290 100644 --- a/lib/Target/AArch64/AArch64InstrFormats.td +++ b/lib/Target/AArch64/AArch64InstrFormats.td @@ -1351,14 +1351,15 @@ class BaseMulAccum opc, RegisterClass multype, } multiclass MulAccum { + // MADD/MSUB generation is decided by MachineCombiner.cpp def Wrrr : BaseMulAccum, + [/*(set GPR32:$Rd, (AccNode GPR32:$Ra, (mul GPR32:$Rn, GPR32:$Rm)))*/]>, Sched<[WriteIM32, ReadIM, ReadIM, ReadIMA]> { let Inst{31} = 0; } def Xrrr : BaseMulAccum, + [/*(set GPR64:$Rd, (AccNode GPR64:$Ra, (mul GPR64:$Rn, GPR64:$Rm)))*/]>, Sched<[WriteIM64, ReadIM, ReadIM, ReadIMA]> { let Inst{31} = 1; } diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp index 48235186176..e9fecbf81d7 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -14,6 +14,7 @@ #include "AArch64InstrInfo.h" #include "AArch64Subtarget.h" #include "MCTargetDesc/AArch64AddressingModes.h" +#include "AArch64MachineCombinerPattern.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" @@ -697,17 +698,12 @@ static bool UpdateOperandRegClass(MachineInstr *Instr) { return true; } -/// optimizeCompareInstr - Convert the instruction supplying the argument to the -/// comparison into one that sets the zero bit in the flags register. -bool AArch64InstrInfo::optimizeCompareInstr( - MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask, - int CmpValue, const MachineRegisterInfo *MRI) const { - - // Replace SUBSWrr with SUBWrr if NZCV is not used. - int Cmp_NZCV = CmpInstr->findRegisterDefOperandIdx(AArch64::NZCV, true); - if (Cmp_NZCV != -1) { +/// convertFlagSettingOpcode - return opcode that does not +/// set flags when possible. The caller is responsible to do +/// the actual substitution and legality checking. +static unsigned convertFlagSettingOpcode(MachineInstr *MI) { unsigned NewOpc; - switch (CmpInstr->getOpcode()) { + switch (MI->getOpcode()) { default: return false; case AArch64::ADDSWrr: NewOpc = AArch64::ADDWrr; break; @@ -727,7 +723,22 @@ bool AArch64InstrInfo::optimizeCompareInstr( case AArch64::SUBSXrs: NewOpc = AArch64::SUBXrs; break; case AArch64::SUBSXrx: NewOpc = AArch64::SUBXrx; break; } + return NewOpc; +} +/// optimizeCompareInstr - Convert the instruction supplying the argument to the +/// comparison into one that sets the zero bit in the flags register. +bool AArch64InstrInfo::optimizeCompareInstr( + MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask, + int CmpValue, const MachineRegisterInfo *MRI) const { + + // Replace SUBSWrr with SUBWrr if NZCV is not used. + int Cmp_NZCV = CmpInstr->findRegisterDefOperandIdx(AArch64::NZCV, true); + if (Cmp_NZCV != -1) { + unsigned Opc = CmpInstr->getOpcode(); + unsigned NewOpc = convertFlagSettingOpcode(CmpInstr); + if (NewOpc == Opc) + return false; const MCInstrDesc &MCID = get(NewOpc); CmpInstr->setDesc(MCID); CmpInstr->RemoveOperand(Cmp_NZCV); @@ -2185,3 +2196,448 @@ void AArch64InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { NopInst.setOpcode(AArch64::HINT); NopInst.addOperand(MCOperand::CreateImm(0)); } +/// useMachineCombiner - return true when a target supports MachineCombiner +bool AArch64InstrInfo::useMachineCombiner(void) const { + // AArch64 supports the combiner + return true; +} +// +// True when Opc sets flag +static bool isCombineInstrSettingFlag(unsigned Opc) { + switch (Opc) { + case AArch64::ADDSWrr: + case AArch64::ADDSWri: + case AArch64::ADDSXrr: + case AArch64::ADDSXri: + case AArch64::SUBSWrr: + case AArch64::SUBSXrr: + // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi. + case AArch64::SUBSWri: + case AArch64::SUBSXri: + return true; + default: + break; + } + return false; +} +// +// 32b Opcodes that can be combined with a MUL +static bool isCombineInstrCandidate32(unsigned Opc) { + switch (Opc) { + case AArch64::ADDWrr: + case AArch64::ADDWri: + case AArch64::SUBWrr: + case AArch64::ADDSWrr: + case AArch64::ADDSWri: + case AArch64::SUBSWrr: + // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi. + case AArch64::SUBWri: + case AArch64::SUBSWri: + return true; + default: + break; + } + return false; +} +// +// 64b Opcodes that can be combined with a MUL +static bool isCombineInstrCandidate64(unsigned Opc) { + switch (Opc) { + case AArch64::ADDXrr: + case AArch64::ADDXri: + case AArch64::SUBXrr: + case AArch64::ADDSXrr: + case AArch64::ADDSXri: + case AArch64::SUBSXrr: + // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi. + case AArch64::SUBXri: + case AArch64::SUBSXri: + return true; + default: + break; + } + return false; +} +// +// Opcodes that can be combined with a MUL +static bool isCombineInstrCandidate(unsigned Opc) { + return (isCombineInstrCandidate32(Opc) || isCombineInstrCandidate64(Opc)); +} + +static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO, + unsigned MulOpc, unsigned ZeroReg) { + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + MachineInstr *MI = nullptr; + // We need a virtual register definition. + if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) + MI = MRI.getUniqueVRegDef(MO.getReg()); + // And it needs to be in the trace (otherwise, it won't have a depth). + if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != MulOpc) + return false; + + assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() && + MI->getOperand(1).isReg() && MI->getOperand(2).isReg() && + MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs"); + + // The third input reg must be zero. + if (MI->getOperand(3).getReg() != ZeroReg) + return false; + + // Must only used by the user we combine with. + if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg())) + return false; + + return true; +} + +/// hasPattern - return true when there is potentially a faster code sequence +/// for an instruction chain ending in \p Root. All potential patterns are +/// listed +/// in the \p Pattern vector. Pattern should be sorted in priority order since +/// the pattern evaluator stops checking as soon as it finds a faster sequence. + +bool AArch64InstrInfo::hasPattern( + MachineInstr &Root, + SmallVectorImpl &Pattern) const { + unsigned Opc = Root.getOpcode(); + MachineBasicBlock &MBB = *Root.getParent(); + bool Found = false; + + if (!isCombineInstrCandidate(Opc)) + return 0; + if (isCombineInstrSettingFlag(Opc)) { + int Cmp_NZCV = Root.findRegisterDefOperandIdx(AArch64::NZCV, true); + // When NZCV is live bail out. + if (Cmp_NZCV == -1) + return 0; + unsigned NewOpc = convertFlagSettingOpcode(&Root); + // When opcode can't change bail out. + // CHECKME: do we miss any cases for opcode conversion? + if (NewOpc == Opc) + return 0; + Opc = NewOpc; + } + + switch (Opc) { + default: + break; + case AArch64::ADDWrr: + assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() && + "ADDWrr does not have register operands"); + if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr, + AArch64::WZR)) { + Pattern.push_back(MachineCombinerPattern::MC_MULADDW_OP1); + Found = true; + } + if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr, + AArch64::WZR)) { + Pattern.push_back(MachineCombinerPattern::MC_MULADDW_OP2); + Found = true; + } + break; + case AArch64::ADDXrr: + if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr, + AArch64::XZR)) { + Pattern.push_back(MachineCombinerPattern::MC_MULADDX_OP1); + Found = true; + } + if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr, + AArch64::XZR)) { + Pattern.push_back(MachineCombinerPattern::MC_MULADDX_OP2); + Found = true; + } + break; + case AArch64::SUBWrr: + if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr, + AArch64::WZR)) { + Pattern.push_back(MachineCombinerPattern::MC_MULSUBW_OP1); + Found = true; + } + if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr, + AArch64::WZR)) { + Pattern.push_back(MachineCombinerPattern::MC_MULSUBW_OP2); + Found = true; + } + break; + case AArch64::SUBXrr: + if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr, + AArch64::XZR)) { + Pattern.push_back(MachineCombinerPattern::MC_MULSUBX_OP1); + Found = true; + } + if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr, + AArch64::XZR)) { + Pattern.push_back(MachineCombinerPattern::MC_MULSUBX_OP2); + Found = true; + } + break; + case AArch64::ADDWri: + if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr, + AArch64::WZR)) { + Pattern.push_back(MachineCombinerPattern::MC_MULADDWI_OP1); + Found = true; + } + break; + case AArch64::ADDXri: + if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr, + AArch64::XZR)) { + Pattern.push_back(MachineCombinerPattern::MC_MULADDXI_OP1); + Found = true; + } + break; + case AArch64::SUBWri: + if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr, + AArch64::WZR)) { + Pattern.push_back(MachineCombinerPattern::MC_MULSUBWI_OP1); + Found = true; + } + break; + case AArch64::SUBXri: + if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr, + AArch64::XZR)) { + Pattern.push_back(MachineCombinerPattern::MC_MULSUBXI_OP1); + Found = true; + } + break; + } + return Found; +} + +/// genMadd - Generate madd instruction and combine mul and add. +/// Example: +/// MUL I=A,B,0 +/// ADD R,I,C +/// ==> MADD R,A,B,C +/// \param Root is the ADD instruction +/// \param [out] InsInstr is a vector of machine instructions and will +/// contain the generated madd instruction +/// \param IdxMulOpd is index of operand in Root that is the result of +/// the MUL. In the example above IdxMulOpd is 1. +/// \param MaddOpc the opcode fo the madd instruction +static MachineInstr *genMadd(MachineFunction &MF, MachineRegisterInfo &MRI, + const TargetInstrInfo *TII, MachineInstr &Root, + SmallVectorImpl &InsInstrs, + unsigned IdxMulOpd, unsigned MaddOpc) { + assert(IdxMulOpd == 1 || IdxMulOpd == 2); + + unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1; + MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg()); + MachineOperand R = Root.getOperand(0); + MachineOperand A = MUL->getOperand(1); + MachineOperand B = MUL->getOperand(2); + MachineOperand C = Root.getOperand(IdxOtherOpd); + MachineInstrBuilder MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc)) + .addOperand(R) + .addOperand(A) + .addOperand(B) + .addOperand(C); + // Insert the MADD + InsInstrs.push_back(MIB); + return MUL; +} + +/// genMaddR - Generate madd instruction and combine mul and add using +/// an extra virtual register +/// Example - an ADD intermediate needs to be stored in a register: +/// MUL I=A,B,0 +/// ADD R,I,Imm +/// ==> ORR V, ZR, Imm +/// ==> MADD R,A,B,V +/// \param Root is the ADD instruction +/// \param [out] InsInstr is a vector of machine instructions and will +/// contain the generated madd instruction +/// \param IdxMulOpd is index of operand in Root that is the result of +/// the MUL. In the example above IdxMulOpd is 1. +/// \param MaddOpc the opcode fo the madd instruction +/// \param VR is a virtual register that holds the value of an ADD operand +/// (V in the example above). +static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI, + const TargetInstrInfo *TII, MachineInstr &Root, + SmallVectorImpl &InsInstrs, + unsigned IdxMulOpd, unsigned MaddOpc, + unsigned VR) { + assert(IdxMulOpd == 1 || IdxMulOpd == 2); + + MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg()); + MachineOperand R = Root.getOperand(0); + MachineOperand A = MUL->getOperand(1); + MachineOperand B = MUL->getOperand(2); + MachineInstrBuilder MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc)) + .addOperand(R) + .addOperand(A) + .addOperand(B) + .addReg(VR); + // Insert the MADD + InsInstrs.push_back(MIB); + return MUL; +} +/// genAlternativeCodeSequence - when hasPattern() finds a pattern +/// this function generates the instructions that could replace the +/// original code sequence +void AArch64InstrInfo::genAlternativeCodeSequence( + MachineInstr &Root, MachineCombinerPattern::MC_PATTERN Pattern, + SmallVectorImpl &InsInstrs, + SmallVectorImpl &DelInstrs, + DenseMap &InstrIdxForVirtReg) const { + MachineBasicBlock &MBB = *Root.getParent(); + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + MachineFunction &MF = *MBB.getParent(); + const TargetInstrInfo *TII = MF.getTarget().getSubtargetImpl()->getInstrInfo(); + + MachineInstr *MUL; + unsigned Opc; + switch (Pattern) { + default: + // signal error. + break; + case MachineCombinerPattern::MC_MULADDW_OP1: + case MachineCombinerPattern::MC_MULADDX_OP1: + // MUL I=A,B,0 + // ADD R,I,C + // ==> MADD R,A,B,C + // --- Create(MADD); + Opc = Pattern == MachineCombinerPattern::MC_MULADDW_OP1 ? AArch64::MADDWrrr + : AArch64::MADDXrrr; + MUL = genMadd(MF, MRI, TII, Root, InsInstrs, 1, Opc); + break; + case MachineCombinerPattern::MC_MULADDW_OP2: + case MachineCombinerPattern::MC_MULADDX_OP2: + // MUL I=A,B,0 + // ADD R,C,I + // ==> MADD R,A,B,C + // --- Create(MADD); + Opc = Pattern == MachineCombinerPattern::MC_MULADDW_OP2 ? AArch64::MADDWrrr + : AArch64::MADDXrrr; + MUL = genMadd(MF, MRI, TII, Root, InsInstrs, 2, Opc); + break; + case MachineCombinerPattern::MC_MULADDWI_OP1: + case MachineCombinerPattern::MC_MULADDXI_OP1: + // MUL I=A,B,0 + // ADD R,I,Imm + // ==> ORR V, ZR, Imm + // ==> MADD R,A,B,V + // --- Create(MADD); + { + const TargetRegisterClass *RC = + MRI.getRegClass(Root.getOperand(1).getReg()); + unsigned NewVR = MRI.createVirtualRegister(RC); + unsigned BitSize, OrrOpc, ZeroReg; + if (Pattern == MachineCombinerPattern::MC_MULADDWI_OP1) { + BitSize = 32; + OrrOpc = AArch64::ORRWri; + ZeroReg = AArch64::WZR; + Opc = AArch64::MADDWrrr; + } else { + OrrOpc = AArch64::ORRXri; + BitSize = 64; + ZeroReg = AArch64::XZR; + Opc = AArch64::MADDXrrr; + } + uint64_t Imm = Root.getOperand(2).getImm(); + + if (Root.getOperand(3).isImm()) { + unsigned val = Root.getOperand(3).getImm(); + Imm = Imm << val; + } + uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize); + uint64_t Encoding; + + if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) { + MachineInstrBuilder MIB1 = + BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc)) + .addOperand(MachineOperand::CreateReg(NewVR, RegState::Define)) + .addReg(ZeroReg) + .addImm(Encoding); + InsInstrs.push_back(MIB1); + InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); + MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR); + } + } + break; + case MachineCombinerPattern::MC_MULSUBW_OP1: + case MachineCombinerPattern::MC_MULSUBX_OP1: { + // MUL I=A,B,0 + // SUB R,I, C + // ==> SUB V, 0, C + // ==> MADD R,A,B,V // = -C + A*B + // --- Create(MADD); + const TargetRegisterClass *RC = + MRI.getRegClass(Root.getOperand(1).getReg()); + unsigned NewVR = MRI.createVirtualRegister(RC); + unsigned SubOpc, ZeroReg; + if (Pattern == MachineCombinerPattern::MC_MULSUBW_OP1) { + SubOpc = AArch64::SUBWrr; + ZeroReg = AArch64::WZR; + Opc = AArch64::MADDWrrr; + } else { + SubOpc = AArch64::SUBXrr; + ZeroReg = AArch64::XZR; + Opc = AArch64::MADDXrrr; + } + // SUB NewVR, 0, C + MachineInstrBuilder MIB1 = + BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc)) + .addOperand(MachineOperand::CreateReg(NewVR, RegState::Define)) + .addReg(ZeroReg) + .addOperand(Root.getOperand(2)); + InsInstrs.push_back(MIB1); + InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); + MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR); + } break; + case MachineCombinerPattern::MC_MULSUBW_OP2: + case MachineCombinerPattern::MC_MULSUBX_OP2: + // MUL I=A,B,0 + // SUB R,C,I + // ==> MSUB R,A,B,C (computes C - A*B) + // --- Create(MSUB); + Opc = Pattern == MachineCombinerPattern::MC_MULSUBW_OP2 ? AArch64::MSUBWrrr + : AArch64::MSUBXrrr; + MUL = genMadd(MF, MRI, TII, Root, InsInstrs, 2, Opc); + break; + case MachineCombinerPattern::MC_MULSUBWI_OP1: + case MachineCombinerPattern::MC_MULSUBXI_OP1: { + // MUL I=A,B,0 + // SUB R,I, Imm + // ==> ORR V, ZR, -Imm + // ==> MADD R,A,B,V // = -Imm + A*B + // --- Create(MADD); + const TargetRegisterClass *RC = + MRI.getRegClass(Root.getOperand(1).getReg()); + unsigned NewVR = MRI.createVirtualRegister(RC); + unsigned BitSize, OrrOpc, ZeroReg; + if (Pattern == MachineCombinerPattern::MC_MULSUBWI_OP1) { + BitSize = 32; + OrrOpc = AArch64::ORRWri; + ZeroReg = AArch64::WZR; + Opc = AArch64::MADDWrrr; + } else { + OrrOpc = AArch64::ORRXri; + BitSize = 64; + ZeroReg = AArch64::XZR; + Opc = AArch64::MADDXrrr; + } + int Imm = Root.getOperand(2).getImm(); + if (Root.getOperand(3).isImm()) { + unsigned val = Root.getOperand(3).getImm(); + Imm = Imm << val; + } + uint64_t UImm = -Imm << (64 - BitSize) >> (64 - BitSize); + uint64_t Encoding; + if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) { + MachineInstrBuilder MIB1 = + BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc)) + .addOperand(MachineOperand::CreateReg(NewVR, RegState::Define)) + .addReg(ZeroReg) + .addImm(Encoding); + InsInstrs.push_back(MIB1); + InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); + MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR); + } + } break; + } + // Record MUL and ADD/SUB for deletion + DelInstrs.push_back(MUL); + DelInstrs.push_back(&Root); + + return; +} diff --git a/lib/Target/AArch64/AArch64InstrInfo.h b/lib/Target/AArch64/AArch64InstrInfo.h index d465fb757a0..87e78490c9d 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.h +++ b/lib/Target/AArch64/AArch64InstrInfo.h @@ -17,6 +17,7 @@ #include "AArch64.h" #include "AArch64RegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/CodeGen/MachineCombinerPattern.h" #define GET_INSTRINFO_HEADER #include "AArch64GenInstrInfo.inc" @@ -156,9 +157,26 @@ public: bool optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask, int CmpValue, const MachineRegisterInfo *MRI) const override; + /// hasPattern - return true when there is potentially a faster code sequence + /// for an instruction chain ending in . All potential patterns are + /// listed + /// in the array. + virtual bool hasPattern( + MachineInstr &Root, + SmallVectorImpl &Pattern) const; + + /// genAlternativeCodeSequence - when hasPattern() finds a pattern + /// this function generates the instructions that could replace the + /// original code sequence + virtual void genAlternativeCodeSequence( + MachineInstr &Root, MachineCombinerPattern::MC_PATTERN P, + SmallVectorImpl &InsInstrs, + SmallVectorImpl &DelInstrs, + DenseMap &InstrIdxForVirtReg) const; + /// useMachineCombiner - AArch64 supports MachineCombiner + virtual bool useMachineCombiner(void) const; bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const override; - private: void instantiateCondBranch(MachineBasicBlock &MBB, DebugLoc DL, MachineBasicBlock *TBB, diff --git a/lib/Target/AArch64/AArch64MachineCombinerPattern.h b/lib/Target/AArch64/AArch64MachineCombinerPattern.h new file mode 100644 index 00000000000..568ce897360 --- /dev/null +++ b/lib/Target/AArch64/AArch64MachineCombinerPattern.h @@ -0,0 +1,42 @@ +//===- AArch64MachineCombinerPattern.h -===// +//===- AArch64 instruction pattern supported by combiner -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines instruction pattern supported by combiner +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_AArch64MACHINECOMBINERPATTERN_H +#define LLVM_TARGET_AArch64MACHINECOMBINERPATTERN_H + +namespace llvm { + +/// Enumeration of instruction pattern supported by machine combiner +/// +/// +namespace MachineCombinerPattern { +enum MC_PATTERN : int { + MC_NONE = 0, + MC_MULADDW_OP1 = 1, + MC_MULADDW_OP2 = 2, + MC_MULSUBW_OP1 = 3, + MC_MULSUBW_OP2 = 4, + MC_MULADDWI_OP1 = 5, + MC_MULSUBWI_OP1 = 6, + MC_MULADDX_OP1 = 7, + MC_MULADDX_OP2 = 8, + MC_MULSUBX_OP1 = 9, + MC_MULSUBX_OP2 = 10, + MC_MULADDXI_OP1 = 11, + MC_MULSUBXI_OP1 = 12 +}; +} // end namespace MachineCombinerPattern +} // end namespace llvm + +#endif diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp index 561bb24c6ab..7d7070c8c6e 100644 --- a/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -24,6 +24,10 @@ static cl::opt EnableCCMP("aarch64-ccmp", cl::desc("Enable the CCMP formation pass"), cl::init(true), cl::Hidden); +static cl::opt EnableMCR("aarch64-mcr", + cl::desc("Enable the machine combiner pass"), + cl::init(true), cl::Hidden); + static cl::opt EnableStPairSuppress("aarch64-stp-suppress", cl::desc("Suppress STP for AArch64"), cl::init(true), cl::Hidden); @@ -180,6 +184,8 @@ bool AArch64PassConfig::addInstSelector() { bool AArch64PassConfig::addILPOpts() { if (EnableCCMP) addPass(createAArch64ConditionalCompares()); + if (EnableMCR) + addPass(&MachineCombinerID); if (EnableEarlyIfConversion) addPass(&EarlyIfConverterID); if (EnableStPairSuppress) diff --git a/test/CodeGen/AArch64/madd-lohi.ll b/test/CodeGen/AArch64/madd-lohi.ll new file mode 100644 index 00000000000..550a8cb24b7 --- /dev/null +++ b/test/CodeGen/AArch64/madd-lohi.ll @@ -0,0 +1,19 @@ +; RUN: llc -mtriple=arm64-apple-ios7.0 %s -o - | FileCheck %s +; RUN: llc -mtriple=aarch64_be-linux-gnu %s -o - | FileCheck --check-prefix=CHECK-BE %s + +define i128 @test_128bitmul(i128 %lhs, i128 %rhs) { +; CHECK-LABEL: test_128bitmul: +; CHECK-DAG: umulh [[CARRY:x[0-9]+]], x0, x2 +; CHECK-DAG: madd [[PART1:x[0-9]+]], x0, x3, [[CARRY]] +; CHECK: madd x1, x1, x2, [[PART1]] +; CHECK: mul x0, x0, x2 + +; CHECK-BE-LABEL: test_128bitmul: +; CHECK-BE-DAG: umulh [[CARRY:x[0-9]+]], x1, x3 +; CHECK-BE-DAG: madd [[PART1:x[0-9]+]], x1, x2, [[CARRY]] +; CHECK-BE: madd x0, x0, x3, [[PART1]] +; CHECK-BE: mul x1, x1, x3 + + %prod = mul i128 %lhs, %rhs + ret i128 %prod +} diff --git a/test/CodeGen/AArch64/mul-lohi.ll b/test/CodeGen/AArch64/mul-lohi.ll index 550a8cb24b7..4515697b999 100644 --- a/test/CodeGen/AArch64/mul-lohi.ll +++ b/test/CodeGen/AArch64/mul-lohi.ll @@ -1,17 +1,16 @@ -; RUN: llc -mtriple=arm64-apple-ios7.0 %s -o - | FileCheck %s -; RUN: llc -mtriple=aarch64_be-linux-gnu %s -o - | FileCheck --check-prefix=CHECK-BE %s - +; RUN: llc -mtriple=arm64-apple-ios7.0 -mcpu=cyclone %s -o - | FileCheck %s +; RUN: llc -mtriple=aarch64_be-linux-gnu -mcpu=cyclone %s -o - | FileCheck --check-prefix=CHECK-BE %s define i128 @test_128bitmul(i128 %lhs, i128 %rhs) { ; CHECK-LABEL: test_128bitmul: +; CHECK-DAG: mul [[PART1:x[0-9]+]], x0, x3 ; CHECK-DAG: umulh [[CARRY:x[0-9]+]], x0, x2 -; CHECK-DAG: madd [[PART1:x[0-9]+]], x0, x3, [[CARRY]] -; CHECK: madd x1, x1, x2, [[PART1]] +; CHECK: mul [[PART2:x[0-9]+]], x1, x2 ; CHECK: mul x0, x0, x2 ; CHECK-BE-LABEL: test_128bitmul: +; CHECK-BE-DAG: mul [[PART1:x[0-9]+]], x1, x2 ; CHECK-BE-DAG: umulh [[CARRY:x[0-9]+]], x1, x3 -; CHECK-BE-DAG: madd [[PART1:x[0-9]+]], x1, x2, [[CARRY]] -; CHECK-BE: madd x0, x0, x3, [[PART1]] +; CHECK-BE: mul [[PART2:x[0-9]+]], x0, x3 ; CHECK-BE: mul x1, x1, x3 %prod = mul i128 %lhs, %rhs -- 2.34.1