X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FARM%2FARMBaseInstrInfo.cpp;h=abb1fdad080f43b933e71160d68c4248112d08e2;hb=4d6cb933eb23e4dc167b04a981a8434da2ab13c5;hp=5283d7b362371465078c54a8be1fcb63179497f9;hpb=0187e7a9ba5c50b4559e0c2e0afceb6d5cd32190;p=oota-llvm.git diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 5283d7b3623..abb1fdad080 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -11,10 +11,11 @@ // //===----------------------------------------------------------------------===// -#include "ARMBaseInstrInfo.h" #include "ARM.h" +#include "ARMBaseInstrInfo.h" #include "ARMBaseRegisterInfo.h" #include "ARMConstantPoolValue.h" +#include "ARMFeatures.h" #include "ARMHazardRecognizer.h" #include "ARMMachineFunctionInfo.h" #include "MCTargetDesc/ARMAddressingModes.h" @@ -27,20 +28,25 @@ #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/TargetSchedule.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCExpr.h" #include "llvm/Support/BranchProbability.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" - -#define GET_INSTRINFO_CTOR -#include "ARMGenInstrInfo.inc" +#include "llvm/Support/raw_ostream.h" using namespace llvm; +#define DEBUG_TYPE "arm-instrinfo" + +#define GET_INSTRINFO_CTOR_DTOR +#include "ARMGenInstrInfo.inc" + static cl::opt EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden, cl::desc("Enable ARM 2-addr to 3-addr conv")); @@ -99,14 +105,15 @@ ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI) // Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl // currently defaults to no prepass hazard recognizer. -ScheduleHazardRecognizer *ARMBaseInstrInfo:: -CreateTargetHazardRecognizer(const TargetMachine *TM, - const ScheduleDAG *DAG) const { +ScheduleHazardRecognizer * +ARMBaseInstrInfo::CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, + const ScheduleDAG *DAG) const { if (usePreRAHazardRecognizer()) { - const InstrItineraryData *II = TM->getInstrItineraryData(); + const InstrItineraryData *II = + static_cast(STI)->getInstrItineraryData(); return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched"); } - return TargetInstrInfo::CreateTargetHazardRecognizer(TM, DAG); + return TargetInstrInfo::CreateTargetHazardRecognizer(STI, DAG); } ScheduleHazardRecognizer *ARMBaseInstrInfo:: @@ -124,14 +131,14 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, // FIXME: Thumb2 support. if (!EnableARM3Addr) - return NULL; + return nullptr; MachineInstr *MI = MBBI; MachineFunction &MF = *MI->getParent()->getParent(); uint64_t TSFlags = MI->getDesc().TSFlags; bool isPre = false; switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) { - default: return NULL; + default: return nullptr; case ARMII::IndexModePre: isPre = true; break; @@ -143,10 +150,10 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, // operation. unsigned MemOpc = getUnindexedOpcode(MI->getOpcode()); if (MemOpc == 0) - return NULL; + return nullptr; - MachineInstr *UpdateMI = NULL; - MachineInstr *MemMI = NULL; + MachineInstr *UpdateMI = nullptr; + MachineInstr *MemMI = nullptr; unsigned AddrMode = (TSFlags & ARMII::AddrModeMask); const MCInstrDesc &MCID = MI->getDesc(); unsigned NumOps = MCID.getNumOperands(); @@ -168,7 +175,7 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, if (ARM_AM::getSOImmVal(Amt) == -1) // Can't encode it in a so_imm operand. This transformation will // add more than 1 instruction. Abandon! - return NULL; + return nullptr; UpdateMI = BuildMI(MF, MI->getDebugLoc(), get(isSub ? ARM::SUBri : ARM::ADDri), WBReg) .addReg(BaseReg).addImm(Amt) @@ -272,116 +279,98 @@ ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl &Cond, bool AllowModify) const { - // If the block has no terminators, it just falls into the block after it. + TBB = nullptr; + FBB = nullptr; + MachineBasicBlock::iterator I = MBB.end(); if (I == MBB.begin()) - return false; + return false; // Empty blocks are easy. --I; - while (I->isDebugValue()) { - if (I == MBB.begin()) - return false; - --I; - } - // Get the last instruction in the block. - MachineInstr *LastInst = I; - unsigned LastOpc = LastInst->getOpcode(); + // Walk backwards from the end of the basic block until the branch is + // analyzed or we give up. + while (isPredicated(I) || I->isTerminator() || I->isDebugValue()) { - // Check if it's an indirect branch first, this should return 'unanalyzable' - // even if it's predicated. - if (isIndirectBranchOpcode(LastOpc)) - return true; + // Flag to be raised on unanalyzeable instructions. This is useful in cases + // where we want to clean up on the end of the basic block before we bail + // out. + bool CantAnalyze = false; - if (!isUnpredicatedTerminator(I)) - return false; - - // If there is only one terminator instruction, process it. - if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { - if (isUncondBranchOpcode(LastOpc)) { - TBB = LastInst->getOperand(0).getMBB(); - return false; + // Skip over DEBUG values and predicated nonterminators. + while (I->isDebugValue() || !I->isTerminator()) { + if (I == MBB.begin()) + return false; + --I; } - if (isCondBranchOpcode(LastOpc)) { - // Block ends with fall-through condbranch. - TBB = LastInst->getOperand(0).getMBB(); - Cond.push_back(LastInst->getOperand(1)); - Cond.push_back(LastInst->getOperand(2)); - return false; + + if (isIndirectBranchOpcode(I->getOpcode()) || + isJumpTableBranchOpcode(I->getOpcode())) { + // Indirect branches and jump tables can't be analyzed, but we still want + // to clean up any instructions at the tail of the basic block. + CantAnalyze = true; + } else if (isUncondBranchOpcode(I->getOpcode())) { + TBB = I->getOperand(0).getMBB(); + } else if (isCondBranchOpcode(I->getOpcode())) { + // Bail out if we encounter multiple conditional branches. + if (!Cond.empty()) + return true; + + assert(!FBB && "FBB should have been null."); + FBB = TBB; + TBB = I->getOperand(0).getMBB(); + Cond.push_back(I->getOperand(1)); + Cond.push_back(I->getOperand(2)); + } else if (I->isReturn()) { + // Returns can't be analyzed, but we should run cleanup. + CantAnalyze = !isPredicated(I); + } else { + // We encountered other unrecognized terminator. Bail out immediately. + return true; } - return true; // Can't handle indirect branch. - } - // Get the instruction before it if it is a terminator. - MachineInstr *SecondLastInst = I; - unsigned SecondLastOpc = SecondLastInst->getOpcode(); - - // If AllowModify is true and the block ends with two or more unconditional - // branches, delete all but the first unconditional branch. - if (AllowModify && isUncondBranchOpcode(LastOpc)) { - while (isUncondBranchOpcode(SecondLastOpc)) { - LastInst->eraseFromParent(); - LastInst = SecondLastInst; - LastOpc = LastInst->getOpcode(); - if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { - // Return now the only terminator is an unconditional branch. - TBB = LastInst->getOperand(0).getMBB(); - return false; - } else { - SecondLastInst = I; - SecondLastOpc = SecondLastInst->getOpcode(); + // Cleanup code - to be run for unpredicated unconditional branches and + // returns. + if (!isPredicated(I) && + (isUncondBranchOpcode(I->getOpcode()) || + isIndirectBranchOpcode(I->getOpcode()) || + isJumpTableBranchOpcode(I->getOpcode()) || + I->isReturn())) { + // Forget any previous condition branch information - it no longer applies. + Cond.clear(); + FBB = nullptr; + + // If we can modify the function, delete everything below this + // unconditional branch. + if (AllowModify) { + MachineBasicBlock::iterator DI = std::next(I); + while (DI != MBB.end()) { + MachineInstr *InstToDelete = DI; + ++DI; + InstToDelete->eraseFromParent(); + } } } - } - - // If there are three terminators, we don't know what sort of block this is. - if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I)) - return true; - // If the block ends with a B and a Bcc, handle it. - if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { - TBB = SecondLastInst->getOperand(0).getMBB(); - Cond.push_back(SecondLastInst->getOperand(1)); - Cond.push_back(SecondLastInst->getOperand(2)); - FBB = LastInst->getOperand(0).getMBB(); - return false; - } + if (CantAnalyze) + return true; - // If the block ends with two unconditional branches, handle it. The second - // one is not executed, so remove it. - if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { - TBB = SecondLastInst->getOperand(0).getMBB(); - I = LastInst; - if (AllowModify) - I->eraseFromParent(); - return false; - } + if (I == MBB.begin()) + return false; - // ...likewise if it ends with a branch table followed by an unconditional - // branch. The branch folder can create these, and we must get rid of them for - // correctness of Thumb constant islands. - if ((isJumpTableBranchOpcode(SecondLastOpc) || - isIndirectBranchOpcode(SecondLastOpc)) && - isUncondBranchOpcode(LastOpc)) { - I = LastInst; - if (AllowModify) - I->eraseFromParent(); - return true; + --I; } - // Otherwise, can't handle this. - return true; + // We made it past the terminators without bailing out - we must have + // analyzed this branch successfully. + return false; } unsigned ARMBaseInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { - MachineBasicBlock::iterator I = MBB.end(); - if (I == MBB.begin()) return 0; - --I; - while (I->isDebugValue()) { - if (I == MBB.begin()) - return 0; - --I; - } + MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); + if (I == MBB.end()) + return 0; + if (!isUncondBranchOpcode(I->getOpcode()) && !isCondBranchOpcode(I->getOpcode())) return 0; @@ -404,7 +393,7 @@ unsigned ARMBaseInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { unsigned ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl &Cond, + ArrayRef Cond, DebugLoc DL) const { ARMFunctionInfo *AFI = MBB.getParent()->getInfo(); int BOpc = !AFI->isThumbFunction() @@ -418,7 +407,9 @@ ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, assert((Cond.size() == 2 || Cond.size() == 0) && "ARM branch conditions have two components!"); - if (FBB == 0) { + // For conditional branches, we use addOperand to preserve CPSR flags. + + if (!FBB) { if (Cond.empty()) { // Unconditional branch? if (isThumb) BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB).addImm(ARMCC::AL).addReg(0); @@ -426,13 +417,13 @@ ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB); } else BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB) - .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()); + .addImm(Cond[0].getImm()).addOperand(Cond[1]); return 1; } // Two-way conditional branch. BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB) - .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()); + .addImm(Cond[0].getImm()).addOperand(Cond[1]); if (isThumb) BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).addImm(ARMCC::AL).addReg(0); else @@ -464,8 +455,7 @@ bool ARMBaseInstrInfo::isPredicated(const MachineInstr *MI) const { } bool ARMBaseInstrInfo:: -PredicateInstruction(MachineInstr *MI, - const SmallVectorImpl &Pred) const { +PredicateInstruction(MachineInstr *MI, ArrayRef Pred) const { unsigned Opc = MI->getOpcode(); if (isUncondBranchOpcode(Opc)) { MI->setDesc(get(getMatchingCondBranchOpcode(Opc))); @@ -485,9 +475,8 @@ PredicateInstruction(MachineInstr *MI, return false; } -bool ARMBaseInstrInfo:: -SubsumesPredicate(const SmallVectorImpl &Pred1, - const SmallVectorImpl &Pred2) const { +bool ARMBaseInstrInfo::SubsumesPredicate(ArrayRef Pred1, + ArrayRef Pred2) const { if (Pred1.size() > 2 || Pred2.size() > 2) return false; @@ -527,6 +516,42 @@ bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI, return Found; } +static bool isCPSRDefined(const MachineInstr *MI) { + for (const auto &MO : MI->operands()) + if (MO.isReg() && MO.getReg() == ARM::CPSR && MO.isDef() && !MO.isDead()) + return true; + return false; +} + +static bool isEligibleForITBlock(const MachineInstr *MI) { + switch (MI->getOpcode()) { + default: return true; + case ARM::tADC: // ADC (register) T1 + case ARM::tADDi3: // ADD (immediate) T1 + case ARM::tADDi8: // ADD (immediate) T2 + case ARM::tADDrr: // ADD (register) T1 + case ARM::tAND: // AND (register) T1 + case ARM::tASRri: // ASR (immediate) T1 + case ARM::tASRrr: // ASR (register) T1 + case ARM::tBIC: // BIC (register) T1 + case ARM::tEOR: // EOR (register) T1 + case ARM::tLSLri: // LSL (immediate) T1 + case ARM::tLSLrr: // LSL (register) T1 + case ARM::tLSRri: // LSR (immediate) T1 + case ARM::tLSRrr: // LSR (register) T1 + case ARM::tMUL: // MUL T1 + case ARM::tMVN: // MVN (register) T1 + case ARM::tORR: // ORR (register) T1 + case ARM::tROR: // ROR (register) T1 + case ARM::tRSB: // RSB (immediate) T1 + case ARM::tSBC: // SBC (register) T1 + case ARM::tSUBi3: // SUB (immediate) T1 + case ARM::tSUBi8: // SUB (immediate) T2 + case ARM::tSUBrr: // SUB (register) T1 + return !isCPSRDefined(MI); + } +} + /// isPredicable - Return true if the specified instruction can be predicated. /// By default, this returns true for every instruction with a /// PredicateOperand. @@ -534,22 +559,37 @@ bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const { if (!MI->isPredicable()) return false; - if ((MI->getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) { - ARMFunctionInfo *AFI = - MI->getParent()->getParent()->getInfo(); - return AFI->isThumb2Function(); + if (!isEligibleForITBlock(MI)) + return false; + + ARMFunctionInfo *AFI = + MI->getParent()->getParent()->getInfo(); + + if (AFI->isThumb2Function()) { + if (getSubtarget().restrictIT()) + return isV8EligibleForIT(MI); + } else { // non-Thumb + if ((MI->getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) + return false; } + return true; } -/// FIXME: Works around a gcc miscompilation with -fstrict-aliasing. -LLVM_ATTRIBUTE_NOINLINE -static unsigned getNumJTEntries(const std::vector &JT, - unsigned JTI); -static unsigned getNumJTEntries(const std::vector &JT, - unsigned JTI) { - assert(JTI < JT.size()); - return JT[JTI].MBBs.size(); +namespace llvm { +template <> bool IsCPSRDead(MachineInstr *MI) { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || MO.isUndef() || MO.isUse()) + continue; + if (MO.getReg() != ARM::CPSR) + continue; + if (!MO.isDead()) + return false; + } + // all definitions of CPSR are dead + return true; +} } /// GetInstSize - Return the size of the specified MachineInstr. @@ -566,15 +606,10 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { // If this machine instr is an inline asm, measure it. if (MI->getOpcode() == ARM::INLINEASM) return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI); - if (MI->isLabel()) - return 0; unsigned Opc = MI->getOpcode(); switch (Opc) { - case TargetOpcode::IMPLICIT_DEF: - case TargetOpcode::KILL: - case TargetOpcode::PROLOG_LABEL: - case TargetOpcode::EH_LABEL: - case TargetOpcode::DBG_VALUE: + default: + // pseudo-instruction sizes are zero. return 0; case TargetOpcode::BUNDLE: return getInstBundleLength(MI); @@ -587,6 +622,10 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { case ARM::t2MOVi32imm: return 8; case ARM::CONSTPOOL_ENTRY: + case ARM::JUMPTABLE_INSTS: + case ARM::JUMPTABLE_ADDRS: + case ARM::JUMPTABLE_TBB: + case ARM::JUMPTABLE_TBH: // If this machine instr is a constant pool entry, its size is recorded as // operand #2. return MI->getOperand(2).getImm(); @@ -601,45 +640,8 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { case ARM::t2Int_eh_sjlj_setjmp: case ARM::t2Int_eh_sjlj_setjmp_nofp: return 12; - case ARM::BR_JTr: - case ARM::BR_JTm: - case ARM::BR_JTadd: - case ARM::tBR_JTr: - case ARM::t2BR_JT: - case ARM::t2TBB_JT: - case ARM::t2TBH_JT: { - // These are jumptable branches, i.e. a branch followed by an inlined - // jumptable. The size is 4 + 4 * number of entries. For TBB, each - // entry is one byte; TBH two byte each. - unsigned EntrySize = (Opc == ARM::t2TBB_JT) - ? 1 : ((Opc == ARM::t2TBH_JT) ? 2 : 4); - unsigned NumOps = MCID.getNumOperands(); - MachineOperand JTOP = - MI->getOperand(NumOps - (MI->isPredicable() ? 3 : 2)); - unsigned JTI = JTOP.getIndex(); - const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); - assert(MJTI != 0); - const std::vector &JT = MJTI->getJumpTables(); - assert(JTI < JT.size()); - // Thumb instructions are 2 byte aligned, but JT entries are 4 byte - // 4 aligned. The assembler / linker may add 2 byte padding just before - // the JT entries. The size does not include this padding; the - // constant islands pass does separate bookkeeping for it. - // FIXME: If we know the size of the function is less than (1 << 16) *2 - // bytes, we can use 16-bit entries instead. Then there won't be an - // alignment issue. - unsigned InstSize = (Opc == ARM::tBR_JTr || Opc == ARM::t2BR_JT) ? 2 : 4; - unsigned NumEntries = getNumJTEntries(JT, JTI); - if (Opc == ARM::t2TBB_JT && (NumEntries & 1)) - // Make sure the instruction that follows TBB is 2-byte aligned. - // FIXME: Constant island pass should insert an "ALIGN" instruction - // instead. - ++NumEntries; - return NumEntries * EntrySize + InstSize; - } - default: - // Otherwise, pseudo-instruction sizes are zero. - return 0; + case ARM::SPACE: + return MI->getOperand(1).getImm(); } } @@ -654,21 +656,64 @@ unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr *MI) const { return Size; } +void ARMBaseInstrInfo::copyFromCPSR(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned DestReg, bool KillSrc, + const ARMSubtarget &Subtarget) const { + unsigned Opc = Subtarget.isThumb() + ? (Subtarget.isMClass() ? ARM::t2MRS_M : ARM::t2MRS_AR) + : ARM::MRS; + + MachineInstrBuilder MIB = + BuildMI(MBB, I, I->getDebugLoc(), get(Opc), DestReg); + + // There is only 1 A/R class MRS instruction, and it always refers to + // APSR. However, there are lots of other possibilities on M-class cores. + if (Subtarget.isMClass()) + MIB.addImm(0x800); + + AddDefaultPred(MIB); + + MIB.addReg(ARM::CPSR, RegState::Implicit | getKillRegState(KillSrc)); +} + +void ARMBaseInstrInfo::copyToCPSR(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned SrcReg, bool KillSrc, + const ARMSubtarget &Subtarget) const { + unsigned Opc = Subtarget.isThumb() + ? (Subtarget.isMClass() ? ARM::t2MSR_M : ARM::t2MSR_AR) + : ARM::MSR; + + MachineInstrBuilder MIB = BuildMI(MBB, I, I->getDebugLoc(), get(Opc)); + + if (Subtarget.isMClass()) + MIB.addImm(0x800); + else + MIB.addImm(8); + + MIB.addReg(SrcReg, getKillRegState(KillSrc)); + + AddDefaultPred(MIB); + + MIB.addReg(ARM::CPSR, RegState::Implicit | RegState::Define); +} + void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const { bool GPRDest = ARM::GPRRegClass.contains(DestReg); - bool GPRSrc = ARM::GPRRegClass.contains(SrcReg); + bool GPRSrc = ARM::GPRRegClass.contains(SrcReg); if (GPRDest && GPRSrc) { AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)))); + .addReg(SrcReg, getKillRegState(KillSrc)))); return; } bool SPRDest = ARM::SPRRegClass.contains(DestReg); - bool SPRSrc = ARM::SPRRegClass.contains(SrcReg); + bool SPRSrc = ARM::SPRRegClass.contains(SrcReg); unsigned Opc = 0; if (SPRDest && SPRSrc) @@ -677,7 +722,7 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, Opc = ARM::VMOVRS; else if (SPRDest && GPRSrc) Opc = ARM::VMOVSR; - else if (ARM::DPRRegClass.contains(DestReg, SrcReg)) + else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && !Subtarget.isFPOnlySP()) Opc = ARM::VMOVD; else if (ARM::QPRRegClass.contains(DestReg, SrcReg)) Opc = ARM::VORRq; @@ -697,26 +742,57 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, int Spacing = 1; // Use VORRq when possible. - if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) - Opc = ARM::VORRq, BeginIdx = ARM::qsub_0, SubRegs = 2; - else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) - Opc = ARM::VORRq, BeginIdx = ARM::qsub_0, SubRegs = 4; + if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) { + Opc = ARM::VORRq; + BeginIdx = ARM::qsub_0; + SubRegs = 2; + } else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) { + Opc = ARM::VORRq; + BeginIdx = ARM::qsub_0; + SubRegs = 4; // Fall back to VMOVD. - else if (ARM::DPairRegClass.contains(DestReg, SrcReg)) - Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 2; - else if (ARM::DTripleRegClass.contains(DestReg, SrcReg)) - Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 3; - else if (ARM::DQuadRegClass.contains(DestReg, SrcReg)) - Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 4; - else if (ARM::GPRPairRegClass.contains(DestReg, SrcReg)) - Opc = ARM::MOVr, BeginIdx = ARM::gsub_0, SubRegs = 2; - - else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg)) - Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 2, Spacing = 2; - else if (ARM::DTripleSpcRegClass.contains(DestReg, SrcReg)) - Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 3, Spacing = 2; - else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg)) - Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 4, Spacing = 2; + } else if (ARM::DPairRegClass.contains(DestReg, SrcReg)) { + Opc = ARM::VMOVD; + BeginIdx = ARM::dsub_0; + SubRegs = 2; + } else if (ARM::DTripleRegClass.contains(DestReg, SrcReg)) { + Opc = ARM::VMOVD; + BeginIdx = ARM::dsub_0; + SubRegs = 3; + } else if (ARM::DQuadRegClass.contains(DestReg, SrcReg)) { + Opc = ARM::VMOVD; + BeginIdx = ARM::dsub_0; + SubRegs = 4; + } else if (ARM::GPRPairRegClass.contains(DestReg, SrcReg)) { + Opc = Subtarget.isThumb2() ? ARM::tMOVr : ARM::MOVr; + BeginIdx = ARM::gsub_0; + SubRegs = 2; + } else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg)) { + Opc = ARM::VMOVD; + BeginIdx = ARM::dsub_0; + SubRegs = 2; + Spacing = 2; + } else if (ARM::DTripleSpcRegClass.contains(DestReg, SrcReg)) { + Opc = ARM::VMOVD; + BeginIdx = ARM::dsub_0; + SubRegs = 3; + Spacing = 2; + } else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg)) { + Opc = ARM::VMOVD; + BeginIdx = ARM::dsub_0; + SubRegs = 4; + Spacing = 2; + } else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && Subtarget.isFPOnlySP()) { + Opc = ARM::VMOVS; + BeginIdx = ARM::ssub_0; + SubRegs = 2; + } else if (SrcReg == ARM::CPSR) { + copyFromCPSR(MBB, I, DestReg, KillSrc, Subtarget); + return; + } else if (DestReg == ARM::CPSR) { + copyToCPSR(MBB, I, SrcReg, KillSrc, Subtarget); + return; + } assert(Opc && "Impossible reg-to-reg copy"); @@ -725,26 +801,28 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, // Copy register tuples backward when the first Dest reg overlaps with SrcReg. if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) { - BeginIdx = BeginIdx + ((SubRegs-1)*Spacing); + BeginIdx = BeginIdx + ((SubRegs - 1) * Spacing); Spacing = -Spacing; } #ifndef NDEBUG SmallSet DstRegs; #endif for (unsigned i = 0; i != SubRegs; ++i) { - unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i*Spacing); - unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i*Spacing); + unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i * Spacing); + unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i * Spacing); assert(Dst && Src && "Bad sub-register"); #ifndef NDEBUG assert(!DstRegs.count(Src) && "destructive vector copy"); DstRegs.insert(Dst); #endif - Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst) - .addReg(Src); + Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst).addReg(Src); // VORR takes two source operands. if (Opc == ARM::VORRq) Mov.addReg(Src); Mov = AddDefaultPred(Mov); + // MOVr can set CC. + if (Opc == ARM::MOVr) + Mov = AddDefaultCC(Mov); } // Add implicit super-register defs and kills to the last instruction. Mov->addRegisterDefined(DestReg, TRI); @@ -775,11 +853,9 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineFrameInfo &MFI = *MF.getFrameInfo(); unsigned Align = MFI.getObjectAlignment(FI); - MachineMemOperand *MMO = - MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), - MachineMemOperand::MOStore, - MFI.getObjectSize(FI), - Align); + MachineMemOperand *MMO = MF.getMachineMemOperand( + MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore, + MFI.getObjectSize(FI), Align); switch (RC->getSize()) { case 4: @@ -965,12 +1041,9 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = *MF.getFrameInfo(); unsigned Align = MFI.getObjectAlignment(FI); - MachineMemOperand *MMO = - MF.getMachineMemOperand( - MachinePointerInfo::getFixedStack(FI), - MachineMemOperand::MOLoad, - MFI.getObjectSize(FI), - Align); + MachineMemOperand *MMO = MF.getMachineMemOperand( + MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad, + MFI.getObjectSize(FI), Align); switch (RC->getSize()) { case 4: @@ -1146,12 +1219,25 @@ unsigned ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI, return MI->mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex); } -bool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{ +bool +ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { + MachineFunction &MF = *MI->getParent()->getParent(); + Reloc::Model RM = MF.getTarget().getRelocationModel(); + + if (MI->getOpcode() == TargetOpcode::LOAD_STACK_GUARD) { + assert(getSubtarget().getTargetTriple().isOSBinFormatMachO() && + "LOAD_STACK_GUARD currently supported only for MachO."); + expandLoadStackGuard(MI, RM); + MI->getParent()->erase(MI); + return true; + } + // This hook gets to expand COPY instructions before they become // copyPhysReg() calls. Look for VMOVS instructions that can legally be // widened to VMOVD. We prefer the VMOVD when possible because it may be // changed into a VORR that can go down the NEON pipeline. - if (!WidenVMOVS || !MI->isCopy() || Subtarget.isCortexA15()) + if (!WidenVMOVS || !MI->isCopy() || Subtarget.isCortexA15() || + Subtarget.isFPOnlySP()) return false; // Look for a copy between even S-registers. That is where we keep floats @@ -1226,7 +1312,8 @@ static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) { static_cast(MCPE.Val.MachineCPVal); unsigned PCLabelId = AFI->createPICLabelUId(); - ARMConstantPoolValue *NewCPV = 0; + ARMConstantPoolValue *NewCPV = nullptr; + // FIXME: The below assumes PIC relocation model and that the function // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR @@ -1304,15 +1391,16 @@ ARMBaseInstrInfo::duplicate(MachineInstr *Orig, MachineFunction &MF) const { bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0, const MachineInstr *MI1, const MachineRegisterInfo *MRI) const { - int Opcode = MI0->getOpcode(); + unsigned Opcode = MI0->getOpcode(); if (Opcode == ARM::t2LDRpci || Opcode == ARM::t2LDRpci_pic || Opcode == ARM::tLDRpci || Opcode == ARM::tLDRpci_pic || - Opcode == ARM::MOV_ga_dyn || + Opcode == ARM::LDRLIT_ga_pcrel || + Opcode == ARM::LDRLIT_ga_pcrel_ldr || + Opcode == ARM::tLDRLIT_ga_pcrel || Opcode == ARM::MOV_ga_pcrel || Opcode == ARM::MOV_ga_pcrel_ldr || - Opcode == ARM::t2MOV_ga_dyn || Opcode == ARM::t2MOV_ga_pcrel) { if (MI1->getOpcode() != Opcode) return false; @@ -1324,10 +1412,11 @@ bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0, if (MO0.getOffset() != MO1.getOffset()) return false; - if (Opcode == ARM::MOV_ga_dyn || + if (Opcode == ARM::LDRLIT_ga_pcrel || + Opcode == ARM::LDRLIT_ga_pcrel_ldr || + Opcode == ARM::tLDRLIT_ga_pcrel || Opcode == ARM::MOV_ga_pcrel || Opcode == ARM::MOV_ga_pcrel_ldr || - Opcode == ARM::t2MOV_ga_dyn || Opcode == ARM::t2MOV_ga_pcrel) // Ignore the PC labels. return MO0.getGlobal() == MO1.getGlobal(); @@ -1415,9 +1504,11 @@ bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, case ARM::VLDRD: case ARM::VLDRS: case ARM::t2LDRi8: + case ARM::t2LDRBi8: case ARM::t2LDRDi8: case ARM::t2LDRSHi8: case ARM::t2LDRi12: + case ARM::t2LDRBi12: case ARM::t2LDRSHi12: break; } @@ -1434,8 +1525,10 @@ bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, case ARM::VLDRD: case ARM::VLDRS: case ARM::t2LDRi8: + case ARM::t2LDRBi8: case ARM::t2LDRSHi8: case ARM::t2LDRi12: + case ARM::t2LDRBi12: case ARM::t2LDRSHi12: break; } @@ -1482,7 +1575,16 @@ bool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, if ((Offset2 - Offset1) / 8 > 64) return false; - if (Load1->getMachineOpcode() != Load2->getMachineOpcode()) + // Check if the machine opcodes are different. If they are different + // then we consider them to not be of the same base address, + // EXCEPT in the case of Thumb2 byte loads where one is LDRBi8 and the other LDRBi12. + // In this case, they are considered to be the same because they are different + // encoding forms of the same basic instruction. + if ((Load1->getMachineOpcode() != Load2->getMachineOpcode()) && + !((Load1->getMachineOpcode() == ARM::t2LDRBi8 && + Load2->getMachineOpcode() == ARM::t2LDRBi12) || + (Load1->getMachineOpcode() == ARM::t2LDRBi12 && + Load2->getMachineOpcode() == ARM::t2LDRBi8))) return false; // FIXME: overly conservative? // Four loads in a row should be sufficient. @@ -1505,7 +1607,7 @@ bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI, return false; // Terminators and labels can't be scheduled around. - if (MI->isTerminator() || MI->isLabel()) + if (MI->isTerminator() || MI->isPosition()) return true; // Treat the start of the IT block as a scheduling boundary, but schedule @@ -1538,17 +1640,44 @@ bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI, bool ARMBaseInstrInfo:: isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, unsigned ExtraPredCycles, - const BranchProbability &Probability) const { + BranchProbability Probability) const { if (!NumCycles) return false; - // Attempt to estimate the relative costs of predication versus branching. - unsigned UnpredCost = Probability.getNumerator() * NumCycles; - UnpredCost /= Probability.getDenominator(); - UnpredCost += 1; // The branch itself - UnpredCost += Subtarget.getMispredictionPenalty() / 10; + // If we are optimizing for size, see if the branch in the predecessor can be + // lowered to cbn?z by the constant island lowering pass, and return false if + // so. This results in a shorter instruction sequence. + if (MBB.getParent()->getFunction()->optForSize()) { + MachineBasicBlock *Pred = *MBB.pred_begin(); + if (!Pred->empty()) { + MachineInstr *LastMI = &*Pred->rbegin(); + if (LastMI->getOpcode() == ARM::t2Bcc) { + MachineBasicBlock::iterator CmpMI = LastMI; + if (CmpMI != Pred->begin()) { + --CmpMI; + if (CmpMI->getOpcode() == ARM::tCMPi8 || + CmpMI->getOpcode() == ARM::t2CMPri) { + unsigned Reg = CmpMI->getOperand(0).getReg(); + unsigned PredReg = 0; + ARMCC::CondCodes P = getInstrPredicate(CmpMI, PredReg); + if (P == ARMCC::AL && CmpMI->getOperand(1).getImm() == 0 && + isARMLowRegister(Reg)) + return false; + } + } + } + } + } - return (NumCycles + ExtraPredCycles) <= UnpredCost; + // Attempt to estimate the relative costs of predication versus branching. + // Here we scale up each component of UnpredCost to avoid precision issue when + // scaling NumCycles by Probability. + const unsigned ScalingUpFactor = 1024; + unsigned UnpredCost = Probability.scale(NumCycles * ScalingUpFactor); + UnpredCost += ScalingUpFactor; // The branch itself + UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10; + + return (NumCycles + ExtraPredCycles) * ScalingUpFactor <= UnpredCost; } bool ARMBaseInstrInfo:: @@ -1556,23 +1685,22 @@ isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned TCycles, unsigned TExtra, MachineBasicBlock &FMBB, unsigned FCycles, unsigned FExtra, - const BranchProbability &Probability) const { + BranchProbability Probability) const { if (!TCycles || !FCycles) return false; // Attempt to estimate the relative costs of predication versus branching. - unsigned TUnpredCost = Probability.getNumerator() * TCycles; - TUnpredCost /= Probability.getDenominator(); - - uint32_t Comp = Probability.getDenominator() - Probability.getNumerator(); - unsigned FUnpredCost = Comp * FCycles; - FUnpredCost /= Probability.getDenominator(); - + // Here we scale up each component of UnpredCost to avoid precision issue when + // scaling TCycles/FCycles by Probability. + const unsigned ScalingUpFactor = 1024; + unsigned TUnpredCost = Probability.scale(TCycles * ScalingUpFactor); + unsigned FUnpredCost = + Probability.getCompl().scale(FCycles * ScalingUpFactor); unsigned UnpredCost = TUnpredCost + FUnpredCost; - UnpredCost += 1; // The branch itself - UnpredCost += Subtarget.getMispredictionPenalty() / 10; + UnpredCost += 1 * ScalingUpFactor; // The branch itself + UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10; - return (TCycles + FCycles + TExtra + FExtra) <= UnpredCost; + return (TCycles + FCycles + TExtra + FExtra) * ScalingUpFactor <= UnpredCost; } bool @@ -1599,7 +1727,7 @@ llvm::getInstrPredicate(const MachineInstr *MI, unsigned &PredReg) { } -int llvm::getMatchingCondBranchOpcode(int Opc) { +unsigned llvm::getMatchingCondBranchOpcode(unsigned Opc) { if (Opc == ARM::B) return ARM::Bcc; if (Opc == ARM::tB) @@ -1621,10 +1749,10 @@ ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { ARMCC::CondCodes CC = getInstrPredicate(MI, PredReg); // MOVCC AL can't be inverted. Shouldn't happen. if (CC == ARMCC::AL || PredReg != ARM::CPSR) - return NULL; + return nullptr; MI = TargetInstrInfo::commuteInstruction(MI, NewMI); if (!MI) - return NULL; + return nullptr; // After swapping the MOVCC operands, also invert the condition. MI->getOperand(MI->findFirstPredOperandIdx()) .setImm(ARMCC::getOppositeCondition(CC)); @@ -1640,35 +1768,35 @@ static MachineInstr *canFoldIntoMOVCC(unsigned Reg, const MachineRegisterInfo &MRI, const TargetInstrInfo *TII) { if (!TargetRegisterInfo::isVirtualRegister(Reg)) - return 0; + return nullptr; if (!MRI.hasOneNonDBGUse(Reg)) - return 0; + return nullptr; MachineInstr *MI = MRI.getVRegDef(Reg); if (!MI) - return 0; + return nullptr; // MI is folded into the MOVCC by predicating it. if (!MI->isPredicable()) - return 0; + return nullptr; // Check if MI has any non-dead defs or physreg uses. This also detects // predicated instructions which will be reading CPSR. for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); // Reject frame index operands, PEI can't handle the predicated pseudos. if (MO.isFI() || MO.isCPI() || MO.isJTI()) - return 0; + return nullptr; if (!MO.isReg()) continue; // MI can't have any tied operands, that would conflict with predication. if (MO.isTied()) - return 0; + return nullptr; if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) - return 0; + return nullptr; if (MO.isDef() && !MO.isDead()) - return 0; + return nullptr; } bool DontMoveAcrossStores = true; - if (!MI->isSafeToMove(TII, /* AliasAnalysis = */ 0, DontMoveAcrossStores)) - return 0; + if (!MI->isSafeToMove(/* AliasAnalysis = */ nullptr, DontMoveAcrossStores)) + return nullptr; return MI; } @@ -1693,23 +1821,31 @@ bool ARMBaseInstrInfo::analyzeSelect(const MachineInstr *MI, return false; } -MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI, - bool PreferFalse) const { +MachineInstr * +ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI, + SmallPtrSetImpl &SeenMIs, + bool PreferFalse) const { assert((MI->getOpcode() == ARM::MOVCCr || MI->getOpcode() == ARM::t2MOVCCr) && "Unknown select instruction"); - const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); + MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); MachineInstr *DefMI = canFoldIntoMOVCC(MI->getOperand(2).getReg(), MRI, this); bool Invert = !DefMI; if (!DefMI) DefMI = canFoldIntoMOVCC(MI->getOperand(1).getReg(), MRI, this); if (!DefMI) - return 0; + return nullptr; + + // Find new register class to use. + MachineOperand FalseReg = MI->getOperand(Invert ? 2 : 1); + unsigned DestReg = MI->getOperand(0).getReg(); + const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg()); + if (!MRI.constrainRegClass(DestReg, PreviousClass)) + return nullptr; // Create a new predicated version of DefMI. // Rfalse is the first use. MachineInstrBuilder NewMI = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), - DefMI->getDesc(), - MI->getOperand(0).getReg()); + DefMI->getDesc(), DestReg); // Copy all the DefMI operands, excluding its (null) predicate. const MCInstrDesc &DefDesc = DefMI->getDesc(); @@ -1732,11 +1868,21 @@ MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI, // register operand tied to the first def. // The tie makes the register allocator ensure the FalseReg is allocated the // same register as operand 0. - MachineOperand FalseReg = MI->getOperand(Invert ? 2 : 1); FalseReg.setImplicit(); NewMI.addOperand(FalseReg); NewMI->tieOperands(0, NewMI->getNumOperands() - 1); + // Update SeenMIs set: register newly created MI and erase removed DefMI. + SeenMIs.insert(NewMI); + SeenMIs.erase(DefMI); + + // If MI is inside a loop, and DefMI is outside the loop, then kill flags on + // DefMI would be invalid when tranferred inside the loop. Checking for a + // loop is expensive, but at least remove kill flags if they are in different + // BBs. + if (DefMI->getParent() != MI->getParent()) + NewMI->clearKillInfo(); + // The caller will erase MI, but not DefMI. DefMI->eraseFromParent(); return NewMI; @@ -1792,6 +1938,14 @@ void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB, unsigned DestReg, unsigned BaseReg, int NumBytes, ARMCC::CondCodes Pred, unsigned PredReg, const ARMBaseInstrInfo &TII, unsigned MIFlags) { + if (NumBytes == 0 && DestReg != BaseReg) { + BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), DestReg) + .addReg(BaseReg, RegState::Kill) + .addImm((unsigned)Pred).addReg(PredReg).addReg(0) + .setMIFlags(MIFlags); + return; + } + bool isSub = NumBytes < 0; if (isSub) NumBytes = -NumBytes; @@ -1815,6 +1969,125 @@ void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB, } } +static bool isAnySubRegLive(unsigned Reg, const TargetRegisterInfo *TRI, + MachineInstr *MI) { + for (MCSubRegIterator Subreg(Reg, TRI, /* IncludeSelf */ true); + Subreg.isValid(); ++Subreg) + if (MI->getParent()->computeRegisterLiveness(TRI, *Subreg, MI) != + MachineBasicBlock::LQR_Dead) + return true; + return false; +} +bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, + MachineFunction &MF, MachineInstr *MI, + unsigned NumBytes) { + // This optimisation potentially adds lots of load and store + // micro-operations, it's only really a great benefit to code-size. + if (!MF.getFunction()->optForMinSize()) + return false; + + // If only one register is pushed/popped, LLVM can use an LDR/STR + // instead. We can't modify those so make sure we're dealing with an + // instruction we understand. + bool IsPop = isPopOpcode(MI->getOpcode()); + bool IsPush = isPushOpcode(MI->getOpcode()); + if (!IsPush && !IsPop) + return false; + + bool IsVFPPushPop = MI->getOpcode() == ARM::VSTMDDB_UPD || + MI->getOpcode() == ARM::VLDMDIA_UPD; + bool IsT1PushPop = MI->getOpcode() == ARM::tPUSH || + MI->getOpcode() == ARM::tPOP || + MI->getOpcode() == ARM::tPOP_RET; + + assert((IsT1PushPop || (MI->getOperand(0).getReg() == ARM::SP && + MI->getOperand(1).getReg() == ARM::SP)) && + "trying to fold sp update into non-sp-updating push/pop"); + + // The VFP push & pop act on D-registers, so we can only fold an adjustment + // by a multiple of 8 bytes in correctly. Similarly rN is 4-bytes. Don't try + // if this is violated. + if (NumBytes % (IsVFPPushPop ? 8 : 4) != 0) + return false; + + // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+ + // pred) so the list starts at 4. Thumb1 starts after the predicate. + int RegListIdx = IsT1PushPop ? 2 : 4; + + // Calculate the space we'll need in terms of registers. + unsigned FirstReg = MI->getOperand(RegListIdx).getReg(); + unsigned RD0Reg, RegsNeeded; + if (IsVFPPushPop) { + RD0Reg = ARM::D0; + RegsNeeded = NumBytes / 8; + } else { + RD0Reg = ARM::R0; + RegsNeeded = NumBytes / 4; + } + + // We're going to have to strip all list operands off before + // re-adding them since the order matters, so save the existing ones + // for later. + SmallVector RegList; + for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i) + RegList.push_back(MI->getOperand(i)); + + const TargetRegisterInfo *TRI = MF.getRegInfo().getTargetRegisterInfo(); + const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF); + + // Now try to find enough space in the reglist to allocate NumBytes. + for (unsigned CurReg = FirstReg - 1; CurReg >= RD0Reg && RegsNeeded; + --CurReg) { + if (!IsPop) { + // Pushing any register is completely harmless, mark the + // register involved as undef since we don't care about it in + // the slightest. + RegList.push_back(MachineOperand::CreateReg(CurReg, false, false, + false, false, true)); + --RegsNeeded; + continue; + } + + // However, we can only pop an extra register if it's not live. For + // registers live within the function we might clobber a return value + // register; the other way a register can be live here is if it's + // callee-saved. + // TODO: Currently, computeRegisterLiveness() does not report "live" if a + // sub reg is live. When computeRegisterLiveness() works for sub reg, it + // can replace isAnySubRegLive(). + if (isCalleeSavedRegister(CurReg, CSRegs) || + isAnySubRegLive(CurReg, TRI, MI)) { + // VFP pops don't allow holes in the register list, so any skip is fatal + // for our transformation. GPR pops do, so we should just keep looking. + if (IsVFPPushPop) + return false; + else + continue; + } + + // Mark the unimportant registers as in the POP. + RegList.push_back(MachineOperand::CreateReg(CurReg, true, false, false, + true)); + --RegsNeeded; + } + + if (RegsNeeded > 0) + return false; + + // Finally we know we can profitably perform the optimisation so go + // ahead: strip all existing registers off and add them back again + // in the right order. + for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i) + MI->RemoveOperand(i); + + // Add the complete list back in. + MachineInstrBuilder MIB(MF, &*MI); + for (int i = RegList.size() - 1; i >= 0; --i) + MIB.addOperand(RegList[i]); + + return true; +} + bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, unsigned FrameReg, int &Offset, const ARMBaseInstrInfo &TII) { @@ -2004,16 +2277,6 @@ static bool isSuitableForMask(MachineInstr *&MI, unsigned SrcReg, if (SrcReg == MI->getOperand(CommonUse ? 1 : 0).getReg()) return true; break; - case ARM::COPY: { - // Walk down one instruction which is potentially an 'and'. - const MachineInstr &Copy = *MI; - MachineBasicBlock::iterator AND( - llvm::next(MachineBasicBlock::iterator(MI))); - if (AND == MI->getParent()->end()) return false; - MI = AND; - return isSuitableForMask(MI, Copy.getOperand(0).getReg(), - CmpMask, true); - } } return false; @@ -2084,9 +2347,10 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, // Masked compares sometimes use the same register as the corresponding 'and'. if (CmpMask != ~0) { if (!isSuitableForMask(MI, SrcReg, CmpMask, false) || isPredicated(MI)) { - MI = 0; - for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(SrcReg), - UE = MRI->use_end(); UI != UE; ++UI) { + MI = nullptr; + for (MachineRegisterInfo::use_instr_iterator + UI = MRI->use_instr_begin(SrcReg), UE = MRI->use_instr_end(); + UI != UE; ++UI) { if (UI->getParent() != CmpInstr->getParent()) continue; MachineInstr *PotentialAND = &*UI; if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true) || @@ -2110,17 +2374,18 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, // One is MI, the other is a SUB instruction. // For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1). // For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue). - MachineInstr *Sub = NULL; + MachineInstr *Sub = nullptr; if (SrcReg2 != 0) // MI is not a candidate for CMPrr. - MI = NULL; + MI = nullptr; else if (MI->getParent() != CmpInstr->getParent() || CmpValue != 0) { // Conservatively refuse to convert an instruction which isn't in the same // BB as the comparison. - // For CMPri, we need to check Sub, thus we can't return here. + // For CMPri w/ CmpValue != 0, a Sub may still be a candidate. + // Thus we cannot return here. if (CmpInstr->getOpcode() == ARM::CMPri || CmpInstr->getOpcode() == ARM::t2CMPri) - MI = NULL; + MI = nullptr; else return false; } @@ -2196,8 +2461,8 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, case ARM::t2EORrr: case ARM::t2EORri: { // Scan forward for the use of CPSR - // When checking against MI: if it's a conditional code requires - // checking of V bit, then this is not safe to do. + // When checking against MI: if it's a conditional code that requires + // checking of the V bit or C bit, then this is not safe to do. // It is safe to remove CmpInstr if CPSR is redefined or killed. // If we are done with the basic block, we need to check whether CPSR is // live-out. @@ -2221,8 +2486,32 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, isSafe = true; break; } - // Condition code is after the operand before CPSR. - ARMCC::CondCodes CC = (ARMCC::CondCodes)Instr.getOperand(IO-1).getImm(); + // Condition code is after the operand before CPSR except for VSELs. + ARMCC::CondCodes CC; + bool IsInstrVSel = true; + switch (Instr.getOpcode()) { + default: + IsInstrVSel = false; + CC = (ARMCC::CondCodes)Instr.getOperand(IO - 1).getImm(); + break; + case ARM::VSELEQD: + case ARM::VSELEQS: + CC = ARMCC::EQ; + break; + case ARM::VSELGTD: + case ARM::VSELGTS: + CC = ARMCC::GT; + break; + case ARM::VSELGED: + case ARM::VSELGES: + CC = ARMCC::GE; + break; + case ARM::VSELVSS: + case ARM::VSELVSD: + CC = ARMCC::VS; + break; + } + if (Sub) { ARMCC::CondCodes NewCC = getSwappedCondition(CC); if (NewCC == ARMCC::AL) @@ -2233,23 +2522,37 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, // If it is safe to remove CmpInstr, the condition code of these // operands will be modified. if (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 && - Sub->getOperand(2).getReg() == SrcReg) - OperandsToUpdate.push_back(std::make_pair(&((*I).getOperand(IO-1)), - NewCC)); - } - else + Sub->getOperand(2).getReg() == SrcReg) { + // VSel doesn't support condition code update. + if (IsInstrVSel) + return false; + OperandsToUpdate.push_back( + std::make_pair(&((*I).getOperand(IO - 1)), NewCC)); + } + } else { + // No Sub, so this is x = y, z; cmp x, 0. switch (CC) { - default: + case ARMCC::EQ: // Z + case ARMCC::NE: // Z + case ARMCC::MI: // N + case ARMCC::PL: // N + case ARMCC::AL: // none // CPSR can be used multiple times, we should continue. break; - case ARMCC::VS: - case ARMCC::VC: - case ARMCC::GE: - case ARMCC::LT: - case ARMCC::GT: - case ARMCC::LE: + case ARMCC::HS: // C + case ARMCC::LO: // C + case ARMCC::VS: // V + case ARMCC::VC: // V + case ARMCC::HI: // C Z + case ARMCC::LS: // C Z + case ARMCC::GE: // N V + case ARMCC::LT: // N V + case ARMCC::GT: // Z N V + case ARMCC::LE: // Z N V + // The instruction uses the V bit or C bit which is not safe. return false; } + } } } @@ -2626,7 +2929,7 @@ static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData, // FIXME: The current MachineInstr design does not support relying on machine // mem operands to determine the width of a memory access. Instead, we expect // the target to provide this information based on the instruction opcode and -// operands. However, using MachineMemOperand is a the best solution now for +// operands. However, using MachineMemOperand is the best solution now for // two reasons: // // 1) getNumMicroOps tries to infer LDM memory width from the total number of MI @@ -2769,7 +3072,7 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, break; } return UOps; - } else if (Subtarget.isCortexA8()) { + } else if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) { if (NumRegs < 4) return 2; // 4 registers would be issued: 2, 2. @@ -2806,7 +3109,7 @@ ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData, return ItinData->getOperandCycle(DefClass, DefIdx); int DefCycle; - if (Subtarget.isCortexA8()) { + if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) { // (regno / 2) + (regno % 2) + 1 DefCycle = RegNo / 2 + 1; if (RegNo % 2) @@ -2847,7 +3150,7 @@ ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData, return ItinData->getOperandCycle(DefClass, DefIdx); int DefCycle; - if (Subtarget.isCortexA8()) { + if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) { // 4 registers would be issued: 1, 2, 1. // 5 registers would be issued: 1, 2, 2. DefCycle = RegNo / 2; @@ -2881,7 +3184,7 @@ ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData, return ItinData->getOperandCycle(UseClass, UseIdx); int UseCycle; - if (Subtarget.isCortexA8()) { + if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) { // (regno / 2) + (regno % 2) + 1 UseCycle = RegNo / 2 + 1; if (RegNo % 2) @@ -2921,7 +3224,7 @@ ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData, return ItinData->getOperandCycle(UseClass, UseIdx); int UseCycle; - if (Subtarget.isCortexA8()) { + if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) { UseCycle = RegNo / 2; if (UseCycle < 2) UseCycle = 2; @@ -3058,8 +3361,7 @@ static const MachineInstr *getBundledDefMI(const TargetRegisterInfo *TRI, Dist = 0; MachineBasicBlock::const_iterator I = MI; ++I; - MachineBasicBlock::const_instr_iterator II = - llvm::prior(I.getInstrIterator()); + MachineBasicBlock::const_instr_iterator II = std::prev(I.getInstrIterator()); assert(II->isInsideBundle() && "Empty bundle?"); int Idx = -1; @@ -3098,7 +3400,7 @@ static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI, if (Idx == -1) { Dist = 0; - return 0; + return nullptr; } UseIdx = Idx; @@ -3112,7 +3414,7 @@ static int adjustDefLatency(const ARMSubtarget &Subtarget, const MachineInstr *DefMI, const MCInstrDesc *DefMCID, unsigned DefAlign) { int Adjust = 0; - if (Subtarget.isCortexA8() || Subtarget.isLikeA9()) { + if (Subtarget.isCortexA8() || Subtarget.isLikeA9() || Subtarget.isCortexA7()) { // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2] // variants are one cycle cheaper. switch (DefMCID->getOpcode()) { @@ -3344,9 +3646,8 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, // instructions). if (Latency > 0 && Subtarget.isThumb2()) { const MachineFunction *MF = DefMI->getParent()->getParent(); - if (MF->getFunction()->getAttributes(). - hasAttribute(AttributeSet::FunctionIndex, - Attribute::OptimizeForSize)) + // FIXME: Use Function::optForSize(). + if (MF->getFunction()->hasFnAttribute(Attribute::OptimizeForSize)) --Latency; } return Latency; @@ -3413,7 +3714,8 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, UseMCID, UseIdx, UseAlign); if (Latency > 1 && - (Subtarget.isCortexA8() || Subtarget.isLikeA9())) { + (Subtarget.isCortexA8() || Subtarget.isLikeA9() || + Subtarget.isCortexA7())) { // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2] // variants are one cycle cheaper. switch (DefMCID.getOpcode()) { @@ -3506,6 +3808,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, case ARM::VLD3d16Pseudo: case ARM::VLD3d32Pseudo: case ARM::VLD1d64TPseudo: + case ARM::VLD1d64TPseudoWB_fixed: case ARM::VLD3d8Pseudo_UPD: case ARM::VLD3d16Pseudo_UPD: case ARM::VLD3d32Pseudo_UPD: @@ -3522,6 +3825,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, case ARM::VLD4d16Pseudo: case ARM::VLD4d32Pseudo: case ARM::VLD1d64QPseudo: + case ARM::VLD1d64QPseudoWB_fixed: case ARM::VLD4d8Pseudo_UPD: case ARM::VLD4d16Pseudo_UPD: case ARM::VLD4d32Pseudo_UPD: @@ -3593,6 +3897,24 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, return Latency; } +unsigned ARMBaseInstrInfo::getPredicationCost(const MachineInstr *MI) const { + if (MI->isCopyLike() || MI->isInsertSubreg() || + MI->isRegSequence() || MI->isImplicitDef()) + return 0; + + if (MI->isBundle()) + return 0; + + const MCInstrDesc &MCID = MI->getDesc(); + + if (MCID.isCall() || MCID.hasImplicitDefOfPhysReg(ARM::CPSR)) { + // When predicated, CPSR is an additional source operand for CPSR updating + // instructions, this apparently increases their latencies. + return 1; + } + return 0; +} + unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr *MI, unsigned *PredCost) const { @@ -3662,7 +3984,7 @@ int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, } bool ARMBaseInstrInfo:: -hasHighOperandLatency(const InstrItineraryData *ItinData, +hasHighOperandLatency(const TargetSchedModel &SchedModel, const MachineRegisterInfo *MRI, const MachineInstr *DefMI, unsigned DefIdx, const MachineInstr *UseMI, unsigned UseIdx) const { @@ -3674,9 +3996,8 @@ hasHighOperandLatency(const InstrItineraryData *ItinData, return true; // Hoist VFP / NEON instructions with 4 or higher latency. - int Latency = computeOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx); - if (Latency < 0) - Latency = getInstrLatency(ItinData, DefMI); + unsigned Latency + = SchedModel.computeOperandLatency(DefMI, DefIdx, UseMI, UseIdx); if (Latency <= 3) return false; return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON || @@ -3684,8 +4005,9 @@ hasHighOperandLatency(const InstrItineraryData *ItinData, } bool ARMBaseInstrInfo:: -hasLowDefLatency(const InstrItineraryData *ItinData, +hasLowDefLatency(const TargetSchedModel &SchedModel, const MachineInstr *DefMI, unsigned DefIdx) const { + const InstrItineraryData *ItinData = SchedModel.getInstrItineraries(); if (!ItinData || ItinData->isEmpty()) return false; @@ -3707,6 +4029,38 @@ bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr *MI, return true; } +// LoadStackGuard has so far only been implemented for MachO. Different code +// sequence is needed for other targets. +void ARMBaseInstrInfo::expandLoadStackGuardBase(MachineBasicBlock::iterator MI, + unsigned LoadImmOpc, + unsigned LoadOpc, + Reloc::Model RM) const { + MachineBasicBlock &MBB = *MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + unsigned Reg = MI->getOperand(0).getReg(); + const GlobalValue *GV = + cast((*MI->memoperands_begin())->getValue()); + MachineInstrBuilder MIB; + + BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg) + .addGlobalAddress(GV, 0, ARMII::MO_NONLAZY); + + if (Subtarget.GVIsIndirectSymbol(GV, RM)) { + MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg); + MIB.addReg(Reg, RegState::Kill).addImm(0); + unsigned Flag = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant; + MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand( + MachinePointerInfo::getGOT(*MBB.getParent()), Flag, 4, 4); + MIB.addMemOperand(MMO); + AddDefaultPred(MIB); + } + + MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg); + MIB.addReg(Reg, RegState::Kill).addImm(0); + MIB.setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + AddDefaultPred(MIB); +} + bool ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc, unsigned &AddSubOpc, @@ -3744,19 +4098,21 @@ enum ARMExeDomain { // std::pair ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const { - // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON - // if they are not predicated. - if (MI->getOpcode() == ARM::VMOVD && !isPredicated(MI)) - return std::make_pair(ExeVFP, (1<getOpcode() == ARM::VMOVRS || - MI->getOpcode() == ARM::VMOVSR || - MI->getOpcode() == ARM::VMOVS)) - return std::make_pair(ExeVFP, (1<getOpcode() == ARM::VMOVD && !isPredicated(MI)) + return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON)); + + // CortexA9 is particularly picky about mixing the two and wants these + // converted. + if (Subtarget.isCortexA9() && !isPredicated(MI) && + (MI->getOpcode() == ARM::VMOVRS || MI->getOpcode() == ARM::VMOVSR || + MI->getOpcode() == ARM::VMOVS)) + return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON)); + } // No other instructions can be swizzled, so just determine their domain. unsigned Domain = MI->getDesc().TSFlags & ARMII::DomainMask; @@ -3849,6 +4205,9 @@ ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { // Zap the predicate operands. assert(!isPredicated(MI) && "Cannot predicate a VORRd"); + // Make sure we've got NEON instructions. + assert(Subtarget.hasNEON() && "VORRd requires NEON"); + // Source instruction is %DDst = VMOVD %DSrc, 14, %noreg (; implicits) DstReg = MI->getOperand(0).getReg(); SrcReg = MI->getOperand(1).getReg(); @@ -4125,7 +4484,7 @@ breakPartialRegDependency(MachineBasicBlock::iterator MI, // FIXME: In some cases, VLDRS can be changed to a VLD1DUPd32 which defines // the full D-register by loading the same value to both lanes. The // instruction is micro-coded with 2 uops, so don't do this until we can - // properly schedule micro-coded instuctions. The dispatcher stalls cause + // properly schedule micro-coded instructions. The dispatcher stalls cause // too big regressions. // Insert the dependency-breaking FCONSTD before MI. @@ -4136,7 +4495,7 @@ breakPartialRegDependency(MachineBasicBlock::iterator MI, } bool ARMBaseInstrInfo::hasNOP() const { - return (Subtarget.getFeatureBits() & ARM::HasV6T2Ops) != 0; + return Subtarget.getFeatureBits()[ARM::HasV6KOps]; } bool ARMBaseInstrInfo::isSwiftFastImmShift(const MachineInstr *MI) const { @@ -4152,3 +4511,72 @@ bool ARMBaseInstrInfo::isSwiftFastImmShift(const MachineInstr *MI) const { return false; } + +bool ARMBaseInstrInfo::getRegSequenceLikeInputs( + const MachineInstr &MI, unsigned DefIdx, + SmallVectorImpl &InputRegs) const { + assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index"); + assert(MI.isRegSequenceLike() && "Invalid kind of instruction"); + + switch (MI.getOpcode()) { + case ARM::VMOVDRR: + // dX = VMOVDRR rY, rZ + // is the same as: + // dX = REG_SEQUENCE rY, ssub_0, rZ, ssub_1 + // Populate the InputRegs accordingly. + // rY + const MachineOperand *MOReg = &MI.getOperand(1); + InputRegs.push_back( + RegSubRegPairAndIdx(MOReg->getReg(), MOReg->getSubReg(), ARM::ssub_0)); + // rZ + MOReg = &MI.getOperand(2); + InputRegs.push_back( + RegSubRegPairAndIdx(MOReg->getReg(), MOReg->getSubReg(), ARM::ssub_1)); + return true; + } + llvm_unreachable("Target dependent opcode missing"); +} + +bool ARMBaseInstrInfo::getExtractSubregLikeInputs( + const MachineInstr &MI, unsigned DefIdx, + RegSubRegPairAndIdx &InputReg) const { + assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index"); + assert(MI.isExtractSubregLike() && "Invalid kind of instruction"); + + switch (MI.getOpcode()) { + case ARM::VMOVRRD: + // rX, rY = VMOVRRD dZ + // is the same as: + // rX = EXTRACT_SUBREG dZ, ssub_0 + // rY = EXTRACT_SUBREG dZ, ssub_1 + const MachineOperand &MOReg = MI.getOperand(2); + InputReg.Reg = MOReg.getReg(); + InputReg.SubReg = MOReg.getSubReg(); + InputReg.SubIdx = DefIdx == 0 ? ARM::ssub_0 : ARM::ssub_1; + return true; + } + llvm_unreachable("Target dependent opcode missing"); +} + +bool ARMBaseInstrInfo::getInsertSubregLikeInputs( + const MachineInstr &MI, unsigned DefIdx, RegSubRegPair &BaseReg, + RegSubRegPairAndIdx &InsertedReg) const { + assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index"); + assert(MI.isInsertSubregLike() && "Invalid kind of instruction"); + + switch (MI.getOpcode()) { + case ARM::VSETLNi32: + // dX = VSETLNi32 dY, rZ, imm + const MachineOperand &MOBaseReg = MI.getOperand(1); + const MachineOperand &MOInsertedReg = MI.getOperand(2); + const MachineOperand &MOIndex = MI.getOperand(3); + BaseReg.Reg = MOBaseReg.getReg(); + BaseReg.SubReg = MOBaseReg.getSubReg(); + + InsertedReg.Reg = MOInsertedReg.getReg(); + InsertedReg.SubReg = MOInsertedReg.getSubReg(); + InsertedReg.SubIdx = MOIndex.getImm() == 0 ? ARM::ssub_0 : ARM::ssub_1; + return true; + } + llvm_unreachable("Target dependent opcode missing"); +}