X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FARM%2FARMBaseInstrInfo.cpp;h=abb1fdad080f43b933e71160d68c4248112d08e2;hb=4d6cb933eb23e4dc167b04a981a8434da2ab13c5;hp=0d38b2ab1995960bee9a73579e4432202d9e06d1;hpb=c3f2ad087923d6570f0e3a6ea1e004a892a5ef15;p=oota-llvm.git diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 0d38b2ab199..abb1fdad080 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -28,6 +28,7 @@ #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/TargetSchedule.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" @@ -37,6 +38,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -365,14 +367,10 @@ ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, unsigned ARMBaseInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { - MachineBasicBlock::iterator I = MBB.end(); - if (I == MBB.begin()) return 0; - --I; - while (I->isDebugValue()) { - if (I == MBB.begin()) - return 0; - --I; - } + MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); + if (I == MBB.end()) + return 0; + if (!isUncondBranchOpcode(I->getOpcode()) && !isCondBranchOpcode(I->getOpcode())) return 0; @@ -395,7 +393,7 @@ unsigned ARMBaseInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { unsigned ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl &Cond, + ArrayRef Cond, DebugLoc DL) const { ARMFunctionInfo *AFI = MBB.getParent()->getInfo(); int BOpc = !AFI->isThumbFunction() @@ -409,6 +407,8 @@ ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, assert((Cond.size() == 2 || Cond.size() == 0) && "ARM branch conditions have two components!"); + // For conditional branches, we use addOperand to preserve CPSR flags. + if (!FBB) { if (Cond.empty()) { // Unconditional branch? if (isThumb) @@ -417,13 +417,13 @@ ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB); } else BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB) - .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()); + .addImm(Cond[0].getImm()).addOperand(Cond[1]); return 1; } // Two-way conditional branch. BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB) - .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()); + .addImm(Cond[0].getImm()).addOperand(Cond[1]); if (isThumb) BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).addImm(ARMCC::AL).addReg(0); else @@ -455,8 +455,7 @@ bool ARMBaseInstrInfo::isPredicated(const MachineInstr *MI) const { } bool ARMBaseInstrInfo:: -PredicateInstruction(MachineInstr *MI, - const SmallVectorImpl &Pred) const { +PredicateInstruction(MachineInstr *MI, ArrayRef Pred) const { unsigned Opc = MI->getOpcode(); if (isUncondBranchOpcode(Opc)) { MI->setDesc(get(getMatchingCondBranchOpcode(Opc))); @@ -476,9 +475,8 @@ PredicateInstruction(MachineInstr *MI, return false; } -bool ARMBaseInstrInfo:: -SubsumesPredicate(const SmallVectorImpl &Pred1, - const SmallVectorImpl &Pred2) const { +bool ARMBaseInstrInfo::SubsumesPredicate(ArrayRef Pred1, + ArrayRef Pred2) const { if (Pred1.size() > 2 || Pred2.size() > 2) return false; @@ -520,7 +518,7 @@ bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI, static bool isCPSRDefined(const MachineInstr *MI) { for (const auto &MO : MI->operands()) - if (MO.isReg() && MO.getReg() == ARM::CPSR && MO.isDef()) + if (MO.isReg() && MO.getReg() == ARM::CPSR && MO.isDef() && !MO.isDead()) return true; return false; } @@ -594,16 +592,6 @@ template <> bool IsCPSRDead(MachineInstr *MI) { } } -/// FIXME: Works around a gcc miscompilation with -fstrict-aliasing. -LLVM_ATTRIBUTE_NOINLINE -static unsigned getNumJTEntries(const std::vector &JT, - unsigned JTI); -static unsigned getNumJTEntries(const std::vector &JT, - unsigned JTI) { - assert(JTI < JT.size()); - return JT[JTI].MBBs.size(); -} - /// GetInstSize - Return the size of the specified MachineInstr. /// unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { @@ -634,6 +622,10 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { case ARM::t2MOVi32imm: return 8; case ARM::CONSTPOOL_ENTRY: + case ARM::JUMPTABLE_INSTS: + case ARM::JUMPTABLE_ADDRS: + case ARM::JUMPTABLE_TBB: + case ARM::JUMPTABLE_TBH: // If this machine instr is a constant pool entry, its size is recorded as // operand #2. return MI->getOperand(2).getImm(); @@ -648,42 +640,8 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { case ARM::t2Int_eh_sjlj_setjmp: case ARM::t2Int_eh_sjlj_setjmp_nofp: return 12; - case ARM::BR_JTr: - case ARM::BR_JTm: - case ARM::BR_JTadd: - case ARM::tBR_JTr: - case ARM::t2BR_JT: - case ARM::t2TBB_JT: - case ARM::t2TBH_JT: { - // These are jumptable branches, i.e. a branch followed by an inlined - // jumptable. The size is 4 + 4 * number of entries. For TBB, each - // entry is one byte; TBH two byte each. - unsigned EntrySize = (Opc == ARM::t2TBB_JT) - ? 1 : ((Opc == ARM::t2TBH_JT) ? 2 : 4); - unsigned NumOps = MCID.getNumOperands(); - MachineOperand JTOP = - MI->getOperand(NumOps - (MI->isPredicable() ? 3 : 2)); - unsigned JTI = JTOP.getIndex(); - const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); - assert(MJTI != nullptr); - const std::vector &JT = MJTI->getJumpTables(); - assert(JTI < JT.size()); - // Thumb instructions are 2 byte aligned, but JT entries are 4 byte - // 4 aligned. The assembler / linker may add 2 byte padding just before - // the JT entries. The size does not include this padding; the - // constant islands pass does separate bookkeeping for it. - // FIXME: If we know the size of the function is less than (1 << 16) *2 - // bytes, we can use 16-bit entries instead. Then there won't be an - // alignment issue. - unsigned InstSize = (Opc == ARM::tBR_JTr || Opc == ARM::t2BR_JT) ? 2 : 4; - unsigned NumEntries = getNumJTEntries(JT, JTI); - if (Opc == ARM::t2TBB_JT && (NumEntries & 1)) - // Make sure the instruction that follows TBB is 2-byte aligned. - // FIXME: Constant island pass should insert an "ALIGN" instruction - // instead. - ++NumEntries; - return NumEntries * EntrySize + InstSize; - } + case ARM::SPACE: + return MI->getOperand(1).getImm(); } } @@ -698,6 +656,49 @@ unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr *MI) const { return Size; } +void ARMBaseInstrInfo::copyFromCPSR(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned DestReg, bool KillSrc, + const ARMSubtarget &Subtarget) const { + unsigned Opc = Subtarget.isThumb() + ? (Subtarget.isMClass() ? ARM::t2MRS_M : ARM::t2MRS_AR) + : ARM::MRS; + + MachineInstrBuilder MIB = + BuildMI(MBB, I, I->getDebugLoc(), get(Opc), DestReg); + + // There is only 1 A/R class MRS instruction, and it always refers to + // APSR. However, there are lots of other possibilities on M-class cores. + if (Subtarget.isMClass()) + MIB.addImm(0x800); + + AddDefaultPred(MIB); + + MIB.addReg(ARM::CPSR, RegState::Implicit | getKillRegState(KillSrc)); +} + +void ARMBaseInstrInfo::copyToCPSR(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned SrcReg, bool KillSrc, + const ARMSubtarget &Subtarget) const { + unsigned Opc = Subtarget.isThumb() + ? (Subtarget.isMClass() ? ARM::t2MSR_M : ARM::t2MSR_AR) + : ARM::MSR; + + MachineInstrBuilder MIB = BuildMI(MBB, I, I->getDebugLoc(), get(Opc)); + + if (Subtarget.isMClass()) + MIB.addImm(0x800); + else + MIB.addImm(8); + + MIB.addReg(SrcReg, getKillRegState(KillSrc)); + + AddDefaultPred(MIB); + + MIB.addReg(ARM::CPSR, RegState::Implicit | RegState::Define); +} + void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, unsigned SrcReg, @@ -785,6 +786,12 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, Opc = ARM::VMOVS; BeginIdx = ARM::ssub_0; SubRegs = 2; + } else if (SrcReg == ARM::CPSR) { + copyFromCPSR(MBB, I, DestReg, KillSrc, Subtarget); + return; + } else if (DestReg == ARM::CPSR) { + copyToCPSR(MBB, I, SrcReg, KillSrc, Subtarget); + return; } assert(Opc && "Impossible reg-to-reg copy"); @@ -846,11 +853,9 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineFrameInfo &MFI = *MF.getFrameInfo(); unsigned Align = MFI.getObjectAlignment(FI); - MachineMemOperand *MMO = - MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), - MachineMemOperand::MOStore, - MFI.getObjectSize(FI), - Align); + MachineMemOperand *MMO = MF.getMachineMemOperand( + MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore, + MFI.getObjectSize(FI), Align); switch (RC->getSize()) { case 4: @@ -1036,12 +1041,9 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = *MF.getFrameInfo(); unsigned Align = MFI.getObjectAlignment(FI); - MachineMemOperand *MMO = - MF.getMachineMemOperand( - MachinePointerInfo::getFixedStack(FI), - MachineMemOperand::MOLoad, - MFI.getObjectSize(FI), - Align); + MachineMemOperand *MMO = MF.getMachineMemOperand( + MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad, + MFI.getObjectSize(FI), Align); switch (RC->getSize()) { case 4: @@ -1223,8 +1225,7 @@ ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { Reloc::Model RM = MF.getTarget().getRelocationModel(); if (MI->getOpcode() == TargetOpcode::LOAD_STACK_GUARD) { - assert(getSubtarget().getTargetTriple().getObjectFormat() == - Triple::MachO && + assert(getSubtarget().getTargetTriple().isOSBinFormatMachO() && "LOAD_STACK_GUARD currently supported only for MachO."); expandLoadStackGuard(MI, RM); MI->getParent()->erase(MI); @@ -1390,7 +1391,7 @@ ARMBaseInstrInfo::duplicate(MachineInstr *Orig, MachineFunction &MF) const { bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0, const MachineInstr *MI1, const MachineRegisterInfo *MRI) const { - int Opcode = MI0->getOpcode(); + unsigned Opcode = MI0->getOpcode(); if (Opcode == ARM::t2LDRpci || Opcode == ARM::t2LDRpci_pic || Opcode == ARM::tLDRpci || @@ -1639,17 +1640,44 @@ bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI, bool ARMBaseInstrInfo:: isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, unsigned ExtraPredCycles, - const BranchProbability &Probability) const { + BranchProbability Probability) const { if (!NumCycles) return false; - // Attempt to estimate the relative costs of predication versus branching. - unsigned UnpredCost = Probability.getNumerator() * NumCycles; - UnpredCost /= Probability.getDenominator(); - UnpredCost += 1; // The branch itself - UnpredCost += Subtarget.getMispredictionPenalty() / 10; + // If we are optimizing for size, see if the branch in the predecessor can be + // lowered to cbn?z by the constant island lowering pass, and return false if + // so. This results in a shorter instruction sequence. + if (MBB.getParent()->getFunction()->optForSize()) { + MachineBasicBlock *Pred = *MBB.pred_begin(); + if (!Pred->empty()) { + MachineInstr *LastMI = &*Pred->rbegin(); + if (LastMI->getOpcode() == ARM::t2Bcc) { + MachineBasicBlock::iterator CmpMI = LastMI; + if (CmpMI != Pred->begin()) { + --CmpMI; + if (CmpMI->getOpcode() == ARM::tCMPi8 || + CmpMI->getOpcode() == ARM::t2CMPri) { + unsigned Reg = CmpMI->getOperand(0).getReg(); + unsigned PredReg = 0; + ARMCC::CondCodes P = getInstrPredicate(CmpMI, PredReg); + if (P == ARMCC::AL && CmpMI->getOperand(1).getImm() == 0 && + isARMLowRegister(Reg)) + return false; + } + } + } + } + } - return (NumCycles + ExtraPredCycles) <= UnpredCost; + // Attempt to estimate the relative costs of predication versus branching. + // Here we scale up each component of UnpredCost to avoid precision issue when + // scaling NumCycles by Probability. + const unsigned ScalingUpFactor = 1024; + unsigned UnpredCost = Probability.scale(NumCycles * ScalingUpFactor); + UnpredCost += ScalingUpFactor; // The branch itself + UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10; + + return (NumCycles + ExtraPredCycles) * ScalingUpFactor <= UnpredCost; } bool ARMBaseInstrInfo:: @@ -1657,23 +1685,22 @@ isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned TCycles, unsigned TExtra, MachineBasicBlock &FMBB, unsigned FCycles, unsigned FExtra, - const BranchProbability &Probability) const { + BranchProbability Probability) const { if (!TCycles || !FCycles) return false; // Attempt to estimate the relative costs of predication versus branching. - unsigned TUnpredCost = Probability.getNumerator() * TCycles; - TUnpredCost /= Probability.getDenominator(); - - uint32_t Comp = Probability.getDenominator() - Probability.getNumerator(); - unsigned FUnpredCost = Comp * FCycles; - FUnpredCost /= Probability.getDenominator(); - + // Here we scale up each component of UnpredCost to avoid precision issue when + // scaling TCycles/FCycles by Probability. + const unsigned ScalingUpFactor = 1024; + unsigned TUnpredCost = Probability.scale(TCycles * ScalingUpFactor); + unsigned FUnpredCost = + Probability.getCompl().scale(FCycles * ScalingUpFactor); unsigned UnpredCost = TUnpredCost + FUnpredCost; - UnpredCost += 1; // The branch itself - UnpredCost += Subtarget.getMispredictionPenalty() / 10; + UnpredCost += 1 * ScalingUpFactor; // The branch itself + UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10; - return (TCycles + FCycles + TExtra + FExtra) <= UnpredCost; + return (TCycles + FCycles + TExtra + FExtra) * ScalingUpFactor <= UnpredCost; } bool @@ -1700,7 +1727,7 @@ llvm::getInstrPredicate(const MachineInstr *MI, unsigned &PredReg) { } -int llvm::getMatchingCondBranchOpcode(int Opc) { +unsigned llvm::getMatchingCondBranchOpcode(unsigned Opc) { if (Opc == ARM::B) return ARM::Bcc; if (Opc == ARM::tB) @@ -1768,8 +1795,7 @@ static MachineInstr *canFoldIntoMOVCC(unsigned Reg, return nullptr; } bool DontMoveAcrossStores = true; - if (!MI->isSafeToMove(TII, /* AliasAnalysis = */ nullptr, - DontMoveAcrossStores)) + if (!MI->isSafeToMove(/* AliasAnalysis = */ nullptr, DontMoveAcrossStores)) return nullptr; return MI; } @@ -1795,8 +1821,10 @@ bool ARMBaseInstrInfo::analyzeSelect(const MachineInstr *MI, return false; } -MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI, - bool PreferFalse) const { +MachineInstr * +ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI, + SmallPtrSetImpl &SeenMIs, + bool PreferFalse) const { assert((MI->getOpcode() == ARM::MOVCCr || MI->getOpcode() == ARM::t2MOVCCr) && "Unknown select instruction"); MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); @@ -1844,6 +1872,17 @@ MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI, NewMI.addOperand(FalseReg); NewMI->tieOperands(0, NewMI->getNumOperands() - 1); + // Update SeenMIs set: register newly created MI and erase removed DefMI. + SeenMIs.insert(NewMI); + SeenMIs.erase(DefMI); + + // If MI is inside a loop, and DefMI is outside the loop, then kill flags on + // DefMI would be invalid when tranferred inside the loop. Checking for a + // loop is expensive, but at least remove kill flags if they are in different + // BBs. + if (DefMI->getParent() != MI->getParent()) + NewMI->clearKillInfo(); + // The caller will erase MI, but not DefMI. DefMI->eraseFromParent(); return NewMI; @@ -1944,8 +1983,7 @@ bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, unsigned NumBytes) { // This optimisation potentially adds lots of load and store // micro-operations, it's only really a great benefit to code-size. - if (!MF.getFunction()->getAttributes().hasAttribute( - AttributeSet::FunctionIndex, Attribute::MinSize)) + if (!MF.getFunction()->optForMinSize()) return false; // If only one register is pushed/popped, LLVM can use an LDR/STR @@ -2239,16 +2277,6 @@ static bool isSuitableForMask(MachineInstr *&MI, unsigned SrcReg, if (SrcReg == MI->getOperand(CommonUse ? 1 : 0).getReg()) return true; break; - case ARM::COPY: { - // Walk down one instruction which is potentially an 'and'. - const MachineInstr &Copy = *MI; - MachineBasicBlock::iterator AND( - std::next(MachineBasicBlock::iterator(MI))); - if (AND == MI->getParent()->end()) return false; - MI = AND; - return isSuitableForMask(MI, Copy.getOperand(0).getReg(), - CmpMask, true); - } } return false; @@ -2353,7 +2381,8 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, else if (MI->getParent() != CmpInstr->getParent() || CmpValue != 0) { // Conservatively refuse to convert an instruction which isn't in the same // BB as the comparison. - // For CMPri, we need to check Sub, thus we can't return here. + // For CMPri w/ CmpValue != 0, a Sub may still be a candidate. + // Thus we cannot return here. if (CmpInstr->getOpcode() == ARM::CMPri || CmpInstr->getOpcode() == ARM::t2CMPri) MI = nullptr; @@ -2432,8 +2461,8 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, case ARM::t2EORrr: case ARM::t2EORri: { // Scan forward for the use of CPSR - // When checking against MI: if it's a conditional code requires - // checking of V bit, then this is not safe to do. + // When checking against MI: if it's a conditional code that requires + // checking of the V bit or C bit, then this is not safe to do. // It is safe to remove CmpInstr if CPSR is redefined or killed. // If we are done with the basic block, we need to check whether CPSR is // live-out. @@ -2500,19 +2529,30 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, OperandsToUpdate.push_back( std::make_pair(&((*I).getOperand(IO - 1)), NewCC)); } - } else + } else { + // No Sub, so this is x = y, z; cmp x, 0. switch (CC) { - default: + case ARMCC::EQ: // Z + case ARMCC::NE: // Z + case ARMCC::MI: // N + case ARMCC::PL: // N + case ARMCC::AL: // none // CPSR can be used multiple times, we should continue. break; - case ARMCC::VS: - case ARMCC::VC: - case ARMCC::GE: - case ARMCC::LT: - case ARMCC::GT: - case ARMCC::LE: + case ARMCC::HS: // C + case ARMCC::LO: // C + case ARMCC::VS: // V + case ARMCC::VC: // V + case ARMCC::HI: // C Z + case ARMCC::LS: // C Z + case ARMCC::GE: // N V + case ARMCC::LT: // N V + case ARMCC::GT: // Z N V + case ARMCC::LE: // Z N V + // The instruction uses the V bit or C bit which is not safe. return false; } + } } } @@ -2889,7 +2929,7 @@ static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData, // FIXME: The current MachineInstr design does not support relying on machine // mem operands to determine the width of a memory access. Instead, we expect // the target to provide this information based on the instruction opcode and -// operands. However, using MachineMemOperand is a the best solution now for +// operands. However, using MachineMemOperand is the best solution now for // two reasons: // // 1) getNumMicroOps tries to infer LDM memory width from the total number of MI @@ -3606,9 +3646,8 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, // instructions). if (Latency > 0 && Subtarget.isThumb2()) { const MachineFunction *MF = DefMI->getParent()->getParent(); - if (MF->getFunction()->getAttributes(). - hasAttribute(AttributeSet::FunctionIndex, - Attribute::OptimizeForSize)) + // FIXME: Use Function::optForSize(). + if (MF->getFunction()->hasFnAttribute(Attribute::OptimizeForSize)) --Latency; } return Latency; @@ -3945,7 +3984,7 @@ int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, } bool ARMBaseInstrInfo:: -hasHighOperandLatency(const InstrItineraryData *ItinData, +hasHighOperandLatency(const TargetSchedModel &SchedModel, const MachineRegisterInfo *MRI, const MachineInstr *DefMI, unsigned DefIdx, const MachineInstr *UseMI, unsigned UseIdx) const { @@ -3957,9 +3996,8 @@ hasHighOperandLatency(const InstrItineraryData *ItinData, return true; // Hoist VFP / NEON instructions with 4 or higher latency. - int Latency = computeOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx); - if (Latency < 0) - Latency = getInstrLatency(ItinData, DefMI); + unsigned Latency + = SchedModel.computeOperandLatency(DefMI, DefIdx, UseMI, UseIdx); if (Latency <= 3) return false; return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON || @@ -3967,8 +4005,9 @@ hasHighOperandLatency(const InstrItineraryData *ItinData, } bool ARMBaseInstrInfo:: -hasLowDefLatency(const InstrItineraryData *ItinData, +hasLowDefLatency(const TargetSchedModel &SchedModel, const MachineInstr *DefMI, unsigned DefIdx) const { + const InstrItineraryData *ItinData = SchedModel.getInstrItineraries(); if (!ItinData || ItinData->isEmpty()) return false; @@ -4010,8 +4049,8 @@ void ARMBaseInstrInfo::expandLoadStackGuardBase(MachineBasicBlock::iterator MI, MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg); MIB.addReg(Reg, RegState::Kill).addImm(0); unsigned Flag = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant; - MachineMemOperand *MMO = MBB.getParent()-> - getMachineMemOperand(MachinePointerInfo::getGOT(), Flag, 4, 4); + MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand( + MachinePointerInfo::getGOT(*MBB.getParent()), Flag, 4, 4); MIB.addMemOperand(MMO); AddDefaultPred(MIB); } @@ -4059,19 +4098,21 @@ enum ARMExeDomain { // std::pair ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const { - // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON - // if they are not predicated. - if (MI->getOpcode() == ARM::VMOVD && !isPredicated(MI)) - return std::make_pair(ExeVFP, (1<getOpcode() == ARM::VMOVRS || - MI->getOpcode() == ARM::VMOVSR || - MI->getOpcode() == ARM::VMOVS)) - return std::make_pair(ExeVFP, (1<getOpcode() == ARM::VMOVD && !isPredicated(MI)) + return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON)); + + // CortexA9 is particularly picky about mixing the two and wants these + // converted. + if (Subtarget.isCortexA9() && !isPredicated(MI) && + (MI->getOpcode() == ARM::VMOVRS || MI->getOpcode() == ARM::VMOVSR || + MI->getOpcode() == ARM::VMOVS)) + return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON)); + } // No other instructions can be swizzled, so just determine their domain. unsigned Domain = MI->getDesc().TSFlags & ARMII::DomainMask; @@ -4164,6 +4205,9 @@ ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { // Zap the predicate operands. assert(!isPredicated(MI) && "Cannot predicate a VORRd"); + // Make sure we've got NEON instructions. + assert(Subtarget.hasNEON() && "VORRd requires NEON"); + // Source instruction is %DDst = VMOVD %DSrc, 14, %noreg (; implicits) DstReg = MI->getOperand(0).getReg(); SrcReg = MI->getOperand(1).getReg(); @@ -4450,31 +4494,8 @@ breakPartialRegDependency(MachineBasicBlock::iterator MI, MI->addRegisterKilled(DReg, TRI, true); } -void ARMBaseInstrInfo::getUnconditionalBranch( - MCInst &Branch, const MCSymbolRefExpr *BranchTarget) const { - if (Subtarget.isThumb()) - Branch.setOpcode(ARM::tB); - else if (Subtarget.isThumb2()) - Branch.setOpcode(ARM::t2B); - else - Branch.setOpcode(ARM::Bcc); - - Branch.addOperand(MCOperand::CreateExpr(BranchTarget)); - Branch.addOperand(MCOperand::CreateImm(ARMCC::AL)); - Branch.addOperand(MCOperand::CreateReg(0)); -} - -void ARMBaseInstrInfo::getTrap(MCInst &MI) const { - if (Subtarget.isThumb()) - MI.setOpcode(ARM::tTRAP); - else if (Subtarget.useNaClTrap()) - MI.setOpcode(ARM::TRAPNaCl); - else - MI.setOpcode(ARM::TRAP); -} - bool ARMBaseInstrInfo::hasNOP() const { - return (Subtarget.getFeatureBits() & ARM::HasV6T2Ops) != 0; + return Subtarget.getFeatureBits()[ARM::HasV6KOps]; } bool ARMBaseInstrInfo::isSwiftFastImmShift(const MachineInstr *MI) const {