From: Evan Cheng Date: Wed, 29 Jul 2009 02:18:14 +0000 (+0000) Subject: Optimize Thumb2 jumptable to use tbb / tbh when all the offsets fit in byte / halfword. X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=5657c01949dca6c012ac60d242d1a8d2ffdf5603;p=oota-llvm.git Optimize Thumb2 jumptable to use tbb / tbh when all the offsets fit in byte / halfword. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@77422 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp index 24655212786..aad5eb70923 100644 --- a/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -21,6 +21,7 @@ #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Support/Compiler.h" @@ -35,6 +36,7 @@ STATISTIC(NumCPEs, "Number of constpool entries"); STATISTIC(NumSplit, "Number of uncond branches inserted"); STATISTIC(NumCBrFixed, "Number of cond branches fixed"); STATISTIC(NumUBrFixed, "Number of uncond branches fixed"); +STATISTIC(NumTBs, "Number of table branches generated"); namespace { /// ARMConstantIslands - Due to limited PC-relative displacements, ARM @@ -122,6 +124,9 @@ namespace { /// SmallVector PushPopMIs; + /// T2JumpTables - Keep track of all the Thumb2 jumptable instructions. + SmallVector T2JumpTables; + /// HasFarJump - True if any far jump instruction has been emitted during /// the branch fix up pass. bool HasFarJump; @@ -135,17 +140,17 @@ namespace { static char ID; ARMConstantIslands() : MachineFunctionPass(&ID) {} - virtual bool runOnMachineFunction(MachineFunction &Fn); + virtual bool runOnMachineFunction(MachineFunction &MF); virtual const char *getPassName() const { return "ARM constant island placement and branch shortening pass"; } private: - void DoInitialPlacement(MachineFunction &Fn, + void DoInitialPlacement(MachineFunction &MF, std::vector &CPEMIs); CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI); - void InitialFunctionScan(MachineFunction &Fn, + void InitialFunctionScan(MachineFunction &MF, const std::vector &CPEMIs); MachineBasicBlock *SplitBlockBeforeInstr(MachineInstr *MI); void UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB); @@ -158,7 +163,7 @@ namespace { std::vector::iterator IP); void CreateNewWater(unsigned CPUserIndex, unsigned UserOffset, MachineBasicBlock** NewMBB); - bool HandleConstantPoolUser(MachineFunction &Fn, unsigned CPUserIndex); + bool HandleConstantPoolUser(MachineFunction &MF, unsigned CPUserIndex); void RemoveDeadCPEMI(MachineInstr *CPEMI); bool RemoveUnusedCPEntries(); bool CPEIsInRange(MachineInstr *MI, unsigned UserOffset, @@ -169,27 +174,28 @@ namespace { bool OffsetIsInRange(unsigned UserOffset, unsigned TrialOffset, unsigned Disp, bool NegativeOK, bool IsSoImm = false); bool BBIsInRange(MachineInstr *MI, MachineBasicBlock *BB, unsigned Disp); - bool FixUpImmediateBr(MachineFunction &Fn, ImmBranch &Br); - bool FixUpConditionalBr(MachineFunction &Fn, ImmBranch &Br); - bool FixUpUnconditionalBr(MachineFunction &Fn, ImmBranch &Br); + bool FixUpImmediateBr(MachineFunction &MF, ImmBranch &Br); + bool FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br); + bool FixUpUnconditionalBr(MachineFunction &MF, ImmBranch &Br); bool UndoLRSpillRestore(); + bool OptimizeThumb2JumpTables(MachineFunction &MF); unsigned GetOffsetOf(MachineInstr *MI) const; void dumpBBs(); - void verify(MachineFunction &Fn); + void verify(MachineFunction &MF); }; char ARMConstantIslands::ID = 0; } /// verify - check BBOffsets, BBSizes, alignment of islands -void ARMConstantIslands::verify(MachineFunction &Fn) { +void ARMConstantIslands::verify(MachineFunction &MF) { assert(BBOffsets.size() == BBSizes.size()); for (unsigned i = 1, e = BBOffsets.size(); i != e; ++i) assert(BBOffsets[i-1]+BBSizes[i-1] == BBOffsets[i]); if (!isThumb) return; #ifndef NDEBUG - for (MachineFunction::iterator MBBI = Fn.begin(), E = Fn.end(); + for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end(); MBBI != E; ++MBBI) { MachineBasicBlock *MBB = MBBI; if (!MBB->empty() && @@ -216,11 +222,11 @@ FunctionPass *llvm::createARMConstantIslandPass() { return new ARMConstantIslands(); } -bool ARMConstantIslands::runOnMachineFunction(MachineFunction &Fn) { - MachineConstantPool &MCP = *Fn.getConstantPool(); +bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) { + MachineConstantPool &MCP = *MF.getConstantPool(); - TII = Fn.getTarget().getInstrInfo(); - AFI = Fn.getInfo(); + TII = MF.getTarget().getInstrInfo(); + AFI = MF.getInfo(); isThumb = AFI->isThumbFunction(); isThumb1 = AFI->isThumb1OnlyFunction(); isThumb2 = AFI->isThumb2Function(); @@ -229,7 +235,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &Fn) { // Renumber all of the machine basic blocks in the function, guaranteeing that // the numbers agree with the position of the block in the function. - Fn.RenumberBlocks(); + MF.RenumberBlocks(); // Thumb1 functions containing constant pools get 2-byte alignment. // This is so we can keep exact track of where the alignment padding goes. @@ -242,7 +248,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &Fn) { // we put them all at the end of the function. std::vector CPEMIs; if (!MCP.isEmpty()) { - DoInitialPlacement(Fn, CPEMIs); + DoInitialPlacement(MF, CPEMIs); if (isThumb1) AFI->setAlign(2U); } @@ -253,7 +259,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &Fn) { // Do the initial scan of the function, building up information about the // sizes of each block, the location of all the water, and finding all of the // constant pool users. - InitialFunctionScan(Fn, CPEMIs); + InitialFunctionScan(MF, CPEMIs); CPEMIs.clear(); /// Remove dead constant pool entries. @@ -265,10 +271,10 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &Fn) { while (true) { bool Change = false; for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) - Change |= HandleConstantPoolUser(Fn, i); + Change |= HandleConstantPoolUser(MF, i); DEBUG(dumpBBs()); for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i) - Change |= FixUpImmediateBr(Fn, ImmBranches[i]); + Change |= FixUpImmediateBr(MF, ImmBranches[i]); DEBUG(dumpBBs()); if (!Change) break; @@ -276,13 +282,16 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &Fn) { } // After a while, this might be made debug-only, but it is not expensive. - verify(Fn); + verify(MF); // If LR has been forced spilled and no far jumps (i.e. BL) has been issued. // Undo the spill / restore of LR if possible. - if (!HasFarJump && AFI->isLRSpilledForFarJump() && isThumb) + if (isThumb && !HasFarJump && AFI->isLRSpilledForFarJump()) MadeChange |= UndoLRSpillRestore(); + // Let's see if we can use tbb / tbh to do jump tables. + MadeChange |= OptimizeThumb2JumpTables(MF); + BBSizes.clear(); BBOffsets.clear(); WaterList.clear(); @@ -290,24 +299,25 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &Fn) { CPEntries.clear(); ImmBranches.clear(); PushPopMIs.clear(); + T2JumpTables.clear(); return MadeChange; } /// DoInitialPlacement - Perform the initial placement of the constant pool /// entries. To start with, we put them all at the end of the function. -void ARMConstantIslands::DoInitialPlacement(MachineFunction &Fn, +void ARMConstantIslands::DoInitialPlacement(MachineFunction &MF, std::vector &CPEMIs) { // Create the basic block to hold the CPE's. - MachineBasicBlock *BB = Fn.CreateMachineBasicBlock(); - Fn.push_back(BB); + MachineBasicBlock *BB = MF.CreateMachineBasicBlock(); + MF.push_back(BB); // Add all of the constants from the constant pool to the end block, use an // identity mapping of CPI's to CPE's. const std::vector &CPs = - Fn.getConstantPool()->getConstants(); + MF.getConstantPool()->getConstants(); - const TargetData &TD = *Fn.getTarget().getTargetData(); + const TargetData &TD = *MF.getTarget().getTargetData(); for (unsigned i = 0, e = CPs.size(); i != e; ++i) { unsigned Size = TD.getTypeAllocSize(CPs[i].getType()); // Verify that all constant pool entries are a multiple of 4 bytes. If not, @@ -363,10 +373,10 @@ ARMConstantIslands::CPEntry /// InitialFunctionScan - Do the initial scan of the function, building up /// information about the sizes of each block, the location of all the water, /// and finding all of the constant pool users. -void ARMConstantIslands::InitialFunctionScan(MachineFunction &Fn, +void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF, const std::vector &CPEMIs) { unsigned Offset = 0; - for (MachineFunction::iterator MBBI = Fn.begin(), E = Fn.end(); + for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end(); MBBI != E; ++MBBI) { MachineBasicBlock &MBB = *MBBI; @@ -388,15 +398,19 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &Fn, unsigned Scale = 1; int UOpc = Opc; switch (Opc) { + default: + continue; // Ignore other JT branches case ARM::tBR_JTr: // A Thumb1 table jump may involve padding; for the offsets to // be right, functions containing these must be 4-byte aligned. AFI->setAlign(2U); if ((Offset+MBBSize)%4 != 0) + // FIXME: Add a pseudo ALIGN instruction instead. MBBSize += 2; // padding continue; // Does not get an entry in ImmBranches - default: - continue; // Ignore other JT branches + case ARM::t2BR_JT: + T2JumpTables.push_back(I); + continue; // Does not get an entry in ImmBranches case ARM::Bcc: isCond = true; UOpc = ARM::B; @@ -1041,7 +1055,7 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex, /// is out-of-range. If so, pick up the constant pool value and move it some /// place in-range. Return true if we changed any addresses (thus must run /// another pass of branch lengthening), false otherwise. -bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &Fn, +bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF, unsigned CPUserIndex) { CPUser &U = CPUsers[CPUserIndex]; MachineInstr *UserMI = U.MI; @@ -1074,8 +1088,8 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &Fn, } // Okay, we know we can put an island before NewMBB now, do it! - MachineBasicBlock *NewIsland = Fn.CreateMachineBasicBlock(); - Fn.insert(NewMBB, NewIsland); + MachineBasicBlock *NewIsland = MF.CreateMachineBasicBlock(); + MF.insert(NewMBB, NewIsland); // Update internal data structures to account for the newly inserted MBB. UpdateForInsertedWaterBlock(NewIsland); @@ -1181,7 +1195,7 @@ bool ARMConstantIslands::BBIsInRange(MachineInstr *MI,MachineBasicBlock *DestBB, /// FixUpImmediateBr - Fix up an immediate branch whose destination is too far /// away to fit in its displacement field. -bool ARMConstantIslands::FixUpImmediateBr(MachineFunction &Fn, ImmBranch &Br) { +bool ARMConstantIslands::FixUpImmediateBr(MachineFunction &MF, ImmBranch &Br) { MachineInstr *MI = Br.MI; MachineBasicBlock *DestBB = MI->getOperand(0).getMBB(); @@ -1190,8 +1204,8 @@ bool ARMConstantIslands::FixUpImmediateBr(MachineFunction &Fn, ImmBranch &Br) { return false; if (!Br.isCond) - return FixUpUnconditionalBr(Fn, Br); - return FixUpConditionalBr(Fn, Br); + return FixUpUnconditionalBr(MF, Br); + return FixUpConditionalBr(MF, Br); } /// FixUpUnconditionalBr - Fix up an unconditional branch whose destination is @@ -1199,7 +1213,7 @@ bool ARMConstantIslands::FixUpImmediateBr(MachineFunction &Fn, ImmBranch &Br) { /// spilled in the epilogue, then we can use BL to implement a far jump. /// Otherwise, add an intermediate branch instruction to a branch. bool -ARMConstantIslands::FixUpUnconditionalBr(MachineFunction &Fn, ImmBranch &Br) { +ARMConstantIslands::FixUpUnconditionalBr(MachineFunction &MF, ImmBranch &Br) { MachineInstr *MI = Br.MI; MachineBasicBlock *MBB = MI->getParent(); assert(isThumb && !isThumb2 && "Expected a Thumb1 function!"); @@ -1221,7 +1235,7 @@ ARMConstantIslands::FixUpUnconditionalBr(MachineFunction &Fn, ImmBranch &Br) { /// far away to fit in its displacement field. It is converted to an inverse /// conditional branch + an unconditional branch to the destination. bool -ARMConstantIslands::FixUpConditionalBr(MachineFunction &Fn, ImmBranch &Br) { +ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) { MachineInstr *MI = Br.MI; MachineBasicBlock *DestBB = MI->getOperand(0).getMBB(); @@ -1320,3 +1334,95 @@ bool ARMConstantIslands::UndoLRSpillRestore() { } return MadeChange; } + +bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) { + bool MadeChange = false; + + // FIXME: After the tables are shrunk, can we get rid some of the + // constantpool tables? + const MachineJumpTableInfo *MJTI = MF.getJumpTableInfo(); + const std::vector &JT = MJTI->getJumpTables(); + for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) { + MachineInstr *MI = T2JumpTables[i]; + const TargetInstrDesc &TID = MI->getDesc(); + unsigned NumOps = TID.getNumOperands(); + unsigned JTOpIdx = NumOps - (TID.isPredicable() ? 3 : 2); + MachineOperand JTOP = MI->getOperand(JTOpIdx); + unsigned JTI = JTOP.getIndex(); + assert(JTI < JT.size()); + + bool ByteOk = true; + bool HalfWordOk = true; + unsigned JTOffset = GetOffsetOf(MI) + 4; + const std::vector &JTBBs = JT[JTI].MBBs; + for (unsigned j = 0, ee = JTBBs.size(); j != ee; ++j) { + MachineBasicBlock *MBB = JTBBs[j]; + unsigned DstOffset = BBOffsets[MBB->getNumber()]; + if (ByteOk && !OffsetIsInRange(JTOffset, DstOffset, (1<<8)-1, true, false)) + ByteOk = false; + if (HalfWordOk && + !OffsetIsInRange(JTOffset, DstOffset, (1<<16)-1, true, false)) + HalfWordOk = false; + if (!ByteOk && !HalfWordOk) + break; + } + + if (ByteOk || HalfWordOk) { + MachineBasicBlock *MBB = MI->getParent(); + unsigned BaseReg = MI->getOperand(0).getReg(); + bool BaseRegKill = MI->getOperand(0).isKill(); + if (!BaseRegKill) + continue; + unsigned IdxReg = MI->getOperand(1).getReg(); + bool IdxRegKill = MI->getOperand(1).isKill(); + MachineBasicBlock::iterator PrevI = MI; + if (PrevI == MBB->begin()) + continue; + + MachineInstr *AddrMI = --PrevI; + bool OptOk = true; + // Examine the instruction that calculate the jumptable entry address. + // If it's not the one just before the t2BR_JT, we won't delete it, then + // it's not worth doing the optimization. + for (unsigned k = 0, eee = AddrMI->getNumOperands(); k != eee; ++k) { + const MachineOperand &MO = AddrMI->getOperand(k); + if (!MO.isReg() || !MO.getReg()) + continue; + if (MO.isDef() && MO.getReg() != BaseReg) { + OptOk = false; + break; + } + if (MO.isUse() && !MO.isKill() && MO.getReg() != IdxReg) { + OptOk = false; + break; + } + } + if (!OptOk) + continue; + + // The previous instruction should be a t2LEApcrelJT, we want to delete + // it as well. + MachineInstr *LeaMI = --PrevI; + if (LeaMI->getOpcode() != ARM::t2LEApcrelJT || + LeaMI->getOperand(0).getReg() != BaseReg) + LeaMI = 0; + + if (OptOk) { + unsigned Opc = ByteOk ? ARM::t2TBB : ARM::t2TBH; + AddDefaultPred(BuildMI(MBB, MI->getDebugLoc(), TII->get(Opc)) + .addReg(IdxReg, getKillRegState(IdxRegKill)) + .addJumpTableIndex(JTI, JTOP.getTargetFlags()) + .addImm(MI->getOperand(JTOpIdx+1).getImm())); + + AddrMI->eraseFromParent(); + if (LeaMI) + LeaMI->eraseFromParent(); + MI->eraseFromParent(); + ++NumTBs; + MadeChange = true; + } + } + } + + return MadeChange; +} diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 026018c7d73..36de793c53b 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -409,6 +409,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::tCALL: return "ARMISD::tCALL"; case ARMISD::BRCOND: return "ARMISD::BRCOND"; case ARMISD::BR_JT: return "ARMISD::BR_JT"; + case ARMISD::BR2_JT: return "ARMISD::BR2_JT"; case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG"; case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD"; case ARMISD::CMP: return "ARMISD::CMP"; @@ -1718,7 +1719,8 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) { // which does another jump to the destination. This also makes it easier // to translate it to TBB / TBH later. // FIXME: This might not work if the function is extremely large. - return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); + return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain, + Addr, Op.getOperand(2), JTI, UId); } if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { Addr = DAG.getLoad((MVT)MVT::i32, dl, Chain, Addr, NULL, 0); diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 42a19d45a6f..d0806fb9c1d 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -40,6 +40,7 @@ namespace llvm { tCALL, // Thumb function call. BRCOND, // Conditional branch. BR_JT, // Jumptable branch. + BR2_JT, // Jumptable branch (2 level - jumptable entry is a jump). RET_FLAG, // Return with a flag operand. PIC_ADD, // Add with a PC operand and a PIC label. diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index ab4ccd7d75f..f0101eaf508 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -34,6 +34,10 @@ def SDT_ARMBrJT : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisVT<1, i32>, SDTCisVT<2, i32>]>; +def SDT_ARMBr2JT : SDTypeProfile<0, 4, + [SDTCisPtrTy<0>, SDTCisVT<1, i32>, + SDTCisVT<2, i32>, SDTCisVT<3, i32>]>; + def SDT_ARMCmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>; def SDT_ARMPICAdd : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, @@ -71,6 +75,8 @@ def ARMbrcond : SDNode<"ARMISD::BRCOND", SDT_ARMBrcond, def ARMbrjt : SDNode<"ARMISD::BR_JT", SDT_ARMBrJT, [SDNPHasChain]>; +def ARMbr2jt : SDNode<"ARMISD::BR2_JT", SDT_ARMBr2JT, + [SDNPHasChain]>; def ARMcmp : SDNode<"ARMISD::CMP", SDT_ARMCmp, [SDNPOutFlag]>; diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index a77d93191d3..9ef3658d2aa 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -21,6 +21,11 @@ def it_mask : Operand { let PrintMethod = "printThumbITMask"; } +// Table branch address +def tb_addrmode : Operand { + let PrintMethod = "printTBAddrMode"; +} + // Shifted operands. No register controlled shifts for Thumb2. // Note: We do not support rrx shifted operands yet. def t2_so_reg : Operand, // reg imm @@ -1048,11 +1053,24 @@ def t2B : T2XI<(outs), (ins brtarget:$target), "b.w $target", [(br bb:$target)]>; -let isNotDuplicable = 1, isIndirectBranch = 1 in +let isNotDuplicable = 1, isIndirectBranch = 1 in { def t2BR_JT : - T2JTI<(outs), (ins GPR:$target, jt2block_operand:$jt, i32imm:$id), + T2JTI<(outs), + (ins GPR:$target, GPR:$index, jt2block_operand:$jt, i32imm:$id), "mov pc, $target\n$jt", - [(ARMbrjt GPR:$target, tjumptable:$jt, imm:$id)]>; + [(ARMbr2jt GPR:$target, GPR:$index, tjumptable:$jt, imm:$id)]>; + +def t2TBB : + T2I<(outs), + (ins tb_addrmode:$index, jt2block_operand:$jt, i32imm:$id), + "tbb", " $index\n$jt", []>; + +def t2TBH : + T2I<(outs), + (ins tb_addrmode:$index, jt2block_operand:$jt, i32imm:$id), + "tbh", " $index\n$jt", []>; +} // isNotDuplicable, isIndirectBranch + } // isBranch, isTerminator, isBarrier // FIXME: should be able to write a pattern for ARMBrcond, but can't use diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp index c7f93831add..f82cee35aff 100644 --- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp @@ -162,6 +162,7 @@ namespace { const char *Modifier); void printJTBlockOperand(const MachineInstr *MI, int OpNum); void printJT2BlockOperand(const MachineInstr *MI, int OpNum); + void printTBAddrMode(const MachineInstr *MI, int OpNum); virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, unsigned AsmVariant, const char *ExtraCode); @@ -964,15 +965,39 @@ void ARMAsmPrinter::printJT2BlockOperand(const MachineInstr *MI, int OpNum) { const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); const std::vector &JT = MJTI->getJumpTables(); const std::vector &JTBBs = JT[JTI].MBBs; + bool ByteOffset = false, HalfWordOffset = false; + if (MI->getOpcode() == ARM::t2TBB) + ByteOffset = true; + else if (MI->getOpcode() == ARM::t2TBH) + HalfWordOffset = true; + for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) { MachineBasicBlock *MBB = JTBBs[i]; - O << "\tb.w "; - printBasicBlockLabel(MBB, false, false, false); + if (ByteOffset) + O << TAI->getData8bitsDirective(); + else if (HalfWordOffset) + O << TAI->getData16bitsDirective(); + if (ByteOffset || HalfWordOffset) { + O << '('; + printBasicBlockLabel(MBB, false, false, false); + O << "-" << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() + << '_' << JTI << '_' << MO2.getImm() << ")/2"; + } else { + O << "\tb.w "; + printBasicBlockLabel(MBB, false, false, false); + } if (i != e-1) O << '\n'; } } +void ARMAsmPrinter::printTBAddrMode(const MachineInstr *MI, int OpNum) { + O << "[pc, " << TRI->getAsmName(MI->getOperand(OpNum).getReg()); + if (MI->getOpcode() == ARM::t2TBH) + O << ", lsl #1"; + O << ']'; +} + bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, unsigned AsmVariant, const char *ExtraCode){ diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index b3e5d7ea72b..9a757b04ccc 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -40,6 +40,8 @@ Thumb2InstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const { case ARM::t2LDM_RET: case ARM::t2B: // Uncond branch. case ARM::t2BR_JT: // Jumptable branch. + case ARM::t2TBB: // Table branch byte. + case ARM::t2TBH: // Table branch halfword. case ARM::tBR_JTr: // Jumptable branch (16-bit version). case ARM::tBX_RET: case ARM::tBX_RET_vararg: diff --git a/test/CodeGen/Thumb2/thumb2-jtbl.ll b/test/CodeGen/Thumb2/thumb2-jtbl.ll deleted file mode 100644 index f84618e7ae8..00000000000 --- a/test/CodeGen/Thumb2/thumb2-jtbl.ll +++ /dev/null @@ -1,57 +0,0 @@ -; RUN: llvm-as < %s | llc -mtriple=thumbv7-apple-darwin | FileCheck %s -; RUN: llvm-as < %s | llc -mtriple=thumbv7-apple-darwin -relocation-model=pic | FileCheck %s - -define void @bar(i32 %n.u) { -entry: -; CHECK: bar: -; CHECK: mov pc -; CHECK: b.w LBB1_2 - - switch i32 %n.u, label %bb12 [i32 1, label %bb i32 2, label %bb6 i32 4, label %bb7 i32 5, label %bb8 i32 6, label %bb10 i32 7, label %bb1 i32 8, label %bb3 i32 9, label %bb4 i32 10, label %bb9 i32 11, label %bb2 i32 12, label %bb5 i32 13, label %bb11 ] -bb: - tail call void(...)* @foo1() - ret void -bb1: - tail call void(...)* @foo2() - ret void -bb2: - tail call void(...)* @foo6() - ret void -bb3: - tail call void(...)* @foo3() - ret void -bb4: - tail call void(...)* @foo4() - ret void -bb5: - tail call void(...)* @foo5() - ret void -bb6: - tail call void(...)* @foo1() - ret void -bb7: - tail call void(...)* @foo2() - ret void -bb8: - tail call void(...)* @foo6() - ret void -bb9: - tail call void(...)* @foo3() - ret void -bb10: - tail call void(...)* @foo4() - ret void -bb11: - tail call void(...)* @foo5() - ret void -bb12: - tail call void(...)* @foo6() - ret void -} - -declare void @foo1(...) -declare void @foo2(...) -declare void @foo6(...) -declare void @foo3(...) -declare void @foo4(...) -declare void @foo5(...) diff --git a/test/CodeGen/Thumb2/thumb2-tbb.ll b/test/CodeGen/Thumb2/thumb2-tbb.ll new file mode 100644 index 00000000000..c0347c78479 --- /dev/null +++ b/test/CodeGen/Thumb2/thumb2-tbb.ll @@ -0,0 +1,56 @@ +; RUN: llvm-as < %s | llc -mtriple=thumbv7-apple-darwin | FileCheck %s +; RUN: llvm-as < %s | llc -mtriple=thumbv7-apple-darwin -relocation-model=pic | FileCheck %s + +define void @bar(i32 %n.u) { +entry: +; CHECK: bar: +; CHECK: tbb + + switch i32 %n.u, label %bb12 [i32 1, label %bb i32 2, label %bb6 i32 4, label %bb7 i32 5, label %bb8 i32 6, label %bb10 i32 7, label %bb1 i32 8, label %bb3 i32 9, label %bb4 i32 10, label %bb9 i32 11, label %bb2 i32 12, label %bb5 i32 13, label %bb11 ] +bb: + tail call void(...)* @foo1() + ret void +bb1: + tail call void(...)* @foo2() + ret void +bb2: + tail call void(...)* @foo6() + ret void +bb3: + tail call void(...)* @foo3() + ret void +bb4: + tail call void(...)* @foo4() + ret void +bb5: + tail call void(...)* @foo5() + ret void +bb6: + tail call void(...)* @foo1() + ret void +bb7: + tail call void(...)* @foo2() + ret void +bb8: + tail call void(...)* @foo6() + ret void +bb9: + tail call void(...)* @foo3() + ret void +bb10: + tail call void(...)* @foo4() + ret void +bb11: + tail call void(...)* @foo5() + ret void +bb12: + tail call void(...)* @foo6() + ret void +} + +declare void @foo1(...) +declare void @foo2(...) +declare void @foo6(...) +declare void @foo3(...) +declare void @foo4(...) +declare void @foo5(...) diff --git a/test/CodeGen/Thumb2/thumb2-tbh.ll b/test/CodeGen/Thumb2/thumb2-tbh.ll new file mode 100644 index 00000000000..d15bf58337c --- /dev/null +++ b/test/CodeGen/Thumb2/thumb2-tbh.ll @@ -0,0 +1,86 @@ +; RUN: llvm-as < %s | llc -mtriple=thumbv7-apple-darwin -relocation-model=pic | FileCheck %s + + %struct.R_flstr = type { i32, i32, i8* } + %struct._T_tstr = type { i32, %struct.R_flstr*, %struct._T_tstr* } +@_C_nextcmd = external global i32 ; [#uses=3] +@.str31 = external constant [28 x i8], align 1 ; <[28 x i8]*> [#uses=1] +@_T_gtol = external global %struct._T_tstr* ; <%struct._T_tstr**> [#uses=2] + +declare arm_apcscc i32 @strlen(i8* nocapture) nounwind readonly + +declare arm_apcscc void @Z_fatal(i8*) noreturn nounwind + +declare arm_apcscc noalias i8* @calloc(i32, i32) nounwind + +define arm_apcscc i32 @main(i32 %argc, i8** nocapture %argv) nounwind { +; CHECK: main: +; CHECK: tbh +entry: + br label %bb42.i + +bb1.i2: ; preds = %bb42.i + br label %bb40.i + +bb5.i: ; preds = %bb42.i + %0 = or i32 %_Y_flags.1, 32 ; [#uses=1] + br label %bb40.i + +bb7.i: ; preds = %bb42.i + call arm_apcscc void @_T_addtol(%struct._T_tstr** @_T_gtol, i32 0, i8* null) nounwind + unreachable + +bb15.i: ; preds = %bb42.i + call arm_apcscc void @_T_addtol(%struct._T_tstr** @_T_gtol, i32 2, i8* null) nounwind + unreachable + +bb23.i: ; preds = %bb42.i + %1 = call arm_apcscc i32 @strlen(i8* null) nounwind readonly ; [#uses=0] + unreachable + +bb33.i: ; preds = %bb42.i + store i32 0, i32* @_C_nextcmd, align 4 + %2 = call arm_apcscc noalias i8* @calloc(i32 21, i32 1) nounwind ; [#uses=0] + unreachable + +bb34.i: ; preds = %bb42.i + %3 = load i32* @_C_nextcmd, align 4 ; [#uses=1] + %4 = add i32 %3, 1 ; [#uses=1] + store i32 %4, i32* @_C_nextcmd, align 4 + %5 = call arm_apcscc noalias i8* @calloc(i32 22, i32 1) nounwind ; [#uses=0] + unreachable + +bb35.i: ; preds = %bb42.i + %6 = call arm_apcscc noalias i8* @calloc(i32 20, i32 1) nounwind ; [#uses=0] + unreachable + +bb37.i: ; preds = %bb42.i + %7 = call arm_apcscc noalias i8* @calloc(i32 14, i32 1) nounwind ; [#uses=0] + unreachable + +bb39.i: ; preds = %bb42.i + call arm_apcscc void @Z_fatal(i8* getelementptr ([28 x i8]* @.str31, i32 0, i32 0)) nounwind + unreachable + +bb40.i: ; preds = %bb42.i, %bb5.i, %bb1.i2 + %_Y_flags.0 = phi i32 [ 0, %bb1.i2 ], [ %0, %bb5.i ], [ %_Y_flags.1, %bb42.i ] ; [#uses=1] + %_Y_eflag.b.0 = phi i1 [ %_Y_eflag.b.1, %bb1.i2 ], [ %_Y_eflag.b.1, %bb5.i ], [ true, %bb42.i ] ; [#uses=1] + br label %bb42.i + +bb42.i: ; preds = %bb40.i, %entry + %_Y_eflag.b.1 = phi i1 [ false, %entry ], [ %_Y_eflag.b.0, %bb40.i ] ; [#uses=2] + %_Y_flags.1 = phi i32 [ 0, %entry ], [ %_Y_flags.0, %bb40.i ] ; [#uses=2] + switch i32 undef, label %bb39.i [ + i32 67, label %bb33.i + i32 70, label %bb35.i + i32 77, label %bb37.i + i32 83, label %bb34.i + i32 97, label %bb7.i + i32 100, label %bb5.i + i32 101, label %bb40.i + i32 102, label %bb23.i + i32 105, label %bb15.i + i32 116, label %bb1.i2 + ] +} + +declare arm_apcscc void @_T_addtol(%struct._T_tstr** nocapture, i32, i8*) nounwind