1 //===-- AArch64BranchFixupPass.cpp - AArch64 branch fixup -----------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that fixes AArch64 branches which have ended up out
11 // of range for their immediate operands.
13 //===----------------------------------------------------------------------===//
15 #define DEBUG_TYPE "aarch64-branch-fixup"
17 #include "AArch64InstrInfo.h"
18 #include "Utils/AArch64BaseInfo.h"
19 #include "llvm/CodeGen/MachineFunctionPass.h"
20 #include "llvm/CodeGen/MachineInstrBuilder.h"
21 #include "llvm/CodeGen/MachineRegisterInfo.h"
22 #include "llvm/Support/Debug.h"
23 #include "llvm/Support/Format.h"
24 #include "llvm/Support/raw_ostream.h"
25 #include "llvm/ADT/Statistic.h"
28 STATISTIC(NumSplit, "Number of uncond branches inserted");
29 STATISTIC(NumCBrFixed, "Number of cond branches fixed");
31 /// Return the worst case padding that could result from unknown offset bits.
32 /// This does not include alignment padding caused by known offset bits.
34 /// @param LogAlign log2(alignment)
35 /// @param KnownBits Number of known low offset bits.
36 static inline unsigned UnknownPadding(unsigned LogAlign, unsigned KnownBits) {
37 if (KnownBits < LogAlign)
38 return (1u << LogAlign) - (1u << KnownBits);
43 /// Due to limited PC-relative displacements, conditional branches to distant
44 /// blocks may need converting into an unconditional equivalent. For example:
45 /// tbz w1, #0, far_away
50 class AArch64BranchFixup : public MachineFunctionPass {
51 /// Information about the offset and size of a single basic block.
52 struct BasicBlockInfo {
53 /// Distance from the beginning of the function to the beginning of this
56 /// Offsets are computed assuming worst case padding before an aligned
57 /// block. This means that subtracting basic block offsets always gives a
58 /// conservative estimate of the real distance which may be smaller.
60 /// Because worst case padding is used, the computed offset of an aligned
61 /// block may not actually be aligned.
64 /// Size of the basic block in bytes. If the block contains inline
65 /// assembly, this is a worst case estimate.
67 /// The size does not include any alignment padding whether from the
68 /// beginning of the block, or from an aligned jump table at the end.
71 /// The number of low bits in Offset that are known to be exact. The
72 /// remaining bits of Offset are an upper bound.
75 /// When non-zero, the block contains instructions (inline asm) of unknown
76 /// size. The real size may be smaller than Size bytes by a multiple of 1
80 BasicBlockInfo() : Offset(0), Size(0), KnownBits(0), Unalign(0) {}
82 /// Compute the number of known offset bits internally to this block.
83 /// This number should be used to predict worst case padding when
84 /// splitting the block.
85 unsigned internalKnownBits() const {
86 unsigned Bits = Unalign ? Unalign : KnownBits;
87 // If the block size isn't a multiple of the known bits, assume the
88 // worst case padding.
89 if (Size & ((1u << Bits) - 1))
90 Bits = CountTrailingZeros_32(Size);
94 /// Compute the offset immediately following this block. If LogAlign is
95 /// specified, return the offset the successor block will get if it has
97 unsigned postOffset(unsigned LogAlign = 0) const {
98 unsigned PO = Offset + Size;
101 // Add alignment padding from the terminator.
102 return PO + UnknownPadding(LogAlign, internalKnownBits());
105 /// Compute the number of known low bits of postOffset. If this block
106 /// contains inline asm, the number of known bits drops to the
107 /// instruction alignment. An aligned terminator may increase the number
109 /// If LogAlign is given, also consider the alignment of the next block.
110 unsigned postKnownBits(unsigned LogAlign = 0) const {
111 return std::max(LogAlign, internalKnownBits());
115 std::vector<BasicBlockInfo> BBInfo;
117 /// One per immediate branch, keeping the machine instruction pointer,
118 /// conditional or unconditional, the max displacement, and (if IsCond is
119 /// true) the corresponding inverted branch opcode.
122 unsigned OffsetBits : 31;
124 ImmBranch(MachineInstr *mi, unsigned offsetbits, bool cond)
125 : MI(mi), OffsetBits(offsetbits), IsCond(cond) {}
128 /// Keep track of all the immediate branch instructions.
130 std::vector<ImmBranch> ImmBranches;
133 const AArch64InstrInfo *TII;
136 AArch64BranchFixup() : MachineFunctionPass(ID) {}
138 virtual bool runOnMachineFunction(MachineFunction &MF);
140 virtual const char *getPassName() const {
141 return "AArch64 branch fixup pass";
145 void initializeFunctionInfo();
146 MachineBasicBlock *splitBlockBeforeInstr(MachineInstr *MI);
147 void adjustBBOffsetsAfter(MachineBasicBlock *BB);
148 bool isBBInRange(MachineInstr *MI, MachineBasicBlock *BB,
149 unsigned OffsetBits);
150 bool fixupImmediateBr(ImmBranch &Br);
151 bool fixupConditionalBr(ImmBranch &Br);
153 void computeBlockSize(MachineBasicBlock *MBB);
154 unsigned getOffsetOf(MachineInstr *MI) const;
158 char AArch64BranchFixup::ID = 0;
162 void AArch64BranchFixup::verify() {
164 for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
166 MachineBasicBlock *MBB = MBBI;
167 unsigned MBBId = MBB->getNumber();
168 assert(!MBBId || BBInfo[MBBId - 1].postOffset() <= BBInfo[MBBId].Offset);
173 /// print block size and offset information - debugging
174 void AArch64BranchFixup::dumpBBs() {
176 for (unsigned J = 0, E = BBInfo.size(); J !=E; ++J) {
177 const BasicBlockInfo &BBI = BBInfo[J];
178 dbgs() << format("%08x BB#%u\t", BBI.Offset, J)
179 << " kb=" << unsigned(BBI.KnownBits)
180 << " ua=" << unsigned(BBI.Unalign)
181 << format(" size=%#x\n", BBInfo[J].Size);
186 /// Returns an instance of the branch fixup pass.
187 FunctionPass *llvm::createAArch64BranchFixupPass() {
188 return new AArch64BranchFixup();
191 bool AArch64BranchFixup::runOnMachineFunction(MachineFunction &mf) {
193 DEBUG(dbgs() << "***** AArch64BranchFixup ******");
194 TII = (const AArch64InstrInfo*)MF->getTarget().getInstrInfo();
196 // This pass invalidates liveness information when it splits basic blocks.
197 MF->getRegInfo().invalidateLiveness();
199 // Renumber all of the machine basic blocks in the function, guaranteeing that
200 // the numbers agree with the position of the block in the function.
201 MF->RenumberBlocks();
203 // Do the initial scan of the function, building up information about the
204 // sizes of each block and location of each immediate branch.
205 initializeFunctionInfo();
207 // Iteratively fix up branches until there is no change.
208 unsigned NoBRIters = 0;
209 bool MadeChange = false;
211 DEBUG(dbgs() << "Beginning iteration #" << NoBRIters << '\n');
212 bool BRChange = false;
213 for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i)
214 BRChange |= fixupImmediateBr(ImmBranches[i]);
215 if (BRChange && ++NoBRIters > 30)
216 report_fatal_error("Branch Fix Up pass failed to converge!");
224 // After a while, this might be made debug-only, but it is not expensive.
227 DEBUG(dbgs() << '\n'; dumpBBs());
235 /// Return true if the specified basic block can fallthrough into the block
236 /// immediately after it.
237 static bool BBHasFallthrough(MachineBasicBlock *MBB) {
238 // Get the next machine basic block in the function.
239 MachineFunction::iterator MBBI = MBB;
240 // Can't fall off end of function.
241 if (llvm::next(MBBI) == MBB->getParent()->end())
244 MachineBasicBlock *NextBB = llvm::next(MBBI);
245 for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
246 E = MBB->succ_end(); I != E; ++I)
253 /// Do the initial scan of the function, building up information about the sizes
254 /// of each block, and each immediate branch.
255 void AArch64BranchFixup::initializeFunctionInfo() {
257 BBInfo.resize(MF->getNumBlockIDs());
259 // First thing, compute the size of all basic blocks, and see if the function
260 // has any inline assembly in it. If so, we have to be conservative about
261 // alignment assumptions, as we don't know for sure the size of any
262 // instructions in the inline assembly.
263 for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I)
266 // The known bits of the entry block offset are determined by the function
268 BBInfo.front().KnownBits = MF->getAlignment();
270 // Compute block offsets and known bits.
271 adjustBBOffsetsAfter(MF->begin());
273 // Now go back through the instructions and build up our data structures.
274 for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
276 MachineBasicBlock &MBB = *MBBI;
278 for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
280 if (I->isDebugValue())
283 int Opc = I->getOpcode();
287 // The offsets encoded in instructions here scale by the instruction
288 // size (4 bytes), effectively increasing their range by 2 bits.
292 continue; // Ignore other JT branches
293 case AArch64::TBZxii:
294 case AArch64::TBZwii:
295 case AArch64::TBNZxii:
296 case AArch64::TBNZwii:
313 // Record this immediate branch.
314 ImmBranches.push_back(ImmBranch(I, Bits, IsCond));
320 /// Compute the size and some alignment information for MBB. This function
321 /// updates BBInfo directly.
322 void AArch64BranchFixup::computeBlockSize(MachineBasicBlock *MBB) {
323 BasicBlockInfo &BBI = BBInfo[MBB->getNumber()];
327 for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
329 BBI.Size += TII->getInstSizeInBytes(*I);
330 // For inline asm, GetInstSizeInBytes returns a conservative estimate.
331 // The actual size may be smaller, but still a multiple of the instr size.
332 if (I->isInlineAsm())
337 /// Return the current offset of the specified machine instruction from the
338 /// start of the function. This offset changes as stuff is moved around inside
340 unsigned AArch64BranchFixup::getOffsetOf(MachineInstr *MI) const {
341 MachineBasicBlock *MBB = MI->getParent();
343 // The offset is composed of two things: the sum of the sizes of all MBB's
344 // before this instruction's block, and the offset from the start of the block
346 unsigned Offset = BBInfo[MBB->getNumber()].Offset;
348 // Sum instructions before MI in MBB.
349 for (MachineBasicBlock::iterator I = MBB->begin(); &*I != MI; ++I) {
350 assert(I != MBB->end() && "Didn't find MI in its own basic block?");
351 Offset += TII->getInstSizeInBytes(*I);
356 /// Split the basic block containing MI into two blocks, which are joined by
357 /// an unconditional branch. Update data structures and renumber blocks to
358 /// account for this change and returns the newly created block.
360 AArch64BranchFixup::splitBlockBeforeInstr(MachineInstr *MI) {
361 MachineBasicBlock *OrigBB = MI->getParent();
363 // Create a new MBB for the code after the OrigBB.
364 MachineBasicBlock *NewBB =
365 MF->CreateMachineBasicBlock(OrigBB->getBasicBlock());
366 MachineFunction::iterator MBBI = OrigBB; ++MBBI;
367 MF->insert(MBBI, NewBB);
369 // Splice the instructions starting with MI over to NewBB.
370 NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end());
372 // Add an unconditional branch from OrigBB to NewBB.
373 // Note the new unconditional branch is not being recorded.
374 // There doesn't seem to be meaningful DebugInfo available; this doesn't
375 // correspond to anything in the source.
376 BuildMI(OrigBB, DebugLoc(), TII->get(AArch64::Bimm)).addMBB(NewBB);
379 // Update the CFG. All succs of OrigBB are now succs of NewBB.
380 NewBB->transferSuccessors(OrigBB);
382 // OrigBB branches to NewBB.
383 OrigBB->addSuccessor(NewBB);
385 // Update internal data structures to account for the newly inserted MBB.
386 MF->RenumberBlocks(NewBB);
388 // Insert an entry into BBInfo to align it properly with the (newly
389 // renumbered) block numbers.
390 BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo());
392 // Figure out how large the OrigBB is. As the first half of the original
393 // block, it cannot contain a tablejump. The size includes
394 // the new jump we added. (It should be possible to do this without
395 // recounting everything, but it's very confusing, and this is rarely
397 computeBlockSize(OrigBB);
399 // Figure out how large the NewMBB is. As the second half of the original
400 // block, it may contain a tablejump.
401 computeBlockSize(NewBB);
403 // All BBOffsets following these blocks must be modified.
404 adjustBBOffsetsAfter(OrigBB);
409 void AArch64BranchFixup::adjustBBOffsetsAfter(MachineBasicBlock *BB) {
410 unsigned BBNum = BB->getNumber();
411 for(unsigned i = BBNum + 1, e = MF->getNumBlockIDs(); i < e; ++i) {
412 // Get the offset and known bits at the end of the layout predecessor.
413 // Include the alignment of the current block.
414 unsigned LogAlign = MF->getBlockNumbered(i)->getAlignment();
415 unsigned Offset = BBInfo[i - 1].postOffset(LogAlign);
416 unsigned KnownBits = BBInfo[i - 1].postKnownBits(LogAlign);
418 // This is where block i begins. Stop if the offset is already correct,
419 // and we have updated 2 blocks. This is the maximum number of blocks
420 // changed before calling this function.
422 BBInfo[i].Offset == Offset &&
423 BBInfo[i].KnownBits == KnownBits)
426 BBInfo[i].Offset = Offset;
427 BBInfo[i].KnownBits = KnownBits;
431 /// Returns true if the distance between specific MI and specific BB can fit in
432 /// MI's displacement field.
433 bool AArch64BranchFixup::isBBInRange(MachineInstr *MI,
434 MachineBasicBlock *DestBB,
435 unsigned OffsetBits) {
436 int64_t BrOffset = getOffsetOf(MI);
437 int64_t DestOffset = BBInfo[DestBB->getNumber()].Offset;
439 DEBUG(dbgs() << "Branch of destination BB#" << DestBB->getNumber()
440 << " from BB#" << MI->getParent()->getNumber()
441 << " bits available=" << OffsetBits
442 << " from " << getOffsetOf(MI) << " to " << DestOffset
443 << " offset " << int(DestOffset-BrOffset) << "\t" << *MI);
445 return isIntN(OffsetBits, DestOffset - BrOffset);
448 /// Fix up an immediate branch whose destination is too far away to fit in its
449 /// displacement field.
450 bool AArch64BranchFixup::fixupImmediateBr(ImmBranch &Br) {
451 MachineInstr *MI = Br.MI;
452 MachineBasicBlock *DestBB = 0;
453 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
454 if (MI->getOperand(i).isMBB()) {
455 DestBB = MI->getOperand(i).getMBB();
459 assert(DestBB && "Branch with no destination BB?");
461 // Check to see if the DestBB is already in-range.
462 if (isBBInRange(MI, DestBB, Br.OffsetBits))
465 assert(Br.IsCond && "Only conditional branches should need fixup");
466 return fixupConditionalBr(Br);
469 /// Fix up a conditional branch whose destination is too far away to fit in its
470 /// displacement field. It is converted to an inverse conditional branch + an
471 /// unconditional branch to the destination.
473 AArch64BranchFixup::fixupConditionalBr(ImmBranch &Br) {
474 MachineInstr *MI = Br.MI;
475 MachineBasicBlock *MBB = MI->getParent();
476 unsigned CondBrMBBOperand = 0;
478 // The general idea is to add an unconditional branch to the destination and
479 // invert the conditional branch to jump over it. Complications occur around
480 // fallthrough and unreachable ends to the block.
487 // First we invert the conditional branch, by creating a replacement if
488 // necessary. This if statement contains all the special handling of different
490 if (MI->getOpcode() == AArch64::Bcc) {
491 // The basic block is operand number 1 for Bcc
492 CondBrMBBOperand = 1;
494 A64CC::CondCodes CC = (A64CC::CondCodes)MI->getOperand(0).getImm();
495 CC = A64InvertCondCode(CC);
496 MI->getOperand(0).setImm(CC);
498 MachineInstrBuilder InvertedMI;
500 switch (MI->getOpcode()) {
501 default: llvm_unreachable("Unknown branch type");
502 case AArch64::TBZxii: InvertedOpcode = AArch64::TBNZxii; break;
503 case AArch64::TBZwii: InvertedOpcode = AArch64::TBNZwii; break;
504 case AArch64::TBNZxii: InvertedOpcode = AArch64::TBZxii; break;
505 case AArch64::TBNZwii: InvertedOpcode = AArch64::TBZwii; break;
506 case AArch64::CBZx: InvertedOpcode = AArch64::CBNZx; break;
507 case AArch64::CBZw: InvertedOpcode = AArch64::CBNZw; break;
508 case AArch64::CBNZx: InvertedOpcode = AArch64::CBZx; break;
509 case AArch64::CBNZw: InvertedOpcode = AArch64::CBZw; break;
512 InvertedMI = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(InvertedOpcode));
513 for (unsigned i = 0, e= MI->getNumOperands(); i != e; ++i) {
514 InvertedMI.addOperand(MI->getOperand(i));
515 if (MI->getOperand(i).isMBB())
516 CondBrMBBOperand = i;
519 MI->eraseFromParent();
520 MI = Br.MI = InvertedMI;
523 // If the branch is at the end of its MBB and that has a fall-through block,
524 // direct the updated conditional branch to the fall-through
525 // block. Otherwise, split the MBB before the next instruction.
526 MachineInstr *BMI = &MBB->back();
527 bool NeedSplit = (BMI != MI) || !BBHasFallthrough(MBB);
531 if (llvm::next(MachineBasicBlock::iterator(MI)) == prior(MBB->end()) &&
532 BMI->getOpcode() == AArch64::Bimm) {
533 // Last MI in the BB is an unconditional branch. We can swap destinations:
534 // b.eq L1 (temporarily b.ne L1 after first change)
539 MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB();
540 if (isBBInRange(MI, NewDest, Br.OffsetBits)) {
541 DEBUG(dbgs() << " Invert Bcc condition and swap its destination with "
543 MachineBasicBlock *DestBB = MI->getOperand(CondBrMBBOperand).getMBB();
544 BMI->getOperand(0).setMBB(DestBB);
545 MI->getOperand(CondBrMBBOperand).setMBB(NewDest);
552 MachineBasicBlock::iterator MBBI = MI; ++MBBI;
553 splitBlockBeforeInstr(MBBI);
554 // No need for the branch to the next block. We're adding an unconditional
555 // branch to the destination.
556 int delta = TII->getInstSizeInBytes(MBB->back());
557 BBInfo[MBB->getNumber()].Size -= delta;
558 MBB->back().eraseFromParent();
559 // BBInfo[SplitBB].Offset is wrong temporarily, fixed below
562 // After splitting and removing the unconditional branch from the original BB,
563 // the structure is now:
567 // splitbb/fallthroughbb:
568 // [old b L2/real continuation]
570 // We now have to change the conditional branch to point to splitbb and add an
571 // unconditional branch after it to L1, giving the final structure:
574 // b.invertedCC splitbb
576 // splitbb/fallthroughbb:
577 // [old b L2/real continuation]
578 MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB));
580 DEBUG(dbgs() << " Insert B to BB#"
581 << MI->getOperand(CondBrMBBOperand).getMBB()->getNumber()
582 << " also invert condition and change dest. to BB#"
583 << NextBB->getNumber() << "\n");
585 // Insert a new unconditional branch and fixup the destination of the
586 // conditional one. Also update the ImmBranch as well as adding a new entry
587 // for the new branch.
588 BuildMI(MBB, DebugLoc(), TII->get(AArch64::Bimm))
589 .addMBB(MI->getOperand(CondBrMBBOperand).getMBB());
590 MI->getOperand(CondBrMBBOperand).setMBB(NextBB);
592 BBInfo[MBB->getNumber()].Size += TII->getInstSizeInBytes(MBB->back());
594 // 26 bits written down in Bimm, specifying a multiple of 4.
595 unsigned OffsetBits = 26 + 2;
596 ImmBranches.push_back(ImmBranch(&MBB->back(), OffsetBits, false));
598 adjustBBOffsetsAfter(MBB);