private:
// Tail Merging.
bool TailMergeBlocks(MachineFunction &MF);
+ bool TryMergeBlocks(MachineBasicBlock* SuccBB,
+ MachineBasicBlock* PredBB);
void ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
MachineBasicBlock *NewDest);
MachineBasicBlock *SplitMBBAt(MachineBasicBlock &CurMBB,
MachineBasicBlock::iterator BBI1);
+ std::vector<std::pair<unsigned,MachineBasicBlock*> > MergePotentials;
const MRegisterInfo *RegInfo;
RegScavenger *RS;
// Branch optzn.
return MBB1Time < MBB2Time;
}
-bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
+// See if any of the blocks in MergePotentials (which all have a common single
+// successor, or all have no successor) can be tail-merged. If there is a
+// successor, any blocks in MergePotentials that are not tail-merged and
+// are not immediately before Succ must have an unconditional branch to
+// Succ added (but the predecessor/successor lists need no adjustment).
+// The lone predecessor of Succ that falls through into Succ,
+// if any, is given in PredBB.
+
+bool BranchFolder::TryMergeBlocks(MachineBasicBlock *SuccBB,
+ MachineBasicBlock* PredBB) {
MadeChange = false;
- if (!EnableTailMerge) return false;
-
- // Find blocks with no successors.
- std::vector<std::pair<unsigned,MachineBasicBlock*> > MergePotentials;
- for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
- if (I->succ_empty())
- MergePotentials.push_back(std::make_pair(HashEndOfMBB(I), I));
- }
-
// Sort by hash value so that blocks with identical end sequences sort
// together.
std::stable_sort(MergePotentials.begin(), MergePotentials.end());
// If there is nothing that matches the hash of the current basic block,
// give up.
if (CurHash != PrevHash) {
+ if (SuccBB && CurMBB != PredBB)
+ TII->InsertBranch(*CurMBB, SuccBB, NULL, std::vector<MachineOperand>());
MergePotentials.pop_back();
continue;
}
// If we didn't find anything that has at least two instructions matching
// this one, bail out.
if (FoundMatch == ~0U) {
+ // Put the unconditional branch back, if we need one.
+ if (SuccBB && CurMBB != PredBB)
+ TII->InsertBranch(*CurMBB, SuccBB, NULL, std::vector<MachineOperand>());
MergePotentials.pop_back();
continue;
}
}
MadeChange = true;
}
-
return MadeChange;
}
+bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
+ MadeChange = false;
+
+ if (!EnableTailMerge) return false;
+
+ // First find blocks with no successors.
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+ if (I->succ_empty())
+ MergePotentials.push_back(std::make_pair(HashEndOfMBB(I), I));
+ }
+ // See if we can do any crossjumping on those.
+ MadeChange |= TryMergeBlocks(NULL, NULL);
+
+ MergePotentials.clear();
+ // Look at blocks with two predecessors, where each predecessor has either:
+ // - a single successor, or
+ // - two successors, where successor I is reached either by ubr or fallthrough.
+ // The two-successor case where successor I is reached by cbr
+ // from both blocks is handled by the preceding case (when we consider the
+ // other, fallthough block).
+ // FIXME: The two-successor case where I is reached by cbr
+ // from one block, and by fallthrough/ubr from the other, is not handled yet.
+ // Beware that sometimes blocks are in the predecessor list, but can't really
+ // jump to the "successor" we're looking at. Tolerate this.
+
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+ if (!I->succ_empty() && I->pred_size() >= 2) {
+ MachineBasicBlock *IBB = I;
+ MachineBasicBlock *PredBB = prior(I);
+ for (MachineBasicBlock::pred_iterator P = I->pred_begin(), E2 = I->pred_end();
+ P != E2; ++P) {
+ MachineBasicBlock* PBB = *P;
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ std::vector<MachineOperand> Cond;
+ // Remove the unconditional branch at the end, if any.
+ if (!TII->AnalyzeBranch(*PBB, TBB, FBB, Cond) &&
+ ((!FBB && Cond.size()==0) || // single successor
+ (!FBB && TBB!=IBB) || // cbr elsewhere, fallthrough to I
+ (FBB && FBB==IBB))) { // cbr then branch to I
+ if (TBB) {
+ TII->RemoveBranch(*PBB);
+ if (TBB!=IBB)
+ // reinsert conditional branch only, for now
+ TII->InsertBranch(*PBB, TBB, 0, Cond);
+ }
+ MergePotentials.push_back(std::make_pair(HashEndOfMBB(PBB), *P));
+ }
+ }
+ if (MergePotentials.size() >= 2)
+ MadeChange |= TryMergeBlocks(I, PredBB);
+ // Reinsert an unconditional branch if needed.
+ // The 1 below can be either an original single predecessor, or a result
+ // of removing blocks in TryMergeBlocks.
+ if (MergePotentials.size()==1 &&
+ (MergePotentials.begin())->second != PredBB)
+ TII->InsertBranch(*((MergePotentials.begin())->second), I, NULL,
+ std::vector<MachineOperand>());
+ MergePotentials.clear();
+ }
+ }
+
+ return MadeChange;
+}
//===----------------------------------------------------------------------===//
// Branch Optimization
--- /dev/null
+; RUN: llvm-as < %s | llc -march=arm -enable-tail-merge | grep bl.*baz | wc -l | grep 1
+; RUN: llvm-as < %s | llc -march=arm -enable-tail-merge | grep bl.*quux | wc -l | grep 1
+; Check that calls to baz and quux are tail-merged.
+
+; ModuleID = 'tail.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i686-apple-darwin8"
+
+define i32 @f(i32 %i, i32 %q) {
+entry:
+ %i_addr = alloca i32 ; <i32*> [#uses=2]
+ %q_addr = alloca i32 ; <i32*> [#uses=2]
+ %retval = alloca i32, align 4 ; <i32*> [#uses=1]
+ "alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
+ store i32 %i, i32* %i_addr
+ store i32 %q, i32* %q_addr
+ %tmp = load i32* %i_addr ; <i32> [#uses=1]
+ %tmp1 = icmp ne i32 %tmp, 0 ; <i1> [#uses=1]
+ %tmp12 = zext i1 %tmp1 to i8 ; <i8> [#uses=1]
+ %toBool = icmp ne i8 %tmp12, 0 ; <i1> [#uses=1]
+ br i1 %toBool, label %cond_true, label %cond_false
+
+cond_true: ; preds = %entry
+ %tmp3 = call i32 (...)* @bar( ) ; <i32> [#uses=0]
+ %tmp4 = call i32 (...)* @baz( i32 5, i32 6 ) ; <i32> [#uses=0]
+ br label %cond_next
+
+cond_false: ; preds = %entry
+ %tmp5 = call i32 (...)* @foo( ) ; <i32> [#uses=0]
+ %tmp6 = call i32 (...)* @baz( i32 5, i32 6 ) ; <i32> [#uses=0]
+ br label %cond_next
+
+cond_next: ; preds = %cond_false, %cond_true
+ %tmp7 = load i32* %q_addr ; <i32> [#uses=1]
+ %tmp8 = icmp ne i32 %tmp7, 0 ; <i1> [#uses=1]
+ %tmp89 = zext i1 %tmp8 to i8 ; <i8> [#uses=1]
+ %toBool10 = icmp ne i8 %tmp89, 0 ; <i1> [#uses=1]
+ br i1 %toBool10, label %cond_true11, label %cond_false15
+
+cond_true11: ; preds = %cond_next
+ %tmp13 = call i32 (...)* @foo( ) ; <i32> [#uses=0]
+ %tmp14 = call i32 (...)* @quux( i32 3, i32 4 ) ; <i32> [#uses=0]
+ br label %cond_next18
+
+cond_false15: ; preds = %cond_next
+ %tmp16 = call i32 (...)* @bar( ) ; <i32> [#uses=0]
+ %tmp17 = call i32 (...)* @quux( i32 3, i32 4 ) ; <i32> [#uses=0]
+ br label %cond_next18
+
+cond_next18: ; preds = %cond_false15, %cond_true11
+ %tmp19 = call i32 (...)* @bar( ) ; <i32> [#uses=0]
+ br label %return
+
+return: ; preds = %cond_next18
+ %retval20 = load i32* %retval ; <i32> [#uses=1]
+ ret i32 %retval20
+}
+
+declare i32 @bar(...)
+
+declare i32 @baz(...)
+
+declare i32 @foo(...)
+
+declare i32 @quux(...)