From 23a903a5175908226b6aecc1832d32e820b3f091 Mon Sep 17 00:00:00 2001 From: Matthias Braun Date: Thu, 3 Dec 2015 17:19:58 +0000 Subject: [PATCH] AArch64FastISel: Use cbz/cbnz to branch on i1 In the case of a conditional branch without a preceding cmp we used to emit a "and; cmp; b.eq/b.ne" sequence, use tbz/tbnz instead. Differential Revision: http://reviews.llvm.org/D15122 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254621 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64FastISel.cpp | 86 ++++++------------- test/CodeGen/AArch64/arm64-fast-isel-br.ll | 15 +--- .../AArch64/fast-isel-branch-cond-mask.ll | 3 +- .../AArch64/fast-isel-branch-cond-split.ll | 8 +- 4 files changed, 32 insertions(+), 80 deletions(-) diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp index b7849d5bbc2..cae2d527629 100644 --- a/lib/Target/AArch64/AArch64FastISel.cpp +++ b/lib/Target/AArch64/AArch64FastISel.cpp @@ -2275,7 +2275,6 @@ bool AArch64FastISel::selectBranch(const Instruction *I) { MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; - AArch64CC::CondCode CC = AArch64CC::NE; if (const CmpInst *CI = dyn_cast(BI->getCondition())) { if (CI->hasOneUse() && isValueAvailable(CI)) { // Try to optimize or fold the cmp. @@ -2307,7 +2306,7 @@ bool AArch64FastISel::selectBranch(const Instruction *I) { // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch // instruction. - CC = getCompareCC(Predicate); + AArch64CC::CondCode CC = getCompareCC(Predicate); AArch64CC::CondCode ExtraCC = AArch64CC::AL; switch (Predicate) { default: @@ -2335,37 +2334,6 @@ bool AArch64FastISel::selectBranch(const Instruction *I) { .addImm(CC) .addMBB(TBB); - finishCondBranch(BI->getParent(), TBB, FBB); - return true; - } - } else if (TruncInst *TI = dyn_cast(BI->getCondition())) { - MVT SrcVT; - if (TI->hasOneUse() && isValueAvailable(TI) && - isTypeSupported(TI->getOperand(0)->getType(), SrcVT)) { - unsigned CondReg = getRegForValue(TI->getOperand(0)); - if (!CondReg) - return false; - bool CondIsKill = hasTrivialKill(TI->getOperand(0)); - - // Issue an extract_subreg to get the lower 32-bits. - if (SrcVT == MVT::i64) { - CondReg = fastEmitInst_extractsubreg(MVT::i32, CondReg, CondIsKill, - AArch64::sub_32); - CondIsKill = true; - } - - unsigned ANDReg = emitAnd_ri(MVT::i32, CondReg, CondIsKill, 1); - assert(ANDReg && "Unexpected AND instruction emission failure."); - emitICmp_ri(MVT::i32, ANDReg, /*IsKill=*/true, 0); - - if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { - std::swap(TBB, FBB); - CC = AArch64CC::EQ; - } - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) - .addImm(CC) - .addMBB(TBB); - finishCondBranch(BI->getParent(), TBB, FBB); return true; } @@ -2383,20 +2351,23 @@ bool AArch64FastISel::selectBranch(const Instruction *I) { } else FuncInfo.MBB->addSuccessorWithoutProb(Target); return true; - } else if (foldXALUIntrinsic(CC, I, BI->getCondition())) { - // Fake request the condition, otherwise the intrinsic might be completely - // optimized away. - unsigned CondReg = getRegForValue(BI->getCondition()); - if (!CondReg) - return false; + } else { + AArch64CC::CondCode CC = AArch64CC::NE; + if (foldXALUIntrinsic(CC, I, BI->getCondition())) { + // Fake request the condition, otherwise the intrinsic might be completely + // optimized away. + unsigned CondReg = getRegForValue(BI->getCondition()); + if (!CondReg) + return false; - // Emit the branch. - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) - .addImm(CC) - .addMBB(TBB); + // Emit the branch. + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) + .addImm(CC) + .addMBB(TBB); - finishCondBranch(BI->getParent(), TBB, FBB); - return true; + finishCondBranch(BI->getParent(), TBB, FBB); + return true; + } } unsigned CondReg = getRegForValue(BI->getCondition()); @@ -2404,26 +2375,19 @@ bool AArch64FastISel::selectBranch(const Instruction *I) { return false; bool CondRegIsKill = hasTrivialKill(BI->getCondition()); - // We've been divorced from our compare! Our block was split, and - // now our compare lives in a predecessor block. We musn't - // re-compare here, as the children of the compare aren't guaranteed - // live across the block boundary (we *could* check for this). - // Regardless, the compare has been done in the predecessor block, - // and it left a value for us in a virtual register. Ergo, we test - // the one-bit value left in the virtual register. - // - // FIXME: Optimize this with TBZW/TBZNW. - unsigned ANDReg = emitAnd_ri(MVT::i32, CondReg, CondRegIsKill, 1); - assert(ANDReg && "Unexpected AND instruction emission failure."); - emitICmp_ri(MVT::i32, ANDReg, /*IsKill=*/true, 0); - + // i1 conditions come as i32 values, test the lowest bit with tb(n)z. + unsigned Opcode = AArch64::TBNZW; if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { std::swap(TBB, FBB); - CC = AArch64CC::EQ; + Opcode = AArch64::TBZW; } - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) - .addImm(CC) + const MCInstrDesc &II = TII.get(Opcode); + unsigned ConstrainedCondReg + = constrainOperandRegClass(II, CondReg, II.getNumDefs()); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) + .addReg(ConstrainedCondReg, getKillRegState(CondRegIsKill)) + .addImm(0) .addMBB(TBB); finishCondBranch(BI->getParent(), TBB, FBB); diff --git a/test/CodeGen/AArch64/arm64-fast-isel-br.ll b/test/CodeGen/AArch64/arm64-fast-isel-br.ll index 0ef7b143df8..55c9c6036ed 100644 --- a/test/CodeGen/AArch64/arm64-fast-isel-br.ll +++ b/test/CodeGen/AArch64/arm64-fast-isel-br.ll @@ -94,9 +94,7 @@ entry: store i32 %c, i32* %c.addr, align 4 store i64 %d, i64* %d.addr, align 8 %0 = load i16, i16* %b.addr, align 2 -; CHECK: and w0, w0, #0x1 -; CHECK: cmp w0, #0 -; CHECK: b.eq LBB4_2 +; CHECK: tbz w0, #0, LBB4_2 %conv = trunc i16 %0 to i1 br i1 %conv, label %if.then, label %if.end @@ -106,9 +104,7 @@ if.then: ; preds = %entry if.end: ; preds = %if.then, %entry %1 = load i32, i32* %c.addr, align 4 -; CHECK: and w[[REG:[0-9]+]], w{{[0-9]+}}, #0x1 -; CHECK: cmp w[[REG]], #0 -; CHECK: b.eq LBB4_4 +; CHECK: tbz w{{[0-9]+}}, #0, LBB4_4 %conv1 = trunc i32 %1 to i1 br i1 %conv1, label %if.then3, label %if.end4 @@ -118,8 +114,7 @@ if.then3: ; preds = %if.end if.end4: ; preds = %if.then3, %if.end %2 = load i64, i64* %d.addr, align 8 -; CHECK: cmp w{{[0-9]+}}, #0 -; CHECK: b.eq LBB4_6 +; CHECK: tbz w{{[0-9]+}}, #0, LBB4_6 %conv5 = trunc i64 %2 to i1 br i1 %conv5, label %if.then7, label %if.end8 @@ -139,9 +134,7 @@ define i32 @trunc64(i64 %foo) nounwind { ; CHECK: trunc64 ; CHECK: and [[REG1:x[0-9]+]], x0, #0x1 ; CHECK: mov x[[REG2:[0-9]+]], [[REG1]] -; CHECK: and [[REG3:w[0-9]+]], w[[REG2]], #0x1 -; CHECK: cmp [[REG3]], #0 -; CHECK: b.eq LBB5_2 +; CHECK: tbz w[[REG2]], #0, LBB5_2 %a = and i64 %foo, 1 %b = trunc i64 %a to i1 br i1 %b, label %if.then, label %if.else diff --git a/test/CodeGen/AArch64/fast-isel-branch-cond-mask.ll b/test/CodeGen/AArch64/fast-isel-branch-cond-mask.ll index c018b2778b0..55fbf63319e 100644 --- a/test/CodeGen/AArch64/fast-isel-branch-cond-mask.ll +++ b/test/CodeGen/AArch64/fast-isel-branch-cond-mask.ll @@ -4,8 +4,7 @@ define void @test(i64 %a, i64 %b, i2* %c) { ; CHECK-LABEL: test ; CHECK: and [[REG1:w[0-9]+]], w8, #0x3 ; CHECK-NEXT: strb [[REG1]], {{\[}}x2{{\]}} -; CHECK: and [[REG2:w[0-9]+]], w8, #0x1 -; CHECK-NEXT: cmp [[REG2]], #0 +; CHECK-NEXT: tbz w9, #0, %1 = trunc i64 %a to i2 %2 = trunc i64 %b to i1 ; Force fast-isel to fall back to SDAG. diff --git a/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll b/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll index 5248b9253e7..e04a62b85c8 100644 --- a/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll +++ b/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll @@ -44,9 +44,7 @@ bb4: ; CHECK-NEXT: cmp w1, #0 ; CHECK-NEXT: cset w9, eq ; CHECK-NEXT: orr w8, w8, w9 -; CHECK-NEXT: and w8, w8, #0x1 -; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: b.ne +; CHECK-NEXT: tbnz w8, #0, define i64 @test_or_unpredictable(i32 %a, i32 %b) { bb1: %0 = icmp eq i32 %a, 0 @@ -68,9 +66,7 @@ bb4: ; CHECK-NEXT: cmp w1, #0 ; CHECK-NEXT: cset w9, ne ; CHECK-NEXT: and w8, w8, w9 -; CHECK-NEXT: and w8, w8, #0x1 -; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: b.eq +; CHECK-NEXT: tbz w8, #0, define i64 @test_and_unpredictable(i32 %a, i32 %b) { bb1: %0 = icmp ne i32 %a, 0 -- 2.34.1