AArch64FastISel: Use cbz/cbnz to branch on i1

author Matthias Braun <matze@braunis.de>

Thu, 3 Dec 2015 17:19:58 +0000 (17:19 +0000)

committer Matthias Braun <matze@braunis.de>

Thu, 3 Dec 2015 17:19:58 +0000 (17:19 +0000)
author Matthias Braun <matze@braunis.de>
Thu, 3 Dec 2015 17:19:58 +0000 (17:19 +0000)
committer Matthias Braun <matze@braunis.de>
Thu, 3 Dec 2015 17:19:58 +0000 (17:19 +0000)
diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp

index b7849d5bbc26ea41813e5e5cb34eb4be3ef8e66f..cae2d52762962ceddf5c1e4adc3d1d5d841449a6 100644 (file)
--- a/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/lib/Target/AArch64/AArch64FastISel.cpp
@@ -2275,7 +2275,6 @@ bool AArch64FastISel::selectBranch(const Instruction *I) {
    MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
    MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
  
-  AArch64CC::CondCode CC = AArch64CC::NE;
    if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
      if (CI->hasOneUse() && isValueAvailable(CI)) {
        // Try to optimize or fold the cmp.
@@ -2307,7 +2306,7 @@ bool AArch64FastISel::selectBranch(const Instruction *I) {
  
        // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
        // instruction.
-      CC = getCompareCC(Predicate);
+      AArch64CC::CondCode CC = getCompareCC(Predicate);
        AArch64CC::CondCode ExtraCC = AArch64CC::AL;
        switch (Predicate) {
        default:
@@ -2335,37 +2334,6 @@ bool AArch64FastISel::selectBranch(const Instruction *I) {
            .addImm(CC)
            .addMBB(TBB);
  
-      finishCondBranch(BI->getParent(), TBB, FBB);
-      return true;
-    }
-  } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
-    MVT SrcVT;
-    if (TI->hasOneUse() && isValueAvailable(TI) &&
-        isTypeSupported(TI->getOperand(0)->getType(), SrcVT)) {
-      unsigned CondReg = getRegForValue(TI->getOperand(0));
-      if (!CondReg)
-        return false;
-      bool CondIsKill = hasTrivialKill(TI->getOperand(0));
-
-      // Issue an extract_subreg to get the lower 32-bits.
-      if (SrcVT == MVT::i64) {
-        CondReg = fastEmitInst_extractsubreg(MVT::i32, CondReg, CondIsKill,
-                                             AArch64::sub_32);
-        CondIsKill = true;
-      }
-
-      unsigned ANDReg = emitAnd_ri(MVT::i32, CondReg, CondIsKill, 1);
-      assert(ANDReg && "Unexpected AND instruction emission failure.");
-      emitICmp_ri(MVT::i32, ANDReg, /*IsKill=*/true, 0);
-
-      if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
-        std::swap(TBB, FBB);
-        CC = AArch64CC::EQ;
-      }
-      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
-          .addImm(CC)
-          .addMBB(TBB);
-
        finishCondBranch(BI->getParent(), TBB, FBB);
        return true;
      }
@@ -2383,20 +2351,23 @@ bool AArch64FastISel::selectBranch(const Instruction *I) {
      } else
        FuncInfo.MBB->addSuccessorWithoutProb(Target);
      return true;
-  } else if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
-    // Fake request the condition, otherwise the intrinsic might be completely
-    // optimized away.
-    unsigned CondReg = getRegForValue(BI->getCondition());
-    if (!CondReg)
-      return false;
+  } else {
+    AArch64CC::CondCode CC = AArch64CC::NE;
+    if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
+      // Fake request the condition, otherwise the intrinsic might be completely
+      // optimized away.
+      unsigned CondReg = getRegForValue(BI->getCondition());
+      if (!CondReg)
+        return false;
  
-    // Emit the branch.
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
-      .addImm(CC)
-      .addMBB(TBB);
+      // Emit the branch.
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
+        .addImm(CC)
+        .addMBB(TBB);
  
-    finishCondBranch(BI->getParent(), TBB, FBB);
-    return true;
+      finishCondBranch(BI->getParent(), TBB, FBB);
+      return true;
+    }
    }
  
    unsigned CondReg = getRegForValue(BI->getCondition());
@@ -2404,26 +2375,19 @@ bool AArch64FastISel::selectBranch(const Instruction *I) {
      return false;
    bool CondRegIsKill = hasTrivialKill(BI->getCondition());
  
-  // We've been divorced from our compare!  Our block was split, and
-  // now our compare lives in a predecessor block.  We musn't
-  // re-compare here, as the children of the compare aren't guaranteed
-  // live across the block boundary (we *could* check for this).
-  // Regardless, the compare has been done in the predecessor block,
-  // and it left a value for us in a virtual register.  Ergo, we test
-  // the one-bit value left in the virtual register.
-  //
-  // FIXME: Optimize this with TBZW/TBZNW.
-  unsigned ANDReg = emitAnd_ri(MVT::i32, CondReg, CondRegIsKill, 1);
-  assert(ANDReg && "Unexpected AND instruction emission failure.");
-  emitICmp_ri(MVT::i32, ANDReg, /*IsKill=*/true, 0);
-
+  // i1 conditions come as i32 values, test the lowest bit with tb(n)z.
+  unsigned Opcode = AArch64::TBNZW;
    if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
      std::swap(TBB, FBB);
-    CC = AArch64CC::EQ;
+    Opcode = AArch64::TBZW;
    }
  
-  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
-      .addImm(CC)
+  const MCInstrDesc &II = TII.get(Opcode);
+  unsigned ConstrainedCondReg
+    = constrainOperandRegClass(II, CondReg, II.getNumDefs());
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
+      .addReg(ConstrainedCondReg, getKillRegState(CondRegIsKill))
+      .addImm(0)
        .addMBB(TBB);
  
    finishCondBranch(BI->getParent(), TBB, FBB);
diff --git a/test/CodeGen/AArch64/arm64-fast-isel-br.ll b/test/CodeGen/AArch64/arm64-fast-isel-br.ll

index 0ef7b143df807a275744e42f980de203a4db9b62..55c9c6036ed57b49e4b735c7871a193c094ab23f 100644 (file)
--- a/test/CodeGen/AArch64/arm64-fast-isel-br.ll
+++ b/test/CodeGen/AArch64/arm64-fast-isel-br.ll
@@ -94,9 +94,7 @@ entry:
    store i32 %c, i32* %c.addr, align 4
    store i64 %d, i64* %d.addr, align 8
    %0 = load i16, i16* %b.addr, align 2
-; CHECK: and w0, w0, #0x1
-; CHECK: cmp w0, #0
-; CHECK: b.eq LBB4_2
+; CHECK: tbz w0, #0, LBB4_2
    %conv = trunc i16 %0 to i1
    br i1 %conv, label %if.then, label %if.end
  
@@ -106,9 +104,7 @@ if.then:                                          ; preds = %entry
  
  if.end:                                           ; preds = %if.then, %entry
    %1 = load i32, i32* %c.addr, align 4
-; CHECK: and w[[REG:[0-9]+]], w{{[0-9]+}}, #0x1
-; CHECK: cmp w[[REG]], #0
-; CHECK: b.eq LBB4_4
+; CHECK: tbz w{{[0-9]+}}, #0, LBB4_4
    %conv1 = trunc i32 %1 to i1
    br i1 %conv1, label %if.then3, label %if.end4
  
@@ -118,8 +114,7 @@ if.then3:                                         ; preds = %if.end
  
  if.end4:                                          ; preds = %if.then3, %if.end
    %2 = load i64, i64* %d.addr, align 8
-; CHECK: cmp w{{[0-9]+}}, #0
-; CHECK: b.eq LBB4_6
+; CHECK: tbz w{{[0-9]+}}, #0, LBB4_6
    %conv5 = trunc i64 %2 to i1
    br i1 %conv5, label %if.then7, label %if.end8
  
@@ -139,9 +134,7 @@ define i32 @trunc64(i64 %foo) nounwind {
  ; CHECK: trunc64
  ; CHECK: and  [[REG1:x[0-9]+]], x0, #0x1
  ; CHECK: mov  x[[REG2:[0-9]+]], [[REG1]]
-; CHECK: and  [[REG3:w[0-9]+]], w[[REG2]], #0x1
-; CHECK: cmp  [[REG3]], #0
-; CHECK: b.eq LBB5_2
+; CHECK: tbz w[[REG2]], #0, LBB5_2
    %a = and i64 %foo, 1
    %b = trunc i64 %a to i1
    br i1 %b, label %if.then, label %if.else
diff --git a/test/CodeGen/AArch64/fast-isel-branch-cond-mask.ll b/test/CodeGen/AArch64/fast-isel-branch-cond-mask.ll

index c018b2778b04bd44ff8c59c9fbf07bbe0a867792..55fbf63319ee3da06f698cf1727caaa4dc5cd287 100644 (file)
--- a/test/CodeGen/AArch64/fast-isel-branch-cond-mask.ll
+++ b/test/CodeGen/AArch64/fast-isel-branch-cond-mask.ll
@@ -4,8 +4,7 @@ define void @test(i64 %a, i64 %b, i2* %c) {
  ; CHECK-LABEL: test
  ; CHECK:       and [[REG1:w[0-9]+]], w8, #0x3
  ; CHECK-NEXT:  strb [[REG1]], {{\[}}x2{{\]}}
-; CHECK:       and [[REG2:w[0-9]+]], w8, #0x1
-; CHECK-NEXT:  cmp [[REG2]], #0
+; CHECK-NEXT:  tbz w9, #0,
   %1 = trunc i64 %a to i2
   %2 = trunc i64 %b to i1
  ; Force fast-isel to fall back to SDAG.
diff --git a/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll b/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll

index 5248b9253e7aa7fa87d8c971d25edf7e0a43d703..e04a62b85c8eb353de5ff0028b23962176849d57 100644 (file)
--- a/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll
+++ b/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll
@@ -44,9 +44,7 @@ bb4:
  ; CHECK-NEXT:  cmp   w1, #0
  ; CHECK-NEXT:  cset  w9, eq
  ; CHECK-NEXT:  orr   w8, w8, w9
-; CHECK-NEXT:  and   w8, w8, #0x1
-; CHECK-NEXT:  cmp   w8, #0
-; CHECK-NEXT:  b.ne 
+; CHECK-NEXT:  tbnz w8, #0,
  define i64 @test_or_unpredictable(i32 %a, i32 %b) {
  bb1:
    %0 = icmp eq i32 %a, 0
@@ -68,9 +66,7 @@ bb4:
  ; CHECK-NEXT:  cmp   w1, #0
  ; CHECK-NEXT:  cset  w9, ne
  ; CHECK-NEXT:  and   w8, w8, w9
-; CHECK-NEXT:  and   w8, w8, #0x1
-; CHECK-NEXT:  cmp   w8, #0
-; CHECK-NEXT:  b.eq 
+; CHECK-NEXT:  tbz w8, #0,
  define i64 @test_and_unpredictable(i32 %a, i32 %b) {
  bb1:
    %0 = icmp ne i32 %a, 0
author	Matthias Braun <matze@braunis.de>
	Thu, 3 Dec 2015 17:19:58 +0000 (17:19 +0000)
committer	Matthias Braun <matze@braunis.de>
	Thu, 3 Dec 2015 17:19:58 +0000 (17:19 +0000)
lib/Target/AArch64/AArch64FastISel.cpp		patch \| blob \| history
test/CodeGen/AArch64/arm64-fast-isel-br.ll		patch \| blob \| history
test/CodeGen/AArch64/fast-isel-branch-cond-mask.ll		patch \| blob \| history
test/CodeGen/AArch64/fast-isel-branch-cond-split.ll		patch \| blob \| history