From ccb762ceea71b86cdf40af65d26ce86515037e58 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Tue, 10 Nov 2015 18:11:37 +0000
Subject: [PATCH] [AArch64] add overrides for isCheapToSpeculateCttz() and
 isCheapToSpeculateCtlz()

AArch64 has instructions for efficient count-leading/trailing-zeros, so this should be
considered a cheap operation (and therefore fair game for speculation) for any AArch64
implementation.

The net result of allowing this speculation for the regression tests in this
patch is that we get this code:

ctlz:
  clz  w0, w0
  ret

cttz:
  rbit  w8, w0
  clz  w0, w8
  ret

Instead of:

ctlz:
  cbz  w0, .LBB0_2
  clz  w0, w0
  ret
.LBB0_2:
  orr  w0, wzr, #0x20
  ret

cttz:
  cbz  w0, .LBB1_2
  rbit  w8, w0
  clz  w0, w8
  ret
.LBB1_2:
  orr  w0, wzr, #0x20
  ret

See D14469 for the larger motivation.

Differential Revision: http://reviews.llvm.org/D14505



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@252625 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/AArch64/AArch64ISelLowering.h      |  8 ++++
 .../SimplifyCFG/AArch64/cttz-ctlz.ll          | 43 +++++++++++++++++++
 2 files changed, 51 insertions(+)
 create mode 100644 test/Transforms/SimplifyCFG/AArch64/cttz-ctlz.ll

diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h
index 091983387d9..c8d9ca3ef07 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/lib/Target/AArch64/AArch64ISelLowering.h
@@ -381,6 +381,14 @@ public:
     return AArch64::X1;
   }
 
+  bool isCheapToSpeculateCttz() const override {
+    return true;
+  }
+
+  bool isCheapToSpeculateCtlz() const override {
+    return true;
+  }
+
 private:
   bool isExtFreeImpl(const Instruction *Ext) const override;
 
diff --git a/test/Transforms/SimplifyCFG/AArch64/cttz-ctlz.ll b/test/Transforms/SimplifyCFG/AArch64/cttz-ctlz.ll
new file mode 100644
index 00000000000..e32d711143d
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/AArch64/cttz-ctlz.ll
@@ -0,0 +1,43 @@
+; RUN: opt -S -simplifycfg -mtriple=aarch64 < %s | FileCheck %s
+
+define i32 @ctlz(i32 %A) {
+; CHECK-LABEL: @ctlz(
+; CHECK: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i32 %A, 0
+; CHECK-NEXT: [[CTZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.ctlz.i32(i32 %A, i1 true)
+; CHECK-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i32 32, i32 [[CTZ]]
+; CHECK-NEXT: ret i32 [[SEL]]
+entry:
+  %tobool = icmp eq i32 %A, 0
+  br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:
+  %0 = tail call i32 @llvm.ctlz.i32(i32 %A, i1 true)
+  br label %cond.end
+
+cond.end:
+  %cond = phi i32 [ %0, %cond.true ], [ 32, %entry ]
+  ret i32 %cond
+}
+
+define i32 @cttz(i32 %A) {
+; CHECK-LABEL: @cttz(
+; CHECK: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i32 %A, 0
+; CHECK-NEXT: [[CTZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %A, i1 true)
+; CHECK-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i32 32, i32 [[CTZ]]
+; CHECK-NEXT: ret i32 [[SEL]]
+entry:
+  %tobool = icmp eq i32 %A, 0
+  br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:
+  %0 = tail call i32 @llvm.cttz.i32(i32 %A, i1 true)
+  br label %cond.end
+
+cond.end:
+  %cond = phi i32 [ %0, %cond.true ], [ 32, %entry ]
+  ret i32 %cond
+}
+
+declare i32 @llvm.ctlz.i32(i32, i1)
+declare i32 @llvm.cttz.i32(i32, i1)
+
-- 
2.34.1