From 8a5832c92f51558b354a454785c92298959c3b37 Mon Sep 17 00:00:00 2001
From: Tom Stellard <thomas.stellard@amd.com>
Date: Tue, 15 Dec 2015 17:02:52 +0000
Subject: [PATCH] AMDGPU/SI: Add llvm.amdgcn.mbcnt.* intrinsics

Summary:
These are meant to be used instead of the llvm.SI.tid intrinsic which will
be deprecated at some point.

Reviewers: arsenm

Subscribers: arsenm, llvm-commits

Differential Revision: http://reviews.llvm.org/D15475

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@255652 91177308-0d34-0410-b5e6-96231b3b80d8
---
 include/llvm/IR/IntrinsicsAMDGPU.td      |  8 ++++++++
 lib/Target/AMDGPU/SIInstructions.td      |  4 ++--
 test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll | 24 ++++++++++++++++++++++++
 3 files changed, 34 insertions(+), 2 deletions(-)
 create mode 100644 test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll

diff --git a/include/llvm/IR/IntrinsicsAMDGPU.td b/include/llvm/IR/IntrinsicsAMDGPU.td
index 98af638a15b..84582e8b992 100644
--- a/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -146,4 +146,12 @@ def int_amdgcn_interp_p2 :
             [llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
             [IntrNoMem]>;  // See int_amdgcn_v_interp_p1 for why this is
                            // IntrNoMem.
+
+def int_amdgcn_mbcnt_lo :
+  GCCBuiltin<"__builtin_amdgcn_mbcnt_lo">,
+  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+
+def int_amdgcn_mbcnt_hi :
+  GCCBuiltin<"__builtin_amdgcn_mbcnt_hi">,
+  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
 }
diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td
index f247cbf41c9..08149d4eab2 100644
--- a/lib/Target/AMDGPU/SIInstructions.td
+++ b/lib/Target/AMDGPU/SIInstructions.td
@@ -1590,10 +1590,10 @@ defm V_BCNT_U32_B32 : VOP2_VI3_Inst <vop23<0x22, 0x28b>, "v_bcnt_u32_b32",
   VOP_I32_I32_I32
 >;
 defm V_MBCNT_LO_U32_B32 : VOP2_VI3_Inst <vop23<0x23, 0x28c>, "v_mbcnt_lo_u32_b32",
-  VOP_I32_I32_I32
+  VOP_I32_I32_I32, int_amdgcn_mbcnt_lo
 >;
 defm V_MBCNT_HI_U32_B32 : VOP2_VI3_Inst <vop23<0x24, 0x28d>, "v_mbcnt_hi_u32_b32",
-  VOP_I32_I32_I32
+  VOP_I32_I32_I32, int_amdgcn_mbcnt_hi
 >;
 defm V_LDEXP_F32 : VOP2_VI3_Inst <vop23<0x2b, 0x288>, "v_ldexp_f32",
   VOP_F32_F32_I32, AMDGPUldexp
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll
new file mode 100644
index 00000000000..02ee2039542
--- /dev/null
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll
@@ -0,0 +1,24 @@
+;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=GCN %s
+;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=VI --check-prefix=GCN %s
+
+;GCN-LABEL: {{^}}mbcnt_intrinsics:
+;GCN: v_mbcnt_lo_u32_b32_e64 [[LO:v[0-9]+]], -1, 0
+;SI: v_mbcnt_hi_u32_b32_e32 {{v[0-9]+}}, -1, [[LO]]
+;VI: v_mbcnt_hi_u32_b32_e64 {{v[0-9]+}}, -1, [[LO]]
+
+define void @mbcnt_intrinsics(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg) "ShaderType"="0" {
+main_body:
+  %lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #1
+  %hi = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %lo) #1
+  %4 = bitcast i32 %hi to float
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %4, float %4, float %4, float %4)
+  ret void
+}
+
+declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
+
+declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #1
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
+
+attributes #1 = { nounwind readnone }
-- 
2.34.1