AMDGPU: Add llvm.amdgcn.dispatch.ptr intrinsic
authorTom Stellard <thomas.stellard@amd.com>
Thu, 26 Nov 2015 00:43:29 +0000 (00:43 +0000)
committerTom Stellard <thomas.stellard@amd.com>
Thu, 26 Nov 2015 00:43:29 +0000 (00:43 +0000)
Summary:
This returns a pointer to the dispatch packet, which can be used to load
information about the kernel dispach.

Reviewers: arsenm

Subscribers: arsenm, llvm-commits

Differential Revision: http://reviews.llvm.org/D14898

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254116 91177308-0d34-0410-b5e6-96231b3b80d8

include/llvm/IR/IntrinsicsAMDGPU.td
lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
lib/Target/AMDGPU/SIISelLowering.cpp
lib/Target/AMDGPU/SIRegisterInfo.cpp
lib/Target/AMDGPU/SIRegisterInfo.h
test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.ptr.ll [new file with mode: 0644]

index cad2b56a35f153bc1765d857ab265fe51fe1c7b9..0f87596d473170a8dc26904bef60a5ce598a44ca 100644 (file)
@@ -127,4 +127,8 @@ def int_amdgcn_s_dcache_wb_vol :
   GCCBuiltin<"__builtin_amdgcn_s_dcache_wb_vol">,
   Intrinsic<[], [], []>;
 
+def int_amdgcn_dispatch_ptr :
+  GCCBuiltin<"__builtin_amdgcn_disptch_ptr">,
+  Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [], [IntrNoMem]>;
+
 }
index e57415f3b8e14b8f7957916421427086f8a4b85b..378183927242c59161099b4bdb6d18521c4398f1 100644 (file)
@@ -105,7 +105,8 @@ bool AMDGPUAnnotateKernelFeatures::runOnModule(Module &M) {
 
     { "llvm.r600.read.global.size.x", "amdgpu-dispatch-ptr" },
     { "llvm.r600.read.global.size.y", "amdgpu-dispatch-ptr" },
-    { "llvm.r600.read.global.size.z", "amdgpu-dispatch-ptr" }
+    { "llvm.r600.read.global.size.z", "amdgpu-dispatch-ptr" },
+    { "llvm.amdgcn.dispatch.ptr",     "amdgpu-dispatch-ptr" }
   };
 
   // TODO: Intrinsics that require queue ptr.
index 39beb6a4f503b87f9da1b9a25f3b985d9459ccba..314ef721c1fcd395cb391f2a47205ced4e809aa2 100644 (file)
@@ -528,6 +528,9 @@ void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
       AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR |
       AMD_CODE_PROPERTY_IS_PTR64;
 
+  if (MFI->hasDispatchPtr())
+    header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
+
   header.kernarg_segment_byte_size = MFI->ABIArgOffset;
   header.wavefront_sgpr_count = KernelInfo.NumSGPR;
   header.workitem_vgpr_count = KernelInfo.NumVGPR;
index 4ed9cf6c97eb395920af5b50e03336486071ad1f..5c67bf80c17561180e83cb4ee5b5057943656d32 100644 (file)
@@ -646,6 +646,18 @@ SDValue SITargetLowering::LowerFormalArguments(
     CCInfo.AllocateReg(ScratchPtrRegHi);
     MF.addLiveIn(InputPtrReg, &AMDGPU::SReg_64RegClass);
     MF.addLiveIn(ScratchPtrReg, &AMDGPU::SReg_64RegClass);
+    SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+    if (Subtarget->isAmdHsaOS() && MFI->hasDispatchPtr()) {
+      unsigned DispatchPtrReg =
+        TRI->getPreloadedValue(MF, SIRegisterInfo::DISPATCH_PTR);
+      unsigned DispatchPtrRegLo =
+        TRI->getPhysRegSubReg(DispatchPtrReg, &AMDGPU::SReg_32RegClass, 0);
+      unsigned DispatchPtrRegHi =
+        TRI->getPhysRegSubReg(DispatchPtrReg, &AMDGPU::SReg_32RegClass, 1);
+      CCInfo.AllocateReg(DispatchPtrRegLo);
+      CCInfo.AllocateReg(DispatchPtrRegHi);
+      MF.addLiveIn(DispatchPtrReg, &AMDGPU::SReg_64RegClass);
+    }
   }
 
   if (Info->getShaderType() == ShaderType::COMPUTE) {
@@ -1053,6 +1065,10 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
   // TODO: Should this propagate fast-math-flags?
 
   switch (IntrinsicID) {
+  case Intrinsic::amdgcn_dispatch_ptr:
+    return CreateLiveInRegister(DAG, &AMDGPU::SReg_64RegClass,
+      TRI->getPreloadedValue(MF, SIRegisterInfo::DISPATCH_PTR), VT);
+
   case Intrinsic::r600_read_ngroups_x:
     return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
                           SI::KernelInputOffsets::NGROUPS_X, false);
index 436808b5287d0085b789e003235cb5297073bd13..ab7539b6fb3a22b8b0f41603035526e98ef875d3 100644 (file)
@@ -510,6 +510,7 @@ bool SIRegisterInfo::opCanUseInlineConstant(unsigned OpType) const {
 unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF,
                                            enum PreloadedValue Value) const {
 
+  const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
   switch (Value) {
   case SIRegisterInfo::TGID_X:
@@ -525,6 +526,11 @@ unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF,
   case SIRegisterInfo::SCRATCH_PTR:
     return AMDGPU::SGPR2_SGPR3;
   case SIRegisterInfo::INPUT_PTR:
+    if (ST.isAmdHsaOS())
+      return MFI->hasDispatchPtr() ? AMDGPU::SGPR2_SGPR3 : AMDGPU::SGPR0_SGPR1;
+    return AMDGPU::SGPR0_SGPR1;
+  case SIRegisterInfo::DISPATCH_PTR:
+    assert(MFI->hasDispatchPtr());
     return AMDGPU::SGPR0_SGPR1;
   case SIRegisterInfo::TIDIG_X:
     return AMDGPU::VGPR0;
index b1389533ec39f2c4d568412e118ccfd0eb53e35c..36f6d1c7a261bf3db1a8b216f29319376bf4dc68 100644 (file)
@@ -99,6 +99,7 @@ public:
   enum PreloadedValue {
     // SGPRS:
     SCRATCH_PTR         =  0,
+    DISPATCH_PTR        =  1,
     INPUT_PTR           =  3,
     TGID_X              = 10,
     TGID_Y              = 11,
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.ptr.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.ptr.ll
new file mode 100644 (file)
index 0000000..719f7ff
--- /dev/null
@@ -0,0 +1,16 @@
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+; GCN-LABEL: {{^}}test:
+; GCN: enable_sgpr_dispatch_ptr = 1
+; GCN: s_load_dword s{{[0-9]+}}, s[0:1], 0x0
+define void @test(i32 addrspace(1)* %out) {
+  %dispatch_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
+  %header_ptr = bitcast i8 addrspace(2)* %dispatch_ptr to i32 addrspace(2)*
+  %value = load i32, i32 addrspace(2)* %header_ptr
+  store i32 %value, i32 addrspace(1)* %out
+  ret void
+}
+
+declare noalias i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
+
+attributes #0 = { readnone }