Summary:
This returns a pointer to the dispatch packet, which can be used to load
information about the kernel dispach.
Reviewers: arsenm
Subscribers: arsenm, llvm-commits
Differential Revision: http://reviews.llvm.org/D14898
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254116
91177308-0d34-0410-b5e6-
96231b3b80d8
GCCBuiltin<"__builtin_amdgcn_s_dcache_wb_vol">,
Intrinsic<[], [], []>;
+def int_amdgcn_dispatch_ptr :
+ GCCBuiltin<"__builtin_amdgcn_disptch_ptr">,
+ Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [], [IntrNoMem]>;
+
}
{ "llvm.r600.read.global.size.x", "amdgpu-dispatch-ptr" },
{ "llvm.r600.read.global.size.y", "amdgpu-dispatch-ptr" },
- { "llvm.r600.read.global.size.z", "amdgpu-dispatch-ptr" }
+ { "llvm.r600.read.global.size.z", "amdgpu-dispatch-ptr" },
+ { "llvm.amdgcn.dispatch.ptr", "amdgpu-dispatch-ptr" }
};
// TODO: Intrinsics that require queue ptr.
AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR |
AMD_CODE_PROPERTY_IS_PTR64;
+ if (MFI->hasDispatchPtr())
+ header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
+
header.kernarg_segment_byte_size = MFI->ABIArgOffset;
header.wavefront_sgpr_count = KernelInfo.NumSGPR;
header.workitem_vgpr_count = KernelInfo.NumVGPR;
CCInfo.AllocateReg(ScratchPtrRegHi);
MF.addLiveIn(InputPtrReg, &AMDGPU::SReg_64RegClass);
MF.addLiveIn(ScratchPtrReg, &AMDGPU::SReg_64RegClass);
+ SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ if (Subtarget->isAmdHsaOS() && MFI->hasDispatchPtr()) {
+ unsigned DispatchPtrReg =
+ TRI->getPreloadedValue(MF, SIRegisterInfo::DISPATCH_PTR);
+ unsigned DispatchPtrRegLo =
+ TRI->getPhysRegSubReg(DispatchPtrReg, &AMDGPU::SReg_32RegClass, 0);
+ unsigned DispatchPtrRegHi =
+ TRI->getPhysRegSubReg(DispatchPtrReg, &AMDGPU::SReg_32RegClass, 1);
+ CCInfo.AllocateReg(DispatchPtrRegLo);
+ CCInfo.AllocateReg(DispatchPtrRegHi);
+ MF.addLiveIn(DispatchPtrReg, &AMDGPU::SReg_64RegClass);
+ }
}
if (Info->getShaderType() == ShaderType::COMPUTE) {
// TODO: Should this propagate fast-math-flags?
switch (IntrinsicID) {
+ case Intrinsic::amdgcn_dispatch_ptr:
+ return CreateLiveInRegister(DAG, &AMDGPU::SReg_64RegClass,
+ TRI->getPreloadedValue(MF, SIRegisterInfo::DISPATCH_PTR), VT);
+
case Intrinsic::r600_read_ngroups_x:
return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
SI::KernelInputOffsets::NGROUPS_X, false);
unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF,
enum PreloadedValue Value) const {
+ const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
switch (Value) {
case SIRegisterInfo::TGID_X:
case SIRegisterInfo::SCRATCH_PTR:
return AMDGPU::SGPR2_SGPR3;
case SIRegisterInfo::INPUT_PTR:
+ if (ST.isAmdHsaOS())
+ return MFI->hasDispatchPtr() ? AMDGPU::SGPR2_SGPR3 : AMDGPU::SGPR0_SGPR1;
+ return AMDGPU::SGPR0_SGPR1;
+ case SIRegisterInfo::DISPATCH_PTR:
+ assert(MFI->hasDispatchPtr());
return AMDGPU::SGPR0_SGPR1;
case SIRegisterInfo::TIDIG_X:
return AMDGPU::VGPR0;
enum PreloadedValue {
// SGPRS:
SCRATCH_PTR = 0,
+ DISPATCH_PTR = 1,
INPUT_PTR = 3,
TGID_X = 10,
TGID_Y = 11,
--- /dev/null
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+; GCN-LABEL: {{^}}test:
+; GCN: enable_sgpr_dispatch_ptr = 1
+; GCN: s_load_dword s{{[0-9]+}}, s[0:1], 0x0
+define void @test(i32 addrspace(1)* %out) {
+ %dispatch_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
+ %header_ptr = bitcast i8 addrspace(2)* %dispatch_ptr to i32 addrspace(2)*
+ %value = load i32, i32 addrspace(2)* %header_ptr
+ store i32 %value, i32 addrspace(1)* %out
+ ret void
+}
+
+declare noalias i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
+
+attributes #0 = { readnone }