1 //===-- SIMachineFunctionInfo.cpp - SI Machine Function Info -------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
9 //===----------------------------------------------------------------------===//
12 #include "SIMachineFunctionInfo.h"
13 #include "AMDGPUSubtarget.h"
14 #include "SIInstrInfo.h"
15 #include "llvm/CodeGen/MachineInstrBuilder.h"
16 #include "llvm/CodeGen/MachineFrameInfo.h"
17 #include "llvm/CodeGen/MachineRegisterInfo.h"
18 #include "llvm/IR/Function.h"
19 #include "llvm/IR/LLVMContext.h"
26 // Pin the vtable to this file.
27 void SIMachineFunctionInfo::anchor() {}
29 SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
30 : AMDGPUMachineFunction(MF),
31 TIDReg(AMDGPU::NoRegister),
32 ScratchRSrcReg(AMDGPU::NoRegister),
33 ScratchWaveOffsetReg(AMDGPU::NoRegister),
34 PrivateSegmentBufferUserSGPR(AMDGPU::NoRegister),
35 DispatchPtrUserSGPR(AMDGPU::NoRegister),
36 QueuePtrUserSGPR(AMDGPU::NoRegister),
37 KernargSegmentPtrUserSGPR(AMDGPU::NoRegister),
38 DispatchIDUserSGPR(AMDGPU::NoRegister),
39 FlatScratchInitUserSGPR(AMDGPU::NoRegister),
40 PrivateSegmentSizeUserSGPR(AMDGPU::NoRegister),
41 GridWorkGroupCountXUserSGPR(AMDGPU::NoRegister),
42 GridWorkGroupCountYUserSGPR(AMDGPU::NoRegister),
43 GridWorkGroupCountZUserSGPR(AMDGPU::NoRegister),
44 WorkGroupIDXSystemSGPR(AMDGPU::NoRegister),
45 WorkGroupIDYSystemSGPR(AMDGPU::NoRegister),
46 WorkGroupIDZSystemSGPR(AMDGPU::NoRegister),
47 WorkGroupInfoSystemSGPR(AMDGPU::NoRegister),
48 PrivateSegmentWaveByteOffsetSystemSGPR(AMDGPU::NoRegister),
53 HasSpilledSGPRs(false),
54 HasSpilledVGPRs(false),
55 PrivateSegmentBuffer(false),
59 KernargSegmentPtr(false),
60 FlatScratchInit(false),
61 GridWorkgroupCountX(false),
62 GridWorkgroupCountY(false),
63 GridWorkgroupCountZ(false),
68 PrivateSegmentWaveByteOffset(false),
72 const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
73 const Function *F = MF.getFunction();
75 const MachineFrameInfo *FrameInfo = MF.getFrameInfo();
77 if (getShaderType() == ShaderType::COMPUTE)
78 KernargSegmentPtr = true;
80 if (F->hasFnAttribute("amdgpu-work-group-id-y"))
83 if (F->hasFnAttribute("amdgpu-work-group-id-z"))
86 if (F->hasFnAttribute("amdgpu-work-item-id-y"))
89 if (F->hasFnAttribute("amdgpu-work-item-id-z"))
92 bool MaySpill = ST.isVGPRSpillingEnabled(this);
93 bool HasStackObjects = FrameInfo->hasStackObjects();
95 if (HasStackObjects || MaySpill)
96 PrivateSegmentWaveByteOffset = true;
98 if (ST.isAmdHsaOS()) {
99 if (HasStackObjects || MaySpill)
100 PrivateSegmentBuffer = true;
102 if (F->hasFnAttribute("amdgpu-dispatch-ptr"))
106 // X, XY, and XYZ are the only supported combinations, so make sure Y is
112 unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer(
113 const SIRegisterInfo &TRI) {
114 PrivateSegmentBufferUserSGPR = TRI.getMatchingSuperReg(
115 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
117 return PrivateSegmentBufferUserSGPR;
120 unsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
121 DispatchPtrUserSGPR = TRI.getMatchingSuperReg(
122 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
124 return DispatchPtrUserSGPR;
127 unsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
128 QueuePtrUserSGPR = TRI.getMatchingSuperReg(
129 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
131 return QueuePtrUserSGPR;
134 unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
135 KernargSegmentPtrUserSGPR = TRI.getMatchingSuperReg(
136 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
138 return KernargSegmentPtrUserSGPR;
141 SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg(
145 const MachineFrameInfo *FrameInfo = MF->getFrameInfo();
146 const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo *>(
147 MF->getSubtarget<AMDGPUSubtarget>().getRegisterInfo());
148 MachineRegisterInfo &MRI = MF->getRegInfo();
149 int64_t Offset = FrameInfo->getObjectOffset(FrameIndex);
150 Offset += SubIdx * 4;
152 unsigned LaneVGPRIdx = Offset / (64 * 4);
153 unsigned Lane = (Offset / 4) % 64;
155 struct SpilledReg Spill;
157 if (!LaneVGPRs.count(LaneVGPRIdx)) {
158 unsigned LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass);
160 if (LaneVGPR == AMDGPU::NoRegister) {
161 LLVMContext &Ctx = MF->getFunction()->getContext();
162 Ctx.emitError("Ran out of VGPRs for spilling SGPR");
164 // When compiling from inside Mesa, the compilation continues.
165 // Select an arbitrary register to avoid triggering assertions
166 // during subsequent passes.
167 LaneVGPR = AMDGPU::VGPR0;
170 LaneVGPRs[LaneVGPRIdx] = LaneVGPR;
172 // Add this register as live-in to all blocks to avoid machine verifer
173 // complaining about use of an undefined physical register.
174 for (MachineFunction::iterator BI = MF->begin(), BE = MF->end();
176 BI->addLiveIn(LaneVGPR);
180 Spill.VGPR = LaneVGPRs[LaneVGPRIdx];
185 unsigned SIMachineFunctionInfo::getMaximumWorkGroupSize(
186 const MachineFunction &MF) const {
187 const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
188 // FIXME: We should get this information from kernel attributes if it
190 return getShaderType() == ShaderType::COMPUTE ? 256 : ST.getWavefrontSize();