Summary:
Enabling this feature will account for the two SGPRs used by the hardware
to store the XNACK_MASK physically.
The hardware only requires this reservation when the XNACK feature is
explicitly enabled. At some point, HSA will probably want to do that, but
it does increase SGPR register pressure, so leave it disabled by default
for now (but do add a small test).
Reviewers: arsenm, tstellarAMD
Subscribers: arsenm, llvm-commits
Differential Revision: http://reviews.llvm.org/D15869
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@256794
91177308-0d34-0410-b5e6-
96231b3b80d8
"true",
"Support flat address space">;
+def FeatureXNACK : SubtargetFeature<"xnack",
+ "EnableXNACK",
+ "true",
+ "Enable XNACK support">;
+
def FeatureVGPRSpilling : SubtargetFeature<"vgpr-spilling",
"EnableVGPRSpilling",
"true",
}
}
- if (VCCUsed || FlatUsed)
+ if (VCCUsed || FlatUsed || STM.isXNACKEnabled()) {
MaxSGPR += 2;
- if (FlatUsed) {
- MaxSGPR += 2;
- // 2 additional for VI+.
- if (STM.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
+ if (FlatUsed)
+ MaxSGPR += 2;
+
+ if (STM.isXNACKEnabled())
MaxSGPR += 2;
}
if (MFI->hasDispatchPtr())
header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
+ if (STM.isXNACKEnabled())
+ header.code_properties |= AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED;
+
header.kernarg_segment_byte_size = MFI->ABIArgOffset;
header.wavefront_sgpr_count = KernelInfo.NumSGPR;
header.workitem_vgpr_count = KernelInfo.NumVGPR;
CaymanISA(false), FlatAddressSpace(false), FlatForGlobal(false),
EnableIRStructurizer(true), EnablePromoteAlloca(false), EnableIfCvt(true),
EnableLoadStoreOpt(false), EnableUnsafeDSOffsetFolding(false),
+ EnableXNACK(false),
WavefrontSize(0), CFALUBug(false), LocalMemorySize(0),
EnableVGPRSpilling(false), SGPRInitBug(false), IsGCN(false),
GCN1Encoding(false), GCN3Encoding(false), CIInsts(false), LDSBankCount(0),
bool EnableIfCvt;
bool EnableLoadStoreOpt;
bool EnableUnsafeDSOffsetFolding;
+ bool EnableXNACK;
unsigned WavefrontSize;
bool CFALUBug;
int LocalMemorySize;
}
bool isVGPRSpillingEnabled(const SIMachineFunctionInfo *MFI) const;
+ bool isXNACKEnabled() const {
+ return EnableXNACK;
+ }
+
unsigned getMaxWavesPerCU() const {
if (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS)
return 10;
const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
if (ST.hasSGPRInitBug()) {
unsigned BaseIdx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4 - 4;
+ if (ST.isXNACKEnabled())
+ BaseIdx -= 4;
+
unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
}
if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
- // 98/99 need to be reserved for flat_scr, and 100/101 for vcc. This is the
- // next sgpr128 down.
+ // 98/99 need to be reserved for flat_scr or 96/97 for flat_scr and
+ // 98/99 for xnack_mask, and 100/101 for vcc. This is the next sgpr128 down
+ // either way.
return AMDGPU::SGPR92_SGPR93_SGPR94_SGPR95;
}
const MachineFunction &MF) const {
const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
if (ST.hasSGPRInitBug()) {
- unsigned Idx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4 - 5;
+ unsigned Idx;
+
+ if (!ST.isXNACKEnabled())
+ Idx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4 - 5;
+ else
+ Idx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 6 - 1;
+
return AMDGPU::SGPR_32RegClass.getRegister(Idx);
}
if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
- // Next register before reservations for flat_scr and vcc.
- return AMDGPU::SGPR97;
+ if (!ST.isXNACKEnabled()) {
+ // Next register before reservations for flat_scr and vcc.
+ return AMDGPU::SGPR97;
+ } else {
+ // Next register before reservations for flat_scr, xnack_mask, vcc,
+ // and scratch resource.
+ return AMDGPU::SGPR91;
+ }
}
return AMDGPU::SGPR95;
// for VCC/FLAT_SCR.
reserveRegisterTuples(Reserved, AMDGPU::SGPR98_SGPR99);
reserveRegisterTuples(Reserved, AMDGPU::SGPR100_SGPR101);
+
+ if (ST.isXNACKEnabled())
+ reserveRegisterTuples(Reserved, AMDGPU::SGPR96_SGPR97);
}
// Tonga and Iceland can only allocate a fixed number of SGPRs due
if (ST.hasSGPRInitBug()) {
unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
// Reserve some SGPRs for FLAT_SCRATCH and VCC (4 SGPRs).
- // Assume XNACK_MASK is unused.
unsigned Limit = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4;
+ if (ST.isXNACKEnabled())
+ Limit -= 2;
+
for (unsigned i = Limit; i < NumSGPRs; ++i) {
unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
reserveRegisterTuples(Reserved, Reg);
-; RUN: llc < %s -march=amdgcn -mcpu=kaveri -verify-machineinstrs | FileCheck %s --check-prefix=GCN --check-prefix=CI
-; RUN: llc < %s -march=amdgcn -mcpu=fiji -verify-machineinstrs | FileCheck %s --check-prefix=GCN --check-prefix=VI
+; RUN: llc < %s -march=amdgcn -mcpu=kaveri -verify-machineinstrs | FileCheck %s --check-prefix=GCN --check-prefix=CI --check-prefix=NO-XNACK
+; RUN: llc < %s -march=amdgcn -mcpu=fiji -verify-machineinstrs | FileCheck %s --check-prefix=GCN --check-prefix=VI --check-prefix=NO-XNACK
+; RUN: llc < %s -march=amdgcn -mcpu=carrizo -mattr=+xnack -verify-machineinstrs | FileCheck %s --check-prefix=GCN --check-prefix=XNACK
; GCN-LABEL: {{^}}no_vcc_no_flat:
-; GCN: ; NumSgprs: 8
+; NO-XNACK: ; NumSgprs: 8
+; XNACK: ; NumSgprs: 12
define void @no_vcc_no_flat() {
entry:
call void asm sideeffect "", "~{SGPR7}"()
}
; GCN-LABEL: {{^}}vcc_no_flat:
-; GCN: ; NumSgprs: 10
+; NO-XNACK: ; NumSgprs: 10
+; XNACK: ; NumSgprs: 12
define void @vcc_no_flat() {
entry:
call void asm sideeffect "", "~{SGPR7},~{VCC}"()
; GCN-LABEL: {{^}}no_vcc_flat:
; CI: ; NumSgprs: 12
-; VI: ; NumSgprs: 14
+; VI: ; NumSgprs: 12
+; XNACK: ; NumSgprs: 14
define void @no_vcc_flat() {
entry:
call void asm sideeffect "", "~{SGPR7},~{FLAT_SCR}"()
; GCN-LABEL: {{^}}vcc_flat:
; CI: ; NumSgprs: 12
-; VI: ; NumSgprs: 14
+; VI: ; NumSgprs: 12
+; XNACK: ; NumSgprs: 14
define void @vcc_flat() {
entry:
call void asm sideeffect "", "~{SGPR7},~{VCC},~{FLAT_SCR}"()