#include "AMDGPUAsmPrinter.h"
#include "AMDGPU.h"
+#include "R600Defines.h"
+#include "R600MachineFunctionInfo.h"
+#include "R600RegisterInfo.h"
#include "SIDefines.h"
#include "SIMachineFunctionInfo.h"
#include "SIRegisterInfo.h"
-#include "R600MachineFunctionInfo.h"
-#include "R600RegisterInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/Support/ELF.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
TargetRegistry::RegisterAsmPrinter(TheAMDGPUTarget, createAMDGPUAsmPrinterPass);
}
+AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+ : AsmPrinter(TM, Streamer) {
+ DisasmEnabled = TM.getSubtarget<AMDGPUSubtarget>().dumpCode();
+}
+
/// We need to override this function so we can avoid
/// the call to EmitFunctionHeader(), which the MCPureStreamer can't handle.
bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
- const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
- if (STM.dumpCode()) {
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- MF.dump();
-#endif
- }
SetupMachineFunction(MF);
- if (OutStreamer.hasRawTextSupport()) {
- OutStreamer.EmitRawText("@" + MF.getName() + ":");
- }
- const MCSectionELF *ConfigSection = getObjFileLowering().getContext()
- .getELFSection(".AMDGPU.config",
+ OutStreamer.emitRawComment(Twine('@') + MF.getName() + Twine(':'));
+
+ MCContext &Context = getObjFileLowering().getContext();
+ const MCSectionELF *ConfigSection = Context.getELFSection(".AMDGPU.config",
ELF::SHT_PROGBITS, 0,
SectionKind::getReadOnly());
OutStreamer.SwitchSection(ConfigSection);
- if (STM.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
- EmitProgramInfoSI(MF);
+
+ const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
+ SIProgramInfo KernelInfo;
+ if (STM.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
+ findNumUsedRegistersSI(MF, KernelInfo.NumSGPR, KernelInfo.NumVGPR);
+ EmitProgramInfoSI(MF, KernelInfo);
} else {
EmitProgramInfoR600(MF);
}
+
+ DisasmLines.clear();
+ HexLines.clear();
+ DisasmLineMaxLen = 0;
+
OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
EmitFunctionBody();
+
+ if (isVerbose()) {
+ const MCSectionELF *CommentSection
+ = Context.getELFSection(".AMDGPU.csdata",
+ ELF::SHT_PROGBITS, 0,
+ SectionKind::getReadOnly());
+ OutStreamer.SwitchSection(CommentSection);
+
+ if (STM.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
+ OutStreamer.emitRawComment(" Kernel info:", false);
+ OutStreamer.emitRawComment(" NumSgprs: " + Twine(KernelInfo.NumSGPR),
+ false);
+ OutStreamer.emitRawComment(" NumVgprs: " + Twine(KernelInfo.NumVGPR),
+ false);
+ } else {
+ R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
+ OutStreamer.emitRawComment(
+ Twine("SQ_PGM_RESOURCES:STACK_SIZE = " + Twine(MFI->StackSize)));
+ }
+ }
+
+ if (STM.dumpCode()) {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ MF.dump();
+#endif
+
+ if (DisasmEnabled) {
+ OutStreamer.SwitchSection(Context.getELFSection(".AMDGPU.disasm",
+ ELF::SHT_NOTE, 0,
+ SectionKind::getReadOnly()));
+
+ for (size_t i = 0; i < DisasmLines.size(); ++i) {
+ std::string Comment(DisasmLineMaxLen - DisasmLines[i].size(), ' ');
+ Comment += " ; " + HexLines[i] + "\n";
+
+ OutStreamer.EmitBytes(StringRef(DisasmLines[i]));
+ OutStreamer.EmitBytes(StringRef(Comment));
+ }
+ }
+ }
+
return false;
}
void AMDGPUAsmPrinter::EmitProgramInfoR600(MachineFunction &MF) {
unsigned MaxGPR = 0;
+ bool killPixel = false;
const R600RegisterInfo * RI =
static_cast<const R600RegisterInfo*>(TM.getRegisterInfo());
R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
+ const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
BB != BB_E; ++BB) {
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
I != E; ++I) {
MachineInstr &MI = *I;
+ if (MI.getOpcode() == AMDGPU::KILLGT)
+ killPixel = true;
unsigned numOperands = MI.getNumOperands();
for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
MachineOperand & MO = MI.getOperand(op_idx);
}
}
}
- OutStreamer.EmitIntValue(MaxGPR + 1, 4);
- OutStreamer.EmitIntValue(MFI->StackSize, 4);
+
+ unsigned RsrcReg;
+ if (STM.getGeneration() >= AMDGPUSubtarget::EVERGREEN) {
+ // Evergreen / Northern Islands
+ switch (MFI->ShaderType) {
+ default: // Fall through
+ case ShaderType::COMPUTE: RsrcReg = R_0288D4_SQ_PGM_RESOURCES_LS; break;
+ case ShaderType::GEOMETRY: RsrcReg = R_028878_SQ_PGM_RESOURCES_GS; break;
+ case ShaderType::PIXEL: RsrcReg = R_028844_SQ_PGM_RESOURCES_PS; break;
+ case ShaderType::VERTEX: RsrcReg = R_028860_SQ_PGM_RESOURCES_VS; break;
+ }
+ } else {
+ // R600 / R700
+ switch (MFI->ShaderType) {
+ default: // Fall through
+ case ShaderType::GEOMETRY: // Fall through
+ case ShaderType::COMPUTE: // Fall through
+ case ShaderType::VERTEX: RsrcReg = R_028868_SQ_PGM_RESOURCES_VS; break;
+ case ShaderType::PIXEL: RsrcReg = R_028850_SQ_PGM_RESOURCES_PS; break;
+ }
+ }
+
+ OutStreamer.EmitIntValue(RsrcReg, 4);
+ OutStreamer.EmitIntValue(S_NUM_GPRS(MaxGPR + 1) |
+ S_STACK_SIZE(MFI->StackSize), 4);
+ OutStreamer.EmitIntValue(R_02880C_DB_SHADER_CONTROL, 4);
+ OutStreamer.EmitIntValue(S_02880C_KILL_ENABLE(killPixel), 4);
+
+ if (MFI->ShaderType == ShaderType::COMPUTE) {
+ OutStreamer.EmitIntValue(R_0288E8_SQ_LDS_ALLOC, 4);
+ OutStreamer.EmitIntValue(RoundUpToAlignment(MFI->LDSSize, 4) >> 2, 4);
+ }
}
-void AMDGPUAsmPrinter::EmitProgramInfoSI(MachineFunction &MF) {
+void AMDGPUAsmPrinter::findNumUsedRegistersSI(MachineFunction &MF,
+ unsigned &NumSGPR,
+ unsigned &NumVGPR) const {
unsigned MaxSGPR = 0;
unsigned MaxVGPR = 0;
bool VCCUsed = false;
unsigned numOperands = MI.getNumOperands();
for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
- MachineOperand & MO = MI.getOperand(op_idx);
- unsigned maxUsed;
+ MachineOperand &MO = MI.getOperand(op_idx);
unsigned width = 0;
bool isSGPR = false;
- unsigned reg;
- unsigned hwReg;
+
if (!MO.isReg()) {
continue;
}
- reg = MO.getReg();
+ unsigned reg = MO.getReg();
if (reg == AMDGPU::VCC) {
VCCUsed = true;
continue;
}
+
switch (reg) {
default: break;
+ case AMDGPU::SCC:
case AMDGPU::EXEC:
case AMDGPU::M0:
continue;
} else if (AMDGPU::VReg_256RegClass.contains(reg)) {
isSGPR = false;
width = 8;
+ } else if (AMDGPU::SReg_512RegClass.contains(reg)) {
+ isSGPR = true;
+ width = 16;
} else if (AMDGPU::VReg_512RegClass.contains(reg)) {
isSGPR = false;
width = 16;
} else {
- assert(!"Unknown register class");
+ llvm_unreachable("Unknown register class");
}
- hwReg = RI->getEncodingValue(reg) & 0xff;
- maxUsed = hwReg + width - 1;
+ unsigned hwReg = RI->getEncodingValue(reg) & 0xff;
+ unsigned maxUsed = hwReg + width - 1;
if (isSGPR) {
MaxSGPR = maxUsed > MaxSGPR ? maxUsed : MaxSGPR;
} else {
}
}
}
- if (VCCUsed) {
+
+ if (VCCUsed)
MaxSGPR += 2;
- }
- SIMachineFunctionInfo * MFI = MF.getInfo<SIMachineFunctionInfo>();
+
+ NumSGPR = MaxSGPR;
+ NumVGPR = MaxVGPR;
+}
+
+void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &Out,
+ MachineFunction &MF) const {
+ findNumUsedRegistersSI(MF, Out.NumSGPR, Out.NumVGPR);
+}
+
+void AMDGPUAsmPrinter::EmitProgramInfoSI(MachineFunction &MF,
+ const SIProgramInfo &KernelInfo) {
+ const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
+
+ SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
unsigned RsrcReg;
switch (MFI->ShaderType) {
default: // Fall through
}
OutStreamer.EmitIntValue(RsrcReg, 4);
- OutStreamer.EmitIntValue(S_00B028_VGPRS(MaxVGPR / 4) | S_00B028_SGPRS(MaxSGPR / 8), 4);
+ OutStreamer.EmitIntValue(S_00B028_VGPRS(KernelInfo.NumVGPR / 4) |
+ S_00B028_SGPRS(KernelInfo.NumSGPR / 8), 4);
+
+ unsigned LDSAlignShift;
+ if (STM.getGeneration() < AMDGPUSubtarget::SEA_ISLANDS) {
+ // LDS is allocated in 64 dword blocks
+ LDSAlignShift = 8;
+ } else {
+ // LDS is allocated in 128 dword blocks
+ LDSAlignShift = 9;
+ }
+ unsigned LDSBlocks =
+ RoundUpToAlignment(MFI->LDSSize, 1 << LDSAlignShift) >> LDSAlignShift;
+
+ if (MFI->ShaderType == ShaderType::COMPUTE) {
+ OutStreamer.EmitIntValue(R_00B84C_COMPUTE_PGM_RSRC2, 4);
+ OutStreamer.EmitIntValue(S_00B84C_LDS_SIZE(LDSBlocks), 4);
+ }
if (MFI->ShaderType == ShaderType::PIXEL) {
+ OutStreamer.EmitIntValue(R_00B02C_SPI_SHADER_PGM_RSRC2_PS, 4);
+ OutStreamer.EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(LDSBlocks), 4);
OutStreamer.EmitIntValue(R_0286CC_SPI_PS_INPUT_ENA, 4);
OutStreamer.EmitIntValue(MFI->PSInputAddr, 4);
}