#include "SIInstrInfo.h"
#include "AMDGPUTargetMachine.h"
+#include "SIDefines.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/MC/MCInstrDesc.h"
-#include <stdio.h>
using namespace llvm;
SIInstrInfo::SIInstrInfo(AMDGPUTargetMachine &tm)
: AMDGPUInstrInfo(tm),
- RI(tm, *this)
+ RI(tm)
{ }
const SIRegisterInfo &SIInstrInfo::getRegisterInfo() const {
return RI;
}
+//===----------------------------------------------------------------------===//
+// TargetInstrInfo callbacks
+//===----------------------------------------------------------------------===//
+
void
SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI, DebugLoc DL,
- unsigned DestReg, unsigned SrcReg,
- bool KillSrc) const {
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+
// If we are trying to copy to or from SCC, there is a bug somewhere else in
// the backend. While it may be theoretically possible to do this, it should
// never be necessary.
assert(DestReg != AMDGPU::SCC && SrcReg != AMDGPU::SCC);
- if (AMDGPU::VReg_64RegClass.contains(DestReg)) {
- assert(AMDGPU::VReg_64RegClass.contains(SrcReg) ||
- AMDGPU::SReg_64RegClass.contains(SrcReg));
- BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), RI.getSubReg(DestReg, AMDGPU::sub0))
- .addReg(RI.getSubReg(SrcReg, AMDGPU::sub0), getKillRegState(KillSrc))
- .addReg(DestReg, RegState::Define | RegState::Implicit);
- BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), RI.getSubReg(DestReg, AMDGPU::sub1))
- .addReg(RI.getSubReg(SrcReg, AMDGPU::sub1), getKillRegState(KillSrc));
+ static const int16_t Sub0_15[] = {
+ AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
+ AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
+ AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
+ AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15, 0
+ };
+
+ static const int16_t Sub0_7[] = {
+ AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
+ AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, 0
+ };
+
+ static const int16_t Sub0_3[] = {
+ AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, 0
+ };
+
+ static const int16_t Sub0_2[] = {
+ AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, 0
+ };
+
+ static const int16_t Sub0_1[] = {
+ AMDGPU::sub0, AMDGPU::sub1, 0
+ };
+
+ unsigned Opcode;
+ const int16_t *SubIndices;
+
+ if (AMDGPU::M0 == DestReg) {
+ // Check if M0 isn't already set to this value
+ for (MachineBasicBlock::reverse_iterator E = MBB.rend(),
+ I = MachineBasicBlock::reverse_iterator(MI); I != E; ++I) {
+
+ if (!I->definesRegister(AMDGPU::M0))
+ continue;
+
+ unsigned Opc = I->getOpcode();
+ if (Opc != TargetOpcode::COPY && Opc != AMDGPU::S_MOV_B32)
+ break;
+
+ if (!I->readsRegister(SrcReg))
+ break;
+
+ // The copy isn't necessary
+ return;
+ }
+ }
+
+ if (AMDGPU::SReg_32RegClass.contains(DestReg)) {
+ assert(AMDGPU::SReg_32RegClass.contains(SrcReg));
+ BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+
} else if (AMDGPU::SReg_64RegClass.contains(DestReg)) {
assert(AMDGPU::SReg_64RegClass.contains(SrcReg));
BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+
+ } else if (AMDGPU::SReg_128RegClass.contains(DestReg)) {
+ assert(AMDGPU::SReg_128RegClass.contains(SrcReg));
+ Opcode = AMDGPU::S_MOV_B32;
+ SubIndices = Sub0_3;
+
+ } else if (AMDGPU::SReg_256RegClass.contains(DestReg)) {
+ assert(AMDGPU::SReg_256RegClass.contains(SrcReg));
+ Opcode = AMDGPU::S_MOV_B32;
+ SubIndices = Sub0_7;
+
+ } else if (AMDGPU::SReg_512RegClass.contains(DestReg)) {
+ assert(AMDGPU::SReg_512RegClass.contains(SrcReg));
+ Opcode = AMDGPU::S_MOV_B32;
+ SubIndices = Sub0_15;
+
} else if (AMDGPU::VReg_32RegClass.contains(DestReg)) {
assert(AMDGPU::VReg_32RegClass.contains(SrcReg) ||
AMDGPU::SReg_32RegClass.contains(SrcReg));
BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+
+ } else if (AMDGPU::VReg_64RegClass.contains(DestReg)) {
+ assert(AMDGPU::VReg_64RegClass.contains(SrcReg) ||
+ AMDGPU::SReg_64RegClass.contains(SrcReg));
+ Opcode = AMDGPU::V_MOV_B32_e32;
+ SubIndices = Sub0_1;
+
+ } else if (AMDGPU::VReg_96RegClass.contains(DestReg)) {
+ assert(AMDGPU::VReg_96RegClass.contains(SrcReg));
+ Opcode = AMDGPU::V_MOV_B32_e32;
+ SubIndices = Sub0_2;
+
+ } else if (AMDGPU::VReg_128RegClass.contains(DestReg)) {
+ assert(AMDGPU::VReg_128RegClass.contains(SrcReg) ||
+ AMDGPU::SReg_128RegClass.contains(SrcReg));
+ Opcode = AMDGPU::V_MOV_B32_e32;
+ SubIndices = Sub0_3;
+
+ } else if (AMDGPU::VReg_256RegClass.contains(DestReg)) {
+ assert(AMDGPU::VReg_256RegClass.contains(SrcReg) ||
+ AMDGPU::SReg_256RegClass.contains(SrcReg));
+ Opcode = AMDGPU::V_MOV_B32_e32;
+ SubIndices = Sub0_7;
+
+ } else if (AMDGPU::VReg_512RegClass.contains(DestReg)) {
+ assert(AMDGPU::VReg_512RegClass.contains(SrcReg) ||
+ AMDGPU::SReg_512RegClass.contains(SrcReg));
+ Opcode = AMDGPU::V_MOV_B32_e32;
+ SubIndices = Sub0_15;
+
} else {
- assert(AMDGPU::SReg_32RegClass.contains(DestReg));
- assert(AMDGPU::SReg_32RegClass.contains(SrcReg));
- BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
+ llvm_unreachable("Can't copy register!");
}
+
+ while (unsigned SubIdx = *SubIndices++) {
+ MachineInstrBuilder Builder = BuildMI(MBB, MI, DL,
+ get(Opcode), RI.getSubReg(DestReg, SubIdx));
+
+ Builder.addReg(RI.getSubReg(SrcReg, SubIdx), getKillRegState(KillSrc));
+
+ if (*SubIndices)
+ Builder.addReg(DestReg, RegState::Define | RegState::Implicit);
+ }
+}
+
+unsigned SIInstrInfo::commuteOpcode(unsigned Opcode) const {
+
+ int NewOpc;
+
+ // Try to map original to commuted opcode
+ if ((NewOpc = AMDGPU::getCommuteRev(Opcode)) != -1)
+ return NewOpc;
+
+ // Try to map commuted to original opcode
+ if ((NewOpc = AMDGPU::getCommuteOrig(Opcode)) != -1)
+ return NewOpc;
+
+ return Opcode;
}
MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
bool NewMI) const {
- if (MI->getNumOperands() < 3 || !MI->getOperand(1).isReg() ||
- !MI->getOperand(2).isReg())
+ MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+ if (MI->getNumOperands() < 3 || !MI->getOperand(1).isReg())
return 0;
- return TargetInstrInfo::commuteInstruction(MI, NewMI);
-}
+ // Cannot commute VOP2 if src0 is SGPR.
+ if (isVOP2(MI->getOpcode()) && MI->getOperand(1).isReg() &&
+ RI.isSGPRClass(MRI.getRegClass(MI->getOperand(1).getReg())))
+ return 0;
+
+ if (!MI->getOperand(2).isReg()) {
+ // XXX: Commute instructions with FPImm operands
+ if (NewMI || MI->getOperand(2).isFPImm() ||
+ (!isVOP2(MI->getOpcode()) && !isVOP3(MI->getOpcode()))) {
+ return 0;
+ }
+
+ // XXX: Commute VOP3 instructions with abs and neg set.
+ if (isVOP3(MI->getOpcode()) &&
+ (MI->getOperand(AMDGPU::getNamedOperandIdx(MI->getOpcode(),
+ AMDGPU::OpName::abs)).getImm() ||
+ MI->getOperand(AMDGPU::getNamedOperandIdx(MI->getOpcode(),
+ AMDGPU::OpName::neg)).getImm()))
+ return 0;
-MachineInstr * SIInstrInfo::getMovImmInstr(MachineFunction *MF, unsigned DstReg,
- int64_t Imm) const {
- MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::V_MOV_B32_e32), DebugLoc());
- MachineInstrBuilder MIB(*MF, MI);
- MIB.addReg(DstReg, RegState::Define);
- MIB.addImm(Imm);
+ unsigned Reg = MI->getOperand(1).getReg();
+ MI->getOperand(1).ChangeToImmediate(MI->getOperand(2).getImm());
+ MI->getOperand(2).ChangeToRegister(Reg, false);
+ } else {
+ MI = TargetInstrInfo::commuteInstruction(MI, NewMI);
+ }
+
+ if (MI)
+ MI->setDesc(get(commuteOpcode(MI->getOpcode())));
return MI;
+}
+MachineInstr *SIInstrInfo::buildMovInstr(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DstReg,
+ unsigned SrcReg) const {
+ return BuildMI(*MBB, I, MBB->findDebugLoc(I), get(AMDGPU::V_MOV_B32_e32),
+ DstReg) .addReg(SrcReg);
}
bool SIInstrInfo::isMov(unsigned Opcode) const {
return RC != &AMDGPU::EXECRegRegClass;
}
-//===----------------------------------------------------------------------===//
-// Indirect addressing callbacks
-//===----------------------------------------------------------------------===//
+int SIInstrInfo::isMIMG(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::MIMG;
+}
-unsigned SIInstrInfo::calculateIndirectAddress(unsigned RegIndex,
- unsigned Channel) const {
- assert(Channel == 0);
- return RegIndex;
+int SIInstrInfo::isSMRD(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::SMRD;
}
+bool SIInstrInfo::isVOP1(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::VOP1;
+}
-int SIInstrInfo::getIndirectIndexBegin(const MachineFunction &MF) const {
- llvm_unreachable("Unimplemented");
+bool SIInstrInfo::isVOP2(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::VOP2;
}
-int SIInstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const {
- llvm_unreachable("Unimplemented");
+bool SIInstrInfo::isVOP3(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::VOP3;
}
-const TargetRegisterClass *SIInstrInfo::getIndirectAddrStoreRegClass(
- unsigned SourceReg) const {
- llvm_unreachable("Unimplemented");
+bool SIInstrInfo::isVOPC(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::VOPC;
}
-const TargetRegisterClass *SIInstrInfo::getIndirectAddrLoadRegClass() const {
- llvm_unreachable("Unimplemented");
+bool SIInstrInfo::isSALUInstr(const MachineInstr &MI) const {
+ return get(MI.getOpcode()).TSFlags & SIInstrFlags::SALU;
+}
+
+bool SIInstrInfo::isInlineConstant(const MachineOperand &MO) const {
+ if(MO.isImm()) {
+ return MO.getImm() >= -16 && MO.getImm() <= 64;
+ }
+ if (MO.isFPImm()) {
+ return MO.getFPImm()->isExactlyValue(0.0) ||
+ MO.getFPImm()->isExactlyValue(0.5) ||
+ MO.getFPImm()->isExactlyValue(-0.5) ||
+ MO.getFPImm()->isExactlyValue(1.0) ||
+ MO.getFPImm()->isExactlyValue(-1.0) ||
+ MO.getFPImm()->isExactlyValue(2.0) ||
+ MO.getFPImm()->isExactlyValue(-2.0) ||
+ MO.getFPImm()->isExactlyValue(4.0) ||
+ MO.getFPImm()->isExactlyValue(-4.0);
+ }
+ return false;
+}
+
+bool SIInstrInfo::isLiteralConstant(const MachineOperand &MO) const {
+ return (MO.isImm() || MO.isFPImm()) && !isInlineConstant(MO);
+}
+
+bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
+ StringRef &ErrInfo) const {
+ uint16_t Opcode = MI->getOpcode();
+ int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
+ int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
+ int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
+
+ // Verify VOP*
+ if (isVOP1(Opcode) || isVOP2(Opcode) || isVOP3(Opcode) || isVOPC(Opcode)) {
+ unsigned ConstantBusCount = 0;
+ unsigned SGPRUsed = AMDGPU::NoRegister;
+ for (int i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isUse() &&
+ !TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+
+ // EXEC register uses the constant bus.
+ if (!MO.isImplicit() && MO.getReg() == AMDGPU::EXEC)
+ ++ConstantBusCount;
+
+ // SGPRs use the constant bus
+ if (MO.getReg() == AMDGPU::M0 || MO.getReg() == AMDGPU::VCC ||
+ (!MO.isImplicit() &&
+ (AMDGPU::SGPR_32RegClass.contains(MO.getReg()) ||
+ AMDGPU::SGPR_64RegClass.contains(MO.getReg())))) {
+ if (SGPRUsed != MO.getReg()) {
+ ++ConstantBusCount;
+ SGPRUsed = MO.getReg();
+ }
+ }
+ }
+ // Literal constants use the constant bus.
+ if (isLiteralConstant(MO))
+ ++ConstantBusCount;
+ }
+ if (ConstantBusCount > 1) {
+ ErrInfo = "VOP* instruction uses the constant bus more than once";
+ return false;
+ }
+ }
+
+ // Verify SRC1 for VOP2 and VOPC
+ if (Src1Idx != -1 && (isVOP2(Opcode) || isVOPC(Opcode))) {
+ const MachineOperand &Src1 = MI->getOperand(Src1Idx);
+ if (Src1.isImm() || Src1.isFPImm()) {
+ ErrInfo = "VOP[2C] src1 cannot be an immediate.";
+ return false;
+ }
+ }
+
+ // Verify VOP3
+ if (isVOP3(Opcode)) {
+ if (Src0Idx != -1 && isLiteralConstant(MI->getOperand(Src0Idx))) {
+ ErrInfo = "VOP3 src0 cannot be a literal constant.";
+ return false;
+ }
+ if (Src1Idx != -1 && isLiteralConstant(MI->getOperand(Src1Idx))) {
+ ErrInfo = "VOP3 src1 cannot be a literal constant.";
+ return false;
+ }
+ if (Src2Idx != -1 && isLiteralConstant(MI->getOperand(Src2Idx))) {
+ ErrInfo = "VOP3 src2 cannot be a literal constant.";
+ return false;
+ }
+ }
+ return true;
+}
+
+unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
+ switch (MI.getOpcode()) {
+ default: return AMDGPU::INSTRUCTION_LIST_END;
+ case AMDGPU::REG_SEQUENCE: return AMDGPU::REG_SEQUENCE;
+ case AMDGPU::COPY: return AMDGPU::COPY;
+ case AMDGPU::PHI: return AMDGPU::PHI;
+ case AMDGPU::S_ASHR_I32: return AMDGPU::V_ASHR_I32_e32;
+ case AMDGPU::S_ASHR_I64: return AMDGPU::V_ASHR_I64;
+ case AMDGPU::S_LSHL_B32: return AMDGPU::V_LSHL_B32_e32;
+ case AMDGPU::S_LSHL_B64: return AMDGPU::V_LSHL_B64;
+ case AMDGPU::S_LSHR_B32: return AMDGPU::V_LSHR_B32_e32;
+ case AMDGPU::S_LSHR_B64: return AMDGPU::V_LSHR_B64;
+ }
+}
+
+bool SIInstrInfo::isSALUOpSupportedOnVALU(const MachineInstr &MI) const {
+ return getVALUOp(MI) != AMDGPU::INSTRUCTION_LIST_END;
+}
+
+const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI,
+ unsigned OpNo) const {
+ const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
+ const MCInstrDesc &Desc = get(MI.getOpcode());
+ if (MI.isVariadic() || OpNo >= Desc.getNumOperands() ||
+ Desc.OpInfo[OpNo].RegClass == -1)
+ return MRI.getRegClass(MI.getOperand(OpNo).getReg());
+
+ unsigned RCID = Desc.OpInfo[OpNo].RegClass;
+ return RI.getRegClass(RCID);
+}
+
+bool SIInstrInfo::canReadVGPR(const MachineInstr &MI, unsigned OpNo) const {
+ switch (MI.getOpcode()) {
+ case AMDGPU::COPY:
+ case AMDGPU::REG_SEQUENCE:
+ return RI.hasVGPRs(getOpRegClass(MI, 0));
+ default:
+ return RI.hasVGPRs(getOpRegClass(MI, OpNo));
+ }
+}
+
+void SIInstrInfo::legalizeOpWithMove(MachineInstr *MI, unsigned OpIdx) const {
+ MachineBasicBlock::iterator I = MI;
+ MachineOperand &MO = MI->getOperand(OpIdx);
+ MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+ unsigned RCID = get(MI->getOpcode()).OpInfo[OpIdx].RegClass;
+ const TargetRegisterClass *RC = RI.getRegClass(RCID);
+ unsigned Opcode = AMDGPU::V_MOV_B32_e32;
+ if (MO.isReg()) {
+ Opcode = AMDGPU::COPY;
+ } else if (RI.isSGPRClass(RC)) {
+ Opcode = AMDGPU::S_MOV_B32;
+ }
+
+ unsigned Reg = MRI.createVirtualRegister(RI.getRegClass(RCID));
+ BuildMI(*MI->getParent(), I, MI->getParent()->findDebugLoc(I), get(Opcode),
+ Reg).addOperand(MO);
+ MO.ChangeToRegister(Reg, false);
+}
+
+void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
+ MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+ int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
+ AMDGPU::OpName::src0);
+ int Src1Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
+ AMDGPU::OpName::src1);
+ int Src2Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
+ AMDGPU::OpName::src2);
+
+ // Legalize VOP2
+ if (isVOP2(MI->getOpcode()) && Src1Idx != -1) {
+ MachineOperand &Src1 = MI->getOperand(Src1Idx);
+
+ // Legalize VOP2 instructions where src1 is not a VGPR. An SGPR input must
+ // be the first operand, and there can only be one.
+ if (Src1.isImm() || Src1.isFPImm() ||
+ (Src1.isReg() && RI.isSGPRClass(MRI.getRegClass(Src1.getReg())))) {
+ if (MI->isCommutable()) {
+ if (commuteInstruction(MI))
+ return;
+ }
+ legalizeOpWithMove(MI, Src1Idx);
+ }
+ }
+
+ // Legalize VOP3
+ if (isVOP3(MI->getOpcode())) {
+ int VOP3Idx[3] = {Src0Idx, Src1Idx, Src2Idx};
+ unsigned SGPRReg = AMDGPU::NoRegister;
+ for (unsigned i = 0; i < 3; ++i) {
+ int Idx = VOP3Idx[i];
+ if (Idx == -1)
+ continue;
+ MachineOperand &MO = MI->getOperand(Idx);
+
+ if (MO.isReg()) {
+ if (!RI.isSGPRClass(MRI.getRegClass(MO.getReg())))
+ continue; // VGPRs are legal
+
+ if (SGPRReg == AMDGPU::NoRegister || SGPRReg == MO.getReg()) {
+ SGPRReg = MO.getReg();
+ // We can use one SGPR in each VOP3 instruction.
+ continue;
+ }
+ } else if (!isLiteralConstant(MO)) {
+ // If it is not a register and not a literal constant, then it must be
+ // an inline constant which is always legal.
+ continue;
+ }
+ // If we make it this far, then the operand is not legal and we must
+ // legalize it.
+ legalizeOpWithMove(MI, Idx);
+ }
+ }
+
+ // Legalize REG_SEQUENCE
+ // The register class of the operands much be the same type as the register
+ // class of the output.
+ if (MI->getOpcode() == AMDGPU::REG_SEQUENCE) {
+ const TargetRegisterClass *RC = NULL, *SRC = NULL, *VRC = NULL;
+ for (unsigned i = 1, e = MI->getNumOperands(); i != e; i+=2) {
+ if (!MI->getOperand(i).isReg() ||
+ !TargetRegisterInfo::isVirtualRegister(MI->getOperand(i).getReg()))
+ continue;
+ const TargetRegisterClass *OpRC =
+ MRI.getRegClass(MI->getOperand(i).getReg());
+ if (RI.hasVGPRs(OpRC)) {
+ VRC = OpRC;
+ } else {
+ SRC = OpRC;
+ }
+ }
+
+ // If any of the operands are VGPR registers, then they all most be
+ // otherwise we will create illegal VGPR->SGPR copies when legalizing
+ // them.
+ if (VRC || !RI.isSGPRClass(getOpRegClass(*MI, 0))) {
+ if (!VRC) {
+ assert(SRC);
+ VRC = RI.getEquivalentVGPRClass(SRC);
+ }
+ RC = VRC;
+ } else {
+ RC = SRC;
+ }
+
+ // Update all the operands so they have the same type.
+ for (unsigned i = 1, e = MI->getNumOperands(); i != e; i+=2) {
+ if (!MI->getOperand(i).isReg() ||
+ !TargetRegisterInfo::isVirtualRegister(MI->getOperand(i).getReg()))
+ continue;
+ unsigned DstReg = MRI.createVirtualRegister(RC);
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+ get(AMDGPU::COPY), DstReg)
+ .addOperand(MI->getOperand(i));
+ MI->getOperand(i).setReg(DstReg);
+ }
+ }
+}
+
+void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
+ SmallVector<MachineInstr *, 128> Worklist;
+ Worklist.push_back(&TopInst);
+
+ while (!Worklist.empty()) {
+ MachineInstr *Inst = Worklist.pop_back_val();
+ unsigned NewOpcode = getVALUOp(*Inst);
+ if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END)
+ continue;
+
+ MachineRegisterInfo &MRI = Inst->getParent()->getParent()->getRegInfo();
+
+ // Use the new VALU Opcode.
+ const MCInstrDesc &NewDesc = get(NewOpcode);
+ Inst->setDesc(NewDesc);
+
+ // Add the implict and explicit register definitions.
+ if (NewDesc.ImplicitUses) {
+ for (unsigned i = 0; NewDesc.ImplicitUses[i]; ++i) {
+ Inst->addOperand(MachineOperand::CreateReg(NewDesc.ImplicitUses[i],
+ false, true));
+ }
+ }
+
+ if (NewDesc.ImplicitDefs) {
+ for (unsigned i = 0; NewDesc.ImplicitDefs[i]; ++i) {
+ Inst->addOperand(MachineOperand::CreateReg(NewDesc.ImplicitDefs[i],
+ true, true));
+ }
+ }
+
+ legalizeOperands(Inst);
+
+ // Update the destination register class.
+ const TargetRegisterClass *NewDstRC = getOpRegClass(*Inst, 0);
+
+ switch (Inst->getOpcode()) {
+ // For target instructions, getOpRegClass just returns the virtual
+ // register class associated with the operand, so we need to find an
+ // equivalent VGPR register class in order to move the instruction to the
+ // VALU.
+ case AMDGPU::COPY:
+ case AMDGPU::PHI:
+ case AMDGPU::REG_SEQUENCE:
+ if (RI.hasVGPRs(NewDstRC))
+ continue;
+ NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
+ if (!NewDstRC)
+ continue;
+ break;
+ default:
+ break;
+ }
+
+ unsigned DstReg = Inst->getOperand(0).getReg();
+ unsigned NewDstReg = MRI.createVirtualRegister(NewDstRC);
+ MRI.replaceRegWith(DstReg, NewDstReg);
+
+ for (MachineRegisterInfo::use_iterator I = MRI.use_begin(NewDstReg),
+ E = MRI.use_end(); I != E; ++I) {
+ MachineInstr &UseMI = *I;
+ if (!canReadVGPR(UseMI, I.getOperandNo())) {
+ Worklist.push_back(&UseMI);
+ }
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Indirect addressing callbacks
+//===----------------------------------------------------------------------===//
+
+unsigned SIInstrInfo::calculateIndirectAddress(unsigned RegIndex,
+ unsigned Channel) const {
+ assert(Channel == 0);
+ return RegIndex;
+}
+
+const TargetRegisterClass *SIInstrInfo::getIndirectAddrRegClass() const {
+ return &AMDGPU::VReg_32RegClass;
}
MachineInstrBuilder SIInstrInfo::buildIndirectWrite(
MachineBasicBlock::iterator I,
unsigned ValueReg,
unsigned Address, unsigned OffsetReg) const {
- llvm_unreachable("Unimplemented");
+ const DebugLoc &DL = MBB->findDebugLoc(I);
+ unsigned IndirectBaseReg = AMDGPU::VReg_32RegClass.getRegister(
+ getIndirectIndexBegin(*MBB->getParent()));
+
+ return BuildMI(*MBB, I, DL, get(AMDGPU::SI_INDIRECT_DST_V1))
+ .addReg(IndirectBaseReg, RegState::Define)
+ .addOperand(I->getOperand(0))
+ .addReg(IndirectBaseReg)
+ .addReg(OffsetReg)
+ .addImm(0)
+ .addReg(ValueReg);
}
MachineInstrBuilder SIInstrInfo::buildIndirectRead(
MachineBasicBlock::iterator I,
unsigned ValueReg,
unsigned Address, unsigned OffsetReg) const {
- llvm_unreachable("Unimplemented");
+ const DebugLoc &DL = MBB->findDebugLoc(I);
+ unsigned IndirectBaseReg = AMDGPU::VReg_32RegClass.getRegister(
+ getIndirectIndexBegin(*MBB->getParent()));
+
+ return BuildMI(*MBB, I, DL, get(AMDGPU::SI_INDIRECT_SRC))
+ .addOperand(I->getOperand(0))
+ .addOperand(I->getOperand(1))
+ .addReg(IndirectBaseReg)
+ .addReg(OffsetReg)
+ .addImm(0);
+
}
-const TargetRegisterClass *SIInstrInfo::getSuperIndirectRegClass() const {
- llvm_unreachable("Unimplemented");
+void SIInstrInfo::reserveIndirectRegisters(BitVector &Reserved,
+ const MachineFunction &MF) const {
+ int End = getIndirectIndexEnd(MF);
+ int Begin = getIndirectIndexBegin(MF);
+
+ if (End == -1)
+ return;
+
+
+ for (int Index = Begin; Index <= End; ++Index)
+ Reserved.set(AMDGPU::VReg_32RegClass.getRegister(Index));
+
+ for (int Index = std::max(0, Begin - 1); Index <= End; ++Index)
+ Reserved.set(AMDGPU::VReg_64RegClass.getRegister(Index));
+
+ for (int Index = std::max(0, Begin - 2); Index <= End; ++Index)
+ Reserved.set(AMDGPU::VReg_96RegClass.getRegister(Index));
+
+ for (int Index = std::max(0, Begin - 3); Index <= End; ++Index)
+ Reserved.set(AMDGPU::VReg_128RegClass.getRegister(Index));
+
+ for (int Index = std::max(0, Begin - 7); Index <= End; ++Index)
+ Reserved.set(AMDGPU::VReg_256RegClass.getRegister(Index));
+
+ for (int Index = std::max(0, Begin - 15); Index <= End; ++Index)
+ Reserved.set(AMDGPU::VReg_512RegClass.getRegister(Index));
}