def VOP_I64_I32_I64 : VOPProfile <[i64, i32, i64, untyped]>;
def VOP_I64_I64_I64 : VOPProfile <[i64, i64, i64, untyped]>;
def VOP_CNDMASK : VOPProfile <[i32, i32, i32, untyped]> {
- let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VCCReg:$src2);
+ let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1);
let Ins64 = (ins Src0RC64:$src0, Src1RC64:$src1, SSrc_64:$src2);
let Asm64 = "$dst, $src0, $src1, $src2";
}
>;
} // End Uses = [SCC]
+let Uses = [VCC] in {
def S_CBRANCH_VCCZ : SOPP <
- 0x00000006, (ins sopp_brtarget:$simm16, VCCReg:$vcc),
+ 0x00000006, (ins sopp_brtarget:$simm16),
"s_cbranch_vccz $simm16"
>;
def S_CBRANCH_VCCNZ : SOPP <
- 0x00000007, (ins sopp_brtarget:$simm16, VCCReg:$vcc),
+ 0x00000007, (ins sopp_brtarget:$simm16),
"s_cbranch_vccnz $simm16"
>;
+} // End Uses = [VCC]
let Uses = [EXEC] in {
def S_CBRANCH_EXECZ : SOPP <
def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64], 64, (add SGPR_64Regs)>;
def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64, i1], 64,
- (add SGPR_64, VCCReg, EXEC, FLAT_SCR)
+ (add SGPR_64, VCC, EXEC, FLAT_SCR)
>;
def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v16i8], 128, (add SGPR_128)>;
}
+// Copy MachineOperand with all flags except setting it as implicit.
+static MachineOperand copyRegOperandAsImplicit(const MachineOperand &Orig) {
+ assert(!Orig.isImplicit());
+ return MachineOperand::CreateReg(Orig.getReg(),
+ Orig.isDef(),
+ true,
+ Orig.isKill(),
+ Orig.isDead(),
+ Orig.isUndef(),
+ Orig.isEarlyClobber(),
+ Orig.getSubReg(),
+ Orig.isDebug(),
+ Orig.isInternalRead());
+}
+
bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
MachineRegisterInfo &MRI = MF.getRegInfo();
const SIInstrInfo *TII =
if (TII->isVOPC(Op32)) {
unsigned DstReg = MI.getOperand(0).getReg();
if (TargetRegisterInfo::isVirtualRegister(DstReg)) {
- // VOPC instructions can only write to the VCC register. We can't
- // force them to use VCC here, because the register allocator has
- // trouble with sequences like this, which cause the allocator to run
- // out of registers if vreg0 and vreg1 belong to the VCCReg register
- // class:
- // vreg0 = VOPC;
- // vreg1 = VOPC;
- // S_AND_B64 vreg0, vreg1
+ // VOPC instructions can only write to the VCC register. We can't
+ // force them to use VCC here, because this is only one register and
+ // cannot deal with sequences which would require multiple copies of
+ // VCC, e.g. S_AND_B64 (vcc = V_CMP_...), (vcc = V_CMP_...)
//
// So, instead of forcing the instruction to write to VCC, we provide
// a hint to the register allocator to use VCC and then we we will run
Inst32.addOperand(*Src1);
const MachineOperand *Src2 =
- TII->getNamedOperand(MI, AMDGPU::OpName::src2);
- if (Src2)
- Inst32.addOperand(*Src2);
+ TII->getNamedOperand(MI, AMDGPU::OpName::src2);
+ if (Src2) {
+ int Op32Src2Idx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::src2);
+ if (Op32Src2Idx != -1) {
+ Inst32.addOperand(*Src2);
+ } else {
+ // In the case of V_CNDMASK_B32_e32, the explicit operand src2 is
+ // replaced with an implicit read of vcc.
+ assert(Src2->getReg() == AMDGPU::VCC &&
+ "Unexpected missing register operand");
+ Inst32.addOperand(copyRegOperandAsImplicit(*Src2));
+ }
+ }
++NumInstructionsShrunk;
MI.eraseFromParent();