From: Matt Arsenault Date: Sat, 8 Aug 2015 00:41:45 +0000 (+0000) Subject: AMDGPU/SI: Remove source uses of VCCReg X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=f456aa53d974f063d7a74e5164e567421fb6031c;p=oota-llvm.git AMDGPU/SI: Remove source uses of VCCReg git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@244379 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AMDGPU/SIInstrInfo.td b/lib/Target/AMDGPU/SIInstrInfo.td index f2558a68957..6112b73a462 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.td +++ b/lib/Target/AMDGPU/SIInstrInfo.td @@ -1136,7 +1136,7 @@ def VOP_I64_I64_I32 : VOPProfile <[i64, i64, i32, untyped]>; def VOP_I64_I32_I64 : VOPProfile <[i64, i32, i64, untyped]>; def VOP_I64_I64_I64 : VOPProfile <[i64, i64, i64, untyped]>; def VOP_CNDMASK : VOPProfile <[i32, i32, i32, untyped]> { - let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VCCReg:$src2); + let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1); let Ins64 = (ins Src0RC64:$src0, Src1RC64:$src1, SSrc_64:$src2); let Asm64 = "$dst, $src0, $src1, $src2"; } diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td index 1e8a50dabf5..0269c5523b1 100644 --- a/lib/Target/AMDGPU/SIInstructions.td +++ b/lib/Target/AMDGPU/SIInstructions.td @@ -449,14 +449,16 @@ def S_CBRANCH_SCC1 : SOPP < >; } // End Uses = [SCC] +let Uses = [VCC] in { def S_CBRANCH_VCCZ : SOPP < - 0x00000006, (ins sopp_brtarget:$simm16, VCCReg:$vcc), + 0x00000006, (ins sopp_brtarget:$simm16), "s_cbranch_vccz $simm16" >; def S_CBRANCH_VCCNZ : SOPP < - 0x00000007, (ins sopp_brtarget:$simm16, VCCReg:$vcc), + 0x00000007, (ins sopp_brtarget:$simm16), "s_cbranch_vccnz $simm16" >; +} // End Uses = [VCC] let Uses = [EXEC] in { def S_CBRANCH_EXECZ : SOPP < diff --git a/lib/Target/AMDGPU/SIRegisterInfo.td b/lib/Target/AMDGPU/SIRegisterInfo.td index 31480ae6135..17fd3f0236a 100644 --- a/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/lib/Target/AMDGPU/SIRegisterInfo.td @@ -192,7 +192,7 @@ def SReg_32 : RegisterClass<"AMDGPU", [i32, f32], 32, def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64], 64, (add SGPR_64Regs)>; def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64, i1], 64, - (add SGPR_64, VCCReg, EXEC, FLAT_SCR) + (add SGPR_64, VCC, EXEC, FLAT_SCR) >; def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v16i8], 128, (add SGPR_128)>; diff --git a/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/lib/Target/AMDGPU/SIShrinkInstructions.cpp index 5d00bdd6a9b..720b323f111 100644 --- a/lib/Target/AMDGPU/SIShrinkInstructions.cpp +++ b/lib/Target/AMDGPU/SIShrinkInstructions.cpp @@ -187,6 +187,21 @@ static void foldImmediates(MachineInstr &MI, const SIInstrInfo *TII, } +// Copy MachineOperand with all flags except setting it as implicit. +static MachineOperand copyRegOperandAsImplicit(const MachineOperand &Orig) { + assert(!Orig.isImplicit()); + return MachineOperand::CreateReg(Orig.getReg(), + Orig.isDef(), + true, + Orig.isKill(), + Orig.isDead(), + Orig.isUndef(), + Orig.isEarlyClobber(), + Orig.getSubReg(), + Orig.isDebug(), + Orig.isInternalRead()); +} + bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { MachineRegisterInfo &MRI = MF.getRegInfo(); const SIInstrInfo *TII = @@ -236,14 +251,10 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { if (TII->isVOPC(Op32)) { unsigned DstReg = MI.getOperand(0).getReg(); if (TargetRegisterInfo::isVirtualRegister(DstReg)) { - // VOPC instructions can only write to the VCC register. We can't - // force them to use VCC here, because the register allocator has - // trouble with sequences like this, which cause the allocator to run - // out of registers if vreg0 and vreg1 belong to the VCCReg register - // class: - // vreg0 = VOPC; - // vreg1 = VOPC; - // S_AND_B64 vreg0, vreg1 + // VOPC instructions can only write to the VCC register. We can't + // force them to use VCC here, because this is only one register and + // cannot deal with sequences which would require multiple copies of + // VCC, e.g. S_AND_B64 (vcc = V_CMP_...), (vcc = V_CMP_...) // // So, instead of forcing the instruction to write to VCC, we provide // a hint to the register allocator to use VCC and then we we will run @@ -288,9 +299,19 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { Inst32.addOperand(*Src1); const MachineOperand *Src2 = - TII->getNamedOperand(MI, AMDGPU::OpName::src2); - if (Src2) - Inst32.addOperand(*Src2); + TII->getNamedOperand(MI, AMDGPU::OpName::src2); + if (Src2) { + int Op32Src2Idx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::src2); + if (Op32Src2Idx != -1) { + Inst32.addOperand(*Src2); + } else { + // In the case of V_CNDMASK_B32_e32, the explicit operand src2 is + // replaced with an implicit read of vcc. + assert(Src2->getReg() == AMDGPU::VCC && + "Unexpected missing register operand"); + Inst32.addOperand(copyRegOperandAsImplicit(*Src2)); + } + } ++NumInstructionsShrunk; MI.eraseFromParent();