From: Tom Stellard Date: Thu, 15 May 2014 14:41:50 +0000 (+0000) Subject: R600/SI: Use VALU instructions for i1 ops X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=bfffad69b25acf1c42a7891c873a947cb054d953;p=oota-llvm.git R600/SI: Use VALU instructions for i1 ops git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@208885 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 1ede24fd976..ad26c9b84ff 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1408,6 +1408,16 @@ def V_MOV_I1 : InstSI < "", [(set i1:$dst, (imm:$src))] >; +def V_AND_I1 : InstSI < + (outs VReg_1:$dst), (ins VReg_1:$src0, VReg_1:$src1), "", + [(set i1:$dst, (and i1:$src0, i1:$src1))] +>; + +def V_OR_I1 : InstSI < + (outs VReg_1:$dst), (ins VReg_1:$src0, VReg_1:$src1), "", + [(set i1:$dst, (or i1:$src0, i1:$src1))] +>; + def LOAD_CONST : AMDGPUShaderInst < (outs GPRF32:$dst), (ins i32imm:$src), @@ -1680,16 +1690,6 @@ def : Pat < // SOP2 Patterns //===----------------------------------------------------------------------===// -def : Pat < - (i1 (and i1:$src0, i1:$src1)), - (S_AND_B64 $src0, $src1) ->; - -def : Pat < - (i1 (or i1:$src0, i1:$src1)), - (S_OR_B64 $src0, $src1) ->; - def : Pat < (i1 (xor i1:$src0, i1:$src1)), (S_XOR_B64 $src0, $src1) diff --git a/lib/Target/R600/SILowerI1Copies.cpp b/lib/Target/R600/SILowerI1Copies.cpp index 766380ead58..738c90b30e5 100644 --- a/lib/Target/R600/SILowerI1Copies.cpp +++ b/lib/Target/R600/SILowerI1Copies.cpp @@ -73,6 +73,7 @@ bool SILowerI1Copies::runOnMachineFunction(MachineFunction &MF) { const SIInstrInfo *TII = static_cast( MF.getTarget().getInstrInfo()); const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); + std::vector I1Defs; for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE; ++BI) { @@ -84,10 +85,23 @@ bool SILowerI1Copies::runOnMachineFunction(MachineFunction &MF) { MachineInstr &MI = *I; if (MI.getOpcode() == AMDGPU::V_MOV_I1) { + I1Defs.push_back(MI.getOperand(0).getReg()); MI.setDesc(TII->get(AMDGPU::V_MOV_B32_e32)); continue; } + if (MI.getOpcode() == AMDGPU::V_AND_I1) { + I1Defs.push_back(MI.getOperand(0).getReg()); + MI.setDesc(TII->get(AMDGPU::V_AND_B32_e32)); + continue; + } + + if (MI.getOpcode() == AMDGPU::V_OR_I1) { + I1Defs.push_back(MI.getOperand(0).getReg()); + MI.setDesc(TII->get(AMDGPU::V_OR_B32_e32)); + continue; + } + if (MI.getOpcode() != AMDGPU::COPY || !TargetRegisterInfo::isVirtualRegister(MI.getOperand(0).getReg()) || !TargetRegisterInfo::isVirtualRegister(MI.getOperand(1).getReg())) @@ -101,6 +115,7 @@ bool SILowerI1Copies::runOnMachineFunction(MachineFunction &MF) { if (DstRC == &AMDGPU::VReg_1RegClass && TRI->getCommonSubClass(SrcRC, &AMDGPU::SGPR_64RegClass)) { + I1Defs.push_back(MI.getOperand(0).getReg()); BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(AMDGPU::V_CNDMASK_B32_e64)) .addOperand(MI.getOperand(0)) .addImm(0) @@ -123,8 +138,11 @@ bool SILowerI1Copies::runOnMachineFunction(MachineFunction &MF) { .addImm(0); MI.eraseFromParent(); } - } } + + for (unsigned Reg : I1Defs) + MRI.setRegClass(Reg, &AMDGPU::VReg_32RegClass); + return false; } diff --git a/test/CodeGen/R600/setcc.ll b/test/CodeGen/R600/setcc.ll index ad72732cab0..5bd95b79c0f 100644 --- a/test/CodeGen/R600/setcc.ll +++ b/test/CodeGen/R600/setcc.ll @@ -96,7 +96,9 @@ entry: ; R600-DAG: SETNE_INT ; SI: V_CMP_O_F32 ; SI: V_CMP_NEQ_F32 -; SI: S_AND_B64 +; SI: V_CNDMASK_B32_e64 +; SI: V_CNDMASK_B32_e64 +; SI: V_AND_B32_e32 define void @f32_one(i32 addrspace(1)* %out, float %a, float %b) { entry: %0 = fcmp one float %a, %b @@ -128,7 +130,9 @@ entry: ; R600-DAG: SETNE_INT ; SI: V_CMP_U_F32 ; SI: V_CMP_EQ_F32 -; SI: S_OR_B64 +; SI: V_CNDMASK_B32_e64 +; SI: V_CNDMASK_B32_e64 +; SI: V_OR_B32_e32 define void @f32_ueq(i32 addrspace(1)* %out, float %a, float %b) { entry: %0 = fcmp ueq float %a, %b @@ -142,7 +146,9 @@ entry: ; R600: SETE_DX10 ; SI: V_CMP_U_F32 ; SI: V_CMP_GT_F32 -; SI: S_OR_B64 +; SI: V_CNDMASK_B32_e64 +; SI: V_CNDMASK_B32_e64 +; SI: V_OR_B32_e32 define void @f32_ugt(i32 addrspace(1)* %out, float %a, float %b) { entry: %0 = fcmp ugt float %a, %b @@ -156,7 +162,9 @@ entry: ; R600: SETE_DX10 ; SI: V_CMP_U_F32 ; SI: V_CMP_GE_F32 -; SI: S_OR_B64 +; SI: V_CNDMASK_B32_e64 +; SI: V_CNDMASK_B32_e64 +; SI: V_OR_B32_e32 define void @f32_uge(i32 addrspace(1)* %out, float %a, float %b) { entry: %0 = fcmp uge float %a, %b @@ -170,7 +178,9 @@ entry: ; R600: SETE_DX10 ; SI: V_CMP_U_F32 ; SI: V_CMP_LT_F32 -; SI: S_OR_B64 +; SI: V_CNDMASK_B32_e64 +; SI: V_CNDMASK_B32_e64 +; SI: V_OR_B32_e32 define void @f32_ult(i32 addrspace(1)* %out, float %a, float %b) { entry: %0 = fcmp ult float %a, %b @@ -184,7 +194,9 @@ entry: ; R600: SETE_DX10 ; SI: V_CMP_U_F32 ; SI: V_CMP_LE_F32 -; SI: S_OR_B64 +; SI: V_CNDMASK_B32_e64 +; SI: V_CNDMASK_B32_e64 +; SI: V_OR_B32_e32 define void @f32_ule(i32 addrspace(1)* %out, float %a, float %b) { entry: %0 = fcmp ule float %a, %b diff --git a/test/CodeGen/R600/setcc64.ll b/test/CodeGen/R600/setcc64.ll index c137125183a..54a33b30940 100644 --- a/test/CodeGen/R600/setcc64.ll +++ b/test/CodeGen/R600/setcc64.ll @@ -59,7 +59,9 @@ entry: ; FUNC-LABEL: @f64_one ; SI: V_CMP_O_F64 ; SI: V_CMP_NEQ_F64 -; SI: S_AND_B64 +; SI: V_CNDMASK_B32_e64 +; SI: V_CNDMASK_B32_e64 +; SI: V_AND_B32_e32 define void @f64_one(i32 addrspace(1)* %out, double %a, double %b) { entry: %0 = fcmp one double %a, %b @@ -81,7 +83,9 @@ entry: ; FUNC-LABEL: @f64_ueq ; SI: V_CMP_U_F64 ; SI: V_CMP_EQ_F64 -; SI: S_OR_B64 +; SI: V_CNDMASK_B32_e64 +; SI: V_CNDMASK_B32_e64 +; SI: V_OR_B32_e32 define void @f64_ueq(i32 addrspace(1)* %out, double %a, double %b) { entry: %0 = fcmp ueq double %a, %b @@ -93,7 +97,9 @@ entry: ; FUNC-LABEL: @f64_ugt ; SI: V_CMP_U_F64 ; SI: V_CMP_GT_F64 -; SI: S_OR_B64 +; SI: V_CNDMASK_B32_e64 +; SI: V_CNDMASK_B32_e64 +; SI: V_OR_B32_e32 define void @f64_ugt(i32 addrspace(1)* %out, double %a, double %b) { entry: %0 = fcmp ugt double %a, %b @@ -105,7 +111,9 @@ entry: ; FUNC-LABEL: @f64_uge ; SI: V_CMP_U_F64 ; SI: V_CMP_GE_F64 -; SI: S_OR_B64 +; SI: V_CNDMASK_B32_e64 +; SI: V_CNDMASK_B32_e64 +; SI: V_OR_B32_e32 define void @f64_uge(i32 addrspace(1)* %out, double %a, double %b) { entry: %0 = fcmp uge double %a, %b @@ -117,7 +125,9 @@ entry: ; FUNC-LABEL: @f64_ult ; SI: V_CMP_U_F64 ; SI: V_CMP_LT_F64 -; SI: S_OR_B64 +; SI: V_CNDMASK_B32_e64 +; SI: V_CNDMASK_B32_e64 +; SI: V_OR_B32_e32 define void @f64_ult(i32 addrspace(1)* %out, double %a, double %b) { entry: %0 = fcmp ult double %a, %b @@ -129,7 +139,9 @@ entry: ; FUNC-LABEL: @f64_ule ; SI: V_CMP_U_F64 ; SI: V_CMP_LE_F64 -; SI: S_OR_B64 +; SI: V_CNDMASK_B32_e64 +; SI: V_CNDMASK_B32_e64 +; SI: V_OR_B32_e32 define void @f64_ule(i32 addrspace(1)* %out, double %a, double %b) { entry: %0 = fcmp ule double %a, %b