From: Vincent Lejeune Date: Sat, 16 Nov 2013 16:24:41 +0000 (+0000) Subject: R600: Make dot_4 instructions predicable X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=411079785388290738049dd099bff8755e6a2c8d;p=oota-llvm.git R600: Make dot_4 instructions predicable git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194927 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp index 8436d5ffed6..1f4741634e1 100644 --- a/lib/Target/R600/R600InstrInfo.cpp +++ b/lib/Target/R600/R600InstrInfo.cpp @@ -1009,6 +1009,20 @@ R600InstrInfo::PredicateInstruction(MachineInstr *MI, return true; } + if (MI->getOpcode() == AMDGPU::DOT_4) { + MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_X)) + .setReg(Pred[2].getReg()); + MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_Y)) + .setReg(Pred[2].getReg()); + MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_Z)) + .setReg(Pred[2].getReg()); + MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_W)) + .setReg(Pred[2].getReg()); + MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI); + MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit); + return true; + } + if (PIdx != -1) { MachineOperand &PMO = MI->getOperand(PIdx); PMO.setReg(Pred[2].getReg()); @@ -1217,6 +1231,11 @@ MachineInstr *R600InstrInfo::buildSlotOfVectorInstruction( AMDGPU::OpName::src1_sel, }; + MachineOperand &MO = MI->getOperand(getOperandIdx(MI->getOpcode(), + getSlotedOps(AMDGPU::OpName::pred_sel, Slot))); + MIB->getOperand(getOperandIdx(Opcode, AMDGPU::OpName::pred_sel)) + .setReg(MO.getReg()); + for (unsigned i = 0; i < 14; i++) { MachineOperand &MO = MI->getOperand( getOperandIdx(MI->getOpcode(), getSlotedOps(Operands[i], Slot))); diff --git a/test/CodeGen/R600/predicate-dp4.ll b/test/CodeGen/R600/predicate-dp4.ll new file mode 100644 index 00000000000..e48d6a7aa9a --- /dev/null +++ b/test/CodeGen/R600/predicate-dp4.ll @@ -0,0 +1,27 @@ +;RUN: llc < %s -march=r600 -mcpu=cayman + +; CHECK-LABEL: @main +; CHECK: PRED_SETE_INT * Pred, +; CHECK: DOT4 T{{[0-9]+}}.X, T0.X, T0.X, Pred_sel_one +define void @main(<4 x float> inreg) #0 { +main_body: + %1 = extractelement <4 x float> %0, i32 0 + %2 = bitcast float %1 to i32 + %3 = icmp eq i32 %2, 0 + br i1 %3, label %IF, label %ENDIF + +IF: ; preds = %main_body + %4 = call float @llvm.AMDGPU.dp4(<4 x float> %0, <4 x float> %0) + br label %ENDIF + +ENDIF: ; preds = %IF, %main_body + %5 = phi float [%4, %IF], [0.000000e+00, %main_body] + %6 = insertelement <4 x float> undef, float %5, i32 0 + call void @llvm.R600.store.swizzle(<4 x float> %6, i32 0, i32 0) + ret void +} + +declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1 +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) +attributes #1 = { readnone } +attributes #0 = { "ShaderType"="0" }