From f57d692c11f0ff6e9c45d2c48c5f362f4c575cf7 Mon Sep 17 00:00:00 2001 From: Vincent Lejeune Date: Thu, 12 Sep 2013 23:44:53 +0000 Subject: [PATCH] R600: Move code handling literal folding into R600ISelLowering. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190644 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUISelDAGToDAG.cpp | 105 ------------------------- lib/Target/R600/R600ISelLowering.cpp | 63 ++++++++++++++- test/CodeGen/R600/literals.ll | 16 ++++ 3 files changed, 75 insertions(+), 109 deletions(-) diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp index 85e1422ac9e..95037ba6a9e 100644 --- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp @@ -193,8 +193,6 @@ bool AMDGPUDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) { } SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { - const R600InstrInfo *TII = - static_cast(TM.getInstrInfo()); unsigned int Opc = N->getOpcode(); if (N->isMachineOpcode()) { return NULL; // Already selected. @@ -310,109 +308,6 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, SDLoc(N), N->getValueType(0), Ops); } - - case ISD::ConstantFP: - case ISD::Constant: { - const AMDGPUSubtarget &ST = TM.getSubtarget(); - // XXX: Custom immediate lowering not implemented yet. Instead we use - // pseudo instructions defined in SIInstructions.td - if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) { - break; - } - - uint64_t ImmValue = 0; - unsigned ImmReg = AMDGPU::ALU_LITERAL_X; - - if (N->getOpcode() == ISD::ConstantFP) { - // XXX: 64-bit Immediates not supported yet - assert(N->getValueType(0) != MVT::f64); - - ConstantFPSDNode *C = dyn_cast(N); - APFloat Value = C->getValueAPF(); - float FloatValue = Value.convertToFloat(); - if (FloatValue == 0.0) { - ImmReg = AMDGPU::ZERO; - } else if (FloatValue == 0.5) { - ImmReg = AMDGPU::HALF; - } else if (FloatValue == 1.0) { - ImmReg = AMDGPU::ONE; - } else { - ImmValue = Value.bitcastToAPInt().getZExtValue(); - } - } else { - // XXX: 64-bit Immediates not supported yet - assert(N->getValueType(0) != MVT::i64); - - ConstantSDNode *C = dyn_cast(N); - if (C->getZExtValue() == 0) { - ImmReg = AMDGPU::ZERO; - } else if (C->getZExtValue() == 1) { - ImmReg = AMDGPU::ONE_INT; - } else { - ImmValue = C->getZExtValue(); - } - } - - for (SDNode::use_iterator Use = N->use_begin(), Next = llvm::next(Use); - Use != SDNode::use_end(); Use = Next) { - Next = llvm::next(Use); - std::vector Ops; - for (unsigned i = 0; i < Use->getNumOperands(); ++i) { - Ops.push_back(Use->getOperand(i)); - } - - if (!Use->isMachineOpcode()) { - if (ImmReg == AMDGPU::ALU_LITERAL_X) { - // We can only use literal constants (e.g. AMDGPU::ZERO, - // AMDGPU::ONE, etc) in machine opcodes. - continue; - } - } else { - switch(Use->getMachineOpcode()) { - case AMDGPU::REG_SEQUENCE: break; - default: - if (!TII->isALUInstr(Use->getMachineOpcode()) || - (TII->get(Use->getMachineOpcode()).TSFlags & - R600_InstFlag::VECTOR)) { - continue; - } - } - - // Check that we aren't already using an immediate. - // XXX: It's possible for an instruction to have more than one - // immediate operand, but this is not supported yet. - if (ImmReg == AMDGPU::ALU_LITERAL_X) { - int ImmIdx = TII->getOperandIdx(Use->getMachineOpcode(), - AMDGPU::OpName::literal); - if (ImmIdx == -1) { - continue; - } - - if (TII->getOperandIdx(Use->getMachineOpcode(), - AMDGPU::OpName::dst) != -1) { - // subtract one from ImmIdx, because the DST operand is usually index - // 0 for MachineInstrs, but we have no DST in the Ops vector. - ImmIdx--; - } - ConstantSDNode *C = dyn_cast(Use->getOperand(ImmIdx)); - assert(C); - - if (C->getZExtValue() != 0) { - // This instruction is already using an immediate. - continue; - } - - // Set the immediate value - Ops[ImmIdx] = CurDAG->getTargetConstant(ImmValue, MVT::i32); - } - } - // Set the immediate register - Ops[Use.getOperandNo()] = CurDAG->getRegister(ImmReg, MVT::i32); - - CurDAG->UpdateNodeOperands(*Use, Ops.data(), Use->getNumOperands()); - } - break; - } } SDNode *Result = SelectCode(N); diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index a7b7a8409b9..5db793737e0 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -1632,7 +1632,7 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N, static bool FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg, - SDValue &Abs, SDValue &Sel, SelectionDAG &DAG) { + SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) { const R600InstrInfo *TII = static_cast(DAG.getTarget().getInstrInfo()); if (!Src.isMachineOpcode()) @@ -1705,6 +1705,51 @@ FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg, Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32); return true; } + case AMDGPU::MOV_IMM_I32: + case AMDGPU::MOV_IMM_F32: { + unsigned ImmReg = AMDGPU::ALU_LITERAL_X; + uint64_t ImmValue = 0; + + + if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) { + ConstantFPSDNode *FPC = dyn_cast(Src.getOperand(0)); + float FloatValue = FPC->getValueAPF().convertToFloat(); + if (FloatValue == 0.0) { + ImmReg = AMDGPU::ZERO; + } else if (FloatValue == 0.5) { + ImmReg = AMDGPU::HALF; + } else if (FloatValue == 1.0) { + ImmReg = AMDGPU::ONE; + } else { + ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue(); + } + } else { + ConstantSDNode *C = dyn_cast(Src.getOperand(0)); + uint64_t Value = C->getZExtValue(); + if (Value == 0) { + ImmReg = AMDGPU::ZERO; + } else if (Value == 1) { + ImmReg = AMDGPU::ONE_INT; + } else { + ImmValue = Value; + } + } + + // Check that we aren't already using an immediate. + // XXX: It's possible for an instruction to have more than one + // immediate operand, but this is not supported yet. + if (ImmReg == AMDGPU::ALU_LITERAL_X) { + if (!Imm.getNode()) + return false; + ConstantSDNode *C = dyn_cast(Imm); + assert(C); + if (C->getZExtValue()) + return false; + Imm = DAG.getTargetConstant(ImmValue, MVT::i32); + } + Src = DAG.getRegister(ImmReg, MVT::i32); + return true; + } default: return false; } @@ -1768,7 +1813,13 @@ SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node, if (HasDst) SelIdx--; SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp; - if (FoldOperand(Node, i, Src, Neg, Abs, Sel, DAG)) + if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG)) + return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops); + } + } else if (Opcode == AMDGPU::REG_SEQUENCE) { + for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) { + SDValue &Src = Ops[i]; + if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG)) return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops); } } else { @@ -1798,10 +1849,14 @@ SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node, SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs; bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1; int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]); - if (HasDst) + int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal); + if (HasDst) { SelIdx--; + ImmIdx--; + } SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp; - if (FoldOperand(Node, i, Src, Neg, Abs, Sel, DAG)) + SDValue &Imm = Ops[ImmIdx]; + if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG)) return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops); } } diff --git a/test/CodeGen/R600/literals.ll b/test/CodeGen/R600/literals.ll index e5bdbc43c24..47191e0a27f 100644 --- a/test/CodeGen/R600/literals.ll +++ b/test/CodeGen/R600/literals.ll @@ -46,3 +46,19 @@ entry: store <4 x i32> , <4 x i32> addrspace(1)* %out ret void } + +; CHECK: @inline_literal_dot4 +; CHECK: DOT4 T[[GPR:[0-9]]].X, 1.0 +; CHECK-NEXT: DOT4 T[[GPR]].Y (MASKED), 1.0 +; CHECK-NEXT: DOT4 T[[GPR]].Z (MASKED), 1.0 +; CHECK-NEXT: DOT4 * T[[GPR]].W (MASKED), 1.0 +define void @inline_literal_dot4(float addrspace(1)* %out) { +entry: + %0 = call float @llvm.AMDGPU.dp4(<4 x float> , <4 x float> ) + store float %0, float addrspace(1)* %out + ret void +} + +declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1 + +attributes #1 = { readnone } -- 2.34.1