From: Tom Stellard Date: Thu, 3 Apr 2014 20:19:27 +0000 (+0000) Subject: R600/SI: Lower 64-bit immediates using REG_SEQUENCE X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=50c16fb65c180cd27888a58cbfe540feb638b221;p=oota-llvm.git R600/SI: Lower 64-bit immediates using REG_SEQUENCE git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@205561 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp index e8c5f5b2dce..1661056f297 100644 --- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp @@ -48,6 +48,7 @@ public: virtual void PostprocessISelDAG(); private: + bool isInlineImmediate(SDNode *N) const; inline SDValue getSmallIPtrImm(unsigned Imm); bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs, const R600InstrInfo *TII); @@ -103,6 +104,12 @@ AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM) AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() { } +bool AMDGPUDAGToDAGISel::isInlineImmediate(SDNode *N) const { + const SITargetLowering *TL + = static_cast(getTargetLowering()); + return TL->analyzeImmediate(N) == 0; +} + /// \brief Determine the register class for \p OpNo /// \returns The register class of the virtual register that will be used for /// the given operand number \OpNo or NULL if the register class cannot be @@ -357,6 +364,37 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, SDLoc(N), N->getValueType(0), Ops); } + + case ISD::Constant: + case ISD::ConstantFP: { + const AMDGPUSubtarget &ST = TM.getSubtarget(); + if (ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || + N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N)) + break; + + uint64_t Imm; + if (ConstantFPSDNode *FP = dyn_cast(N)) + Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue(); + else { + ConstantSDNode *C = dyn_cast(N); + assert(C); + Imm = C->getZExtValue(); + } + + SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SDLoc(N), MVT::i32, + CurDAG->getConstant(Imm & 0xFFFFFFFF, MVT::i32)); + SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SDLoc(N), MVT::i32, + CurDAG->getConstant(Imm >> 32, MVT::i32)); + const SDValue Ops[] = { + CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32), + SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32), + SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32) + }; + + return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, SDLoc(N), + N->getValueType(0), Ops); + } + case AMDGPUISD::REGISTER_LOAD: { const AMDGPUSubtarget &ST = TM.getSubtarget(); if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 0b5541161ff..29aaa9e76b6 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -1029,9 +1029,11 @@ int32_t SITargetLowering::analyzeImmediate(const SDNode *N) const { return -1; } Imm.I = Node->getSExtValue(); - } else if (const ConstantFPSDNode *Node = dyn_cast(N)) + } else if (const ConstantFPSDNode *Node = dyn_cast(N)) { + if (N->getValueType(0) != MVT::f32) + return -1; Imm.F = Node->getValueAPF().convertToFloat(); - else + } else return -1; // It isn't an immediate if ((Imm.I >= -16 && Imm.I <= 64) || diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index e05ab65e8b6..4e4ad2e4f34 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -117,8 +117,7 @@ def mubuf_vaddr_offset : PatFrag< >; class InlineImm : PatLeaf <(vt imm), [{ - return - (*(const SITargetLowering *)getTargetLowering()).analyzeImmediate(N) == 0; + return isInlineImmediate(N); }]>; class SGPRImm : PatLeaf:$imm) >; -// i64 immediates aren't supported in hardware, split it into two 32bit values -def : Pat < - (i64 imm:$imm), - (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)), - (S_MOV_B32 (i32 (LO32 imm:$imm))), sub0), - (S_MOV_B32 (i32 (HI32 imm:$imm))), sub1) ->; - -def : Pat < - (f64 fpimm:$imm), - (INSERT_SUBREG (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (V_MOV_B32_e32 (f32 (LO32f fpimm:$imm))), sub0), - (V_MOV_B32_e32 (f32 (HI32f fpimm:$imm))), sub1) ->; - /********** ===================== **********/ /********** Interpolation Paterns **********/ /********** ===================== **********/ diff --git a/test/CodeGen/R600/fconst64.ll b/test/CodeGen/R600/fconst64.ll index 5c5ee7e9091..6c2a9034b87 100644 --- a/test/CodeGen/R600/fconst64.ll +++ b/test/CodeGen/R600/fconst64.ll @@ -1,8 +1,8 @@ ; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s ; CHECK: @fconst_f64 -; CHECK: V_MOV_B32_e32 {{v[0-9]+}}, 0.000000e+00 -; CHECK-NEXT: V_MOV_B32_e32 {{v[0-9]+}}, 2.312500e+00 +; CHECK-DAG: S_MOV_B32 {{s[0-9]+}}, 1075052544 +; CHECK-DAG: S_MOV_B32 {{s[0-9]+}}, 0 define void @fconst_f64(double addrspace(1)* %out, double addrspace(1)* %in) { %r1 = load double addrspace(1)* %in diff --git a/test/CodeGen/R600/trunc.ll b/test/CodeGen/R600/trunc.ll index 8a759dc21c0..43c06ebbdb4 100644 --- a/test/CodeGen/R600/trunc.ll +++ b/test/CodeGen/R600/trunc.ll @@ -31,8 +31,9 @@ define void @trunc_load_shl_i64(i32 addrspace(1)* %out, i64 %a) { ; SI-LABEL: @trunc_shl_i64: ; SI: S_LOAD_DWORDX2 s{{\[}}[[LO_SREG:[0-9]+]]:{{[0-9]+\]}}, -; SI: V_ADD_I32_e32 v[[LO_ADD:[0-9]+]], s[[LO_SREG]], -; SI: V_LSHL_B64 v{{\[}}[[LO_VREG:[0-9]+]]:{{[0-9]+\]}}, v{{\[}}[[LO_ADD]]:{{[0-9]+\]}}, 2 +; SI: S_ADD_I32 s[[LO_ADD:[0-9]+]], s[[LO_SREG]], +; SI: S_LSHL_B64 s{{\[}}[[LO_SREG2:[0-9]+]]:{{[0-9]+\]}}, s{{\[}}[[LO_ADD]]:{{[0-9]+\]}}, 2 +; SI: V_MOV_B32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG2]] ; SI: BUFFER_STORE_DWORD v[[LO_VREG]], define void @trunc_shl_i64(i64 addrspace(1)* %out2, i32 addrspace(1)* %out, i64 %a) { %aa = add i64 %a, 234 ; Prevent shrinking store.