From: Tom Stellard Date: Thu, 17 Apr 2014 21:00:11 +0000 (+0000) Subject: R600/SI: Stop using i128 as the resource descriptor type X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=93ea1378d22929c3e0e49b68f7f8b9bf3f2ad221;p=oota-llvm.git R600/SI: Stop using i128 as the resource descriptor type Having i128 as a legal type complicates the legalization phase. v4i32 is already a legal type, so we will use that instead. This fixes several piglit tests. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@206500 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 1c7f5f00c4f..c3ad46a4fab 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -42,9 +42,8 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : addRegisterClass(MVT::v2i32, &AMDGPU::VSrc_64RegClass); addRegisterClass(MVT::v2f32, &AMDGPU::VSrc_64RegClass); - addRegisterClass(MVT::v4i32, &AMDGPU::VReg_128RegClass); - addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass); - addRegisterClass(MVT::i128, &AMDGPU::SReg_128RegClass); + addRegisterClass(MVT::v4i32, &AMDGPU::VSrc_128RegClass); + addRegisterClass(MVT::v4f32, &AMDGPU::VSrc_128RegClass); addRegisterClass(MVT::v8i32, &AMDGPU::VReg_256RegClass); addRegisterClass(MVT::v8f32, &AMDGPU::VReg_256RegClass); @@ -78,8 +77,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setOperationAction(ISD::ADDC, MVT::i32, Legal); setOperationAction(ISD::ADDE, MVT::i32, Legal); - setOperationAction(ISD::BITCAST, MVT::i128, Legal); - // We need to custom lower vector stores from local memory setOperationAction(ISD::LOAD, MVT::v2i32, Custom); setOperationAction(ISD::LOAD, MVT::v4i32, Custom); @@ -99,7 +96,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setOperationAction(ISD::STORE, MVT::i1, Custom); setOperationAction(ISD::STORE, MVT::i32, Custom); setOperationAction(ISD::STORE, MVT::i64, Custom); - setOperationAction(ISD::STORE, MVT::i128, Custom); setOperationAction(ISD::STORE, MVT::v2i32, Custom); setOperationAction(ISD::STORE, MVT::v4i32, Custom); @@ -164,7 +160,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setTruncStoreAction(MVT::i32, MVT::i16, Custom); setTruncStoreAction(MVT::f64, MVT::f32, Expand); setTruncStoreAction(MVT::i64, MVT::i32, Expand); - setTruncStoreAction(MVT::i128, MVT::i64, Expand); setTruncStoreAction(MVT::v8i32, MVT::v8i16, Expand); setTruncStoreAction(MVT::v16i32, MVT::v16i16, Expand); @@ -595,7 +590,7 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { AMDGPU::VGPR2, VT); case AMDGPUIntrinsic::SI_load_const: { SDValue Ops [] = { - ResourceDescriptorToi128(Op.getOperand(1), DAG), + Op.getOperand(1), Op.getOperand(2) }; @@ -616,7 +611,7 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { return LowerSampleIntrinsic(AMDGPUISD::SAMPLEL, Op, DAG); case AMDGPUIntrinsic::SI_vs_load_input: return DAG.getNode(AMDGPUISD::LOAD_INPUT, DL, VT, - ResourceDescriptorToi128(Op.getOperand(1), DAG), + Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); } @@ -631,7 +626,7 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); SDValue Ops [] = { Chain, - ResourceDescriptorToi128(Op.getOperand(2), DAG), + Op.getOperand(2), Op.getOperand(3), Op.getOperand(4), Op.getOperand(5), @@ -799,26 +794,12 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { } -SDValue SITargetLowering::ResourceDescriptorToi128(SDValue Op, - SelectionDAG &DAG) const { - - if (Op.getValueType() == MVT::i128) { - return Op; - } - - assert(Op.getOpcode() == ISD::UNDEF); - - return DAG.getNode(ISD::BUILD_PAIR, SDLoc(Op), MVT::i128, - DAG.getConstant(0, MVT::i64), - DAG.getConstant(0, MVT::i64)); -} - SDValue SITargetLowering::LowerSampleIntrinsic(unsigned Opcode, const SDValue &Op, SelectionDAG &DAG) const { return DAG.getNode(Opcode, SDLoc(Op), Op.getValueType(), Op.getOperand(1), Op.getOperand(2), - ResourceDescriptorToi128(Op.getOperand(3), DAG), + Op.getOperand(3), Op.getOperand(4)); } diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h index f3a52cb7f96..ff908310672 100644 --- a/lib/Target/R600/SIISelLowering.h +++ b/lib/Target/R600/SIISelLowering.h @@ -33,7 +33,6 @@ class SITargetLowering : public AMDGPUTargetLowering { SDValue LowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; - SDValue ResourceDescriptorToi128(SDValue Op, SelectionDAG &DAG) const; bool foldImm(SDValue &Operand, int32_t &Immediate, bool &ScalarSlotUsed) const; const TargetRegisterClass *getRegClassForNode(SelectionDAG &DAG, diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index ac727399043..93de9aa4609 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -17,13 +17,13 @@ def SIadd64bit32bit : SDNode<"ISD::ADD", >; def SIload_constant : SDNode<"AMDGPUISD::LOAD_CONSTANT", - SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisVT<1, i128>, SDTCisVT<2, i32>]>, + SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisVT<1, v4i32>, SDTCisVT<2, i32>]>, [SDNPMayLoad, SDNPMemOperand] >; def SItbuffer_store : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT", SDTypeProfile<0, 13, - [SDTCisVT<0, i128>, // rsrc(SGPR) + [SDTCisVT<0, v4i32>, // rsrc(SGPR) SDTCisVT<1, iAny>, // vdata(VGPR) SDTCisVT<2, i32>, // num_channels(imm) SDTCisVT<3, i32>, // vaddr(VGPR) @@ -41,13 +41,13 @@ def SItbuffer_store : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT", >; def SIload_input : SDNode<"AMDGPUISD::LOAD_INPUT", - SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisVT<1, i128>, SDTCisVT<2, i16>, + SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisVT<1, v4i32>, SDTCisVT<2, i16>, SDTCisVT<3, i32>]> >; class SDSample : SDNode , SDTCisVT<2, v32i8>, - SDTCisVT<3, i128>, SDTCisVT<4, i32>]> + SDTCisVT<3, v4i32>, SDTCisVT<4, i32>]> >; def SIsample : SDSample<"AMDGPUISD::SAMPLE">; diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index e872bd8cc6f..036b5aa1d7a 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1456,7 +1456,7 @@ def : Pat < /* int_SI_vs_load_input */ def : Pat< - (SIload_input i128:$tlst, IMM12bit:$attr_offset, i32:$buf_idx_vgpr), + (SIload_input v4i32:$tlst, IMM12bit:$attr_offset, i32:$buf_idx_vgpr), (BUFFER_LOAD_FORMAT_XYZW_IDXEN $tlst, $buf_idx_vgpr, imm:$attr_offset, 0, 0, 0, 0) >; @@ -1479,34 +1479,34 @@ def : Pat < /* SIsample for simple 1D texture lookup */ def : Pat < - (SIsample i32:$addr, v32i8:$rsrc, i128:$sampler, imm), + (SIsample i32:$addr, v32i8:$rsrc, v4i32:$sampler, imm), (IMAGE_SAMPLE_V4_V1 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler) >; class SamplePattern : Pat < - (name vt:$addr, v32i8:$rsrc, i128:$sampler, imm), + (name vt:$addr, v32i8:$rsrc, v4i32:$sampler, imm), (opcode 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler) >; class SampleRectPattern : Pat < - (name vt:$addr, v32i8:$rsrc, i128:$sampler, TEX_RECT), + (name vt:$addr, v32i8:$rsrc, v4i32:$sampler, TEX_RECT), (opcode 0xf, 1, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler) >; class SampleArrayPattern : Pat < - (name vt:$addr, v32i8:$rsrc, i128:$sampler, TEX_ARRAY), + (name vt:$addr, v32i8:$rsrc, v4i32:$sampler, TEX_ARRAY), (opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc, $sampler) >; class SampleShadowPattern : Pat < - (name vt:$addr, v32i8:$rsrc, i128:$sampler, TEX_SHADOW), + (name vt:$addr, v32i8:$rsrc, v4i32:$sampler, TEX_SHADOW), (opcode 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler) >; class SampleShadowArrayPattern : Pat < - (name vt:$addr, v32i8:$rsrc, i128:$sampler, TEX_SHADOW_ARRAY), + (name vt:$addr, v32i8:$rsrc, v4i32:$sampler, TEX_SHADOW_ARRAY), (opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc, $sampler) >; @@ -1695,8 +1695,6 @@ def : BitConvert ; def : BitConvert ; def : BitConvert ; -def : BitConvert ; -def : BitConvert ; def : BitConvert ; def : BitConvert ; @@ -1865,19 +1863,19 @@ def : Ext32Pat ; // 1. Offset as 8bit DWORD immediate def : Pat < - (SIload_constant i128:$sbase, IMM8bitDWORD:$offset), + (SIload_constant v4i32:$sbase, IMM8bitDWORD:$offset), (S_BUFFER_LOAD_DWORD_IMM $sbase, (as_dword_i32imm $offset)) >; // 2. Offset loaded in an 32bit SGPR def : Pat < - (SIload_constant i128:$sbase, imm:$offset), + (SIload_constant v4i32:$sbase, imm:$offset), (S_BUFFER_LOAD_DWORD_SGPR $sbase, (S_MOV_B32 imm:$offset)) >; // 3. Offset in an 32Bit VGPR def : Pat < - (SIload_constant i128:$sbase, i32:$voff), + (SIload_constant v4i32:$sbase, i32:$voff), (BUFFER_LOAD_DWORD_OFFEN $sbase, $voff, 0, 0, 0, 0) >; @@ -1979,7 +1977,6 @@ defm : SMRD_Pattern ; defm : SMRD_Pattern ; defm : SMRD_Pattern ; defm : SMRD_Pattern ; -defm : SMRD_Pattern ; defm : SMRD_Pattern ; defm : SMRD_Pattern ; defm : SMRD_Pattern ; @@ -2071,7 +2068,7 @@ multiclass MUBUF_Load_Dword { def : Pat < - (vt (int_SI_buffer_load_dword i128:$rsrc, i32:$vaddr, i32:$soffset, + (vt (int_SI_buffer_load_dword v4i32:$rsrc, i32:$vaddr, i32:$soffset, imm:$offset, 0, 0, imm:$glc, imm:$slc, imm:$tfe)), (offset $rsrc, $vaddr, (as_i16imm $offset), $soffset, (as_i1imm $glc), @@ -2079,7 +2076,7 @@ multiclass MUBUF_Load_Dword ; def : Pat < - (vt (int_SI_buffer_load_dword i128:$rsrc, i32:$vaddr, i32:$soffset, + (vt (int_SI_buffer_load_dword v4i32:$rsrc, i32:$vaddr, i32:$soffset, imm, 1, 0, imm:$glc, imm:$slc, imm:$tfe)), (offen $rsrc, $vaddr, $soffset, (as_i1imm $glc), (as_i1imm $slc), @@ -2087,7 +2084,7 @@ multiclass MUBUF_Load_Dword ; def : Pat < - (vt (int_SI_buffer_load_dword i128:$rsrc, i32:$vaddr, i32:$soffset, + (vt (int_SI_buffer_load_dword v4i32:$rsrc, i32:$vaddr, i32:$soffset, imm:$offset, 0, 1, imm:$glc, imm:$slc, imm:$tfe)), (idxen $rsrc, $vaddr, (as_i16imm $offset), $soffset, (as_i1imm $glc), @@ -2095,7 +2092,7 @@ multiclass MUBUF_Load_Dword ; def : Pat < - (vt (int_SI_buffer_load_dword i128:$rsrc, v2i32:$vaddr, i32:$soffset, + (vt (int_SI_buffer_load_dword v4i32:$rsrc, v2i32:$vaddr, i32:$soffset, imm, 1, 1, imm:$glc, imm:$slc, imm:$tfe)), (bothen $rsrc, $vaddr, $soffset, (as_i1imm $glc), (as_i1imm $slc), @@ -2116,7 +2113,7 @@ defm : MUBUF_Load_Dword : Pat< - (SItbuffer_store i128:$rsrc, vt:$vdata, num_channels, i32:$vaddr, + (SItbuffer_store v4i32:$rsrc, vt:$vdata, num_channels, i32:$vaddr, i32:$soffset, imm:$inst_offset, imm:$dfmt, imm:$nfmt, imm:$offen, imm:$idxen, imm:$glc, imm:$slc, imm:$tfe), @@ -2240,13 +2237,6 @@ def : Pat< // Miscellaneous Patterns //===----------------------------------------------------------------------===// -def : Pat < - (i64 (trunc i128:$x)), - (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)), - (i32 (EXTRACT_SUBREG $x, sub0)), sub0), - (i32 (EXTRACT_SUBREG $x, sub1)), sub1) ->; - def : Pat < (i32 (trunc i64:$a)), (EXTRACT_SUBREG $a, sub0) diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td index 65cf311dd96..6d6d8b9bd84 100644 --- a/lib/Target/R600/SIRegisterInfo.td +++ b/lib/Target/R600/SIRegisterInfo.td @@ -168,7 +168,7 @@ def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, i1], 64, (add SGPR_64Regs, VCCReg, EXECReg) >; -def SReg_128 : RegisterClass<"AMDGPU", [i128, v4i32], 128, (add SGPR_128)>; +def SReg_128 : RegisterClass<"AMDGPU", [v4i32], 128, (add SGPR_128)>; def SReg_256 : RegisterClass<"AMDGPU", [v32i8, v8i32, v8f32], 256, (add SGPR_256)>; @@ -183,14 +183,14 @@ def VReg_96 : RegisterClass<"AMDGPU", [untyped], 96, (add VGPR_96)> { let Size = 96; } -def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, i128], 128, (add VGPR_128)>; +def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32], 128, (add VGPR_128)>; def VReg_256 : RegisterClass<"AMDGPU", [v32i8, v8i32, v8f32], 256, (add VGPR_256)>; def VReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 512, (add VGPR_512)>; //===----------------------------------------------------------------------===// -// [SV]Src_* register classes, can have either an immediate or an register +// [SV]Src_(32|64) register classes, can have either an immediate or an register //===----------------------------------------------------------------------===// def SSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add SReg_32)>; @@ -201,3 +201,9 @@ def VSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add VReg_32, SReg_32)>; def VSrc_64 : RegisterClass<"AMDGPU", [i64, f64], 64, (add VReg_64, SReg_64)>; +//===----------------------------------------------------------------------===// +// SGPR and VGPR register classes +//===----------------------------------------------------------------------===// + +def VSrc_128 : RegisterClass<"AMDGPU", [v4i32, v4f32], 128, + (add VReg_128, SReg_128)>; diff --git a/lib/Target/R600/SITypeRewriter.cpp b/lib/Target/R600/SITypeRewriter.cpp index 9bf2caf217b..fb374ca42dd 100644 --- a/lib/Target/R600/SITypeRewriter.cpp +++ b/lib/Target/R600/SITypeRewriter.cpp @@ -35,7 +35,7 @@ class SITypeRewriter : public FunctionPass, static char ID; Module *Mod; Type *v16i8; - Type *i128; + Type *v4i32; public: SITypeRewriter() : FunctionPass(ID) { } @@ -56,7 +56,7 @@ char SITypeRewriter::ID = 0; bool SITypeRewriter::doInitialization(Module &M) { Mod = &M; v16i8 = VectorType::get(Type::getInt8Ty(M.getContext()), 16); - i128 = Type::getIntNTy(M.getContext(), 128); + v4i32 = VectorType::get(Type::getInt32Ty(M.getContext()), 4); return false; } @@ -84,7 +84,8 @@ void SITypeRewriter::visitLoadInst(LoadInst &I) { Type *ElemTy = PtrTy->getPointerElementType(); IRBuilder<> Builder(&I); if (ElemTy == v16i8) { - Value *BitCast = Builder.CreateBitCast(Ptr, Type::getIntNPtrTy(I.getContext(), 128, 2)); + Value *BitCast = Builder.CreateBitCast(Ptr, + PointerType::get(v4i32,PtrTy->getPointerAddressSpace())); LoadInst *Load = Builder.CreateLoad(BitCast); SmallVector , 8> MD; I.getAllMetadataOtherThanDebugLoc(MD); @@ -99,6 +100,7 @@ void SITypeRewriter::visitLoadInst(LoadInst &I) { void SITypeRewriter::visitCallInst(CallInst &I) { IRBuilder<> Builder(&I); + SmallVector Args; SmallVector Types; bool NeedToReplace = false; @@ -107,10 +109,10 @@ void SITypeRewriter::visitCallInst(CallInst &I) { for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { Value *Arg = I.getArgOperand(i); if (Arg->getType() == v16i8) { - Args.push_back(Builder.CreateBitCast(Arg, i128)); - Types.push_back(i128); + Args.push_back(Builder.CreateBitCast(Arg, v4i32)); + Types.push_back(v4i32); NeedToReplace = true; - Name = Name + ".i128"; + Name = Name + ".v4i32"; } else if (Arg->getType()->isVectorTy() && Arg->getType()->getVectorNumElements() == 1 && Arg->getType()->getVectorElementType() == @@ -144,12 +146,12 @@ void SITypeRewriter::visitCallInst(CallInst &I) { void SITypeRewriter::visitBitCast(BitCastInst &I) { IRBuilder<> Builder(&I); - if (I.getDestTy() != i128) { + if (I.getDestTy() != v4i32) { return; } if (BitCastInst *Op = dyn_cast(I.getOperand(0))) { - if (Op->getSrcTy() == i128) { + if (Op->getSrcTy() == v4i32) { I.replaceAllUsesWith(Op->getOperand(0)); I.eraseFromParent(); } diff --git a/test/CodeGen/R600/store.ll b/test/CodeGen/R600/store.ll index a3c5331f3f4..b29ad7e3ab0 100644 --- a/test/CodeGen/R600/store.ll +++ b/test/CodeGen/R600/store.ll @@ -297,3 +297,29 @@ entry: } attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } + +; When i128 was a legal type this program generated cannot select errors: + +; FUNC-LABEL: @i128-const-store +; FIXME: We should be able to to this with one store instruction +; EG-CHECK: STORE_RAW +; EG-CHECK: STORE_RAW +; EG-CHECK: STORE_RAW +; EG-CHECK: STORE_RAW +; CM-CHECK: STORE_DWORD +; CM-CHECK: STORE_DWORD +; CM-CHECK: STORE_DWORD +; CM-CHECK: STORE_DWORD +; SI: BUFFER_STORE_DWORDX2 +; SI: BUFFER_STORE_DWORDX2 +define void @i128-const-store(i32 addrspace(1)* %out) { +entry: + store i32 1, i32 addrspace(1)* %out, align 4 + %arrayidx2 = getelementptr inbounds i32 addrspace(1)* %out, i64 1 + store i32 1, i32 addrspace(1)* %arrayidx2, align 4 + %arrayidx4 = getelementptr inbounds i32 addrspace(1)* %out, i64 2 + store i32 2, i32 addrspace(1)* %arrayidx4, align 4 + %arrayidx6 = getelementptr inbounds i32 addrspace(1)* %out, i64 3 + store i32 2, i32 addrspace(1)* %arrayidx6, align 4 + ret void +}