From bd949eea85d485f5ec5917de0a3a9d7255568116 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 13 Aug 2014 18:14:11 +0000 Subject: [PATCH] R600: Correctly set the src value offset for scalarized kernel args This for some reason fixes v1i64 kernel arguments on pre-SI. This currently breaks some other cases in the kernel-args.ll test for R600, but I'm not particularly confident in the new output. VTX_READ_* are not used for some of the scalarized cases, and the code reading from the constant buffer doesn't make much sense to me. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@215564 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/R600ISelLowering.cpp | 40 ++++++++++++++++++++-------- test/CodeGen/R600/kernel-args.ll | 18 +++++++++++++ 2 files changed, 47 insertions(+), 11 deletions(-) diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index 8877cc85504..3bc8cb99c71 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -1705,8 +1705,13 @@ SDValue R600TargetLowering::LowerFormalArguments( for (unsigned i = 0, e = Ins.size(); i < e; ++i) { CCValAssign &VA = ArgLocs[i]; - EVT VT = Ins[i].VT; - EVT MemVT = LocalIns[i].VT; + const ISD::InputArg &In = Ins[i]; + EVT VT = In.VT; + EVT MemVT = VA.getLocVT(); + if (!VT.isVector() && MemVT.isVector()) { + // Get load source type if scalarized. + MemVT = MemVT.getVectorElementType(); + } if (ShaderType != ShaderType::COMPUTE) { unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass); @@ -1716,7 +1721,7 @@ SDValue R600TargetLowering::LowerFormalArguments( } PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()), - AMDGPUAS::CONSTANT_BUFFER_0); + AMDGPUAS::CONSTANT_BUFFER_0); // i64 isn't a legal type, so the register type used ends up as i32, which // isn't expected here. It attempts to create this sextload, but it ends up @@ -1725,15 +1730,28 @@ SDValue R600TargetLowering::LowerFormalArguments( // The first 36 bytes of the input buffer contains information about // thread group and global sizes. + ISD::LoadExtType Ext = ISD::NON_EXTLOAD; + if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) { + // FIXME: This should really check the extload type, but the handling of + // extload vector parameters seems to be broken. + + // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD; + Ext = ISD::SEXTLOAD; + } + + // Compute the offset from the value. + // XXX - I think PartOffset should give you this, but it seems to give the + // size of the register which isn't useful. + + unsigned ValBase = ArgLocs[In.OrigArgIndex].getLocMemOffset(); + unsigned PartOffset = VA.getLocMemOffset(); - // FIXME: This should really check the extload type, but the handling of - // extload vecto parameters seems to be broken. - //ISD::LoadExtType Ext = Ins[i].Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD; - ISD::LoadExtType Ext = ISD::SEXTLOAD; - SDValue Arg = DAG.getExtLoad(Ext, DL, VT, Chain, - DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32), - MachinePointerInfo(UndefValue::get(PtrTy)), - MemVT, false, false, false, 4); + MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase); + SDValue Arg = DAG.getLoad(ISD::UNINDEXED, Ext, VT, DL, Chain, + DAG.getConstant(36 + PartOffset, MVT::i32), + DAG.getUNDEF(MVT::i32), + PtrInfo, + MemVT, false, true, true, 4); // 4 is the preferred alignment for the CONSTANT memory space. InVals.push_back(Arg); diff --git a/test/CodeGen/R600/kernel-args.ll b/test/CodeGen/R600/kernel-args.ll index 6fc69792fd3..d0750de8111 100644 --- a/test/CodeGen/R600/kernel-args.ll +++ b/test/CodeGen/R600/kernel-args.ll @@ -453,3 +453,21 @@ entry: store <16 x float> %in, <16 x float> addrspace(1)* %out, align 4 ret void } + +; FUNC-LABEL: @kernel_arg_i64 +; SI: S_LOAD_DWORDX2 +; SI: S_LOAD_DWORDX2 +; SI: BUFFER_STORE_DWORDX2 +define void @kernel_arg_i64(i64 addrspace(1)* %out, i64 %a) nounwind { + store i64 %a, i64 addrspace(1)* %out, align 8 + ret void +} + +; XFUNC-LABEL: @kernel_arg_v1i64 +; XSI: S_LOAD_DWORDX2 +; XSI: S_LOAD_DWORDX2 +; XSI: BUFFER_STORE_DWORDX2 +; define void @kernel_arg_v1i64(<1 x i64> addrspace(1)* %out, <1 x i64> %a) nounwind { +; store <1 x i64> %a, <1 x i64> addrspace(1)* %out, align 8 +; ret void +; } -- 2.34.1