From: Justin Holewinski Date: Fri, 28 Jun 2013 17:58:10 +0000 (+0000) Subject: [NVPTX] Calling conventions fix X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=ac78a0645ddd2046fb66237ba4cfadffa2d367d7;p=oota-llvm.git [NVPTX] Calling conventions fix Fix ABI handling for function returning bool -- use st.param.b32 to return the value and use ld.param.b32 in caller to load the return value. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185177 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp index 8877d131eae..0ff1a985be0 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -1207,7 +1207,14 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, sz = 8; SmallVector LoadRetVTs; - if (sz < 16) { + EVT TheLoadType = VTs[i]; + if (retTy->isIntegerTy() && + TD->getTypeAllocSizeInBits(retTy) < 32) { + // This is for integer types only, and specifically not for + // aggregates. + LoadRetVTs.push_back(MVT::i32); + TheLoadType = MVT::i32; + } else if (sz < 16) { // If loading i1/i8 result, generate // load i8 (-> i16) // trunc i16 to i1/i8 @@ -1225,7 +1232,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SDValue retval = DAG.getMemIntrinsicNode( NVPTXISD::LoadParam, dl, DAG.getVTList(&LoadRetVTs[0], LoadRetVTs.size()), &LoadRetOps[0], - LoadRetOps.size(), VTs[i], MachinePointerInfo()); + LoadRetOps.size(), TheLoadType, MachinePointerInfo()); Chain = retval.getValue(1); InFlag = retval.getValue(2); SDValue Ret0 = retval.getValue(0); @@ -1798,7 +1805,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, SDLoc dl, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); const Function *F = MF.getFunction(); - const Type *RetTy = F->getReturnType(); + Type *RetTy = F->getReturnType(); const DataLayout *TD = getDataLayout(); bool isABI = (nvptxSubtarget.getSmVersion() >= 20); @@ -1806,14 +1813,14 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, if (!isABI) return Chain; - if (const VectorType *VTy = dyn_cast(RetTy)) { + if (VectorType *VTy = dyn_cast(RetTy)) { // If we have a vector type, the OutVals array will be the scalarized // components and we have combine them into 1 or more vector stores. unsigned NumElts = VTy->getNumElements(); assert(NumElts == Outs.size() && "Bad scalarization of return value"); // const_cast can be removed in later LLVM versions - EVT EltVT = getValueType(const_cast(RetTy)).getVectorElementType(); + EVT EltVT = getValueType(RetTy).getVectorElementType(); bool NeedExtend = false; if (EltVT.getSizeInBits() < 16) NeedExtend = true; @@ -1923,34 +1930,43 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, SmallVector ValVTs; // const_cast is necessary since we are still using an LLVM version from // before the type system re-write. - ComputePTXValueVTs(*this, const_cast(RetTy), ValVTs); + ComputePTXValueVTs(*this, RetTy, ValVTs); assert(ValVTs.size() == OutVals.size() && "Bad return value decomposition"); - unsigned sizesofar = 0; + unsigned SizeSoFar = 0; for (unsigned i = 0, e = Outs.size(); i != e; ++i) { SDValue theVal = OutVals[i]; - EVT theValType = theVal.getValueType(); + EVT TheValType = theVal.getValueType(); unsigned numElems = 1; - if (theValType.isVector()) - numElems = theValType.getVectorNumElements(); + if (TheValType.isVector()) + numElems = TheValType.getVectorNumElements(); for (unsigned j = 0, je = numElems; j != je; ++j) { - SDValue tmpval = theVal; - if (theValType.isVector()) - tmpval = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - theValType.getVectorElementType(), tmpval, + SDValue TmpVal = theVal; + if (TheValType.isVector()) + TmpVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + TheValType.getVectorElementType(), TmpVal, DAG.getIntPtrConstant(j)); - EVT theStoreType = tmpval.getValueType(); - if (theStoreType.getSizeInBits() < 8) - tmpval = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i8, tmpval); - SDValue Ops[] = { Chain, DAG.getConstant(sizesofar, MVT::i32), tmpval }; + EVT TheStoreType = ValVTs[i]; + if (RetTy->isIntegerTy() && + TD->getTypeAllocSizeInBits(RetTy) < 32) { + // The following zero-extension is for integer types only, and + // specifically not for aggregates. + TmpVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, TmpVal); + TheStoreType = MVT::i32; + } + else if (TmpVal.getValueType().getSizeInBits() < 16) + TmpVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, TmpVal); + + SDValue Ops[] = { Chain, DAG.getConstant(SizeSoFar, MVT::i32), TmpVal }; Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetval, dl, - DAG.getVTList(MVT::Other), &Ops[0], 3, - ValVTs[i], MachinePointerInfo()); - if (theValType.isVector()) - sizesofar += - ValVTs[i].getVectorElementType().getStoreSizeInBits() / 8; + DAG.getVTList(MVT::Other), &Ops[0], + 3, TheStoreType, + MachinePointerInfo()); + if(TheValType.isVector()) + SizeSoFar += + TheStoreType.getVectorElementType().getStoreSizeInBits() / 8; else - sizesofar += ValVTs[i].getStoreSizeInBits() / 8; + SizeSoFar += TheStoreType.getStoreSizeInBits()/8; } } } diff --git a/test/CodeGen/NVPTX/compare-int.ll b/test/CodeGen/NVPTX/compare-int.ll index e929f24ddba..c595f215f6f 100644 --- a/test/CodeGen/NVPTX/compare-int.ll +++ b/test/CodeGen/NVPTX/compare-int.ll @@ -195,7 +195,7 @@ define i32 @icmp_sle_i32(i32 %a, i32 %b) { define i16 @icmp_eq_i16(i16 %a, i16 %b) { ; CHECK: setp.eq.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]] ; CHECK: ret %cmp = icmp eq i16 %a, %b %ret = zext i1 %cmp to i16 @@ -204,7 +204,7 @@ define i16 @icmp_eq_i16(i16 %a, i16 %b) { define i16 @icmp_ne_i16(i16 %a, i16 %b) { ; CHECK: setp.ne.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]] ; CHECK: ret %cmp = icmp ne i16 %a, %b %ret = zext i1 %cmp to i16 @@ -213,7 +213,7 @@ define i16 @icmp_ne_i16(i16 %a, i16 %b) { define i16 @icmp_ugt_i16(i16 %a, i16 %b) { ; CHECK: setp.gt.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]] ; CHECK: ret %cmp = icmp ugt i16 %a, %b %ret = zext i1 %cmp to i16 @@ -222,7 +222,7 @@ define i16 @icmp_ugt_i16(i16 %a, i16 %b) { define i16 @icmp_uge_i16(i16 %a, i16 %b) { ; CHECK: setp.ge.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]] ; CHECK: ret %cmp = icmp uge i16 %a, %b %ret = zext i1 %cmp to i16 @@ -231,7 +231,7 @@ define i16 @icmp_uge_i16(i16 %a, i16 %b) { define i16 @icmp_ult_i16(i16 %a, i16 %b) { ; CHECK: setp.lt.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]] ; CHECK: ret %cmp = icmp ult i16 %a, %b %ret = zext i1 %cmp to i16 @@ -240,7 +240,7 @@ define i16 @icmp_ult_i16(i16 %a, i16 %b) { define i16 @icmp_ule_i16(i16 %a, i16 %b) { ; CHECK: setp.le.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]] ; CHECK: ret %cmp = icmp ule i16 %a, %b %ret = zext i1 %cmp to i16 @@ -249,7 +249,7 @@ define i16 @icmp_ule_i16(i16 %a, i16 %b) { define i16 @icmp_sgt_i16(i16 %a, i16 %b) { ; CHECK: setp.gt.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]] ; CHECK: ret %cmp = icmp sgt i16 %a, %b %ret = zext i1 %cmp to i16 @@ -258,7 +258,7 @@ define i16 @icmp_sgt_i16(i16 %a, i16 %b) { define i16 @icmp_sge_i16(i16 %a, i16 %b) { ; CHECK: setp.ge.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]] ; CHECK: ret %cmp = icmp sge i16 %a, %b %ret = zext i1 %cmp to i16 @@ -267,7 +267,7 @@ define i16 @icmp_sge_i16(i16 %a, i16 %b) { define i16 @icmp_slt_i16(i16 %a, i16 %b) { ; CHECK: setp.lt.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]] ; CHECK: ret %cmp = icmp slt i16 %a, %b %ret = zext i1 %cmp to i16 @@ -276,7 +276,7 @@ define i16 @icmp_slt_i16(i16 %a, i16 %b) { define i16 @icmp_sle_i16(i16 %a, i16 %b) { ; CHECK: setp.le.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]] ; CHECK: ret %cmp = icmp sle i16 %a, %b %ret = zext i1 %cmp to i16 @@ -289,7 +289,7 @@ define i16 @icmp_sle_i16(i16 %a, i16 %b) { define i8 @icmp_eq_i8(i8 %a, i8 %b) { ; Comparison happens in 16-bit ; CHECK: setp.eq.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]] ; CHECK: ret %cmp = icmp eq i8 %a, %b %ret = zext i1 %cmp to i8 @@ -299,7 +299,7 @@ define i8 @icmp_eq_i8(i8 %a, i8 %b) { define i8 @icmp_ne_i8(i8 %a, i8 %b) { ; Comparison happens in 16-bit ; CHECK: setp.ne.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]] ; CHECK: ret %cmp = icmp ne i8 %a, %b %ret = zext i1 %cmp to i8 @@ -309,7 +309,7 @@ define i8 @icmp_ne_i8(i8 %a, i8 %b) { define i8 @icmp_ugt_i8(i8 %a, i8 %b) { ; Comparison happens in 16-bit ; CHECK: setp.gt.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]] ; CHECK: ret %cmp = icmp ugt i8 %a, %b %ret = zext i1 %cmp to i8 @@ -319,7 +319,7 @@ define i8 @icmp_ugt_i8(i8 %a, i8 %b) { define i8 @icmp_uge_i8(i8 %a, i8 %b) { ; Comparison happens in 16-bit ; CHECK: setp.ge.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]] ; CHECK: ret %cmp = icmp uge i8 %a, %b %ret = zext i1 %cmp to i8 @@ -329,7 +329,7 @@ define i8 @icmp_uge_i8(i8 %a, i8 %b) { define i8 @icmp_ult_i8(i8 %a, i8 %b) { ; Comparison happens in 16-bit ; CHECK: setp.lt.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]] ; CHECK: ret %cmp = icmp ult i8 %a, %b %ret = zext i1 %cmp to i8 @@ -339,7 +339,7 @@ define i8 @icmp_ult_i8(i8 %a, i8 %b) { define i8 @icmp_ule_i8(i8 %a, i8 %b) { ; Comparison happens in 16-bit ; CHECK: setp.le.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]] ; CHECK: ret %cmp = icmp ule i8 %a, %b %ret = zext i1 %cmp to i8 @@ -349,7 +349,7 @@ define i8 @icmp_ule_i8(i8 %a, i8 %b) { define i8 @icmp_sgt_i8(i8 %a, i8 %b) { ; Comparison happens in 16-bit ; CHECK: setp.gt.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]] ; CHECK: ret %cmp = icmp sgt i8 %a, %b %ret = zext i1 %cmp to i8 @@ -359,7 +359,7 @@ define i8 @icmp_sgt_i8(i8 %a, i8 %b) { define i8 @icmp_sge_i8(i8 %a, i8 %b) { ; Comparison happens in 16-bit ; CHECK: setp.ge.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]] ; CHECK: ret %cmp = icmp sge i8 %a, %b %ret = zext i1 %cmp to i8 @@ -369,7 +369,7 @@ define i8 @icmp_sge_i8(i8 %a, i8 %b) { define i8 @icmp_slt_i8(i8 %a, i8 %b) { ; Comparison happens in 16-bit ; CHECK: setp.lt.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]] ; CHECK: ret %cmp = icmp slt i8 %a, %b %ret = zext i1 %cmp to i8 @@ -379,7 +379,7 @@ define i8 @icmp_slt_i8(i8 %a, i8 %b) { define i8 @icmp_sle_i8(i8 %a, i8 %b) { ; Comparison happens in 16-bit ; CHECK: setp.le.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]] ; CHECK: ret %cmp = icmp sle i8 %a, %b %ret = zext i1 %cmp to i8 diff --git a/test/CodeGen/NVPTX/convert-int-sm20.ll b/test/CodeGen/NVPTX/convert-int-sm20.ll index fad240e03d2..227cd31e11b 100644 --- a/test/CodeGen/NVPTX/convert-int-sm20.ll +++ b/test/CodeGen/NVPTX/convert-int-sm20.ll @@ -8,16 +8,16 @@ ; i16 define i16 @cvt_i16_i32(i32 %x) { -; CHECK: ld.param.u16 %rs[[R0:[0-9]+]], [cvt_i16_i32_param_{{[0-9]+}}] -; CHECK: st.param.b16 [func_retval{{[0-9]+}}+0], %rs[[R0]] +; CHECK: ld.param.u16 %r[[R0:[0-9]+]], [cvt_i16_i32_param_{{[0-9]+}}] +; CHECK: st.param.b32 [func_retval{{[0-9]+}}+0], %r[[R0]] ; CHECK: ret %a = trunc i32 %x to i16 ret i16 %a } define i16 @cvt_i16_i64(i64 %x) { -; CHECK: ld.param.u16 %rs[[R0:[0-9]+]], [cvt_i16_i64_param_{{[0-9]+}}] -; CHECK: st.param.b16 [func_retval{{[0-9]+}}+0], %rs[[R0]] +; CHECK: ld.param.u16 %r[[R0:[0-9]+]], [cvt_i16_i64_param_{{[0-9]+}}] +; CHECK: st.param.b32 [func_retval{{[0-9]+}}+0], %r[[R0]] ; CHECK: ret %a = trunc i64 %x to i16 ret i16 %a diff --git a/test/CodeGen/NVPTX/i8-param.ll b/test/CodeGen/NVPTX/i8-param.ll new file mode 100644 index 00000000000..9a253ff6c99 --- /dev/null +++ b/test/CodeGen/NVPTX/i8-param.ll @@ -0,0 +1,23 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" + +; CHECK: .visible .func (.param .b32 func_retval0) callee +define i8 @callee(i8 %a) { +; CHECK: ld.param.u8 + %ret = add i8 %a, 42 +; CHECK: st.param.b32 + ret i8 %ret +} + +; CHECK: .visible .func caller +define void @caller(i8* %a) { +; CHECK: ld.u8 + %val = load i8* %a + %ret = tail call i8 @callee(i8 %val) +; CHECK: ld.param.b32 + store i8 %ret, i8* %a + ret void +} + + \ No newline at end of file diff --git a/test/CodeGen/NVPTX/ld-addrspace.ll b/test/CodeGen/NVPTX/ld-addrspace.ll index 204ae7b1fb5..133ef09afdb 100644 --- a/test/CodeGen/NVPTX/ld-addrspace.ll +++ b/test/CodeGen/NVPTX/ld-addrspace.ll @@ -4,27 +4,27 @@ ;; i8 define i8 @ld_global_i8(i8 addrspace(1)* %ptr) { -; PTX32: ld.global.u8 %rs{{[0-9]+}}, [%r{{[0-9]+}}] +; PTX32: ld.global.u8 %r{{[0-9]+}}, [%r{{[0-9]+}}] ; PTX32: ret -; PTX64: ld.global.u8 %rs{{[0-9]+}}, [%rl{{[0-9]+}}] +; PTX64: ld.global.u8 %r{{[0-9]+}}, [%rl{{[0-9]+}}] ; PTX64: ret %a = load i8 addrspace(1)* %ptr ret i8 %a } define i8 @ld_shared_i8(i8 addrspace(3)* %ptr) { -; PTX32: ld.shared.u8 %rs{{[0-9]+}}, [%r{{[0-9]+}}] +; PTX32: ld.shared.u8 %r{{[0-9]+}}, [%r{{[0-9]+}}] ; PTX32: ret -; PTX64: ld.shared.u8 %rs{{[0-9]+}}, [%rl{{[0-9]+}}] +; PTX64: ld.shared.u8 %r{{[0-9]+}}, [%rl{{[0-9]+}}] ; PTX64: ret %a = load i8 addrspace(3)* %ptr ret i8 %a } define i8 @ld_local_i8(i8 addrspace(5)* %ptr) { -; PTX32: ld.local.u8 %rs{{[0-9]+}}, [%r{{[0-9]+}}] +; PTX32: ld.local.u8 %r{{[0-9]+}}, [%r{{[0-9]+}}] ; PTX32: ret -; PTX64: ld.local.u8 %rs{{[0-9]+}}, [%rl{{[0-9]+}}] +; PTX64: ld.local.u8 %r{{[0-9]+}}, [%rl{{[0-9]+}}] ; PTX64: ret %a = load i8 addrspace(5)* %ptr ret i8 %a @@ -32,27 +32,27 @@ define i8 @ld_local_i8(i8 addrspace(5)* %ptr) { ;; i16 define i16 @ld_global_i16(i16 addrspace(1)* %ptr) { -; PTX32: ld.global.u16 %rs{{[0-9]+}}, [%r{{[0-9]+}}] +; PTX32: ld.global.u16 %r{{[0-9]+}}, [%r{{[0-9]+}}] ; PTX32: ret -; PTX64: ld.global.u16 %rs{{[0-9]+}}, [%rl{{[0-9]+}}] +; PTX64: ld.global.u16 %r{{[0-9]+}}, [%rl{{[0-9]+}}] ; PTX64: ret %a = load i16 addrspace(1)* %ptr ret i16 %a } define i16 @ld_shared_i16(i16 addrspace(3)* %ptr) { -; PTX32: ld.shared.u16 %rs{{[0-9]+}}, [%r{{[0-9]+}}] +; PTX32: ld.shared.u16 %r{{[0-9]+}}, [%r{{[0-9]+}}] ; PTX32: ret -; PTX64: ld.shared.u16 %rs{{[0-9]+}}, [%rl{{[0-9]+}}] +; PTX64: ld.shared.u16 %r{{[0-9]+}}, [%rl{{[0-9]+}}] ; PTX64: ret %a = load i16 addrspace(3)* %ptr ret i16 %a } define i16 @ld_local_i16(i16 addrspace(5)* %ptr) { -; PTX32: ld.local.u16 %rs{{[0-9]+}}, [%r{{[0-9]+}}] +; PTX32: ld.local.u16 %r{{[0-9]+}}, [%r{{[0-9]+}}] ; PTX32: ret -; PTX64: ld.local.u16 %rs{{[0-9]+}}, [%rl{{[0-9]+}}] +; PTX64: ld.local.u16 %r{{[0-9]+}}, [%rl{{[0-9]+}}] ; PTX64: ret %a = load i16 addrspace(5)* %ptr ret i16 %a diff --git a/test/CodeGen/NVPTX/ld-generic.ll b/test/CodeGen/NVPTX/ld-generic.ll index f811a371917..3728268c24d 100644 --- a/test/CodeGen/NVPTX/ld-generic.ll +++ b/test/CodeGen/NVPTX/ld-generic.ll @@ -4,9 +4,9 @@ ;; i8 define i8 @ld_global_i8(i8 addrspace(0)* %ptr) { -; PTX32: ld.u8 %rs{{[0-9]+}}, [%r{{[0-9]+}}] +; PTX32: ld.u8 %r{{[0-9]+}}, [%r{{[0-9]+}}] ; PTX32: ret -; PTX64: ld.u8 %rs{{[0-9]+}}, [%rl{{[0-9]+}}] +; PTX64: ld.u8 %r{{[0-9]+}}, [%rl{{[0-9]+}}] ; PTX64: ret %a = load i8 addrspace(0)* %ptr ret i8 %a @@ -14,9 +14,9 @@ define i8 @ld_global_i8(i8 addrspace(0)* %ptr) { ;; i16 define i16 @ld_global_i16(i16 addrspace(0)* %ptr) { -; PTX32: ld.u16 %rs{{[0-9]+}}, [%r{{[0-9]+}}] +; PTX32: ld.u16 %r{{[0-9]+}}, [%r{{[0-9]+}}] ; PTX32: ret -; PTX64: ld.u16 %rs{{[0-9]+}}, [%rl{{[0-9]+}}] +; PTX64: ld.u16 %r{{[0-9]+}}, [%rl{{[0-9]+}}] ; PTX64: ret %a = load i16 addrspace(0)* %ptr ret i16 %a