From: Tom Stellard Date: Wed, 11 Feb 2015 00:34:35 +0000 (+0000) Subject: R600/SI: Store immediate offsets > 12-bits in soffset X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=9f5d593c1f61db2d8a5fc52835435570d26f1118;p=oota-llvm.git R600/SI: Store immediate offsets > 12-bits in soffset This will save us from having to extend these offsets to 64-bits and storing them in a pair of vgprs. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@228776 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp index ebe34431da2..ac05e4ae2ce 100644 --- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp @@ -925,26 +925,32 @@ void AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, SDValue N1 = Addr.getOperand(1); ConstantSDNode *C1 = cast(N1); - if (isLegalMUBUFImmOffset(C1)) { - - if (N0.getOpcode() == ISD::ADD) { - // (add (add N2, N3), C1) -> addr64 - SDValue N2 = N0.getOperand(0); - SDValue N3 = N0.getOperand(1); - Addr64 = CurDAG->getTargetConstant(1, MVT::i1); - Ptr = N2; - VAddr = N3; - Offset = CurDAG->getTargetConstant(C1->getZExtValue(), MVT::i16); - return; - } + if (N0.getOpcode() == ISD::ADD) { + // (add (add N2, N3), C1) -> addr64 + SDValue N2 = N0.getOperand(0); + SDValue N3 = N0.getOperand(1); + Addr64 = CurDAG->getTargetConstant(1, MVT::i1); + Ptr = N2; + VAddr = N3; + } else { // (add N0, C1) -> offset VAddr = CurDAG->getTargetConstant(0, MVT::i32); Ptr = N0; - Offset = CurDAG->getTargetConstant(C1->getZExtValue(), MVT::i16); + } + + if (isLegalMUBUFImmOffset(C1)) { + Offset = CurDAG->getTargetConstant(C1->getZExtValue(), MVT::i16); + return; + } else if (isUInt<32>(C1->getZExtValue())) { + // Illegal offset, store it in soffset. + Offset = CurDAG->getTargetConstant(0, MVT::i16); + SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, + CurDAG->getTargetConstant(C1->getZExtValue(), MVT::i32)), 0); return; } } + if (Addr.getOpcode() == ISD::ADD) { // (add N0, N1) -> addr64 SDValue N0 = Addr.getOperand(0); diff --git a/test/CodeGen/R600/mubuf.ll b/test/CodeGen/R600/mubuf.ll index 9c2a17ce04f..e43c83d0b40 100644 --- a/test/CodeGen/R600/mubuf.ll +++ b/test/CodeGen/R600/mubuf.ll @@ -30,7 +30,8 @@ entry: ; MUBUF load with an immediate byte offset that doesn't fit into 12-bits ; CHECK-LABEL: {{^}}mubuf_load2: -; CHECK: buffer_load_dword v{{[0-9]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64 ; encoding: [0x00,0x80,0x30,0xe0 +; CHECK: s_movk_i32 [[SOFFSET:s[0-9]+]], 0x1000 +; CHECK: buffer_load_dword v{{[0-9]}}, s[{{[0-9]+:[0-9]+}}], [[SOFFSET]] ; encoding: [0x00,0x00,0x30,0xe0 define void @mubuf_load2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { entry: %0 = getelementptr i32 addrspace(1)* %in, i64 1024 @@ -85,12 +86,6 @@ main_body: ret void } -declare i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #3 -declare void @llvm.SI.tbuffer.store.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) - -attributes #1 = { "ShaderType"="2" "unsafe-fp-math"="true" } -attributes #3 = { nounwind readonly } - ;;;==========================================================================;;; ;;; MUBUF STORE TESTS ;;;==========================================================================;;; @@ -118,7 +113,8 @@ entry: ; MUBUF store with an immediate byte offset that doesn't fit into 12-bits ; CHECK-LABEL: {{^}}mubuf_store2: -; CHECK: buffer_store_dword v{{[0-9]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]:[0-9]}}], 0 addr64 ; encoding: [0x00,0x80,0x70,0xe0 +; CHECK: s_movk_i32 [[SOFFSET:s[0-9]+]], 0x1000 +; CHECK: buffer_store_dword v{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[SOFFSET]] ; encoding: [0x00,0x00,0x70,0xe0 define void @mubuf_store2(i32 addrspace(1)* %out) { entry: %0 = getelementptr i32 addrspace(1)* %out, i64 1024 @@ -154,7 +150,8 @@ define void @store_sgpr_ptr_offset(i32 addrspace(1)* %out) #0 { } ; CHECK-LABEL: {{^}}store_sgpr_ptr_large_offset: -; CHECK: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 +; CHECK: s_mov_b32 [[SOFFSET:s[0-9]+]], 0x20000 +; CHECK: buffer_store_dword v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, [[SOFFSET]] define void @store_sgpr_ptr_large_offset(i32 addrspace(1)* %out) #0 { %out.gep = getelementptr i32 addrspace(1)* %out, i32 32768 store i32 99, i32 addrspace(1)* %out.gep, align 4 @@ -169,3 +166,9 @@ define void @store_vgpr_ptr(i32 addrspace(1)* %out) #0 { store i32 99, i32 addrspace(1)* %out.gep, align 4 ret void } + +declare i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #3 +declare void @llvm.SI.tbuffer.store.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) + +attributes #1 = { "ShaderType"="2" "unsafe-fp-math"="true" } +attributes #3 = { nounwind readonly }