From 104dab3e04decd662a9016d6dba18972bcb80b73 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Thu, 16 Jul 2015 19:40:09 +0000 Subject: [PATCH] AMDPGU/SI: Negative offsets aren't allowed in MUBUF's vaddr operand Reviewers: arsenm Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D11226 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@242434 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 15 ++++--- test/CodeGen/AMDGPU/private-memory.ll | 2 +- test/CodeGen/AMDGPU/scratch-buffer.ll | 51 +++++++++++++++++++----- 3 files changed, 50 insertions(+), 18 deletions(-) diff --git a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 37b77d778d9..f0f38c193ff 100644 --- a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -1095,13 +1095,16 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc, // (add n0, c1) if (CurDAG->isBaseWithConstantOffset(Addr)) { + SDValue N0 = Addr.getOperand(0); SDValue N1 = Addr.getOperand(1); - ConstantSDNode *C1 = cast(N1); - - if (isLegalMUBUFImmOffset(C1)) { - VAddr = Addr.getOperand(0); - ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); - return true; + // Offsets in vaddr must be positive. + if (CurDAG->SignBitIsZero(N0)) { + ConstantSDNode *C1 = cast(N1); + if (isLegalMUBUFImmOffset(C1)) { + VAddr = N0; + ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); + return true; + } } } diff --git a/test/CodeGen/AMDGPU/private-memory.ll b/test/CodeGen/AMDGPU/private-memory.ll index 1c562978050..645dc04f442 100644 --- a/test/CodeGen/AMDGPU/private-memory.ll +++ b/test/CodeGen/AMDGPU/private-memory.ll @@ -298,7 +298,7 @@ entry: ; FUNC-LABEL: ptrtoint: ; SI-NOT: ds_write ; SI: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen -; SI: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen offset:5 +; SI: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; define void @ptrtoint(i32 addrspace(1)* %out, i32 %a, i32 %b) { %alloca = alloca [16 x i32] %tmp0 = getelementptr [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a diff --git a/test/CodeGen/AMDGPU/scratch-buffer.ll b/test/CodeGen/AMDGPU/scratch-buffer.ll index 56088718ada..268869daaa3 100644 --- a/test/CodeGen/AMDGPU/scratch-buffer.ll +++ b/test/CodeGen/AMDGPU/scratch-buffer.ll @@ -1,5 +1,7 @@ -; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck %s -; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck --check-prefix=GCN --check-prefix=DEFAULT-SCRATCH %s +; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck --check-prefix=GCN --check-prefix=DEFAULT-SCRATCH %s +; RUN: llc -verify-machineinstrs -march=amdgcn -mattr=+huge-scratch-buffer -mcpu=SI < %s | FileCheck --check-prefix=GCN --check-prefix=HUGE-SCRATCH %s +; RUN: llc -verify-machineinstrs -march=amdgcn -mattr=+huge-scratch-buffer -mcpu=tonga < %s | FileCheck --check-prefix=GCN --check-prefix=HUGE-SCRATCH %s ; When a frame index offset is more than 12-bits, make sure we don't store ; it in mubuf's offset field. @@ -8,11 +10,11 @@ ; for both stores. This register is allocated by the register scavenger, so we ; should be able to reuse the same regiser for each scratch buffer access. -; CHECK-LABEL: {{^}}legal_offset_fi: -; CHECK: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0{{$}} -; CHECK: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen -; CHECK: v_mov_b32_e32 [[OFFSET]], 0x8000 -; CHECK: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen{{$}} +; GCN-LABEL: {{^}}legal_offset_fi: +; GCN: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0{{$}} +; GCN: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen +; GCN: v_mov_b32_e32 [[OFFSET]], 0x8000 +; GCN: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen{{$}} define void @legal_offset_fi(i32 addrspace(1)* %out, i32 %cond, i32 %if_offset, i32 %else_offset) { entry: @@ -47,10 +49,10 @@ done: } -; CHECK-LABEL: {{^}}legal_offset_fi_offset -; CHECK: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen -; CHECK: v_add_i32_e32 [[OFFSET:v[0-9]+]], 0x8000 -; CHECK: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen{{$}} +; GCN-LABEL: {{^}}legal_offset_fi_offset +; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen +; GCN: v_add_i32_e32 [[OFFSET:v[0-9]+]], 0x8000 +; GCN: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen{{$}} define void @legal_offset_fi_offset(i32 addrspace(1)* %out, i32 %cond, i32 addrspace(1)* %offsets, i32 %if_offset, i32 %else_offset) { entry: @@ -85,3 +87,30 @@ done: ret void } +; GCN-LABEL: @neg_vaddr_offset +; We can't prove %offset is positive, so we must do the computation with the +; immediate in an add instruction instead of folding offset and the immediate into +; the store instruction. +; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen{{$}} +define void @neg_vaddr_offset(i32 %offset) { +entry: + %array = alloca [8192 x i32] + %ptr_offset = add i32 %offset, 4 + %ptr = getelementptr [8192 x i32], [8192 x i32]* %array, i32 0, i32 %ptr_offset + store i32 0, i32* %ptr + ret void +} + +; GCN-LABEL: @pos_vaddr_offse +; DEFAULT-SCRATCH: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen offset:16 +; HUGE-SCRATCH: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen{{$}} +define void @pos_vaddr_offset(i32 addrspace(1)* %out, i32 %offset) { +entry: + %array = alloca [8192 x i32] + %ptr = getelementptr [8192 x i32], [8192 x i32]* %array, i32 0, i32 4 + store i32 0, i32* %ptr + %load_ptr = getelementptr [8192 x i32], [8192 x i32]* %array, i32 0, i32 %offset + %val = load i32, i32* %load_ptr + store i32 %val, i32 addrspace(1)* %out + ret void +} -- 2.34.1