return AM.BaseOffs == 0 && (AM.Scale == 0 || AM.Scale == 1);
}
+bool SITargetLowering::isLegalMUBUFAddressingMode(const AddrMode &AM) const {
+ // MUBUF / MTBUF instructions have a 12-bit unsigned byte offset, and
+ // additionally can do r + r + i with addr64. 32-bit has more addressing
+ // mode options. Depending on the resource constant, it can also do
+ // (i64 r0) + (i32 r1) * (i14 i).
+ //
+ // Private arrays end up using a scratch buffer most of the time, so also
+ // assume those use MUBUF instructions. Scratch loads / stores are currently
+ // implemented as mubuf instructions with offen bit set, so slightly
+ // different than the normal addr64.
+ if (!isUInt<12>(AM.BaseOffs))
+ return false;
+
+ // FIXME: Since we can split immediate into soffset and immediate offset,
+ // would it make sense to allow any immediate?
+
+ switch (AM.Scale) {
+ case 0: // r + i or just i, depending on HasBaseReg.
+ return true;
+ case 1:
+ return true; // We have r + r or r + i.
+ case 2:
+ if (AM.HasBaseReg) {
+ // Reject 2 * r + r.
+ return false;
+ }
+
+ // Allow 2 * r as r + r
+ // Or 2 * r + i is allowed as r + r + i.
+ return true;
+ default: // Don't allow n * r
+ return false;
+ }
+}
+
bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
const AddrMode &AM, Type *Ty,
unsigned AS) const {
return false;
switch (AS) {
- case AMDGPUAS::GLOBAL_ADDRESS:
+ case AMDGPUAS::GLOBAL_ADDRESS: {
if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
// Assume the we will use FLAT for all global memory accesses
// on VI.
// because it has never been validated.
return isLegalFlatAddressingMode(AM);
}
- // fall-through
- case AMDGPUAS::PRIVATE_ADDRESS:
- case AMDGPUAS::CONSTANT_ADDRESS: // XXX - Should we assume SMRD instructions?
- case AMDGPUAS::UNKNOWN_ADDRESS_SPACE: {
- // MUBUF / MTBUF instructions have a 12-bit unsigned byte offset, and
- // additionally can do r + r + i with addr64. 32-bit has more addressing
- // mode options. Depending on the resource constant, it can also do
- // (i64 r0) + (i32 r1) * (i14 i).
- //
- // SMRD instructions have an 8-bit, dword offset.
- //
- // Assume nonunifom access, since the address space isn't enough to know
- // what instruction we will use, and since we don't know if this is a load
- // or store and scalar stores are only available on VI.
- //
- // We also know if we are doing an extload, we can't do a scalar load.
- //
- // Private arrays end up using a scratch buffer most of the time, so also
- // assume those use MUBUF instructions. Scratch loads / stores are currently
- // implemented as mubuf instructions with offen bit set, so slightly
- // different than the normal addr64.
- if (!isUInt<12>(AM.BaseOffs))
- return false;
- // FIXME: Since we can split immediate into soffset and immediate offset,
- // would it make sense to allow any immediate?
+ return isLegalMUBUFAddressingMode(AM);
+ }
+ case AMDGPUAS::CONSTANT_ADDRESS: {
+ // If the offset isn't a multiple of 4, it probably isn't going to be
+ // correctly aligned.
+ if (AM.BaseOffs % 4 != 0)
+ return isLegalMUBUFAddressingMode(AM);
+
+ // There are no SMRD extloads, so if we have to do a small type access we
+ // will use a MUBUF load.
+ // FIXME?: We also need to do this if unaligned, but we don't know the
+ // alignment here.
+ if (DL.getTypeStoreSize(Ty) < 4)
+ return isLegalMUBUFAddressingMode(AM);
+
+ if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS) {
+ // SMRD instructions have an 8-bit, dword offset on SI.
+ if (!isUInt<8>(AM.BaseOffs / 4))
+ return false;
+ } else if (Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS) {
+ // On CI+, this can also be a 32-bit literal constant offset. If it fits
+ // in 8-bits, it can use a smaller encoding.
+ if (!isUInt<32>(AM.BaseOffs / 4))
+ return false;
+ } else if (Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+ // On VI, these use the SMEM format and the offset is 20-bit in bytes.
+ if (!isUInt<20>(AM.BaseOffs))
+ return false;
+ } else
+ llvm_unreachable("unhandled generation");
- switch (AM.Scale) {
- case 0: // r + i or just i, depending on HasBaseReg.
+ if (AM.Scale == 0) // r + i or just i, depending on HasBaseReg.
return true;
- case 1:
- return true; // We have r + r or r + i.
- case 2:
- if (AM.HasBaseReg) {
- // Reject 2 * r + r.
- return false;
- }
- // Allow 2 * r as r + r
- // Or 2 * r + i is allowed as r + r + i.
+ if (AM.Scale == 1 && AM.HasBaseReg)
return true;
- default: // Don't allow n * r
- return false;
- }
+
+ return false;
}
+
+ case AMDGPUAS::PRIVATE_ADDRESS:
+ case AMDGPUAS::UNKNOWN_ADDRESS_SPACE:
+ return isLegalMUBUFAddressingMode(AM);
+
case AMDGPUAS::LOCAL_ADDRESS:
case AMDGPUAS::REGION_ADDRESS: {
// Basic, single offset DS instructions allow a 16-bit unsigned immediate
+; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tahiti < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-SI %s
; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=bonaire < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-CI %s
; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tonga < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-VI %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
ret void
}
-; OPT-LABEL: @test_no_sink_flat_small_offset_i32(
-; OPT: getelementptr i32, i32 addrspace(4)* %in
-; OPT: br i1
-; OPT-NOT: ptrtoint
-
-; GCN-LABEL: {{^}}test_no_sink_flat_small_offset_i32:
-; GCN: flat_load_dword
-; GCN: {{^}}BB4_2:
-
-define void @test_no_sink_flat_small_offset_i32(i32 addrspace(4)* %out, i32 addrspace(4)* %in, i32 %cond) {
-entry:
- %out.gep = getelementptr i32, i32 addrspace(4)* %out, i64 999999
- %in.gep = getelementptr i32, i32 addrspace(4)* %in, i64 7
- %tmp0 = icmp eq i32 %cond, 0
- br i1 %tmp0, label %endif, label %if
-
-if:
- %tmp1 = load i32, i32 addrspace(4)* %in.gep
- br label %endif
-
-endif:
- %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
- store i32 %x, i32 addrspace(4)* %out.gep
- br label %done
-
-done:
- ret void
-}
-
; OPT-LABEL: @test_sink_scratch_small_offset_i32(
; OPT-NOT: getelementptr [512 x i32]
; OPT: br i1
; GCN: s_and_saveexec_b64
; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:4092{{$}}
; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:4092{{$}}
-; GCN: {{^}}BB5_2:
+; GCN: {{^}}BB4_2:
define void @test_sink_scratch_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %cond, i32 %arg) {
entry:
%alloca = alloca [512 x i32], align 4
; GCN: s_and_saveexec_b64
; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
-; GCN: {{^}}BB6_2:
+; GCN: {{^}}BB5_2:
define void @test_no_sink_scratch_large_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %cond, i32 %arg) {
entry:
%alloca = alloca [512 x i32], align 4
; GCN: s_and_saveexec_b64
; CI: buffer_load_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; VI: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
-; GCN: {{^}}BB7_2:
+; GCN: {{^}}BB6_2:
define void @test_sink_global_vreg_sreg_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %offset, i32 %cond) {
entry:
%offset.ext = zext i32 %offset to i64
attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }
+
+
+
+; OPT-LABEL: @test_sink_constant_small_offset_i32
+; OPT-NOT: getelementptr i32, i32 addrspace(2)*
+; OPT: br i1
+
+; GCN-LABEL: {{^}}test_sink_constant_small_offset_i32:
+; GCN: s_and_saveexec_b64
+; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x7{{$}}
+; GCN: s_or_b64 exec, exec
+define void @test_sink_constant_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) {
+entry:
+ %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
+ %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 7
+ %tmp0 = icmp eq i32 %cond, 0
+ br i1 %tmp0, label %endif, label %if
+
+if:
+ %tmp1 = load i32, i32 addrspace(2)* %in.gep
+ br label %endif
+
+endif:
+ %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
+ store i32 %x, i32 addrspace(1)* %out.gep
+ br label %done
+
+done:
+ ret void
+}
+
+; OPT-LABEL: @test_sink_constant_max_8_bit_offset_i32
+; OPT-NOT: getelementptr i32, i32 addrspace(2)*
+; OPT: br i1
+
+; GCN-LABEL: {{^}}test_sink_constant_max_8_bit_offset_i32:
+; GCN: s_and_saveexec_b64
+; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0xff{{$}}
+; GCN: s_or_b64 exec, exec
+define void @test_sink_constant_max_8_bit_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) {
+entry:
+ %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
+ %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 255
+ %tmp0 = icmp eq i32 %cond, 0
+ br i1 %tmp0, label %endif, label %if
+
+if:
+ %tmp1 = load i32, i32 addrspace(2)* %in.gep
+ br label %endif
+
+endif:
+ %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
+ store i32 %x, i32 addrspace(1)* %out.gep
+ br label %done
+
+done:
+ ret void
+}
+
+; OPT-LABEL: @test_sink_constant_max_8_bit_offset_p1_i32
+; OPT-SI: getelementptr i32, i32 addrspace(2)*
+; OPT-CI-NOT: getelementptr i32, i32 addrspace(2)*
+; OPT-VI-NOT: getelementptr i32, i32 addrspace(2)*
+; OPT: br i1
+
+; GCN-LABEL: {{^}}test_sink_constant_max_8_bit_offset_p1_i32:
+; GCN: s_and_saveexec_b64
+; SI: s_movk_i32 [[OFFSET:s[0-9]+]], 0x400
+
+; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}}
+; GCN: s_or_b64 exec, exec
+define void @test_sink_constant_max_8_bit_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) {
+entry:
+ %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
+ %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 256
+ %tmp0 = icmp eq i32 %cond, 0
+ br i1 %tmp0, label %endif, label %if
+
+if:
+ %tmp1 = load i32, i32 addrspace(2)* %in.gep
+ br label %endif
+
+endif:
+ %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
+ store i32 %x, i32 addrspace(1)* %out.gep
+ br label %done
+
+done:
+ ret void
+}
+
+; OPT-LABEL: @test_sink_constant_max_32_bit_offset_i32
+; OPT-SI: getelementptr i32, i32 addrspace(2)*
+; OPT-CI-NOT: getelementptr i32, i32 addrspace(2)*
+; OPT: br i1
+
+; GCN-LABEL: {{^}}test_sink_constant_max_32_bit_offset_i32:
+; GCN: s_and_saveexec_b64
+; GCN-DAG: s_mov_b32 s{{[0-9]+}}, 3{{$}}
+; GCN-DAG: s_mov_b32 s{{[0-9]+}}, -4{{$}}
+; GCN: s_add_u32
+; GCN: s_addc_u32
+; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
+; GCN: s_or_b64 exec, exec
+define void @test_sink_constant_max_32_bit_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) {
+entry:
+ %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
+ %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 4294967295
+ %tmp0 = icmp eq i32 %cond, 0
+ br i1 %tmp0, label %endif, label %if
+
+if:
+ %tmp1 = load i32, i32 addrspace(2)* %in.gep
+ br label %endif
+
+endif:
+ %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
+ store i32 %x, i32 addrspace(1)* %out.gep
+ br label %done
+
+done:
+ ret void
+}
+
+; OPT-LABEL: @test_sink_constant_max_32_bit_offset_p1_i32
+; OPT: getelementptr i32, i32 addrspace(2)*
+; OPT: br i1
+
+; GCN-LABEL: {{^}}test_sink_constant_max_32_bit_offset_p1_i32:
+; GCN: s_and_saveexec_b64
+; GCN: s_add_u32
+; GCN: s_addc_u32
+; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
+; GCN: s_or_b64 exec, exec
+define void @test_sink_constant_max_32_bit_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) {
+entry:
+ %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
+ %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 17179869181
+ %tmp0 = icmp eq i32 %cond, 0
+ br i1 %tmp0, label %endif, label %if
+
+if:
+ %tmp1 = load i32, i32 addrspace(2)* %in.gep
+ br label %endif
+
+endif:
+ %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
+ store i32 %x, i32 addrspace(1)* %out.gep
+ br label %done
+
+done:
+ ret void
+}
+
+; GCN-LABEL: {{^}}test_sink_constant_max_20_bit_byte_offset_i32:
+; GCN: s_and_saveexec_b64
+; SI: s_mov_b32 [[OFFSET:s[0-9]+]], 0xffffc{{$}}
+; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}}
+
+; CI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x3ffff{{$}}
+; VI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0xffffc{{$}}
+
+; GCN: s_or_b64 exec, exec
+define void @test_sink_constant_max_20_bit_byte_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) {
+entry:
+ %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
+ %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 262143
+ %tmp0 = icmp eq i32 %cond, 0
+ br i1 %tmp0, label %endif, label %if
+
+if:
+ %tmp1 = load i32, i32 addrspace(2)* %in.gep
+ br label %endif
+
+endif:
+ %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
+ store i32 %x, i32 addrspace(1)* %out.gep
+ br label %done
+
+done:
+ ret void
+}
+
+; OPT-LABEL: @test_sink_constant_max_20_bit_byte_offset_p1_i32
+; OPT-SI: getelementptr i32, i32 addrspace(2)*
+; OPT-CI-NOT: getelementptr i32, i32 addrspace(2)*
+; OPT-VI: getelementptr i32, i32 addrspace(2)*
+; OPT: br i1
+
+; GCN-LABEL: {{^}}test_sink_constant_max_20_bit_byte_offset_p1_i32:
+; GCN: s_and_saveexec_b64
+; SI: s_mov_b32 [[OFFSET:s[0-9]+]], 0x100000{{$}}
+; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}}
+
+; CI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x40000{{$}}
+
+; VI: s_mov_b32 [[OFFSET:s[0-9]+]], 0x100000{{$}}
+; VI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}}
+
+; GCN: s_or_b64 exec, exec
+define void @test_sink_constant_max_20_bit_byte_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) {
+entry:
+ %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
+ %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 262144
+ %tmp0 = icmp eq i32 %cond, 0
+ br i1 %tmp0, label %endif, label %if
+
+if:
+ %tmp1 = load i32, i32 addrspace(2)* %in.gep
+ br label %endif
+
+endif:
+ %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
+ store i32 %x, i32 addrspace(1)* %out.gep
+ br label %done
+
+done:
+ ret void
+}