//===----------------------------------------------------------------------===//
def : Proc<"cedar", R600_VLIW5_Itin,
- [FeatureEvergreen, FeatureVertexCache, FeatureWavefrontSize32]>;
+ [FeatureEvergreen, FeatureVertexCache, FeatureWavefrontSize32,
+ FeatureCFALUBug]>;
def : Proc<"redwood", R600_VLIW5_Itin,
- [FeatureEvergreen, FeatureVertexCache, FeatureWavefrontSize64]>;
+ [FeatureEvergreen, FeatureVertexCache, FeatureWavefrontSize64,
+ FeatureCFALUBug]>;
def : Proc<"sumo", R600_VLIW5_Itin,
- [FeatureEvergreen, FeatureWavefrontSize64]>;
+ [FeatureEvergreen, FeatureWavefrontSize64, FeatureCFALUBug]>;
def : Proc<"juniper", R600_VLIW5_Itin,
[FeatureEvergreen, FeatureVertexCache, FeatureWavefrontSize64]>;
//===----------------------------------------------------------------------===//
def : Proc<"barts", R600_VLIW5_Itin,
- [FeatureNorthernIslands, FeatureVertexCache]>;
+ [FeatureNorthernIslands, FeatureVertexCache, FeatureCFALUBug]>;
def : Proc<"turks", R600_VLIW5_Itin,
- [FeatureNorthernIslands, FeatureVertexCache]>;
+ [FeatureNorthernIslands, FeatureVertexCache, FeatureCFALUBug]>;
def : Proc<"caicos", R600_VLIW5_Itin,
- [FeatureNorthernIslands]>;
+ [FeatureNorthernIslands, FeatureCFALUBug]>;
def : Proc<"cayman", R600_VLIW4_Itin,
[FeatureNorthernIslands, FeatureFP64, FeatureCaymanISA]>;
return false;
}
+bool CFStack::requiresWorkAroundForInst(unsigned Opcode) {
+ if (Opcode == AMDGPU::CF_ALU_PUSH_BEFORE && ST.hasCaymanISA() &&
+ getLoopDepth() > 1)
+ return true;
+
+ if (!ST.hasCFAluBug())
+ return false;
+
+ switch(Opcode) {
+ default: return false;
+ case AMDGPU::CF_ALU_PUSH_BEFORE:
+ case AMDGPU::CF_ALU_ELSE_AFTER:
+ case AMDGPU::CF_ALU_BREAK:
+ case AMDGPU::CF_ALU_CONTINUE:
+ if (CurrentSubEntries == 0)
+ return false;
+ if (ST.getWavefrontSize() == 64) {
+ // We are being conservative here. We only require this work-around if
+ // CurrentSubEntries > 3 &&
+ // (CurrentSubEntries % 4 == 3 || CurrentSubEntries % 4 == 0)
+ //
+ // We have to be conservative, because we don't know for certain that
+ // our stack allocation algorithm for Evergreen/NI is correct. Applying this
+ // work-around when CurrentSubEntries > 3 allows us to over-allocate stack
+ // resources without any problems.
+ return CurrentSubEntries > 3;
+ } else {
+ assert(ST.getWavefrontSize() == 32);
+ // We are being conservative here. We only require the work-around if
+ // CurrentSubEntries > 7 &&
+ // (CurrentSubEntries % 8 == 7 || CurrentSubEntries % 8 == 0)
+ // See the comment on the wavefront size == 64 case for why we are
+ // being conservative.
+ return CurrentSubEntries > 7;
+ }
+ }
+}
+
unsigned CFStack::getSubEntrySize(CFStack::StackItem Item) {
switch(Item) {
default:
if (MI->getOpcode() == AMDGPU::CF_ALU)
LastAlu.back() = MI;
I++;
+ bool RequiresWorkAround =
+ CFStack.requiresWorkAroundForInst(MI->getOpcode());
switch (MI->getOpcode()) {
case AMDGPU::CF_ALU_PUSH_BEFORE:
- if (ST.hasCaymanISA() && CFStack.getLoopDepth() > 1) {
+ if (RequiresWorkAround) {
+ DEBUG(dbgs() << "Applying bug work-around for ALU_PUSH_BEFORE\n");
BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::CF_PUSH_EG))
.addImm(CfCount + 1)
.addImm(1);
--- /dev/null
+; RUN: llc -march=r600 -mcpu=redwood -debug-only=r600cf %s -o - 2>&1 | FileCheck %s --check-prefix=BUG64 --check-prefix=FUNC
+; RUN: llc -march=r600 -mcpu=sumo -debug-only=r600cf %s -o - 2>&1 | FileCheck %s --check-prefix=BUG64 --check-prefix=FUNC
+; RUN: llc -march=r600 -mcpu=barts -debug-only=r600cf %s -o - 2>&1 | FileCheck %s --check-prefix=BUG64 --check-prefix=FUNC
+; RUN: llc -march=r600 -mcpu=turks -debug-only=r600cf %s -o - 2>&1 | FileCheck %s --check-prefix=BUG64 --check-prefix=FUNC
+; RUN: llc -march=r600 -mcpu=caicos -debug-only=r600cf %s -o - 2>&1 | FileCheck %s --check-prefix=BUG64 --check-prefix=FUNC
+; RUN: llc -march=r600 -mcpu=cedar -debug-only=r600cf %s -o - 2>&1 | FileCheck %s --check-prefix=BUG32 --check-prefix=FUNC
+; RUN: llc -march=r600 -mcpu=juniper -debug-only=r600cf %s -o - 2>&1 | FileCheck %s --check-prefix=NOBUG --check-prefix=FUNC
+; RUN: llc -march=r600 -mcpu=cypress -debug-only=r600cf %s -o - 2>&1 | FileCheck %s --check-prefix=NOBUG --check-prefix=FUNC
+; RUN: llc -march=r600 -mcpu=cayman -debug-only=r600cf %s -o - 2>&1 | FileCheck %s --check-prefix=NOBUG --check-prefix=FUNC
+
+; REQUIRES: asserts
+
+; We are currently allocating 2 extra sub-entries on Evergreen / NI for
+; non-WQM push instructions if we change this to 1, then we will need to
+; add one level of depth to each of these tests.
+
+; BUG64-NOT: Applying bug work-around
+; BUG32-NOT: Applying bug work-around
+; NOBUG-NOT: Applying bug work-around
+; FUNC-LABEL: @nested3
+define void @nested3(i32 addrspace(1)* %out, i32 %cond) {
+entry:
+ %0 = icmp sgt i32 %cond, 0
+ br i1 %0, label %if.1, label %end
+
+if.1:
+ %1 = icmp sgt i32 %cond, 10
+ br i1 %1, label %if.2, label %if.store.1
+
+if.store.1:
+ store i32 1, i32 addrspace(1)* %out
+ br label %end
+
+if.2:
+ %2 = icmp sgt i32 %cond, 20
+ br i1 %2, label %if.3, label %if.2.store
+
+if.2.store:
+ store i32 2, i32 addrspace(1)* %out
+ br label %end
+
+if.3:
+ store i32 3, i32 addrspace(1)* %out
+ br label %end
+
+end:
+ ret void
+}
+
+; BUG64: Applying bug work-around
+; BUG32-NOT: Applying bug work-around
+; NOBUG-NOT: Applying bug work-around
+; FUNC-LABEL: @nested4
+define void @nested4(i32 addrspace(1)* %out, i32 %cond) {
+entry:
+ %0 = icmp sgt i32 %cond, 0
+ br i1 %0, label %if.1, label %end
+
+if.1:
+ %1 = icmp sgt i32 %cond, 10
+ br i1 %1, label %if.2, label %if.1.store
+
+if.1.store:
+ store i32 1, i32 addrspace(1)* %out
+ br label %end
+
+if.2:
+ %2 = icmp sgt i32 %cond, 20
+ br i1 %2, label %if.3, label %if.2.store
+
+if.2.store:
+ store i32 2, i32 addrspace(1)* %out
+ br label %end
+
+if.3:
+ %3 = icmp sgt i32 %cond, 30
+ br i1 %3, label %if.4, label %if.3.store
+
+if.3.store:
+ store i32 3, i32 addrspace(1)* %out
+ br label %end
+
+if.4:
+ store i32 4, i32 addrspace(1)* %out
+ br label %end
+
+end:
+ ret void
+}
+
+; BUG64: Applying bug work-around
+; BUG32-NOT: Applying bug work-around
+; NOBUG-NOT: Applying bug work-around
+; FUNC-LABEL: @nested7
+define void @nested7(i32 addrspace(1)* %out, i32 %cond) {
+entry:
+ %0 = icmp sgt i32 %cond, 0
+ br i1 %0, label %if.1, label %end
+
+if.1:
+ %1 = icmp sgt i32 %cond, 10
+ br i1 %1, label %if.2, label %if.1.store
+
+if.1.store:
+ store i32 1, i32 addrspace(1)* %out
+ br label %end
+
+if.2:
+ %2 = icmp sgt i32 %cond, 20
+ br i1 %2, label %if.3, label %if.2.store
+
+if.2.store:
+ store i32 2, i32 addrspace(1)* %out
+ br label %end
+
+if.3:
+ %3 = icmp sgt i32 %cond, 30
+ br i1 %3, label %if.4, label %if.3.store
+
+if.3.store:
+ store i32 3, i32 addrspace(1)* %out
+ br label %end
+
+if.4:
+ %4 = icmp sgt i32 %cond, 40
+ br i1 %4, label %if.5, label %if.4.store
+
+if.4.store:
+ store i32 4, i32 addrspace(1)* %out
+ br label %end
+
+if.5:
+ %5 = icmp sgt i32 %cond, 50
+ br i1 %5, label %if.6, label %if.5.store
+
+if.5.store:
+ store i32 5, i32 addrspace(1)* %out
+ br label %end
+
+if.6:
+ %6 = icmp sgt i32 %cond, 60
+ br i1 %6, label %if.7, label %if.6.store
+
+if.6.store:
+ store i32 6, i32 addrspace(1)* %out
+ br label %end
+
+if.7:
+ store i32 7, i32 addrspace(1)* %out
+ br label %end
+
+end:
+ ret void
+}
+
+; BUG64: Applying bug work-around
+; BUG32: Applying bug work-around
+; NOBUG-NOT: Applying bug work-around
+; FUNC-LABEL: @nested8
+define void @nested8(i32 addrspace(1)* %out, i32 %cond) {
+entry:
+ %0 = icmp sgt i32 %cond, 0
+ br i1 %0, label %if.1, label %end
+
+if.1:
+ %1 = icmp sgt i32 %cond, 10
+ br i1 %1, label %if.2, label %if.1.store
+
+if.1.store:
+ store i32 1, i32 addrspace(1)* %out
+ br label %end
+
+if.2:
+ %2 = icmp sgt i32 %cond, 20
+ br i1 %2, label %if.3, label %if.2.store
+
+if.2.store:
+ store i32 2, i32 addrspace(1)* %out
+ br label %end
+
+if.3:
+ %3 = icmp sgt i32 %cond, 30
+ br i1 %3, label %if.4, label %if.3.store
+
+if.3.store:
+ store i32 3, i32 addrspace(1)* %out
+ br label %end
+
+if.4:
+ %4 = icmp sgt i32 %cond, 40
+ br i1 %4, label %if.5, label %if.4.store
+
+if.4.store:
+ store i32 4, i32 addrspace(1)* %out
+ br label %end
+
+if.5:
+ %5 = icmp sgt i32 %cond, 50
+ br i1 %5, label %if.6, label %if.5.store
+
+if.5.store:
+ store i32 5, i32 addrspace(1)* %out
+ br label %end
+
+if.6:
+ %6 = icmp sgt i32 %cond, 60
+ br i1 %6, label %if.7, label %if.6.store
+
+if.6.store:
+ store i32 6, i32 addrspace(1)* %out
+ br label %end
+
+if.7:
+ %7 = icmp sgt i32 %cond, 70
+ br i1 %7, label %if.8, label %if.7.store
+
+if.7.store:
+ store i32 7, i32 addrspace(1)* %out
+ br label %end
+
+if.8:
+ store i32 8, i32 addrspace(1)* %out
+ br label %end
+
+end:
+ ret void
+}