From 891e9e7869c9f77e2a3de33a4033f71f20f5d788 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Tue, 18 Nov 2014 20:39:39 +0000 Subject: [PATCH] R600/SI: Make sure resource descriptors are always stored in SGPRs git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@222253 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIISelLowering.cpp | 4 +-- test/CodeGen/R600/sgpr-copy.ll | 51 ++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 2 deletions(-) diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 7c557af1d51..8d4164a1c39 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -53,10 +53,10 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : addRegisterClass(MVT::v4i32, &AMDGPU::SReg_128RegClass); addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass); - addRegisterClass(MVT::v8i32, &AMDGPU::VReg_256RegClass); + addRegisterClass(MVT::v8i32, &AMDGPU::SReg_256RegClass); addRegisterClass(MVT::v8f32, &AMDGPU::VReg_256RegClass); - addRegisterClass(MVT::v16i32, &AMDGPU::VReg_512RegClass); + addRegisterClass(MVT::v16i32, &AMDGPU::SReg_512RegClass); addRegisterClass(MVT::v16f32, &AMDGPU::VReg_512RegClass); computeRegisterProperties(); diff --git a/test/CodeGen/R600/sgpr-copy.ll b/test/CodeGen/R600/sgpr-copy.ll index b5a72f76b05..8daf753f16d 100644 --- a/test/CodeGen/R600/sgpr-copy.ll +++ b/test/CodeGen/R600/sgpr-copy.ll @@ -325,3 +325,54 @@ ENDIF69: attributes #0 = { "ShaderType"="0" } +; This test checks that image_sample resource descriptors aren't loaded into +; vgprs. The verifier will fail if this happens. +; CHECK-LABEL:{{^}}sample_rsrc: +; CHECK: image_sample +; CHECK: image_sample +; CHECK: s_endpgm +define void @sample_rsrc([6 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <4 x i32>] addrspace(2)* byval %arg2, [32 x <8 x i32>] addrspace(2)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) #0 { +bb: + %tmp = getelementptr [17 x <16 x i8>] addrspace(2)* %arg1, i32 0, i32 0 + %tmp22 = load <16 x i8> addrspace(2)* %tmp, !tbaa !0 + %tmp23 = call float @llvm.SI.load.const(<16 x i8> %tmp22, i32 16) + %tmp25 = getelementptr [32 x <8 x i32>] addrspace(2)* %arg3, i32 0, i32 0 + %tmp26 = load <8 x i32> addrspace(2)* %tmp25, !tbaa !0 + %tmp27 = getelementptr [16 x <4 x i32>] addrspace(2)* %arg2, i32 0, i32 0 + %tmp28 = load <4 x i32> addrspace(2)* %tmp27, !tbaa !0 + %tmp29 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %arg5, <2 x i32> %arg7) + %tmp30 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %arg5, <2 x i32> %arg7) + %tmp31 = bitcast float %tmp23 to i32 + %tmp36 = icmp ne i32 %tmp31, 0 + br i1 %tmp36, label %bb38, label %bb80 + +bb38: ; preds = %bb + %tmp52 = bitcast float %tmp29 to i32 + %tmp53 = bitcast float %tmp30 to i32 + %tmp54 = insertelement <2 x i32> undef, i32 %tmp52, i32 0 + %tmp55 = insertelement <2 x i32> %tmp54, i32 %tmp53, i32 1 + %tmp56 = bitcast <8 x i32> %tmp26 to <32 x i8> + %tmp57 = bitcast <4 x i32> %tmp28 to <16 x i8> + %tmp58 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %tmp55, <32 x i8> %tmp56, <16 x i8> %tmp57, i32 2) + br label %bb71 + +bb80: ; preds = %bb + %tmp81 = bitcast float %tmp29 to i32 + %tmp82 = bitcast float %tmp30 to i32 + %tmp82.2 = add i32 %tmp82, 1 + %tmp83 = insertelement <2 x i32> undef, i32 %tmp81, i32 0 + %tmp84 = insertelement <2 x i32> %tmp83, i32 %tmp82.2, i32 1 + %tmp85 = bitcast <8 x i32> %tmp26 to <32 x i8> + %tmp86 = bitcast <4 x i32> %tmp28 to <16 x i8> + %tmp87 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %tmp84, <32 x i8> %tmp85, <16 x i8> %tmp86, i32 2) + br label %bb71 + +bb71: ; preds = %bb80, %bb38 + %tmp72 = phi <4 x float> [ %tmp58, %bb38 ], [ %tmp87, %bb80 ] + %tmp88 = extractelement <4 x float> %tmp72, i32 0 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %tmp88, float %tmp88, float %tmp88, float %tmp88) + ret void +} + +attributes #0 = { "ShaderType"="0" "unsafe-fp-math"="true" } +attributes #1 = { nounwind readnone } -- 2.34.1