1 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600-CHECK %s
2 ; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
4 ; R600-CHECK: @ngroups_x
5 ; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
6 ; R600-CHECK: MOV [[VAL]], KC0[0].X
8 ; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 0
9 ; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
10 ; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
11 define void @ngroups_x (i32 addrspace(1)* %out) {
13 %0 = call i32 @llvm.r600.read.ngroups.x() #0
14 store i32 %0, i32 addrspace(1)* %out
18 ; R600-CHECK: @ngroups_y
19 ; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
20 ; R600-CHECK: MOV [[VAL]], KC0[0].Y
21 ; SI-CHECK: @ngroups_y
22 ; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 0x1
23 ; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
24 ; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
25 define void @ngroups_y (i32 addrspace(1)* %out) {
27 %0 = call i32 @llvm.r600.read.ngroups.y() #0
28 store i32 %0, i32 addrspace(1)* %out
32 ; R600-CHECK: @ngroups_z
33 ; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
34 ; R600-CHECK: MOV [[VAL]], KC0[0].Z
35 ; SI-CHECK: @ngroups_z
36 ; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 0x2
37 ; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
38 ; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
39 define void @ngroups_z (i32 addrspace(1)* %out) {
41 %0 = call i32 @llvm.r600.read.ngroups.z() #0
42 store i32 %0, i32 addrspace(1)* %out
46 ; R600-CHECK: @global_size_x
47 ; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
48 ; R600-CHECK: MOV [[VAL]], KC0[0].W
49 ; SI-CHECK: @global_size_x
50 ; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 0x3
51 ; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
52 ; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
53 define void @global_size_x (i32 addrspace(1)* %out) {
55 %0 = call i32 @llvm.r600.read.global.size.x() #0
56 store i32 %0, i32 addrspace(1)* %out
60 ; R600-CHECK: @global_size_y
61 ; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
62 ; R600-CHECK: MOV [[VAL]], KC0[1].X
63 ; SI-CHECK: @global_size_y
64 ; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 0x4
65 ; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
66 ; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
67 define void @global_size_y (i32 addrspace(1)* %out) {
69 %0 = call i32 @llvm.r600.read.global.size.y() #0
70 store i32 %0, i32 addrspace(1)* %out
74 ; R600-CHECK: @global_size_z
75 ; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
76 ; R600-CHECK: MOV [[VAL]], KC0[1].Y
77 ; SI-CHECK: @global_size_z
78 ; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 0x5
79 ; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
80 ; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
81 define void @global_size_z (i32 addrspace(1)* %out) {
83 %0 = call i32 @llvm.r600.read.global.size.z() #0
84 store i32 %0, i32 addrspace(1)* %out
88 ; R600-CHECK: @local_size_x
89 ; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
90 ; R600-CHECK: MOV [[VAL]], KC0[1].Z
91 ; SI-CHECK: @local_size_x
92 ; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 0x6
93 ; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
94 ; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
95 define void @local_size_x (i32 addrspace(1)* %out) {
97 %0 = call i32 @llvm.r600.read.local.size.x() #0
98 store i32 %0, i32 addrspace(1)* %out
102 ; R600-CHECK: @local_size_y
103 ; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
104 ; R600-CHECK: MOV [[VAL]], KC0[1].W
105 ; SI-CHECK: @local_size_y
106 ; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 0x7
107 ; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
108 ; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
109 define void @local_size_y (i32 addrspace(1)* %out) {
111 %0 = call i32 @llvm.r600.read.local.size.y() #0
112 store i32 %0, i32 addrspace(1)* %out
116 ; R600-CHECK: @local_size_z
117 ; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
118 ; R600-CHECK: MOV [[VAL]], KC0[2].X
119 ; SI-CHECK: @local_size_z
120 ; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 0x8
121 ; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
122 ; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
123 define void @local_size_z (i32 addrspace(1)* %out) {
125 %0 = call i32 @llvm.r600.read.local.size.z() #0
126 store i32 %0, i32 addrspace(1)* %out
130 ; The tgid values are stored in ss offset by the number of user ss.
131 ; Currently we always use exactly 2 user ss for the pointer to the
132 ; kernel arguments, but this may change in the future.
135 ; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], s2
136 ; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
137 define void @tgid_x (i32 addrspace(1)* %out) {
139 %0 = call i32 @llvm.r600.read.tgid.x() #0
140 store i32 %0, i32 addrspace(1)* %out
145 ; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], s3
146 ; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
147 define void @tgid_y (i32 addrspace(1)* %out) {
149 %0 = call i32 @llvm.r600.read.tgid.y() #0
150 store i32 %0, i32 addrspace(1)* %out
155 ; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], s4
156 ; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
157 define void @tgid_z (i32 addrspace(1)* %out) {
159 %0 = call i32 @llvm.r600.read.tgid.z() #0
160 store i32 %0, i32 addrspace(1)* %out
165 ; SI-CHECK: BUFFER_STORE_DWORD v0
166 define void @tidig_x (i32 addrspace(1)* %out) {
168 %0 = call i32 @llvm.r600.read.tidig.x() #0
169 store i32 %0, i32 addrspace(1)* %out
174 ; SI-CHECK: BUFFER_STORE_DWORD v1
175 define void @tidig_y (i32 addrspace(1)* %out) {
177 %0 = call i32 @llvm.r600.read.tidig.y() #0
178 store i32 %0, i32 addrspace(1)* %out
183 ; SI-CHECK: BUFFER_STORE_DWORD v2
184 define void @tidig_z (i32 addrspace(1)* %out) {
186 %0 = call i32 @llvm.r600.read.tidig.z() #0
187 store i32 %0, i32 addrspace(1)* %out
191 declare i32 @llvm.r600.read.ngroups.x() #0
192 declare i32 @llvm.r600.read.ngroups.y() #0
193 declare i32 @llvm.r600.read.ngroups.z() #0
195 declare i32 @llvm.r600.read.global.size.x() #0
196 declare i32 @llvm.r600.read.global.size.y() #0
197 declare i32 @llvm.r600.read.global.size.z() #0
199 declare i32 @llvm.r600.read.local.size.x() #0
200 declare i32 @llvm.r600.read.local.size.y() #0
201 declare i32 @llvm.r600.read.local.size.z() #0
203 declare i32 @llvm.r600.read.tgid.x() #0
204 declare i32 @llvm.r600.read.tgid.y() #0
205 declare i32 @llvm.r600.read.tgid.z() #0
207 declare i32 @llvm.r600.read.tidig.x() #0
208 declare i32 @llvm.r600.read.tidig.y() #0
209 declare i32 @llvm.r600.read.tidig.z() #0
211 attributes #0 = { readnone }