1 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
2 ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
5 ; FUNC-LABEL: {{^}}ngroups_x:
6 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
7 ; EG: MOV [[VAL]], KC0[0].X
9 ; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0
10 ; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
11 ; SI: buffer_store_dword [[VVAL]]
12 define void @ngroups_x (i32 addrspace(1)* %out) {
14 %0 = call i32 @llvm.r600.read.ngroups.x() #0
15 store i32 %0, i32 addrspace(1)* %out
19 ; FUNC-LABEL: {{^}}ngroups_y:
20 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
21 ; EG: MOV [[VAL]], KC0[0].Y
23 ; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1
24 ; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
25 ; SI: buffer_store_dword [[VVAL]]
26 define void @ngroups_y (i32 addrspace(1)* %out) {
28 %0 = call i32 @llvm.r600.read.ngroups.y() #0
29 store i32 %0, i32 addrspace(1)* %out
33 ; FUNC-LABEL: {{^}}ngroups_z:
34 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
35 ; EG: MOV [[VAL]], KC0[0].Z
37 ; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2
38 ; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
39 ; SI: buffer_store_dword [[VVAL]]
40 define void @ngroups_z (i32 addrspace(1)* %out) {
42 %0 = call i32 @llvm.r600.read.ngroups.z() #0
43 store i32 %0, i32 addrspace(1)* %out
47 ; FUNC-LABEL: {{^}}global_size_x:
48 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
49 ; EG: MOV [[VAL]], KC0[0].W
51 ; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x3
52 ; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
53 ; SI: buffer_store_dword [[VVAL]]
54 define void @global_size_x (i32 addrspace(1)* %out) {
56 %0 = call i32 @llvm.r600.read.global.size.x() #0
57 store i32 %0, i32 addrspace(1)* %out
61 ; FUNC-LABEL: {{^}}global_size_y:
62 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
63 ; EG: MOV [[VAL]], KC0[1].X
65 ; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4
66 ; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
67 ; SI: buffer_store_dword [[VVAL]]
68 define void @global_size_y (i32 addrspace(1)* %out) {
70 %0 = call i32 @llvm.r600.read.global.size.y() #0
71 store i32 %0, i32 addrspace(1)* %out
75 ; FUNC-LABEL: {{^}}global_size_z:
76 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
77 ; EG: MOV [[VAL]], KC0[1].Y
79 ; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x5
80 ; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
81 ; SI: buffer_store_dword [[VVAL]]
82 define void @global_size_z (i32 addrspace(1)* %out) {
84 %0 = call i32 @llvm.r600.read.global.size.z() #0
85 store i32 %0, i32 addrspace(1)* %out
89 ; FUNC-LABEL: {{^}}local_size_x:
90 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
91 ; EG: MOV [[VAL]], KC0[1].Z
93 ; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x6
94 ; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
95 ; SI: buffer_store_dword [[VVAL]]
96 define void @local_size_x (i32 addrspace(1)* %out) {
98 %0 = call i32 @llvm.r600.read.local.size.x() #0
99 store i32 %0, i32 addrspace(1)* %out
103 ; FUNC-LABEL: {{^}}local_size_y:
104 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
105 ; EG: MOV [[VAL]], KC0[1].W
107 ; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x7
108 ; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
109 ; SI: buffer_store_dword [[VVAL]]
110 define void @local_size_y (i32 addrspace(1)* %out) {
112 %0 = call i32 @llvm.r600.read.local.size.y() #0
113 store i32 %0, i32 addrspace(1)* %out
117 ; FUNC-LABEL: {{^}}local_size_z:
118 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
119 ; EG: MOV [[VAL]], KC0[2].X
121 ; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8
122 ; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
123 ; SI: buffer_store_dword [[VVAL]]
124 define void @local_size_z (i32 addrspace(1)* %out) {
126 %0 = call i32 @llvm.r600.read.local.size.z() #0
127 store i32 %0, i32 addrspace(1)* %out
131 ; FUNC-LABEL: {{^}}get_work_dim:
132 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
133 ; EG: MOV [[VAL]], KC0[2].Z
135 ; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xb
136 ; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
137 ; SI: buffer_store_dword [[VVAL]]
138 define void @get_work_dim (i32 addrspace(1)* %out) {
140 %0 = call i32 @llvm.AMDGPU.read.workdim() #0
141 store i32 %0, i32 addrspace(1)* %out
145 ; The tgid values are stored in sgprs offset by the number of user sgprs.
146 ; Currently we always use exactly 2 user sgprs for the pointer to the
147 ; kernel arguments, but this may change in the future.
149 ; FUNC-LABEL: {{^}}tgid_x:
150 ; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], s4
151 ; SI: buffer_store_dword [[VVAL]]
152 define void @tgid_x (i32 addrspace(1)* %out) {
154 %0 = call i32 @llvm.r600.read.tgid.x() #0
155 store i32 %0, i32 addrspace(1)* %out
159 ; FUNC-LABEL: {{^}}tgid_y:
160 ; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], s5
161 ; SI: buffer_store_dword [[VVAL]]
162 define void @tgid_y (i32 addrspace(1)* %out) {
164 %0 = call i32 @llvm.r600.read.tgid.y() #0
165 store i32 %0, i32 addrspace(1)* %out
169 ; FUNC-LABEL: {{^}}tgid_z:
170 ; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], s6
171 ; SI: buffer_store_dword [[VVAL]]
172 define void @tgid_z (i32 addrspace(1)* %out) {
174 %0 = call i32 @llvm.r600.read.tgid.z() #0
175 store i32 %0, i32 addrspace(1)* %out
179 ; FUNC-LABEL: {{^}}tidig_x:
180 ; SI: buffer_store_dword v0
181 define void @tidig_x (i32 addrspace(1)* %out) {
183 %0 = call i32 @llvm.r600.read.tidig.x() #0
184 store i32 %0, i32 addrspace(1)* %out
188 ; FUNC-LABEL: {{^}}tidig_y:
189 ; SI: buffer_store_dword v1
190 define void @tidig_y (i32 addrspace(1)* %out) {
192 %0 = call i32 @llvm.r600.read.tidig.y() #0
193 store i32 %0, i32 addrspace(1)* %out
197 ; FUNC-LABEL: {{^}}tidig_z:
198 ; SI: buffer_store_dword v2
199 define void @tidig_z (i32 addrspace(1)* %out) {
201 %0 = call i32 @llvm.r600.read.tidig.z() #0
202 store i32 %0, i32 addrspace(1)* %out
206 declare i32 @llvm.r600.read.ngroups.x() #0
207 declare i32 @llvm.r600.read.ngroups.y() #0
208 declare i32 @llvm.r600.read.ngroups.z() #0
210 declare i32 @llvm.r600.read.global.size.x() #0
211 declare i32 @llvm.r600.read.global.size.y() #0
212 declare i32 @llvm.r600.read.global.size.z() #0
214 declare i32 @llvm.r600.read.local.size.x() #0
215 declare i32 @llvm.r600.read.local.size.y() #0
216 declare i32 @llvm.r600.read.local.size.z() #0
218 declare i32 @llvm.r600.read.tgid.x() #0
219 declare i32 @llvm.r600.read.tgid.y() #0
220 declare i32 @llvm.r600.read.tgid.z() #0
222 declare i32 @llvm.r600.read.tidig.x() #0
223 declare i32 @llvm.r600.read.tidig.y() #0
224 declare i32 @llvm.r600.read.tidig.z() #0
226 declare i32 @llvm.AMDGPU.read.workdim() #0
228 attributes #0 = { readnone }