1 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600-CHECK --check-prefix=FUNC %s
2 ; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck --check-prefix=R600-CHECK --check-prefix=FUNC %s
3 ; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK --check-prefix=FUNC %s
5 ;===------------------------------------------------------------------------===;
7 ;===------------------------------------------------------------------------===;
9 ; Load an i8 value from the global address space.
10 ; FUNC-LABEL: @load_i8
11 ; R600-CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
13 ; SI-CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}},
14 define void @load_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
15 %1 = load i8 addrspace(1)* %in
16 %2 = zext i8 %1 to i32
17 store i32 %2, i32 addrspace(1)* %out
21 ; FUNC-LABEL: @load_i8_sext
22 ; R600-CHECK: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]]
23 ; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]]
25 ; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]]
27 ; SI-CHECK: BUFFER_LOAD_SBYTE
28 define void @load_i8_sext(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
30 %0 = load i8 addrspace(1)* %in
31 %1 = sext i8 %0 to i32
32 store i32 %1, i32 addrspace(1)* %out
36 ; FUNC-LABEL: @load_v2i8
37 ; R600-CHECK: VTX_READ_8
38 ; R600-CHECK: VTX_READ_8
39 ; SI-CHECK: BUFFER_LOAD_UBYTE
40 ; SI-CHECK: BUFFER_LOAD_UBYTE
41 define void @load_v2i8(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) {
43 %0 = load <2 x i8> addrspace(1)* %in
44 %1 = zext <2 x i8> %0 to <2 x i32>
45 store <2 x i32> %1, <2 x i32> addrspace(1)* %out
49 ; FUNC-LABEL: @load_v2i8_sext
50 ; R600-CHECK-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
51 ; R600-CHECK-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
52 ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]]
54 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_X_CHAN]]
56 ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Y_CHAN:[XYZW]]], [[DST_Y]]
58 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]]
60 ; SI-CHECK: BUFFER_LOAD_SBYTE
61 ; SI-CHECK: BUFFER_LOAD_SBYTE
62 define void @load_v2i8_sext(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) {
64 %0 = load <2 x i8> addrspace(1)* %in
65 %1 = sext <2 x i8> %0 to <2 x i32>
66 store <2 x i32> %1, <2 x i32> addrspace(1)* %out
70 ; FUNC-LABEL: @load_v4i8
71 ; R600-CHECK: VTX_READ_8
72 ; R600-CHECK: VTX_READ_8
73 ; R600-CHECK: VTX_READ_8
74 ; R600-CHECK: VTX_READ_8
75 ; SI-CHECK: BUFFER_LOAD_UBYTE
76 ; SI-CHECK: BUFFER_LOAD_UBYTE
77 ; SI-CHECK: BUFFER_LOAD_UBYTE
78 ; SI-CHECK: BUFFER_LOAD_UBYTE
79 define void @load_v4i8(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) {
81 %0 = load <4 x i8> addrspace(1)* %in
82 %1 = zext <4 x i8> %0 to <4 x i32>
83 store <4 x i32> %1, <4 x i32> addrspace(1)* %out
87 ; FUNC-LABEL: @load_v4i8_sext
88 ; R600-CHECK-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
89 ; R600-CHECK-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
90 ; R600-CHECK-DAG: VTX_READ_8 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]]
91 ; R600-CHECK-DAG: VTX_READ_8 [[DST_W:T[0-9]\.[XYZW]]], [[DST_W]]
92 ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]]
94 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_X_CHAN]]
96 ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Y_CHAN:[XYZW]]], [[DST_Y]]
98 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]]
100 ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Z_CHAN:[XYZW]]], [[DST_Z]]
102 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Z_CHAN]]
104 ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_W_CHAN:[XYZW]]], [[DST_W]]
106 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_W_CHAN]]
108 ; SI-CHECK: BUFFER_LOAD_SBYTE
109 ; SI-CHECK: BUFFER_LOAD_SBYTE
110 ; SI-CHECK: BUFFER_LOAD_SBYTE
111 ; SI-CHECK: BUFFER_LOAD_SBYTE
112 define void @load_v4i8_sext(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) {
114 %0 = load <4 x i8> addrspace(1)* %in
115 %1 = sext <4 x i8> %0 to <4 x i32>
116 store <4 x i32> %1, <4 x i32> addrspace(1)* %out
120 ; Load an i16 value from the global address space.
121 ; FUNC-LABEL: @load_i16
122 ; R600-CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
123 ; SI-CHECK: BUFFER_LOAD_USHORT
124 define void @load_i16(i32 addrspace(1)* %out, i16 addrspace(1)* %in) {
126 %0 = load i16 addrspace(1)* %in
127 %1 = zext i16 %0 to i32
128 store i32 %1, i32 addrspace(1)* %out
132 ; FUNC-LABEL: @load_i16_sext
133 ; R600-CHECK: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]]
134 ; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]]
136 ; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]]
138 ; SI-CHECK: BUFFER_LOAD_SSHORT
139 define void @load_i16_sext(i32 addrspace(1)* %out, i16 addrspace(1)* %in) {
141 %0 = load i16 addrspace(1)* %in
142 %1 = sext i16 %0 to i32
143 store i32 %1, i32 addrspace(1)* %out
147 ; FUNC-LABEL: @load_v2i16
148 ; R600-CHECK: VTX_READ_16
149 ; R600-CHECK: VTX_READ_16
150 ; SI-CHECK: BUFFER_LOAD_USHORT
151 ; SI-CHECK: BUFFER_LOAD_USHORT
152 define void @load_v2i16(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
154 %0 = load <2 x i16> addrspace(1)* %in
155 %1 = zext <2 x i16> %0 to <2 x i32>
156 store <2 x i32> %1, <2 x i32> addrspace(1)* %out
160 ; FUNC-LABEL: @load_v2i16_sext
161 ; R600-CHECK-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
162 ; R600-CHECK-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
163 ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]]
165 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_X_CHAN]]
167 ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Y_CHAN:[XYZW]]], [[DST_Y]]
169 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]]
171 ; SI-CHECK: BUFFER_LOAD_SSHORT
172 ; SI-CHECK: BUFFER_LOAD_SSHORT
173 define void @load_v2i16_sext(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
175 %0 = load <2 x i16> addrspace(1)* %in
176 %1 = sext <2 x i16> %0 to <2 x i32>
177 store <2 x i32> %1, <2 x i32> addrspace(1)* %out
181 ; FUNC-LABEL: @load_v4i16
182 ; R600-CHECK: VTX_READ_16
183 ; R600-CHECK: VTX_READ_16
184 ; R600-CHECK: VTX_READ_16
185 ; R600-CHECK: VTX_READ_16
186 ; SI-CHECK: BUFFER_LOAD_USHORT
187 ; SI-CHECK: BUFFER_LOAD_USHORT
188 ; SI-CHECK: BUFFER_LOAD_USHORT
189 ; SI-CHECK: BUFFER_LOAD_USHORT
190 define void @load_v4i16(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
192 %0 = load <4 x i16> addrspace(1)* %in
193 %1 = zext <4 x i16> %0 to <4 x i32>
194 store <4 x i32> %1, <4 x i32> addrspace(1)* %out
198 ; FUNC-LABEL: @load_v4i16_sext
199 ; R600-CHECK-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
200 ; R600-CHECK-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
201 ; R600-CHECK-DAG: VTX_READ_16 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]]
202 ; R600-CHECK-DAG: VTX_READ_16 [[DST_W:T[0-9]\.[XYZW]]], [[DST_W]]
203 ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]]
205 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_X_CHAN]]
207 ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Y_CHAN:[XYZW]]], [[DST_Y]]
209 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]]
211 ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Z_CHAN:[XYZW]]], [[DST_Z]]
213 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Z_CHAN]]
215 ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_W_CHAN:[XYZW]]], [[DST_W]]
217 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_W_CHAN]]
219 ; SI-CHECK: BUFFER_LOAD_SSHORT
220 ; SI-CHECK: BUFFER_LOAD_SSHORT
221 ; SI-CHECK: BUFFER_LOAD_SSHORT
222 ; SI-CHECK: BUFFER_LOAD_SSHORT
223 define void @load_v4i16_sext(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
225 %0 = load <4 x i16> addrspace(1)* %in
226 %1 = sext <4 x i16> %0 to <4 x i32>
227 store <4 x i32> %1, <4 x i32> addrspace(1)* %out
231 ; load an i32 value from the global address space.
232 ; FUNC-LABEL: @load_i32
233 ; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
235 ; SI-CHECK: BUFFER_LOAD_DWORD v{{[0-9]+}}
236 define void @load_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
238 %0 = load i32 addrspace(1)* %in
239 store i32 %0, i32 addrspace(1)* %out
243 ; load a f32 value from the global address space.
244 ; FUNC-LABEL: @load_f32
245 ; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
247 ; SI-CHECK: BUFFER_LOAD_DWORD v{{[0-9]+}}
248 define void @load_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
250 %0 = load float addrspace(1)* %in
251 store float %0, float addrspace(1)* %out
255 ; load a v2f32 value from the global address space
256 ; FUNC-LABEL: @load_v2f32
257 ; R600-CHECK: VTX_READ_64
259 ; SI-CHECK: BUFFER_LOAD_DWORDX2
260 define void @load_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) {
262 %0 = load <2 x float> addrspace(1)* %in
263 store <2 x float> %0, <2 x float> addrspace(1)* %out
267 ; FUNC-LABEL: @load_i64
268 ; R600-CHECK: MEM_RAT
269 ; R600-CHECK: MEM_RAT
271 ; SI-CHECK: BUFFER_LOAD_DWORDX2
272 define void @load_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
274 %0 = load i64 addrspace(1)* %in
275 store i64 %0, i64 addrspace(1)* %out
279 ; FUNC-LABEL: @load_i64_sext
280 ; R600-CHECK: MEM_RAT
281 ; R600-CHECK: MEM_RAT
282 ; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}}, literal.x
284 ; SI-CHECK: BUFFER_LOAD_DWORD
286 define void @load_i64_sext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
288 %0 = load i32 addrspace(1)* %in
289 %1 = sext i32 %0 to i64
290 store i64 %1, i64 addrspace(1)* %out
294 ; FUNC-LABEL: @load_i64_zext
295 ; R600-CHECK: MEM_RAT
296 ; R600-CHECK: MEM_RAT
297 define void @load_i64_zext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
299 %0 = load i32 addrspace(1)* %in
300 %1 = zext i32 %0 to i64
301 store i64 %1, i64 addrspace(1)* %out
305 ; FUNC-LABEL: @load_v8i32
306 ; R600-CHECK: VTX_READ_128
307 ; R600-CHECK: VTX_READ_128
308 ; XXX: We should be using DWORDX4 instructions on SI.
309 ; SI-CHECK: BUFFER_LOAD_DWORD
310 ; SI-CHECK: BUFFER_LOAD_DWORD
311 ; SI-CHECK: BUFFER_LOAD_DWORD
312 ; SI-CHECK: BUFFER_LOAD_DWORD
313 ; SI-CHECK: BUFFER_LOAD_DWORD
314 ; SI-CHECK: BUFFER_LOAD_DWORD
315 ; SI-CHECK: BUFFER_LOAD_DWORD
316 ; SI-CHECK: BUFFER_LOAD_DWORD
317 define void @load_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) {
319 %0 = load <8 x i32> addrspace(1)* %in
320 store <8 x i32> %0, <8 x i32> addrspace(1)* %out
324 ; FUNC-LABEL: @load_v16i32
325 ; R600-CHECK: VTX_READ_128
326 ; R600-CHECK: VTX_READ_128
327 ; R600-CHECK: VTX_READ_128
328 ; R600-CHECK: VTX_READ_128
329 ; XXX: We should be using DWORDX4 instructions on SI.
330 ; SI-CHECK: BUFFER_LOAD_DWORD
331 ; SI-CHECK: BUFFER_LOAD_DWORD
332 ; SI-CHECK: BUFFER_LOAD_DWORD
333 ; SI-CHECK: BUFFER_LOAD_DWORD
334 ; SI-CHECK: BUFFER_LOAD_DWORD
335 ; SI-CHECK: BUFFER_LOAD_DWORD
336 ; SI-CHECK: BUFFER_LOAD_DWORD
337 ; SI-CHECK: BUFFER_LOAD_DWORD
338 ; SI-CHECK: BUFFER_LOAD_DWORD
339 ; SI-CHECK: BUFFER_LOAD_DWORD
340 ; SI-CHECK: BUFFER_LOAD_DWORD
341 ; SI-CHECK: BUFFER_LOAD_DWORD
342 ; SI-CHECK: BUFFER_LOAD_DWORD
343 ; SI-CHECK: BUFFER_LOAD_DWORD
344 ; SI-CHECK: BUFFER_LOAD_DWORD
345 ; SI-CHECK: BUFFER_LOAD_DWORD
346 define void @load_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(1)* %in) {
348 %0 = load <16 x i32> addrspace(1)* %in
349 store <16 x i32> %0, <16 x i32> addrspace(1)* %out
353 ;===------------------------------------------------------------------------===;
354 ; CONSTANT ADDRESS SPACE
355 ;===------------------------------------------------------------------------===;
357 ; Load a sign-extended i8 value
358 ; FUNC-LABEL: @load_const_i8_sext
359 ; R600-CHECK: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]]
360 ; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]]
362 ; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]]
364 ; SI-CHECK: BUFFER_LOAD_SBYTE v{{[0-9]+}},
365 define void @load_const_i8_sext(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
367 %0 = load i8 addrspace(2)* %in
368 %1 = sext i8 %0 to i32
369 store i32 %1, i32 addrspace(1)* %out
373 ; Load an aligned i8 value
374 ; FUNC-LABEL: @load_const_i8_aligned
375 ; R600-CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
376 ; SI-CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}},
377 define void @load_const_i8_aligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
379 %0 = load i8 addrspace(2)* %in
380 %1 = zext i8 %0 to i32
381 store i32 %1, i32 addrspace(1)* %out
385 ; Load an un-aligned i8 value
386 ; FUNC-LABEL: @load_const_i8_unaligned
387 ; R600-CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
388 ; SI-CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}},
389 define void @load_const_i8_unaligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
391 %0 = getelementptr i8 addrspace(2)* %in, i32 1
392 %1 = load i8 addrspace(2)* %0
393 %2 = zext i8 %1 to i32
394 store i32 %2, i32 addrspace(1)* %out
398 ; Load a sign-extended i16 value
399 ; FUNC-LABEL: @load_const_i16_sext
400 ; R600-CHECK: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]]
401 ; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]]
403 ; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]]
405 ; SI-CHECK: BUFFER_LOAD_SSHORT
406 define void @load_const_i16_sext(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
408 %0 = load i16 addrspace(2)* %in
409 %1 = sext i16 %0 to i32
410 store i32 %1, i32 addrspace(1)* %out
414 ; Load an aligned i16 value
415 ; FUNC-LABEL: @load_const_i16_aligned
416 ; R600-CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
417 ; SI-CHECK: BUFFER_LOAD_USHORT
418 define void @load_const_i16_aligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
420 %0 = load i16 addrspace(2)* %in
421 %1 = zext i16 %0 to i32
422 store i32 %1, i32 addrspace(1)* %out
426 ; Load an un-aligned i16 value
427 ; FUNC-LABEL: @load_const_i16_unaligned
428 ; R600-CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
429 ; SI-CHECK: BUFFER_LOAD_USHORT
430 define void @load_const_i16_unaligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
432 %0 = getelementptr i16 addrspace(2)* %in, i32 1
433 %1 = load i16 addrspace(2)* %0
434 %2 = zext i16 %1 to i32
435 store i32 %2, i32 addrspace(1)* %out
439 ; Load an i32 value from the constant address space.
440 ; FUNC-LABEL: @load_const_addrspace_i32
441 ; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
443 ; SI-CHECK: S_LOAD_DWORD s{{[0-9]+}}
444 define void @load_const_addrspace_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
446 %0 = load i32 addrspace(2)* %in
447 store i32 %0, i32 addrspace(1)* %out
451 ; Load a f32 value from the constant address space.
452 ; FUNC-LABEL: @load_const_addrspace_f32
453 ; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
455 ; SI-CHECK: S_LOAD_DWORD s{{[0-9]+}}
456 define void @load_const_addrspace_f32(float addrspace(1)* %out, float addrspace(2)* %in) {
457 %1 = load float addrspace(2)* %in
458 store float %1, float addrspace(1)* %out
462 ;===------------------------------------------------------------------------===;
463 ; LOCAL ADDRESS SPACE
464 ;===------------------------------------------------------------------------===;
466 ; Load an i8 value from the local address space.
467 ; FUNC-LABEL: @load_i8_local
468 ; R600-CHECK: LDS_UBYTE_READ_RET
469 ; SI-CHECK-NOT: S_WQM_B64
470 ; SI-CHECK: S_MOV_B32 m0
471 ; SI-CHECK: DS_READ_U8
472 define void @load_i8_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
473 %1 = load i8 addrspace(3)* %in
474 %2 = zext i8 %1 to i32
475 store i32 %2, i32 addrspace(1)* %out
479 ; FUNC-LABEL: @load_i8_sext_local
480 ; R600-CHECK: LDS_UBYTE_READ_RET
482 ; SI-CHECK-NOT: S_WQM_B64
483 ; SI-CHECK: S_MOV_B32 m0
484 ; SI-CHECK: DS_READ_I8
485 define void @load_i8_sext_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
487 %0 = load i8 addrspace(3)* %in
488 %1 = sext i8 %0 to i32
489 store i32 %1, i32 addrspace(1)* %out
493 ; FUNC-LABEL: @load_v2i8_local
494 ; R600-CHECK: LDS_UBYTE_READ_RET
495 ; R600-CHECK: LDS_UBYTE_READ_RET
496 ; SI-CHECK-NOT: S_WQM_B64
497 ; SI-CHECK: S_MOV_B32 m0
498 ; SI-CHECK: DS_READ_U8
499 ; SI-CHECK: DS_READ_U8
500 define void @load_v2i8_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) {
502 %0 = load <2 x i8> addrspace(3)* %in
503 %1 = zext <2 x i8> %0 to <2 x i32>
504 store <2 x i32> %1, <2 x i32> addrspace(1)* %out
508 ; FUNC-LABEL: @load_v2i8_sext_local
509 ; R600-CHECK-DAG: LDS_UBYTE_READ_RET
510 ; R600-CHECK-DAG: LDS_UBYTE_READ_RET
511 ; R600-CHECK-DAG: ASHR
512 ; R600-CHECK-DAG: ASHR
513 ; SI-CHECK-NOT: S_WQM_B64
514 ; SI-CHECK: S_MOV_B32 m0
515 ; SI-CHECK: DS_READ_I8
516 ; SI-CHECK: DS_READ_I8
517 define void @load_v2i8_sext_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) {
519 %0 = load <2 x i8> addrspace(3)* %in
520 %1 = sext <2 x i8> %0 to <2 x i32>
521 store <2 x i32> %1, <2 x i32> addrspace(1)* %out
525 ; FUNC-LABEL: @load_v4i8_local
526 ; R600-CHECK: LDS_UBYTE_READ_RET
527 ; R600-CHECK: LDS_UBYTE_READ_RET
528 ; R600-CHECK: LDS_UBYTE_READ_RET
529 ; R600-CHECK: LDS_UBYTE_READ_RET
530 ; SI-CHECK-NOT: S_WQM_B64
531 ; SI-CHECK: S_MOV_B32 m0
532 ; SI-CHECK: DS_READ_U8
533 ; SI-CHECK: DS_READ_U8
534 ; SI-CHECK: DS_READ_U8
535 ; SI-CHECK: DS_READ_U8
536 define void @load_v4i8_local(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(3)* %in) {
538 %0 = load <4 x i8> addrspace(3)* %in
539 %1 = zext <4 x i8> %0 to <4 x i32>
540 store <4 x i32> %1, <4 x i32> addrspace(1)* %out
544 ; FUNC-LABEL: @load_v4i8_sext_local
545 ; R600-CHECK-DAG: LDS_UBYTE_READ_RET
546 ; R600-CHECK-DAG: LDS_UBYTE_READ_RET
547 ; R600-CHECK-DAG: LDS_UBYTE_READ_RET
548 ; R600-CHECK-DAG: LDS_UBYTE_READ_RET
549 ; R600-CHECK-DAG: ASHR
550 ; R600-CHECK-DAG: ASHR
551 ; R600-CHECK-DAG: ASHR
552 ; R600-CHECK-DAG: ASHR
553 ; SI-CHECK-NOT: S_WQM_B64
554 ; SI-CHECK: S_MOV_B32 m0
555 ; SI-CHECK: DS_READ_I8
556 ; SI-CHECK: DS_READ_I8
557 ; SI-CHECK: DS_READ_I8
558 ; SI-CHECK: DS_READ_I8
559 define void @load_v4i8_sext_local(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(3)* %in) {
561 %0 = load <4 x i8> addrspace(3)* %in
562 %1 = sext <4 x i8> %0 to <4 x i32>
563 store <4 x i32> %1, <4 x i32> addrspace(1)* %out
567 ; Load an i16 value from the local address space.
568 ; FUNC-LABEL: @load_i16_local
569 ; R600-CHECK: LDS_USHORT_READ_RET
570 ; SI-CHECK-NOT: S_WQM_B64
571 ; SI-CHECK: S_MOV_B32 m0
572 ; SI-CHECK: DS_READ_U16
573 define void @load_i16_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) {
575 %0 = load i16 addrspace(3)* %in
576 %1 = zext i16 %0 to i32
577 store i32 %1, i32 addrspace(1)* %out
581 ; FUNC-LABEL: @load_i16_sext_local
582 ; R600-CHECK: LDS_USHORT_READ_RET
584 ; SI-CHECK-NOT: S_WQM_B64
585 ; SI-CHECK: S_MOV_B32 m0
586 ; SI-CHECK: DS_READ_I16
587 define void @load_i16_sext_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) {
589 %0 = load i16 addrspace(3)* %in
590 %1 = sext i16 %0 to i32
591 store i32 %1, i32 addrspace(1)* %out
595 ; FUNC-LABEL: @load_v2i16_local
596 ; R600-CHECK: LDS_USHORT_READ_RET
597 ; R600-CHECK: LDS_USHORT_READ_RET
598 ; SI-CHECK-NOT: S_WQM_B64
599 ; SI-CHECK: S_MOV_B32 m0
600 ; SI-CHECK: DS_READ_U16
601 ; SI-CHECK: DS_READ_U16
602 define void @load_v2i16_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) {
604 %0 = load <2 x i16> addrspace(3)* %in
605 %1 = zext <2 x i16> %0 to <2 x i32>
606 store <2 x i32> %1, <2 x i32> addrspace(1)* %out
610 ; FUNC-LABEL: @load_v2i16_sext_local
611 ; R600-CHECK-DAG: LDS_USHORT_READ_RET
612 ; R600-CHECK-DAG: LDS_USHORT_READ_RET
613 ; R600-CHECK-DAG: ASHR
614 ; R600-CHECK-DAG: ASHR
615 ; SI-CHECK-NOT: S_WQM_B64
616 ; SI-CHECK: S_MOV_B32 m0
617 ; SI-CHECK: DS_READ_I16
618 ; SI-CHECK: DS_READ_I16
619 define void @load_v2i16_sext_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) {
621 %0 = load <2 x i16> addrspace(3)* %in
622 %1 = sext <2 x i16> %0 to <2 x i32>
623 store <2 x i32> %1, <2 x i32> addrspace(1)* %out
627 ; FUNC-LABEL: @load_v4i16_local
628 ; R600-CHECK: LDS_USHORT_READ_RET
629 ; R600-CHECK: LDS_USHORT_READ_RET
630 ; R600-CHECK: LDS_USHORT_READ_RET
631 ; R600-CHECK: LDS_USHORT_READ_RET
632 ; SI-CHECK-NOT: S_WQM_B64
633 ; SI-CHECK: S_MOV_B32 m0
634 ; SI-CHECK: DS_READ_U16
635 ; SI-CHECK: DS_READ_U16
636 ; SI-CHECK: DS_READ_U16
637 ; SI-CHECK: DS_READ_U16
638 define void @load_v4i16_local(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(3)* %in) {
640 %0 = load <4 x i16> addrspace(3)* %in
641 %1 = zext <4 x i16> %0 to <4 x i32>
642 store <4 x i32> %1, <4 x i32> addrspace(1)* %out
646 ; FUNC-LABEL: @load_v4i16_sext_local
647 ; R600-CHECK-DAG: LDS_USHORT_READ_RET
648 ; R600-CHECK-DAG: LDS_USHORT_READ_RET
649 ; R600-CHECK-DAG: LDS_USHORT_READ_RET
650 ; R600-CHECK-DAG: LDS_USHORT_READ_RET
651 ; R600-CHECK-DAG: ASHR
652 ; R600-CHECK-DAG: ASHR
653 ; R600-CHECK-DAG: ASHR
654 ; R600-CHECK-DAG: ASHR
655 ; SI-CHECK-NOT: S_WQM_B64
656 ; SI-CHECK: S_MOV_B32 m0
657 ; SI-CHECK: DS_READ_I16
658 ; SI-CHECK: DS_READ_I16
659 ; SI-CHECK: DS_READ_I16
660 ; SI-CHECK: DS_READ_I16
661 define void @load_v4i16_sext_local(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(3)* %in) {
663 %0 = load <4 x i16> addrspace(3)* %in
664 %1 = sext <4 x i16> %0 to <4 x i32>
665 store <4 x i32> %1, <4 x i32> addrspace(1)* %out
669 ; load an i32 value from the local address space.
670 ; FUNC-LABEL: @load_i32_local
671 ; R600-CHECK: LDS_READ_RET
672 ; SI-CHECK-NOT: S_WQM_B64
673 ; SI-CHECK: S_MOV_B32 m0
674 ; SI-CHECK: DS_READ_B32
675 define void @load_i32_local(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
677 %0 = load i32 addrspace(3)* %in
678 store i32 %0, i32 addrspace(1)* %out
682 ; load a f32 value from the local address space.
683 ; FUNC-LABEL: @load_f32_local
684 ; R600-CHECK: LDS_READ_RET
685 ; SI-CHECK: S_MOV_B32 m0
686 ; SI-CHECK: DS_READ_B32
687 define void @load_f32_local(float addrspace(1)* %out, float addrspace(3)* %in) {
689 %0 = load float addrspace(3)* %in
690 store float %0, float addrspace(1)* %out
694 ; load a v2f32 value from the local address space
695 ; FUNC-LABEL: @load_v2f32_local
696 ; R600-CHECK: LDS_READ_RET
697 ; R600-CHECK: LDS_READ_RET
698 ; SI-CHECK: S_MOV_B32 m0
699 ; SI-CHECK: DS_READ_B32
700 ; SI-CHECK: DS_READ_B32
701 define void @load_v2f32_local(<2 x float> addrspace(1)* %out, <2 x float> addrspace(3)* %in) {
703 %0 = load <2 x float> addrspace(3)* %in
704 store <2 x float> %0, <2 x float> addrspace(1)* %out