1 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600 --check-prefix=FUNC %s
2 ; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck --check-prefix=R600 --check-prefix=FUNC %s
3 ; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s
4 ; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s
6 ;===------------------------------------------------------------------------===;
8 ;===------------------------------------------------------------------------===;
10 ; Load an i8 value from the global address space.
11 ; FUNC-LABEL: {{^}}load_i8:
12 ; R600: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
14 ; SI: buffer_load_ubyte v{{[0-9]+}},
15 define void @load_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
16 %1 = load i8, i8 addrspace(1)* %in
17 %2 = zext i8 %1 to i32
18 store i32 %2, i32 addrspace(1)* %out
22 ; FUNC-LABEL: {{^}}load_i8_sext:
23 ; R600: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]]
24 ; R600: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]]
26 ; R600: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]]
28 ; SI: buffer_load_sbyte
29 define void @load_i8_sext(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
31 %0 = load i8, i8 addrspace(1)* %in
32 %1 = sext i8 %0 to i32
33 store i32 %1, i32 addrspace(1)* %out
37 ; FUNC-LABEL: {{^}}load_v2i8:
40 ; SI: buffer_load_ubyte
41 ; SI: buffer_load_ubyte
42 define void @load_v2i8(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) {
44 %0 = load <2 x i8>, <2 x i8> addrspace(1)* %in
45 %1 = zext <2 x i8> %0 to <2 x i32>
46 store <2 x i32> %1, <2 x i32> addrspace(1)* %out
50 ; FUNC-LABEL: {{^}}load_v2i8_sext:
51 ; R600-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
52 ; R600-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
53 ; R600-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]]
55 ; R600-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_X_CHAN]]
57 ; R600-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Y_CHAN:[XYZW]]], [[DST_Y]]
59 ; R600-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]]
61 ; SI: buffer_load_sbyte
62 ; SI: buffer_load_sbyte
63 define void @load_v2i8_sext(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) {
65 %0 = load <2 x i8>, <2 x i8> addrspace(1)* %in
66 %1 = sext <2 x i8> %0 to <2 x i32>
67 store <2 x i32> %1, <2 x i32> addrspace(1)* %out
71 ; FUNC-LABEL: {{^}}load_v4i8:
76 ; SI: buffer_load_ubyte
77 ; SI: buffer_load_ubyte
78 ; SI: buffer_load_ubyte
79 ; SI: buffer_load_ubyte
80 define void @load_v4i8(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) {
82 %0 = load <4 x i8>, <4 x i8> addrspace(1)* %in
83 %1 = zext <4 x i8> %0 to <4 x i32>
84 store <4 x i32> %1, <4 x i32> addrspace(1)* %out
88 ; FUNC-LABEL: {{^}}load_v4i8_sext:
89 ; R600-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
90 ; R600-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
91 ; R600-DAG: VTX_READ_8 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]]
92 ; R600-DAG: VTX_READ_8 [[DST_W:T[0-9]\.[XYZW]]], [[DST_W]]
93 ; R600-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]]
95 ; R600-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_X_CHAN]]
97 ; R600-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Y_CHAN:[XYZW]]], [[DST_Y]]
99 ; R600-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]]
101 ; R600-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Z_CHAN:[XYZW]]], [[DST_Z]]
103 ; R600-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Z_CHAN]]
105 ; R600-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_W_CHAN:[XYZW]]], [[DST_W]]
107 ; R600-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_W_CHAN]]
109 ; SI: buffer_load_sbyte
110 ; SI: buffer_load_sbyte
111 ; SI: buffer_load_sbyte
112 ; SI: buffer_load_sbyte
113 define void @load_v4i8_sext(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) {
115 %0 = load <4 x i8>, <4 x i8> addrspace(1)* %in
116 %1 = sext <4 x i8> %0 to <4 x i32>
117 store <4 x i32> %1, <4 x i32> addrspace(1)* %out
121 ; Load an i16 value from the global address space.
122 ; FUNC-LABEL: {{^}}load_i16:
123 ; R600: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
124 ; SI: buffer_load_ushort
125 define void @load_i16(i32 addrspace(1)* %out, i16 addrspace(1)* %in) {
127 %0 = load i16 , i16 addrspace(1)* %in
128 %1 = zext i16 %0 to i32
129 store i32 %1, i32 addrspace(1)* %out
133 ; FUNC-LABEL: {{^}}load_i16_sext:
134 ; R600: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]]
135 ; R600: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]]
137 ; R600: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]]
139 ; SI: buffer_load_sshort
140 define void @load_i16_sext(i32 addrspace(1)* %out, i16 addrspace(1)* %in) {
142 %0 = load i16, i16 addrspace(1)* %in
143 %1 = sext i16 %0 to i32
144 store i32 %1, i32 addrspace(1)* %out
148 ; FUNC-LABEL: {{^}}load_v2i16:
151 ; SI: buffer_load_ushort
152 ; SI: buffer_load_ushort
153 define void @load_v2i16(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
155 %0 = load <2 x i16>, <2 x i16> addrspace(1)* %in
156 %1 = zext <2 x i16> %0 to <2 x i32>
157 store <2 x i32> %1, <2 x i32> addrspace(1)* %out
161 ; FUNC-LABEL: {{^}}load_v2i16_sext:
162 ; R600-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
163 ; R600-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
164 ; R600-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]]
166 ; R600-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_X_CHAN]]
168 ; R600-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Y_CHAN:[XYZW]]], [[DST_Y]]
170 ; R600-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]]
172 ; SI: buffer_load_sshort
173 ; SI: buffer_load_sshort
174 define void @load_v2i16_sext(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
176 %0 = load <2 x i16>, <2 x i16> addrspace(1)* %in
177 %1 = sext <2 x i16> %0 to <2 x i32>
178 store <2 x i32> %1, <2 x i32> addrspace(1)* %out
182 ; FUNC-LABEL: {{^}}load_v4i16:
187 ; SI: buffer_load_ushort
188 ; SI: buffer_load_ushort
189 ; SI: buffer_load_ushort
190 ; SI: buffer_load_ushort
191 define void @load_v4i16(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
193 %0 = load <4 x i16>, <4 x i16> addrspace(1)* %in
194 %1 = zext <4 x i16> %0 to <4 x i32>
195 store <4 x i32> %1, <4 x i32> addrspace(1)* %out
199 ; FUNC-LABEL: {{^}}load_v4i16_sext:
200 ; R600-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
201 ; R600-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
202 ; R600-DAG: VTX_READ_16 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]]
203 ; R600-DAG: VTX_READ_16 [[DST_W:T[0-9]\.[XYZW]]], [[DST_W]]
204 ; R600-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]]
206 ; R600-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_X_CHAN]]
208 ; R600-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Y_CHAN:[XYZW]]], [[DST_Y]]
210 ; R600-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]]
212 ; R600-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Z_CHAN:[XYZW]]], [[DST_Z]]
214 ; R600-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Z_CHAN]]
216 ; R600-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_W_CHAN:[XYZW]]], [[DST_W]]
218 ; R600-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_W_CHAN]]
220 ; SI: buffer_load_sshort
221 ; SI: buffer_load_sshort
222 ; SI: buffer_load_sshort
223 ; SI: buffer_load_sshort
224 define void @load_v4i16_sext(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
226 %0 = load <4 x i16>, <4 x i16> addrspace(1)* %in
227 %1 = sext <4 x i16> %0 to <4 x i32>
228 store <4 x i32> %1, <4 x i32> addrspace(1)* %out
232 ; load an i32 value from the global address space.
233 ; FUNC-LABEL: {{^}}load_i32:
234 ; R600: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
236 ; SI: buffer_load_dword v{{[0-9]+}}
237 define void @load_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
239 %0 = load i32, i32 addrspace(1)* %in
240 store i32 %0, i32 addrspace(1)* %out
244 ; load a f32 value from the global address space.
245 ; FUNC-LABEL: {{^}}load_f32:
246 ; R600: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
248 ; SI: buffer_load_dword v{{[0-9]+}}
249 define void @load_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
251 %0 = load float, float addrspace(1)* %in
252 store float %0, float addrspace(1)* %out
256 ; load a v2f32 value from the global address space
257 ; FUNC-LABEL: {{^}}load_v2f32:
260 ; SI: buffer_load_dwordx2
261 define void @load_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) {
263 %0 = load <2 x float>, <2 x float> addrspace(1)* %in
264 store <2 x float> %0, <2 x float> addrspace(1)* %out
268 ; FUNC-LABEL: {{^}}load_i64:
270 ; SI: buffer_load_dwordx2
271 define void @load_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
273 %0 = load i64, i64 addrspace(1)* %in
274 store i64 %0, i64 addrspace(1)* %out
278 ; FUNC-LABEL: {{^}}load_i64_sext:
281 ; R600: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}}, literal.x
283 ; SI: buffer_load_dword
285 define void @load_i64_sext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
287 %0 = load i32, i32 addrspace(1)* %in
288 %1 = sext i32 %0 to i64
289 store i64 %1, i64 addrspace(1)* %out
293 ; FUNC-LABEL: {{^}}load_i64_zext:
296 define void @load_i64_zext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
298 %0 = load i32, i32 addrspace(1)* %in
299 %1 = zext i32 %0 to i64
300 store i64 %1, i64 addrspace(1)* %out
304 ; FUNC-LABEL: {{^}}load_v8i32:
307 ; XXX: We should be using DWORDX4 instructions on SI.
308 ; SI: buffer_load_dword
309 ; SI: buffer_load_dword
310 ; SI: buffer_load_dword
311 ; SI: buffer_load_dword
312 ; SI: buffer_load_dword
313 ; SI: buffer_load_dword
314 ; SI: buffer_load_dword
315 ; SI: buffer_load_dword
316 define void @load_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) {
318 %0 = load <8 x i32>, <8 x i32> addrspace(1)* %in
319 store <8 x i32> %0, <8 x i32> addrspace(1)* %out
323 ; FUNC-LABEL: {{^}}load_v16i32:
328 ; XXX: We should be using DWORDX4 instructions on SI.
329 ; SI: buffer_load_dword
330 ; SI: buffer_load_dword
331 ; SI: buffer_load_dword
332 ; SI: buffer_load_dword
333 ; SI: buffer_load_dword
334 ; SI: buffer_load_dword
335 ; SI: buffer_load_dword
336 ; SI: buffer_load_dword
337 ; SI: buffer_load_dword
338 ; SI: buffer_load_dword
339 ; SI: buffer_load_dword
340 ; SI: buffer_load_dword
341 ; SI: buffer_load_dword
342 ; SI: buffer_load_dword
343 ; SI: buffer_load_dword
344 ; SI: buffer_load_dword
345 define void @load_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(1)* %in) {
347 %0 = load <16 x i32>, <16 x i32> addrspace(1)* %in
348 store <16 x i32> %0, <16 x i32> addrspace(1)* %out
352 ;===------------------------------------------------------------------------===;
353 ; CONSTANT ADDRESS SPACE
354 ;===------------------------------------------------------------------------===;
356 ; Load a sign-extended i8 value
357 ; FUNC-LABEL: {{^}}load_const_i8_sext:
358 ; R600: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]]
359 ; R600: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]]
361 ; R600: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]]
363 ; SI: buffer_load_sbyte v{{[0-9]+}},
364 define void @load_const_i8_sext(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
366 %0 = load i8, i8 addrspace(2)* %in
367 %1 = sext i8 %0 to i32
368 store i32 %1, i32 addrspace(1)* %out
372 ; Load an aligned i8 value
373 ; FUNC-LABEL: {{^}}load_const_i8_aligned:
374 ; R600: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
375 ; SI: buffer_load_ubyte v{{[0-9]+}},
376 define void @load_const_i8_aligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
378 %0 = load i8, i8 addrspace(2)* %in
379 %1 = zext i8 %0 to i32
380 store i32 %1, i32 addrspace(1)* %out
384 ; Load an un-aligned i8 value
385 ; FUNC-LABEL: {{^}}load_const_i8_unaligned:
386 ; R600: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
387 ; SI: buffer_load_ubyte v{{[0-9]+}},
388 define void @load_const_i8_unaligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
390 %0 = getelementptr i8, i8 addrspace(2)* %in, i32 1
391 %1 = load i8, i8 addrspace(2)* %0
392 %2 = zext i8 %1 to i32
393 store i32 %2, i32 addrspace(1)* %out
397 ; Load a sign-extended i16 value
398 ; FUNC-LABEL: {{^}}load_const_i16_sext:
399 ; R600: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]]
400 ; R600: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]]
402 ; R600: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]]
404 ; SI: buffer_load_sshort
405 define void @load_const_i16_sext(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
407 %0 = load i16, i16 addrspace(2)* %in
408 %1 = sext i16 %0 to i32
409 store i32 %1, i32 addrspace(1)* %out
413 ; Load an aligned i16 value
414 ; FUNC-LABEL: {{^}}load_const_i16_aligned:
415 ; R600: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
416 ; SI: buffer_load_ushort
417 define void @load_const_i16_aligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
419 %0 = load i16, i16 addrspace(2)* %in
420 %1 = zext i16 %0 to i32
421 store i32 %1, i32 addrspace(1)* %out
425 ; Load an un-aligned i16 value
426 ; FUNC-LABEL: {{^}}load_const_i16_unaligned:
427 ; R600: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
428 ; SI: buffer_load_ushort
429 define void @load_const_i16_unaligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
431 %0 = getelementptr i16, i16 addrspace(2)* %in, i32 1
432 %1 = load i16, i16 addrspace(2)* %0
433 %2 = zext i16 %1 to i32
434 store i32 %2, i32 addrspace(1)* %out
438 ; Load an i32 value from the constant address space.
439 ; FUNC-LABEL: {{^}}load_const_addrspace_i32:
440 ; R600: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
442 ; SI: s_load_dword s{{[0-9]+}}
443 define void @load_const_addrspace_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
445 %0 = load i32, i32 addrspace(2)* %in
446 store i32 %0, i32 addrspace(1)* %out
450 ; Load a f32 value from the constant address space.
451 ; FUNC-LABEL: {{^}}load_const_addrspace_f32:
452 ; R600: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
454 ; SI: s_load_dword s{{[0-9]+}}
455 define void @load_const_addrspace_f32(float addrspace(1)* %out, float addrspace(2)* %in) {
456 %1 = load float, float addrspace(2)* %in
457 store float %1, float addrspace(1)* %out
461 ;===------------------------------------------------------------------------===;
462 ; LOCAL ADDRESS SPACE
463 ;===------------------------------------------------------------------------===;
465 ; Load an i8 value from the local address space.
466 ; FUNC-LABEL: {{^}}load_i8_local:
467 ; R600: LDS_UBYTE_READ_RET
471 define void @load_i8_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
472 %1 = load i8, i8 addrspace(3)* %in
473 %2 = zext i8 %1 to i32
474 store i32 %2, i32 addrspace(1)* %out
478 ; FUNC-LABEL: {{^}}load_i8_sext_local:
479 ; R600: LDS_UBYTE_READ_RET
484 define void @load_i8_sext_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
486 %0 = load i8, i8 addrspace(3)* %in
487 %1 = sext i8 %0 to i32
488 store i32 %1, i32 addrspace(1)* %out
492 ; FUNC-LABEL: {{^}}load_v2i8_local:
493 ; R600: LDS_UBYTE_READ_RET
494 ; R600: LDS_UBYTE_READ_RET
499 define void @load_v2i8_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) {
501 %0 = load <2 x i8>, <2 x i8> addrspace(3)* %in
502 %1 = zext <2 x i8> %0 to <2 x i32>
503 store <2 x i32> %1, <2 x i32> addrspace(1)* %out
507 ; FUNC-LABEL: {{^}}load_v2i8_sext_local:
508 ; R600-DAG: LDS_UBYTE_READ_RET
509 ; R600-DAG: LDS_UBYTE_READ_RET
516 define void @load_v2i8_sext_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) {
518 %0 = load <2 x i8>, <2 x i8> addrspace(3)* %in
519 %1 = sext <2 x i8> %0 to <2 x i32>
520 store <2 x i32> %1, <2 x i32> addrspace(1)* %out
524 ; FUNC-LABEL: {{^}}load_v4i8_local:
525 ; R600: LDS_UBYTE_READ_RET
526 ; R600: LDS_UBYTE_READ_RET
527 ; R600: LDS_UBYTE_READ_RET
528 ; R600: LDS_UBYTE_READ_RET
535 define void @load_v4i8_local(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(3)* %in) {
537 %0 = load <4 x i8>, <4 x i8> addrspace(3)* %in
538 %1 = zext <4 x i8> %0 to <4 x i32>
539 store <4 x i32> %1, <4 x i32> addrspace(1)* %out
543 ; FUNC-LABEL: {{^}}load_v4i8_sext_local:
544 ; R600-DAG: LDS_UBYTE_READ_RET
545 ; R600-DAG: LDS_UBYTE_READ_RET
546 ; R600-DAG: LDS_UBYTE_READ_RET
547 ; R600-DAG: LDS_UBYTE_READ_RET
558 define void @load_v4i8_sext_local(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(3)* %in) {
560 %0 = load <4 x i8>, <4 x i8> addrspace(3)* %in
561 %1 = sext <4 x i8> %0 to <4 x i32>
562 store <4 x i32> %1, <4 x i32> addrspace(1)* %out
566 ; Load an i16 value from the local address space.
567 ; FUNC-LABEL: {{^}}load_i16_local:
568 ; R600: LDS_USHORT_READ_RET
572 define void @load_i16_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) {
574 %0 = load i16 , i16 addrspace(3)* %in
575 %1 = zext i16 %0 to i32
576 store i32 %1, i32 addrspace(1)* %out
580 ; FUNC-LABEL: {{^}}load_i16_sext_local:
581 ; R600: LDS_USHORT_READ_RET
586 define void @load_i16_sext_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) {
588 %0 = load i16, i16 addrspace(3)* %in
589 %1 = sext i16 %0 to i32
590 store i32 %1, i32 addrspace(1)* %out
594 ; FUNC-LABEL: {{^}}load_v2i16_local:
595 ; R600: LDS_USHORT_READ_RET
596 ; R600: LDS_USHORT_READ_RET
601 define void @load_v2i16_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) {
603 %0 = load <2 x i16>, <2 x i16> addrspace(3)* %in
604 %1 = zext <2 x i16> %0 to <2 x i32>
605 store <2 x i32> %1, <2 x i32> addrspace(1)* %out
609 ; FUNC-LABEL: {{^}}load_v2i16_sext_local:
610 ; R600-DAG: LDS_USHORT_READ_RET
611 ; R600-DAG: LDS_USHORT_READ_RET
618 define void @load_v2i16_sext_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) {
620 %0 = load <2 x i16>, <2 x i16> addrspace(3)* %in
621 %1 = sext <2 x i16> %0 to <2 x i32>
622 store <2 x i32> %1, <2 x i32> addrspace(1)* %out
626 ; FUNC-LABEL: {{^}}load_v4i16_local:
627 ; R600: LDS_USHORT_READ_RET
628 ; R600: LDS_USHORT_READ_RET
629 ; R600: LDS_USHORT_READ_RET
630 ; R600: LDS_USHORT_READ_RET
637 define void @load_v4i16_local(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(3)* %in) {
639 %0 = load <4 x i16>, <4 x i16> addrspace(3)* %in
640 %1 = zext <4 x i16> %0 to <4 x i32>
641 store <4 x i32> %1, <4 x i32> addrspace(1)* %out
645 ; FUNC-LABEL: {{^}}load_v4i16_sext_local:
646 ; R600-DAG: LDS_USHORT_READ_RET
647 ; R600-DAG: LDS_USHORT_READ_RET
648 ; R600-DAG: LDS_USHORT_READ_RET
649 ; R600-DAG: LDS_USHORT_READ_RET
660 define void @load_v4i16_sext_local(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(3)* %in) {
662 %0 = load <4 x i16>, <4 x i16> addrspace(3)* %in
663 %1 = sext <4 x i16> %0 to <4 x i32>
664 store <4 x i32> %1, <4 x i32> addrspace(1)* %out
668 ; load an i32 value from the local address space.
669 ; FUNC-LABEL: {{^}}load_i32_local:
674 define void @load_i32_local(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
676 %0 = load i32, i32 addrspace(3)* %in
677 store i32 %0, i32 addrspace(1)* %out
681 ; load a f32 value from the local address space.
682 ; FUNC-LABEL: {{^}}load_f32_local:
686 define void @load_f32_local(float addrspace(1)* %out, float addrspace(3)* %in) {
688 %0 = load float, float addrspace(3)* %in
689 store float %0, float addrspace(1)* %out
693 ; load a v2f32 value from the local address space
694 ; FUNC-LABEL: {{^}}load_v2f32_local:
699 define void @load_v2f32_local(<2 x float> addrspace(1)* %out, <2 x float> addrspace(3)* %in) {
701 %0 = load <2 x float>, <2 x float> addrspace(3)* %in
702 store <2 x float> %0, <2 x float> addrspace(1)* %out
706 ; Test loading a i32 and v2i32 value from the same base pointer.
707 ; FUNC-LABEL: {{^}}load_i32_v2i32_local:
711 ; SI-DAG: ds_read_b32
712 ; SI-DAG: ds_read2_b32
713 define void @load_i32_v2i32_local(<2 x i32> addrspace(1)* %out, i32 addrspace(3)* %in) {
714 %scalar = load i32, i32 addrspace(3)* %in
715 %tmp0 = bitcast i32 addrspace(3)* %in to <2 x i32> addrspace(3)*
716 %vec_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(3)* %tmp0, i32 2
717 %vec0 = load <2 x i32>, <2 x i32> addrspace(3)* %vec_ptr, align 4
718 %vec1 = insertelement <2 x i32> <i32 0, i32 0>, i32 %scalar, i32 0
719 %vec = add <2 x i32> %vec0, %vec1
720 store <2 x i32> %vec, <2 x i32> addrspace(1)* %out
725 @lds = addrspace(3) global [512 x i32] undef, align 4
727 ; On SI we need to make sure that the base offset is a register and not
729 ; FUNC-LABEL: {{^}}load_i32_local_const_ptr:
730 ; SI: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0
731 ; SI: ds_read_b32 v0, v[[ZERO]] offset:4
733 define void @load_i32_local_const_ptr(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
735 %tmp0 = getelementptr [512 x i32], [512 x i32] addrspace(3)* @lds, i32 0, i32 1
736 %tmp1 = load i32, i32 addrspace(3)* %tmp0
737 %tmp2 = getelementptr i32, i32 addrspace(1)* %out, i32 1
738 store i32 %tmp1, i32 addrspace(1)* %tmp2