R600: Modernize test
[oota-llvm.git] / test / CodeGen / R600 / llvm.memcpy.ll
1 ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
2
3 declare void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* nocapture, i8 addrspace(3)* nocapture, i32, i32, i1) nounwind
4 declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture, i64, i32, i1) nounwind
5
6
7 ; FUNC-LABEL: @test_small_memcpy_i64_lds_to_lds_align1
8 ; SI: DS_READ_U8
9 ; SI: DS_WRITE_B8
10 ; SI: DS_READ_U8
11 ; SI: DS_WRITE_B8
12 ; SI: DS_READ_U8
13 ; SI: DS_WRITE_B8
14 ; SI: DS_READ_U8
15 ; SI: DS_WRITE_B8
16 ; SI: DS_READ_U8
17 ; SI: DS_WRITE_B8
18 ; SI: DS_READ_U8
19 ; SI: DS_WRITE_B8
20 ; SI: DS_READ_U8
21 ; SI: DS_WRITE_B8
22 ; SI: DS_READ_U8
23 ; SI: DS_WRITE_B8
24
25 ; SI: DS_READ_U8
26 ; SI: DS_WRITE_B8
27 ; SI: DS_READ_U8
28 ; SI: DS_WRITE_B8
29 ; SI: DS_READ_U8
30 ; SI: DS_WRITE_B8
31 ; SI: DS_READ_U8
32 ; SI: DS_WRITE_B8
33 ; SI: DS_READ_U8
34 ; SI: DS_WRITE_B8
35 ; SI: DS_READ_U8
36 ; SI: DS_WRITE_B8
37 ; SI: DS_READ_U8
38 ; SI: DS_WRITE_B8
39 ; SI: DS_READ_U8
40 ; SI: DS_WRITE_B8
41
42 ; SI: DS_READ_U8
43 ; SI: DS_WRITE_B8
44 ; SI: DS_READ_U8
45 ; SI: DS_WRITE_B8
46 ; SI: DS_READ_U8
47 ; SI: DS_WRITE_B8
48 ; SI: DS_READ_U8
49 ; SI: DS_WRITE_B8
50 ; SI: DS_READ_U8
51 ; SI: DS_WRITE_B8
52 ; SI: DS_READ_U8
53 ; SI: DS_WRITE_B8
54 ; SI: DS_READ_U8
55 ; SI: DS_WRITE_B8
56 ; SI: DS_READ_U8
57 ; SI: DS_WRITE_B8
58
59 ; SI: DS_READ_U8
60 ; SI: DS_WRITE_B8
61 ; SI: DS_READ_U8
62 ; SI: DS_WRITE_B8
63 ; SI: DS_READ_U8
64 ; SI: DS_WRITE_B8
65 ; SI: DS_READ_U8
66 ; SI: DS_WRITE_B8
67 ; SI: DS_READ_U8
68 ; SI: DS_WRITE_B8
69 ; SI: DS_READ_U8
70 ; SI: DS_WRITE_B8
71 ; SI: DS_READ_U8
72 ; SI: DS_WRITE_B8
73 ; SI: DS_READ_U8
74 ; SI: DS_WRITE_B8
75
76 ; SI: S_ENDPGM
77 define void @test_small_memcpy_i64_lds_to_lds_align1(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
78   %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
79   %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
80   call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 1, i1 false) nounwind
81   ret void
82 }
83
84 ; FUNC-LABEL: @test_small_memcpy_i64_lds_to_lds_align2
85 ; SI: DS_READ_U16
86 ; SI: DS_WRITE_B16
87 ; SI: DS_READ_U16
88 ; SI: DS_WRITE_B16
89 ; SI: DS_READ_U16
90 ; SI: DS_WRITE_B16
91 ; SI: DS_READ_U16
92 ; SI: DS_WRITE_B16
93 ; SI: DS_READ_U16
94 ; SI: DS_WRITE_B16
95 ; SI: DS_READ_U16
96 ; SI: DS_WRITE_B16
97 ; SI: DS_READ_U16
98 ; SI: DS_WRITE_B16
99 ; SI: DS_READ_U16
100 ; SI: DS_WRITE_B16
101
102 ; SI: DS_READ_U16
103 ; SI: DS_WRITE_B16
104 ; SI: DS_READ_U16
105 ; SI: DS_WRITE_B16
106 ; SI: DS_READ_U16
107 ; SI: DS_WRITE_B16
108 ; SI: DS_READ_U16
109 ; SI: DS_WRITE_B16
110 ; SI: DS_READ_U16
111 ; SI: DS_WRITE_B16
112 ; SI: DS_READ_U16
113 ; SI: DS_WRITE_B16
114 ; SI: DS_READ_U16
115 ; SI: DS_WRITE_B16
116 ; SI: DS_READ_U16
117 ; SI: DS_WRITE_B16
118
119 ; SI: S_ENDPGM
120 define void @test_small_memcpy_i64_lds_to_lds_align2(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
121   %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
122   %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
123   call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 2, i1 false) nounwind
124   ret void
125 }
126
127 ; FUNC-LABEL: @test_small_memcpy_i64_lds_to_lds_align4
128 ; SI-DAG: DS_READ_B32
129 ; SI-DAG: DS_WRITE_B32
130
131 ; SI-DAG: DS_READ_B32
132 ; SI-DAG: DS_WRITE_B32
133
134 ; SI-DAG: DS_READ_B32
135 ; SI-DAG: DS_WRITE_B32
136
137 ; SI-DAG: DS_READ_B32
138 ; SI-DAG: DS_WRITE_B32
139
140 ; SI-DAG: DS_READ_B32
141 ; SI-DAG: DS_WRITE_B32
142
143 ; SI-DAG: DS_READ_B32
144 ; SI-DAG: DS_WRITE_B32
145
146 ; SI-DAG: DS_READ_B32
147 ; SI-DAG: DS_WRITE_B32
148
149 ; SI-DAG: DS_READ_B32
150 ; SI-DAG: DS_WRITE_B32
151
152 ; SI-DAG: DS_READ_B32
153 ; SI-DAG: DS_WRITE_B32
154
155 ; SI: S_ENDPGM
156 define void @test_small_memcpy_i64_lds_to_lds_align4(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
157   %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
158   %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
159   call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 4, i1 false) nounwind
160   ret void
161 }
162
163 ; FIXME: Use 64-bit ops
164 ; FUNC-LABEL: @test_small_memcpy_i64_lds_to_lds_align8
165
166 ; SI-DAG: DS_READ_B32
167 ; SI-DAG: DS_WRITE_B32
168
169 ; SI-DAG: DS_READ_B32
170 ; SI-DAG: DS_WRITE_B32
171
172 ; SI-DAG: DS_READ_B32
173 ; SI-DAG: DS_WRITE_B32
174
175 ; SI-DAG: DS_READ_B32
176 ; SI-DAG: DS_WRITE_B32
177
178 ; SI-DAG: DS_READ_B32
179 ; SI-DAG: DS_WRITE_B32
180
181 ; SI-DAG: DS_READ_B32
182 ; SI-DAG: DS_WRITE_B32
183
184 ; SI-DAG: DS_READ_B32
185 ; SI-DAG: DS_WRITE_B32
186
187 ; SI-DAG: DS_READ_B32
188 ; SI-DAG: DS_WRITE_B32
189
190 ; SI-DAG: DS_READ_B32
191 ; SI-DAG: DS_WRITE_B32
192
193 ; SI-DAG: S_ENDPGM
194 define void @test_small_memcpy_i64_lds_to_lds_align8(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
195   %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
196   %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
197   call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 8, i1 false) nounwind
198   ret void
199 }
200
201 ; FUNC-LABEL: @test_small_memcpy_i64_global_to_global_align1
202 ; SI-DAG: BUFFER_LOAD_UBYTE
203 ; SI-DAG: BUFFER_STORE_BYTE
204 ; SI-DAG: BUFFER_LOAD_UBYTE
205 ; SI-DAG: BUFFER_STORE_BYTE
206 ; SI-DAG: BUFFER_LOAD_UBYTE
207 ; SI-DAG: BUFFER_STORE_BYTE
208 ; SI-DAG: BUFFER_LOAD_UBYTE
209 ; SI-DAG: BUFFER_STORE_BYTE
210 ; SI-DAG: BUFFER_LOAD_UBYTE
211 ; SI-DAG: BUFFER_STORE_BYTE
212 ; SI-DAG: BUFFER_LOAD_UBYTE
213 ; SI-DAG: BUFFER_STORE_BYTE
214 ; SI-DAG: BUFFER_LOAD_UBYTE
215 ; SI-DAG: BUFFER_STORE_BYTE
216 ; SI-DAG: BUFFER_LOAD_UBYTE
217 ; SI-DAG: BUFFER_STORE_BYTE
218
219 ; SI-DAG: BUFFER_LOAD_UBYTE
220 ; SI-DAG: BUFFER_STORE_BYTE
221 ; SI-DAG: BUFFER_LOAD_UBYTE
222 ; SI-DAG: BUFFER_STORE_BYTE
223 ; SI-DAG: BUFFER_LOAD_UBYTE
224 ; SI-DAG: BUFFER_STORE_BYTE
225 ; SI-DAG: BUFFER_LOAD_UBYTE
226 ; SI-DAG: BUFFER_STORE_BYTE
227 ; SI-DAG: BUFFER_LOAD_UBYTE
228 ; SI-DAG: BUFFER_STORE_BYTE
229 ; SI-DAG: BUFFER_LOAD_UBYTE
230 ; SI-DAG: BUFFER_STORE_BYTE
231 ; SI-DAG: BUFFER_LOAD_UBYTE
232 ; SI-DAG: BUFFER_STORE_BYTE
233 ; SI-DAG: BUFFER_LOAD_UBYTE
234 ; SI-DAG: BUFFER_STORE_BYTE
235
236 ; SI-DAG: BUFFER_LOAD_UBYTE
237 ; SI-DAG: BUFFER_STORE_BYTE
238 ; SI-DAG: BUFFER_LOAD_UBYTE
239 ; SI-DAG: BUFFER_STORE_BYTE
240 ; SI-DAG: BUFFER_LOAD_UBYTE
241 ; SI-DAG: BUFFER_STORE_BYTE
242 ; SI-DAG: BUFFER_LOAD_UBYTE
243 ; SI-DAG: BUFFER_STORE_BYTE
244 ; SI-DAG: BUFFER_LOAD_UBYTE
245 ; SI-DAG: BUFFER_STORE_BYTE
246 ; SI-DAG: BUFFER_LOAD_UBYTE
247 ; SI-DAG: BUFFER_STORE_BYTE
248 ; SI-DAG: BUFFER_LOAD_UBYTE
249 ; SI-DAG: BUFFER_STORE_BYTE
250 ; SI-DAG: BUFFER_LOAD_UBYTE
251 ; SI-DAG: BUFFER_STORE_BYTE
252
253 ; SI-DAG: BUFFER_LOAD_UBYTE
254 ; SI-DAG: BUFFER_STORE_BYTE
255 ; SI-DAG: BUFFER_LOAD_UBYTE
256 ; SI-DAG: BUFFER_STORE_BYTE
257 ; SI-DAG: BUFFER_LOAD_UBYTE
258 ; SI-DAG: BUFFER_STORE_BYTE
259 ; SI-DAG: BUFFER_LOAD_UBYTE
260 ; SI-DAG: BUFFER_STORE_BYTE
261 ; SI-DAG: BUFFER_LOAD_UBYTE
262 ; SI-DAG: BUFFER_STORE_BYTE
263 ; SI-DAG: BUFFER_LOAD_UBYTE
264 ; SI-DAG: BUFFER_STORE_BYTE
265 ; SI-DAG: BUFFER_LOAD_UBYTE
266 ; SI-DAG: BUFFER_STORE_BYTE
267 ; SI-DAG: BUFFER_LOAD_UBYTE
268 ; SI-DAG: BUFFER_STORE_BYTE
269
270 ; SI: S_ENDPGM
271 define void @test_small_memcpy_i64_global_to_global_align1(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
272   %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
273   %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
274   call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 1, i1 false) nounwind
275   ret void
276 }
277
278 ; FUNC-LABEL: @test_small_memcpy_i64_global_to_global_align2
279 ; SI-DAG: BUFFER_LOAD_USHORT
280 ; SI-DAG: BUFFER_STORE_SHORT
281 ; SI-DAG: BUFFER_LOAD_USHORT
282 ; SI-DAG: BUFFER_STORE_SHORT
283 ; SI-DAG: BUFFER_LOAD_USHORT
284 ; SI-DAG: BUFFER_STORE_SHORT
285 ; SI-DAG: BUFFER_LOAD_USHORT
286 ; SI-DAG: BUFFER_STORE_SHORT
287 ; SI-DAG: BUFFER_LOAD_USHORT
288 ; SI-DAG: BUFFER_STORE_SHORT
289 ; SI-DAG: BUFFER_LOAD_USHORT
290 ; SI-DAG: BUFFER_STORE_SHORT
291 ; SI-DAG: BUFFER_LOAD_USHORT
292 ; SI-DAG: BUFFER_STORE_SHORT
293 ; SI-DAG: BUFFER_LOAD_USHORT
294 ; SI-DAG: BUFFER_STORE_SHORT
295
296 ; SI-DAG: BUFFER_LOAD_USHORT
297 ; SI-DAG: BUFFER_STORE_SHORT
298 ; SI-DAG: BUFFER_LOAD_USHORT
299 ; SI-DAG: BUFFER_STORE_SHORT
300 ; SI-DAG: BUFFER_LOAD_USHORT
301 ; SI-DAG: BUFFER_STORE_SHORT
302 ; SI-DAG: BUFFER_LOAD_USHORT
303 ; SI-DAG: BUFFER_STORE_SHORT
304 ; SI-DAG: BUFFER_LOAD_USHORT
305 ; SI-DAG: BUFFER_STORE_SHORT
306 ; SI-DAG: BUFFER_LOAD_USHORT
307 ; SI-DAG: BUFFER_STORE_SHORT
308 ; SI-DAG: BUFFER_LOAD_USHORT
309 ; SI-DAG: BUFFER_STORE_SHORT
310 ; SI-DAG: BUFFER_LOAD_USHORT
311 ; SI-DAG: BUFFER_STORE_SHORT
312
313 ; SI: S_ENDPGM
314 define void @test_small_memcpy_i64_global_to_global_align2(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
315   %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
316   %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
317   call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 2, i1 false) nounwind
318   ret void
319 }
320
321 ; FUNC-LABEL: @test_small_memcpy_i64_global_to_global_align4
322 ; SI: BUFFER_LOAD_DWORDX4
323 ; SI: BUFFER_STORE_DWORDX4
324 ; SI: BUFFER_LOAD_DWORDX4
325 ; SI: BUFFER_STORE_DWORDX4
326 ; SI: S_ENDPGM
327 define void @test_small_memcpy_i64_global_to_global_align4(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
328   %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
329   %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
330   call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 4, i1 false) nounwind
331   ret void
332 }
333
334 ; FUNC-LABEL: @test_small_memcpy_i64_global_to_global_align8
335 ; SI: BUFFER_LOAD_DWORDX4
336 ; SI: BUFFER_STORE_DWORDX4
337 ; SI: BUFFER_LOAD_DWORDX4
338 ; SI: BUFFER_STORE_DWORDX4
339 ; SI: S_ENDPGM
340 define void @test_small_memcpy_i64_global_to_global_align8(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
341   %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
342   %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
343   call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 8, i1 false) nounwind
344   ret void
345 }
346
347 ; FUNC-LABEL: @test_small_memcpy_i64_global_to_global_align16
348 ; SI: BUFFER_LOAD_DWORDX4
349 ; SI: BUFFER_STORE_DWORDX4
350 ; SI: BUFFER_LOAD_DWORDX4
351 ; SI: BUFFER_STORE_DWORDX4
352 ; SI: S_ENDPGM
353 define void @test_small_memcpy_i64_global_to_global_align16(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
354   %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
355   %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
356   call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 16, i1 false) nounwind
357   ret void
358 }