1 ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
3 ; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
5 declare i32 @llvm.AMDGPU.bfe.u32(i32, i32, i32) nounwind readnone
7 ; FUNC-LABEL: {{^}}bfe_u32_arg_arg_arg:
10 define void @bfe_u32_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) nounwind {
11 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 %src1, i32 %src1) nounwind readnone
12 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
16 ; FUNC-LABEL: {{^}}bfe_u32_arg_arg_imm:
19 define void @bfe_u32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
20 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 %src1, i32 123) nounwind readnone
21 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
25 ; FUNC-LABEL: {{^}}bfe_u32_arg_imm_arg:
28 define void @bfe_u32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) nounwind {
29 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 123, i32 %src2) nounwind readnone
30 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
34 ; FUNC-LABEL: {{^}}bfe_u32_imm_arg_arg:
37 define void @bfe_u32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) nounwind {
38 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 123, i32 %src1, i32 %src2) nounwind readnone
39 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
43 ; FUNC-LABEL: {{^}}bfe_u32_arg_0_width_reg_offset:
47 define void @bfe_u32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
48 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 %src1, i32 0) nounwind readnone
49 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
53 ; FUNC-LABEL: {{^}}bfe_u32_arg_0_width_imm_offset:
57 define void @bfe_u32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
58 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 8, i32 0) nounwind readnone
59 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
63 ; FUNC-LABEL: {{^}}bfe_u32_zextload_i8:
64 ; SI: buffer_load_ubyte
67 define void @bfe_u32_zextload_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
68 %load = load i8, i8 addrspace(1)* %in
69 %ext = zext i8 %load to i32
70 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 0, i32 8)
71 store i32 %bfe, i32 addrspace(1)* %out, align 4
75 ; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i8:
76 ; SI: buffer_load_dword
78 ; SI-NEXT: v_and_b32_e32
81 define void @bfe_u32_zext_in_reg_i8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
82 %load = load i32, i32 addrspace(1)* %in, align 4
83 %add = add i32 %load, 1
84 %ext = and i32 %add, 255
85 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 0, i32 8)
86 store i32 %bfe, i32 addrspace(1)* %out, align 4
90 ; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i16:
91 ; SI: buffer_load_dword
93 ; SI-NEXT: v_and_b32_e32
96 define void @bfe_u32_zext_in_reg_i16(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
97 %load = load i32, i32 addrspace(1)* %in, align 4
98 %add = add i32 %load, 1
99 %ext = and i32 %add, 65535
100 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 0, i32 16)
101 store i32 %bfe, i32 addrspace(1)* %out, align 4
105 ; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i8_offset_1:
106 ; SI: buffer_load_dword
110 define void @bfe_u32_zext_in_reg_i8_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
111 %load = load i32, i32 addrspace(1)* %in, align 4
112 %add = add i32 %load, 1
113 %ext = and i32 %add, 255
114 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 1, i32 8)
115 store i32 %bfe, i32 addrspace(1)* %out, align 4
119 ; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i8_offset_3:
120 ; SI: buffer_load_dword
122 ; SI-NEXT: v_and_b32_e32 {{v[0-9]+}}, 0xf8
125 define void @bfe_u32_zext_in_reg_i8_offset_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
126 %load = load i32, i32 addrspace(1)* %in, align 4
127 %add = add i32 %load, 1
128 %ext = and i32 %add, 255
129 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 3, i32 8)
130 store i32 %bfe, i32 addrspace(1)* %out, align 4
134 ; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i8_offset_7:
135 ; SI: buffer_load_dword
137 ; SI-NEXT: v_and_b32_e32 {{v[0-9]+}}, 0x80
140 define void @bfe_u32_zext_in_reg_i8_offset_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
141 %load = load i32, i32 addrspace(1)* %in, align 4
142 %add = add i32 %load, 1
143 %ext = and i32 %add, 255
144 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 7, i32 8)
145 store i32 %bfe, i32 addrspace(1)* %out, align 4
149 ; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i16_offset_8:
150 ; SI: buffer_load_dword
154 define void @bfe_u32_zext_in_reg_i16_offset_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
155 %load = load i32, i32 addrspace(1)* %in, align 4
156 %add = add i32 %load, 1
157 %ext = and i32 %add, 65535
158 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 8, i32 8)
159 store i32 %bfe, i32 addrspace(1)* %out, align 4
163 ; FUNC-LABEL: {{^}}bfe_u32_test_1:
164 ; SI: buffer_load_dword
165 ; SI: v_and_b32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}}
167 ; EG: AND_INT T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}}, 1,
168 define void @bfe_u32_test_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
169 %x = load i32, i32 addrspace(1)* %in, align 4
170 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 0, i32 1)
171 store i32 %bfe, i32 addrspace(1)* %out, align 4
175 define void @bfe_u32_test_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
176 %x = load i32, i32 addrspace(1)* %in, align 4
177 %shl = shl i32 %x, 31
178 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 0, i32 8)
179 store i32 %bfe, i32 addrspace(1)* %out, align 4
183 define void @bfe_u32_test_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
184 %x = load i32, i32 addrspace(1)* %in, align 4
185 %shl = shl i32 %x, 31
186 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 0, i32 1)
187 store i32 %bfe, i32 addrspace(1)* %out, align 4
191 ; FUNC-LABEL: {{^}}bfe_u32_test_4:
194 ; SI-NOT: {{[^@]}}bfe
195 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
196 ; SI: buffer_store_dword [[VREG]],
198 define void @bfe_u32_test_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
199 %x = load i32, i32 addrspace(1)* %in, align 4
200 %shl = shl i32 %x, 31
201 %shr = lshr i32 %shl, 31
202 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shr, i32 31, i32 1)
203 store i32 %bfe, i32 addrspace(1)* %out, align 4
207 ; FUNC-LABEL: {{^}}bfe_u32_test_5:
208 ; SI: buffer_load_dword
211 ; SI: v_bfe_i32 {{v[0-9]+}}, {{v[0-9]+}}, 0, 1
213 define void @bfe_u32_test_5(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
214 %x = load i32, i32 addrspace(1)* %in, align 4
215 %shl = shl i32 %x, 31
216 %shr = ashr i32 %shl, 31
217 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shr, i32 0, i32 1)
218 store i32 %bfe, i32 addrspace(1)* %out, align 4
222 ; FUNC-LABEL: {{^}}bfe_u32_test_6:
223 ; SI: v_lshlrev_b32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
224 ; SI: v_lshrrev_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
226 define void @bfe_u32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
227 %x = load i32, i32 addrspace(1)* %in, align 4
228 %shl = shl i32 %x, 31
229 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 1, i32 31)
230 store i32 %bfe, i32 addrspace(1)* %out, align 4
234 ; FUNC-LABEL: {{^}}bfe_u32_test_7:
235 ; SI: v_lshlrev_b32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
236 ; SI-NOT: {{[^@]}}bfe
238 define void @bfe_u32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
239 %x = load i32, i32 addrspace(1)* %in, align 4
240 %shl = shl i32 %x, 31
241 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 0, i32 31)
242 store i32 %bfe, i32 addrspace(1)* %out, align 4
246 ; FUNC-LABEL: {{^}}bfe_u32_test_8:
247 ; SI-NOT: {{[^@]}}bfe
248 ; SI: v_and_b32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}}
249 ; SI-NOT: {{[^@]}}bfe
251 define void @bfe_u32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
252 %x = load i32, i32 addrspace(1)* %in, align 4
253 %shl = shl i32 %x, 31
254 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 31, i32 1)
255 store i32 %bfe, i32 addrspace(1)* %out, align 4
259 ; FUNC-LABEL: {{^}}bfe_u32_test_9:
260 ; SI-NOT: {{[^@]}}bfe
261 ; SI: v_lshrrev_b32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
262 ; SI-NOT: {{[^@]}}bfe
264 define void @bfe_u32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
265 %x = load i32, i32 addrspace(1)* %in, align 4
266 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 31, i32 1)
267 store i32 %bfe, i32 addrspace(1)* %out, align 4
271 ; FUNC-LABEL: {{^}}bfe_u32_test_10:
272 ; SI-NOT: {{[^@]}}bfe
273 ; SI: v_lshrrev_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
274 ; SI-NOT: {{[^@]}}bfe
276 define void @bfe_u32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
277 %x = load i32, i32 addrspace(1)* %in, align 4
278 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 1, i32 31)
279 store i32 %bfe, i32 addrspace(1)* %out, align 4
283 ; FUNC-LABEL: {{^}}bfe_u32_test_11:
284 ; SI-NOT: {{[^@]}}bfe
285 ; SI: v_lshrrev_b32_e32 v{{[0-9]+}}, 8, v{{[0-9]+}}
286 ; SI-NOT: {{[^@]}}bfe
288 define void @bfe_u32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
289 %x = load i32, i32 addrspace(1)* %in, align 4
290 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 8, i32 24)
291 store i32 %bfe, i32 addrspace(1)* %out, align 4
295 ; FUNC-LABEL: {{^}}bfe_u32_test_12:
296 ; SI-NOT: {{[^@]}}bfe
297 ; SI: v_lshrrev_b32_e32 v{{[0-9]+}}, 24, v{{[0-9]+}}
298 ; SI-NOT: {{[^@]}}bfe
300 define void @bfe_u32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
301 %x = load i32, i32 addrspace(1)* %in, align 4
302 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 24, i32 8)
303 store i32 %bfe, i32 addrspace(1)* %out, align 4
307 ; FUNC-LABEL: {{^}}bfe_u32_test_13:
308 ; V_ASHRREV_U32_e32 {{v[0-9]+}}, 31, {{v[0-9]+}}
309 ; SI-NOT: {{[^@]}}bfe
311 define void @bfe_u32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
312 %x = load i32, i32 addrspace(1)* %in, align 4
313 %shl = ashr i32 %x, 31
314 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 31, i32 1)
315 store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
318 ; FUNC-LABEL: {{^}}bfe_u32_test_14:
320 ; SI-NOT: {{[^@]}}bfe
322 define void @bfe_u32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
323 %x = load i32, i32 addrspace(1)* %in, align 4
324 %shl = lshr i32 %x, 31
325 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 31, i32 1)
326 store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
329 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_0:
330 ; SI-NOT: {{[^@]}}bfe
331 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
332 ; SI: buffer_store_dword [[VREG]],
335 define void @bfe_u32_constant_fold_test_0(i32 addrspace(1)* %out) nounwind {
336 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 0, i32 0, i32 0) nounwind readnone
337 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
341 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_1:
342 ; SI-NOT: {{[^@]}}bfe
343 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
344 ; SI: buffer_store_dword [[VREG]],
347 define void @bfe_u32_constant_fold_test_1(i32 addrspace(1)* %out) nounwind {
348 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 12334, i32 0, i32 0) nounwind readnone
349 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
353 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_2:
354 ; SI-NOT: {{[^@]}}bfe
355 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
356 ; SI: buffer_store_dword [[VREG]],
359 define void @bfe_u32_constant_fold_test_2(i32 addrspace(1)* %out) nounwind {
360 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 0, i32 0, i32 1) nounwind readnone
361 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
365 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_3:
366 ; SI-NOT: {{[^@]}}bfe
367 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
368 ; SI: buffer_store_dword [[VREG]],
371 define void @bfe_u32_constant_fold_test_3(i32 addrspace(1)* %out) nounwind {
372 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 1, i32 0, i32 1) nounwind readnone
373 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
377 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_4:
378 ; SI-NOT: {{[^@]}}bfe
379 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], -1
380 ; SI: buffer_store_dword [[VREG]],
383 define void @bfe_u32_constant_fold_test_4(i32 addrspace(1)* %out) nounwind {
384 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 4294967295, i32 0, i32 1) nounwind readnone
385 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
389 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_5:
390 ; SI-NOT: {{[^@]}}bfe
391 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
392 ; SI: buffer_store_dword [[VREG]],
395 define void @bfe_u32_constant_fold_test_5(i32 addrspace(1)* %out) nounwind {
396 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 128, i32 7, i32 1) nounwind readnone
397 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
401 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_6:
402 ; SI-NOT: {{[^@]}}bfe
403 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x80
404 ; SI: buffer_store_dword [[VREG]],
407 define void @bfe_u32_constant_fold_test_6(i32 addrspace(1)* %out) nounwind {
408 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 128, i32 0, i32 8) nounwind readnone
409 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
413 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_7:
414 ; SI-NOT: {{[^@]}}bfe
415 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f
416 ; SI: buffer_store_dword [[VREG]],
419 define void @bfe_u32_constant_fold_test_7(i32 addrspace(1)* %out) nounwind {
420 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 127, i32 0, i32 8) nounwind readnone
421 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
425 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_8:
426 ; SI-NOT: {{[^@]}}bfe
427 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
428 ; SI: buffer_store_dword [[VREG]],
431 define void @bfe_u32_constant_fold_test_8(i32 addrspace(1)* %out) nounwind {
432 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 127, i32 6, i32 8) nounwind readnone
433 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
437 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_9:
438 ; SI-NOT: {{[^@]}}bfe
439 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
440 ; SI: buffer_store_dword [[VREG]],
443 define void @bfe_u32_constant_fold_test_9(i32 addrspace(1)* %out) nounwind {
444 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 65536, i32 16, i32 8) nounwind readnone
445 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
449 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_10:
450 ; SI-NOT: {{[^@]}}bfe
451 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
452 ; SI: buffer_store_dword [[VREG]],
455 define void @bfe_u32_constant_fold_test_10(i32 addrspace(1)* %out) nounwind {
456 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 65535, i32 16, i32 16) nounwind readnone
457 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
461 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_11:
462 ; SI-NOT: {{[^@]}}bfe
463 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 10
464 ; SI: buffer_store_dword [[VREG]],
467 define void @bfe_u32_constant_fold_test_11(i32 addrspace(1)* %out) nounwind {
468 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 160, i32 4, i32 4) nounwind readnone
469 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
473 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_12:
474 ; SI-NOT: {{[^@]}}bfe
475 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
476 ; SI: buffer_store_dword [[VREG]],
479 define void @bfe_u32_constant_fold_test_12(i32 addrspace(1)* %out) nounwind {
480 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 160, i32 31, i32 1) nounwind readnone
481 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
485 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_13:
486 ; SI-NOT: {{[^@]}}bfe
487 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
488 ; SI: buffer_store_dword [[VREG]],
491 define void @bfe_u32_constant_fold_test_13(i32 addrspace(1)* %out) nounwind {
492 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 131070, i32 16, i32 16) nounwind readnone
493 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
497 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_14:
498 ; SI-NOT: {{[^@]}}bfe
499 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 40
500 ; SI: buffer_store_dword [[VREG]],
503 define void @bfe_u32_constant_fold_test_14(i32 addrspace(1)* %out) nounwind {
504 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 160, i32 2, i32 30) nounwind readnone
505 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
509 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_15:
510 ; SI-NOT: {{[^@]}}bfe
511 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 10
512 ; SI: buffer_store_dword [[VREG]],
515 define void @bfe_u32_constant_fold_test_15(i32 addrspace(1)* %out) nounwind {
516 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 160, i32 4, i32 28) nounwind readnone
517 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
521 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_16:
522 ; SI-NOT: {{[^@]}}bfe
523 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f
524 ; SI: buffer_store_dword [[VREG]],
527 define void @bfe_u32_constant_fold_test_16(i32 addrspace(1)* %out) nounwind {
528 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 4294967295, i32 1, i32 7) nounwind readnone
529 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
533 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_17:
534 ; SI-NOT: {{[^@]}}bfe
535 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f
536 ; SI: buffer_store_dword [[VREG]],
539 define void @bfe_u32_constant_fold_test_17(i32 addrspace(1)* %out) nounwind {
540 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 255, i32 1, i32 31) nounwind readnone
541 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
545 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_18:
546 ; SI-NOT: {{[^@]}}bfe
547 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
548 ; SI: buffer_store_dword [[VREG]],
551 define void @bfe_u32_constant_fold_test_18(i32 addrspace(1)* %out) nounwind {
552 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 255, i32 31, i32 1) nounwind readnone
553 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
557 ; Make sure that SimplifyDemandedBits doesn't cause the and to be
558 ; reduced to the bits demanded by the bfe.
560 ; XXX: The operand to v_bfe_u32 could also just directly be the load register.
561 ; FUNC-LABEL: {{^}}simplify_bfe_u32_multi_use_arg:
562 ; SI: buffer_load_dword [[ARG:v[0-9]+]]
563 ; SI: v_and_b32_e32 [[AND:v[0-9]+]], 63, [[ARG]]
564 ; SI: v_bfe_u32 [[BFE:v[0-9]+]], [[AND]], 2, 2
565 ; SI-DAG: buffer_store_dword [[AND]]
566 ; SI-DAG: buffer_store_dword [[BFE]]
568 define void @simplify_bfe_u32_multi_use_arg(i32 addrspace(1)* %out0,
569 i32 addrspace(1)* %out1,
570 i32 addrspace(1)* %in) nounwind {
571 %src = load i32, i32 addrspace(1)* %in, align 4
572 %and = and i32 %src, 63
573 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %and, i32 2, i32 2) nounwind readnone
574 store i32 %bfe_u32, i32 addrspace(1)* %out0, align 4
575 store i32 %and, i32 addrspace(1)* %out1, align 4
579 ; FUNC-LABEL: {{^}}lshr_and:
580 ; SI: s_bfe_u32 {{s[0-9]+}}, {{s[0-9]+}}, 0x30006
581 ; SI: buffer_store_dword
582 define void @lshr_and(i32 addrspace(1)* %out, i32 %a) nounwind {
585 store i32 %c, i32 addrspace(1)* %out, align 8
589 ; FUNC-LABEL: {{^}}v_lshr_and:
590 ; SI: v_bfe_u32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}, 3
591 ; SI: buffer_store_dword
592 define void @v_lshr_and(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
595 store i32 %d, i32 addrspace(1)* %out, align 8
599 ; FUNC-LABEL: {{^}}and_lshr:
600 ; SI: s_bfe_u32 {{s[0-9]+}}, {{s[0-9]+}}, 0x30006
601 ; SI: buffer_store_dword
602 define void @and_lshr(i32 addrspace(1)* %out, i32 %a) nounwind {
605 store i32 %c, i32 addrspace(1)* %out, align 8
609 ; FUNC-LABEL: {{^}}and_lshr2:
610 ; SI: s_bfe_u32 {{s[0-9]+}}, {{s[0-9]+}}, 0x30006
611 ; SI: buffer_store_dword
612 define void @and_lshr2(i32 addrspace(1)* %out, i32 %a) nounwind {
615 store i32 %c, i32 addrspace(1)* %out, align 8
619 ; FUNC-LABEL: {{^}}shl_lshr:
620 ; SI: s_bfe_u32 {{s[0-9]+}}, {{s[0-9]+}}, 0x150002
621 ; SI: buffer_store_dword
622 define void @shl_lshr(i32 addrspace(1)* %out, i32 %a) nounwind {
625 store i32 %c, i32 addrspace(1)* %out, align 8