1 ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
3 ; RUN: llc -march=r600 -mcpu=redwood -show-mc-encoding -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
5 declare i32 @llvm.AMDGPU.bfe.i32(i32, i32, i32) nounwind readnone
7 ; FUNC-LABEL: {{^}}bfe_i32_arg_arg_arg:
10 ; EG: encoding: [{{[x0-9a-f]+,[x0-9a-f]+,[x0-9a-f]+,[x0-9a-f]+,[x0-9a-f]+}},0xac
11 define void @bfe_i32_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) nounwind {
12 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 %src1, i32 %src1) nounwind readnone
13 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
17 ; FUNC-LABEL: {{^}}bfe_i32_arg_arg_imm:
20 define void @bfe_i32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
21 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 %src1, i32 123) nounwind readnone
22 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
26 ; FUNC-LABEL: {{^}}bfe_i32_arg_imm_arg:
29 define void @bfe_i32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) nounwind {
30 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 123, i32 %src2) nounwind readnone
31 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
35 ; FUNC-LABEL: {{^}}bfe_i32_imm_arg_arg:
38 define void @bfe_i32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) nounwind {
39 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 123, i32 %src1, i32 %src2) nounwind readnone
40 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
44 ; FUNC-LABEL: {{^}}v_bfe_print_arg:
45 ; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 2, 8
46 define void @v_bfe_print_arg(i32 addrspace(1)* %out, i32 addrspace(1)* %src0) nounwind {
47 %load = load i32 addrspace(1)* %src0, align 4
48 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 2, i32 8) nounwind readnone
49 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
53 ; FUNC-LABEL: {{^}}bfe_i32_arg_0_width_reg_offset:
57 define void @bfe_i32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
58 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 %src1, i32 0) nounwind readnone
59 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
63 ; FUNC-LABEL: {{^}}bfe_i32_arg_0_width_imm_offset:
67 define void @bfe_i32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
68 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 8, i32 0) nounwind readnone
69 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
73 ; FUNC-LABEL: {{^}}bfe_i32_test_6:
74 ; SI: v_lshlrev_b32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
75 ; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
77 define void @bfe_i32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
78 %x = load i32 addrspace(1)* %in, align 4
80 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 1, i32 31)
81 store i32 %bfe, i32 addrspace(1)* %out, align 4
85 ; FUNC-LABEL: {{^}}bfe_i32_test_7:
88 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
89 ; SI: buffer_store_dword [[VREG]],
91 define void @bfe_i32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
92 %x = load i32 addrspace(1)* %in, align 4
94 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 0, i32 31)
95 store i32 %bfe, i32 addrspace(1)* %out, align 4
99 ; FIXME: The shifts should be 1 BFE
100 ; FUNC-LABEL: {{^}}bfe_i32_test_8:
101 ; SI: buffer_load_dword
102 ; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 1
104 define void @bfe_i32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
105 %x = load i32 addrspace(1)* %in, align 4
106 %shl = shl i32 %x, 31
107 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 31, i32 1)
108 store i32 %bfe, i32 addrspace(1)* %out, align 4
112 ; FUNC-LABEL: {{^}}bfe_i32_test_9:
113 ; SI-NOT: {{[^@]}}bfe
114 ; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
115 ; SI-NOT: {{[^@]}}bfe
117 define void @bfe_i32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
118 %x = load i32 addrspace(1)* %in, align 4
119 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 31, i32 1)
120 store i32 %bfe, i32 addrspace(1)* %out, align 4
124 ; FUNC-LABEL: {{^}}bfe_i32_test_10:
125 ; SI-NOT: {{[^@]}}bfe
126 ; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
127 ; SI-NOT: {{[^@]}}bfe
129 define void @bfe_i32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
130 %x = load i32 addrspace(1)* %in, align 4
131 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 1, i32 31)
132 store i32 %bfe, i32 addrspace(1)* %out, align 4
136 ; FUNC-LABEL: {{^}}bfe_i32_test_11:
137 ; SI-NOT: {{[^@]}}bfe
138 ; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 8, v{{[0-9]+}}
139 ; SI-NOT: {{[^@]}}bfe
141 define void @bfe_i32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
142 %x = load i32 addrspace(1)* %in, align 4
143 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 8, i32 24)
144 store i32 %bfe, i32 addrspace(1)* %out, align 4
148 ; FUNC-LABEL: {{^}}bfe_i32_test_12:
149 ; SI-NOT: {{[^@]}}bfe
150 ; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 24, v{{[0-9]+}}
151 ; SI-NOT: {{[^@]}}bfe
153 define void @bfe_i32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
154 %x = load i32 addrspace(1)* %in, align 4
155 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 24, i32 8)
156 store i32 %bfe, i32 addrspace(1)* %out, align 4
160 ; FUNC-LABEL: {{^}}bfe_i32_test_13:
161 ; SI: v_ashrrev_i32_e32 {{v[0-9]+}}, 31, {{v[0-9]+}}
162 ; SI-NOT: {{[^@]}}bfe
164 define void @bfe_i32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
165 %x = load i32 addrspace(1)* %in, align 4
166 %shl = ashr i32 %x, 31
167 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 31, i32 1)
168 store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
171 ; FUNC-LABEL: {{^}}bfe_i32_test_14:
173 ; SI-NOT: {{[^@]}}bfe
175 define void @bfe_i32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
176 %x = load i32 addrspace(1)* %in, align 4
177 %shl = lshr i32 %x, 31
178 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 31, i32 1)
179 store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
182 ; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_0:
183 ; SI-NOT: {{[^@]}}bfe
184 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
185 ; SI: buffer_store_dword [[VREG]],
188 define void @bfe_i32_constant_fold_test_0(i32 addrspace(1)* %out) nounwind {
189 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 0, i32 0, i32 0) nounwind readnone
190 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
194 ; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_1:
195 ; SI-NOT: {{[^@]}}bfe
196 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
197 ; SI: buffer_store_dword [[VREG]],
200 define void @bfe_i32_constant_fold_test_1(i32 addrspace(1)* %out) nounwind {
201 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 12334, i32 0, i32 0) nounwind readnone
202 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
206 ; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_2:
207 ; SI-NOT: {{[^@]}}bfe
208 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
209 ; SI: buffer_store_dword [[VREG]],
212 define void @bfe_i32_constant_fold_test_2(i32 addrspace(1)* %out) nounwind {
213 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 0, i32 0, i32 1) nounwind readnone
214 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
218 ; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_3:
219 ; SI-NOT: {{[^@]}}bfe
220 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], -1
221 ; SI: buffer_store_dword [[VREG]],
224 define void @bfe_i32_constant_fold_test_3(i32 addrspace(1)* %out) nounwind {
225 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 1, i32 0, i32 1) nounwind readnone
226 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
230 ; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_4:
231 ; SI-NOT: {{[^@]}}bfe
232 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], -1
233 ; SI: buffer_store_dword [[VREG]],
236 define void @bfe_i32_constant_fold_test_4(i32 addrspace(1)* %out) nounwind {
237 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 4294967295, i32 0, i32 1) nounwind readnone
238 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
242 ; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_5:
243 ; SI-NOT: {{[^@]}}bfe
244 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], -1
245 ; SI: buffer_store_dword [[VREG]],
248 define void @bfe_i32_constant_fold_test_5(i32 addrspace(1)* %out) nounwind {
249 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 128, i32 7, i32 1) nounwind readnone
250 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
254 ; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_6:
255 ; SI-NOT: {{[^@]}}bfe
256 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0xffffff80
257 ; SI: buffer_store_dword [[VREG]],
260 define void @bfe_i32_constant_fold_test_6(i32 addrspace(1)* %out) nounwind {
261 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 128, i32 0, i32 8) nounwind readnone
262 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
266 ; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_7:
267 ; SI-NOT: {{[^@]}}bfe
268 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f
269 ; SI: buffer_store_dword [[VREG]],
272 define void @bfe_i32_constant_fold_test_7(i32 addrspace(1)* %out) nounwind {
273 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 127, i32 0, i32 8) nounwind readnone
274 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
278 ; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_8:
279 ; SI-NOT: {{[^@]}}bfe
280 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
281 ; SI: buffer_store_dword [[VREG]],
284 define void @bfe_i32_constant_fold_test_8(i32 addrspace(1)* %out) nounwind {
285 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 127, i32 6, i32 8) nounwind readnone
286 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
290 ; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_9:
291 ; SI-NOT: {{[^@]}}bfe
292 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
293 ; SI: buffer_store_dword [[VREG]],
296 define void @bfe_i32_constant_fold_test_9(i32 addrspace(1)* %out) nounwind {
297 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 65536, i32 16, i32 8) nounwind readnone
298 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
302 ; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_10:
303 ; SI-NOT: {{[^@]}}bfe
304 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
305 ; SI: buffer_store_dword [[VREG]],
308 define void @bfe_i32_constant_fold_test_10(i32 addrspace(1)* %out) nounwind {
309 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 65535, i32 16, i32 16) nounwind readnone
310 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
314 ; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_11:
315 ; SI-NOT: {{[^@]}}bfe
316 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], -6
317 ; SI: buffer_store_dword [[VREG]],
320 define void @bfe_i32_constant_fold_test_11(i32 addrspace(1)* %out) nounwind {
321 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 4, i32 4) nounwind readnone
322 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
326 ; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_12:
327 ; SI-NOT: {{[^@]}}bfe
328 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
329 ; SI: buffer_store_dword [[VREG]],
332 define void @bfe_i32_constant_fold_test_12(i32 addrspace(1)* %out) nounwind {
333 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 31, i32 1) nounwind readnone
334 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
338 ; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_13:
339 ; SI-NOT: {{[^@]}}bfe
340 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
341 ; SI: buffer_store_dword [[VREG]],
344 define void @bfe_i32_constant_fold_test_13(i32 addrspace(1)* %out) nounwind {
345 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 131070, i32 16, i32 16) nounwind readnone
346 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
350 ; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_14:
351 ; SI-NOT: {{[^@]}}bfe
352 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 40
353 ; SI: buffer_store_dword [[VREG]],
356 define void @bfe_i32_constant_fold_test_14(i32 addrspace(1)* %out) nounwind {
357 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 2, i32 30) nounwind readnone
358 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
362 ; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_15:
363 ; SI-NOT: {{[^@]}}bfe
364 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 10
365 ; SI: buffer_store_dword [[VREG]],
368 define void @bfe_i32_constant_fold_test_15(i32 addrspace(1)* %out) nounwind {
369 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 4, i32 28) nounwind readnone
370 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
374 ; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_16:
375 ; SI-NOT: {{[^@]}}bfe
376 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], -1
377 ; SI: buffer_store_dword [[VREG]],
380 define void @bfe_i32_constant_fold_test_16(i32 addrspace(1)* %out) nounwind {
381 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 4294967295, i32 1, i32 7) nounwind readnone
382 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
386 ; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_17:
387 ; SI-NOT: {{[^@]}}bfe
388 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f
389 ; SI: buffer_store_dword [[VREG]],
392 define void @bfe_i32_constant_fold_test_17(i32 addrspace(1)* %out) nounwind {
393 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 255, i32 1, i32 31) nounwind readnone
394 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
398 ; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_18:
399 ; SI-NOT: {{[^@]}}bfe
400 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
401 ; SI: buffer_store_dword [[VREG]],
404 define void @bfe_i32_constant_fold_test_18(i32 addrspace(1)* %out) nounwind {
405 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 255, i32 31, i32 1) nounwind readnone
406 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
410 ; XXX - This should really be a single BFE, but the sext_inreg of the
411 ; extended type i24 is never custom lowered.
412 ; FUNC-LABEL: {{^}}bfe_sext_in_reg_i24:
413 ; SI: buffer_load_dword [[LOAD:v[0-9]+]],
414 ; SI: v_lshlrev_b32_e32 {{v[0-9]+}}, 8, {{v[0-9]+}}
415 ; SI: v_ashrrev_i32_e32 {{v[0-9]+}}, 8, {{v[0-9]+}}
416 ; XSI: v_bfe_i32 [[BFE:v[0-9]+]], [[LOAD]], 0, 8
419 ; XSI: buffer_store_dword [[BFE]],
420 define void @bfe_sext_in_reg_i24(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
421 %x = load i32 addrspace(1)* %in, align 4
422 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 0, i32 24)
423 %shl = shl i32 %bfe, 8
424 %ashr = ashr i32 %shl, 8
425 store i32 %ashr, i32 addrspace(1)* %out, align 4
429 ; FUNC-LABEL: @simplify_demanded_bfe_sdiv
430 ; SI: buffer_load_dword [[LOAD:v[0-9]+]]
431 ; SI: v_bfe_i32 [[BFE:v[0-9]+]], [[LOAD]], 1, 16
432 ; SI: v_lshrrev_b32_e32 [[TMP0:v[0-9]+]], 31, [[BFE]]
433 ; SI: v_add_i32_e32 [[TMP1:v[0-9]+]], [[TMP0]], [[BFE]]
434 ; SI: v_ashrrev_i32_e32 [[TMP2:v[0-9]+]], 1, [[TMP1]]
435 ; SI: buffer_store_dword [[TMP2]]
436 define void @simplify_demanded_bfe_sdiv(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
437 %src = load i32 addrspace(1)* %in, align 4
438 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %src, i32 1, i32 16) nounwind readnone
439 %div = sdiv i32 %bfe, 2
440 store i32 %div, i32 addrspace(1)* %out, align 4