1 ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=FUNC %s
2 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=EG --check-prefix=FUNC %s
4 ; FUNC-LABEL: @test_udivrem
29 ; SI: V_RCP_IFLAG_F32_e32 [[RCP:v[0-9]+]]
30 ; SI-DAG: V_MUL_HI_U32 [[RCP_HI:v[0-9]+]], [[RCP]]
31 ; SI-DAG: V_MUL_LO_I32 [[RCP_LO:v[0-9]+]], [[RCP]]
32 ; SI-DAG: V_SUB_I32_e32 [[NEG_RCP_LO:v[0-9]+]], 0, [[RCP_LO]]
33 ; SI: V_CNDMASK_B32_e64
34 ; SI: V_MUL_HI_U32 [[E:v[0-9]+]], {{v[0-9]+}}, [[RCP]]
35 ; SI-DAG: V_ADD_I32_e32 [[RCP_A_E:v[0-9]+]], [[E]], [[RCP]]
36 ; SI-DAG: V_SUBREV_I32_e32 [[RCP_S_E:v[0-9]+]], [[E]], [[RCP]]
37 ; SI: V_CNDMASK_B32_e64
38 ; SI: V_MUL_HI_U32 [[Quotient:v[0-9]+]]
39 ; SI: V_MUL_LO_I32 [[Num_S_Remainder:v[0-9]+]]
40 ; SI-DAG: V_SUB_I32_e32 [[Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[Num_S_Remainder]]
41 ; SI-DAG: V_CNDMASK_B32_e64
42 ; SI-DAG: V_CNDMASK_B32_e64
43 ; SI: V_AND_B32_e32 [[Tmp1:v[0-9]+]]
44 ; SI-DAG: V_ADD_I32_e32 [[Quotient_A_One:v[0-9]+]], 1, [[Quotient]]
45 ; SI-DAG: V_SUBREV_I32_e32 [[Quotient_S_One:v[0-9]+]],
46 ; SI-DAG: V_CNDMASK_B32_e64
47 ; SI-DAG: V_CNDMASK_B32_e64
48 ; SI-DAG: V_ADD_I32_e32 [[Remainder_A_Den:v[0-9]+]],
49 ; SI-DAG: V_SUBREV_I32_e32 [[Remainder_S_Den:v[0-9]+]],
50 ; SI-DAG: V_CNDMASK_B32_e64
51 ; SI-DAG: V_CNDMASK_B32_e64
53 define void @test_udivrem(i32 addrspace(1)* %out, i32 %x, i32 %y) {
54 %result0 = udiv i32 %x, %y
55 store i32 %result0, i32 addrspace(1)* %out
56 %result1 = urem i32 %x, %y
57 store i32 %result1, i32 addrspace(1)* %out
61 ; FUNC-LABEL: @test_udivrem_v2
109 ; SI-DAG: V_RCP_IFLAG_F32_e32 [[FIRST_RCP:v[0-9]+]]
110 ; SI-DAG: V_MUL_HI_U32 [[FIRST_RCP_HI:v[0-9]+]], [[FIRST_RCP]]
111 ; SI-DAG: V_MUL_LO_I32 [[FIRST_RCP_LO:v[0-9]+]], [[FIRST_RCP]]
112 ; SI-DAG: V_SUB_I32_e32 [[FIRST_NEG_RCP_LO:v[0-9]+]], 0, [[FIRST_RCP_LO]]
113 ; SI-DAG: V_CNDMASK_B32_e64
114 ; SI-DAG: V_MUL_HI_U32 [[FIRST_E:v[0-9]+]], {{v[0-9]+}}, [[FIRST_RCP]]
115 ; SI-DAG: V_ADD_I32_e32 [[FIRST_RCP_A_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]]
116 ; SI-DAG: V_SUBREV_I32_e32 [[FIRST_RCP_S_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]]
117 ; SI-DAG: V_CNDMASK_B32_e64
118 ; SI-DAG: V_MUL_HI_U32 [[FIRST_Quotient:v[0-9]+]]
119 ; SI-DAG: V_MUL_LO_I32 [[FIRST_Num_S_Remainder:v[0-9]+]]
120 ; SI-DAG: V_SUB_I32_e32 [[FIRST_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[FIRST_Num_S_Remainder]]
121 ; SI-DAG: V_CNDMASK_B32_e64
122 ; SI-DAG: V_CNDMASK_B32_e64
123 ; SI-DAG: V_AND_B32_e32 [[FIRST_Tmp1:v[0-9]+]]
124 ; SI-DAG: V_ADD_I32_e32 [[FIRST_Quotient_A_One:v[0-9]+]], {{.*}}, [[FIRST_Quotient]]
125 ; SI-DAG: V_SUBREV_I32_e32 [[FIRST_Quotient_S_One:v[0-9]+]],
126 ; SI-DAG: V_CNDMASK_B32_e64
127 ; SI-DAG: V_CNDMASK_B32_e64
128 ; SI-DAG: V_ADD_I32_e32 [[FIRST_Remainder_A_Den:v[0-9]+]],
129 ; SI-DAG: V_SUBREV_I32_e32 [[FIRST_Remainder_S_Den:v[0-9]+]],
130 ; SI-DAG: V_CNDMASK_B32_e64
131 ; SI-DAG: V_CNDMASK_B32_e64
132 ; SI-DAG: V_RCP_IFLAG_F32_e32 [[SECOND_RCP:v[0-9]+]]
133 ; SI-DAG: V_MUL_HI_U32 [[SECOND_RCP_HI:v[0-9]+]], [[SECOND_RCP]]
134 ; SI-DAG: V_MUL_LO_I32 [[SECOND_RCP_LO:v[0-9]+]], [[SECOND_RCP]]
135 ; SI-DAG: V_SUB_I32_e32 [[SECOND_NEG_RCP_LO:v[0-9]+]], 0, [[SECOND_RCP_LO]]
136 ; SI-DAG: V_CNDMASK_B32_e64
137 ; SI-DAG: V_MUL_HI_U32 [[SECOND_E:v[0-9]+]], {{v[0-9]+}}, [[SECOND_RCP]]
138 ; SI-DAG: V_ADD_I32_e32 [[SECOND_RCP_A_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]]
139 ; SI-DAG: V_SUBREV_I32_e32 [[SECOND_RCP_S_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]]
140 ; SI-DAG: V_CNDMASK_B32_e64
141 ; SI-DAG: V_MUL_HI_U32 [[SECOND_Quotient:v[0-9]+]]
142 ; SI-DAG: V_MUL_LO_I32 [[SECOND_Num_S_Remainder:v[0-9]+]]
143 ; SI-DAG: V_SUB_I32_e32 [[SECOND_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[SECOND_Num_S_Remainder]]
144 ; SI-DAG: V_CNDMASK_B32_e64
145 ; SI-DAG: V_CNDMASK_B32_e64
146 ; SI-DAG: V_AND_B32_e32 [[SECOND_Tmp1:v[0-9]+]]
147 ; SI-DAG: V_ADD_I32_e32 [[SECOND_Quotient_A_One:v[0-9]+]], {{.*}}, [[SECOND_Quotient]]
148 ; SI-DAG: V_SUBREV_I32_e32 [[SECOND_Quotient_S_One:v[0-9]+]],
149 ; SI-DAG: V_CNDMASK_B32_e64
150 ; SI-DAG: V_CNDMASK_B32_e64
151 ; SI-DAG: V_ADD_I32_e32 [[SECOND_Remainder_A_Den:v[0-9]+]],
152 ; SI-DAG: V_SUBREV_I32_e32 [[SECOND_Remainder_S_Den:v[0-9]+]],
153 ; SI-DAG: V_CNDMASK_B32_e64
154 ; SI-DAG: V_CNDMASK_B32_e64
156 define void @test_udivrem_v2(<2 x i32> addrspace(1)* %out, <2 x i32> %x, <2 x i32> %y) {
157 %result0 = udiv <2 x i32> %x, %y
158 store <2 x i32> %result0, <2 x i32> addrspace(1)* %out
159 %result1 = urem <2 x i32> %x, %y
160 store <2 x i32> %result1, <2 x i32> addrspace(1)* %out
165 ; FUNC-LABEL: @test_udivrem_v4
259 ; SI-DAG: V_RCP_IFLAG_F32_e32 [[FIRST_RCP:v[0-9]+]]
260 ; SI-DAG: V_MUL_HI_U32 [[FIRST_RCP_HI:v[0-9]+]], [[FIRST_RCP]]
261 ; SI-DAG: V_MUL_LO_I32 [[FIRST_RCP_LO:v[0-9]+]], [[FIRST_RCP]]
262 ; SI-DAG: V_SUB_I32_e32 [[FIRST_NEG_RCP_LO:v[0-9]+]], 0, [[FIRST_RCP_LO]]
263 ; SI-DAG: V_CNDMASK_B32_e64
264 ; SI-DAG: V_MUL_HI_U32 [[FIRST_E:v[0-9]+]], {{v[0-9]+}}, [[FIRST_RCP]]
265 ; SI-DAG: V_ADD_I32_e32 [[FIRST_RCP_A_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]]
266 ; SI-DAG: V_SUBREV_I32_e32 [[FIRST_RCP_S_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]]
267 ; SI-DAG: V_CNDMASK_B32_e64
268 ; SI-DAG: V_MUL_HI_U32 [[FIRST_Quotient:v[0-9]+]]
269 ; SI-DAG: V_MUL_LO_I32 [[FIRST_Num_S_Remainder:v[0-9]+]]
270 ; SI-DAG: V_SUB_I32_e32 [[FIRST_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[FIRST_Num_S_Remainder]]
271 ; SI-DAG: V_CNDMASK_B32_e64
272 ; SI-DAG: V_CNDMASK_B32_e64
273 ; SI-DAG: V_AND_B32_e32 [[FIRST_Tmp1:v[0-9]+]]
274 ; SI-DAG: V_ADD_I32_e32 [[FIRST_Quotient_A_One:v[0-9]+]], {{.*}}, [[FIRST_Quotient]]
275 ; SI-DAG: V_SUBREV_I32_e32 [[FIRST_Quotient_S_One:v[0-9]+]],
276 ; SI-DAG: V_CNDMASK_B32_e64
277 ; SI-DAG: V_CNDMASK_B32_e64
278 ; SI-DAG: V_ADD_I32_e32 [[FIRST_Remainder_A_Den:v[0-9]+]],
279 ; SI-DAG: V_SUBREV_I32_e32 [[FIRST_Remainder_S_Den:v[0-9]+]],
280 ; SI-DAG: V_CNDMASK_B32_e64
281 ; SI-DAG: V_CNDMASK_B32_e64
282 ; SI-DAG: V_RCP_IFLAG_F32_e32 [[SECOND_RCP:v[0-9]+]]
283 ; SI-DAG: V_MUL_HI_U32 [[SECOND_RCP_HI:v[0-9]+]], [[SECOND_RCP]]
284 ; SI-DAG: V_MUL_LO_I32 [[SECOND_RCP_LO:v[0-9]+]], [[SECOND_RCP]]
285 ; SI-DAG: V_SUB_I32_e32 [[SECOND_NEG_RCP_LO:v[0-9]+]], 0, [[SECOND_RCP_LO]]
286 ; SI-DAG: V_CNDMASK_B32_e64
287 ; SI-DAG: V_MUL_HI_U32 [[SECOND_E:v[0-9]+]], {{v[0-9]+}}, [[SECOND_RCP]]
288 ; SI-DAG: V_ADD_I32_e32 [[SECOND_RCP_A_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]]
289 ; SI-DAG: V_SUBREV_I32_e32 [[SECOND_RCP_S_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]]
290 ; SI-DAG: V_CNDMASK_B32_e64
291 ; SI-DAG: V_MUL_HI_U32 [[SECOND_Quotient:v[0-9]+]]
292 ; SI-DAG: V_MUL_LO_I32 [[SECOND_Num_S_Remainder:v[0-9]+]]
293 ; SI-DAG: V_SUB_I32_e32 [[SECOND_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[SECOND_Num_S_Remainder]]
294 ; SI-DAG: V_CNDMASK_B32_e64
295 ; SI-DAG: V_CNDMASK_B32_e64
296 ; SI-DAG: V_AND_B32_e32 [[SECOND_Tmp1:v[0-9]+]]
297 ; SI-DAG: V_ADD_I32_e32 [[SECOND_Quotient_A_One:v[0-9]+]], {{.*}}, [[SECOND_Quotient]]
298 ; SI-DAG: V_SUBREV_I32_e32 [[SECOND_Quotient_S_One:v[0-9]+]],
299 ; SI-DAG: V_CNDMASK_B32_e64
300 ; SI-DAG: V_CNDMASK_B32_e64
301 ; SI-DAG: V_ADD_I32_e32 [[SECOND_Remainder_A_Den:v[0-9]+]],
302 ; SI-DAG: V_SUBREV_I32_e32 [[SECOND_Remainder_S_Den:v[0-9]+]],
303 ; SI-DAG: V_CNDMASK_B32_e64
304 ; SI-DAG: V_CNDMASK_B32_e64
305 ; SI-DAG: V_RCP_IFLAG_F32_e32 [[THIRD_RCP:v[0-9]+]]
306 ; SI-DAG: V_MUL_HI_U32 [[THIRD_RCP_HI:v[0-9]+]], [[THIRD_RCP]]
307 ; SI-DAG: V_MUL_LO_I32 [[THIRD_RCP_LO:v[0-9]+]], [[THIRD_RCP]]
308 ; SI-DAG: V_SUB_I32_e32 [[THIRD_NEG_RCP_LO:v[0-9]+]], 0, [[THIRD_RCP_LO]]
309 ; SI-DAG: V_CNDMASK_B32_e64
310 ; SI-DAG: V_MUL_HI_U32 [[THIRD_E:v[0-9]+]], {{v[0-9]+}}, [[THIRD_RCP]]
311 ; SI-DAG: V_ADD_I32_e32 [[THIRD_RCP_A_E:v[0-9]+]], [[THIRD_E]], [[THIRD_RCP]]
312 ; SI-DAG: V_SUBREV_I32_e32 [[THIRD_RCP_S_E:v[0-9]+]], [[THIRD_E]], [[THIRD_RCP]]
313 ; SI-DAG: V_CNDMASK_B32_e64
314 ; SI-DAG: V_MUL_HI_U32 [[THIRD_Quotient:v[0-9]+]]
315 ; SI-DAG: V_MUL_LO_I32 [[THIRD_Num_S_Remainder:v[0-9]+]]
316 ; SI-DAG: V_SUB_I32_e32 [[THIRD_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[THIRD_Num_S_Remainder]]
317 ; SI-DAG: V_CNDMASK_B32_e64
318 ; SI-DAG: V_CNDMASK_B32_e64
319 ; SI-DAG: V_AND_B32_e32 [[THIRD_Tmp1:v[0-9]+]]
320 ; SI-DAG: V_ADD_I32_e32 [[THIRD_Quotient_A_One:v[0-9]+]], {{.*}}, [[THIRD_Quotient]]
321 ; SI-DAG: V_SUBREV_I32_e32 [[THIRD_Quotient_S_One:v[0-9]+]],
322 ; SI-DAG: V_CNDMASK_B32_e64
323 ; SI-DAG: V_CNDMASK_B32_e64
324 ; SI-DAG: V_ADD_I32_e32 [[THIRD_Remainder_A_Den:v[0-9]+]],
325 ; SI-DAG: V_SUBREV_I32_e32 [[THIRD_Remainder_S_Den:v[0-9]+]],
326 ; SI-DAG: V_CNDMASK_B32_e64
327 ; SI-DAG: V_CNDMASK_B32_e64
328 ; SI-DAG: V_RCP_IFLAG_F32_e32 [[FOURTH_RCP:v[0-9]+]]
329 ; SI-DAG: V_MUL_HI_U32 [[FOURTH_RCP_HI:v[0-9]+]], [[FOURTH_RCP]]
330 ; SI-DAG: V_MUL_LO_I32 [[FOURTH_RCP_LO:v[0-9]+]], [[FOURTH_RCP]]
331 ; SI-DAG: V_SUB_I32_e32 [[FOURTH_NEG_RCP_LO:v[0-9]+]], 0, [[FOURTH_RCP_LO]]
332 ; SI-DAG: V_CNDMASK_B32_e64
333 ; SI-DAG: V_MUL_HI_U32 [[FOURTH_E:v[0-9]+]], {{v[0-9]+}}, [[FOURTH_RCP]]
334 ; SI-DAG: V_ADD_I32_e32 [[FOURTH_RCP_A_E:v[0-9]+]], [[FOURTH_E]], [[FOURTH_RCP]]
335 ; SI-DAG: V_SUBREV_I32_e32 [[FOURTH_RCP_S_E:v[0-9]+]], [[FOURTH_E]], [[FOURTH_RCP]]
336 ; SI-DAG: V_CNDMASK_B32_e64
337 ; SI-DAG: V_MUL_HI_U32 [[FOURTH_Quotient:v[0-9]+]]
338 ; SI-DAG: V_MUL_LO_I32 [[FOURTH_Num_S_Remainder:v[0-9]+]]
339 ; SI-DAG: V_SUB_I32_e32 [[FOURTH_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[FOURTH_Num_S_Remainder]]
340 ; SI-DAG: V_CNDMASK_B32_e64
341 ; SI-DAG: V_CNDMASK_B32_e64
342 ; SI-DAG: V_AND_B32_e32 [[FOURTH_Tmp1:v[0-9]+]]
343 ; SI-DAG: V_ADD_I32_e32 [[FOURTH_Quotient_A_One:v[0-9]+]], {{.*}}, [[FOURTH_Quotient]]
344 ; SI-DAG: V_SUBREV_I32_e32 [[FOURTH_Quotient_S_One:v[0-9]+]],
345 ; SI-DAG: V_CNDMASK_B32_e64
346 ; SI-DAG: V_CNDMASK_B32_e64
347 ; SI-DAG: V_ADD_I32_e32 [[FOURTH_Remainder_A_Den:v[0-9]+]],
348 ; SI-DAG: V_SUBREV_I32_e32 [[FOURTH_Remainder_S_Den:v[0-9]+]],
349 ; SI-DAG: V_CNDMASK_B32_e64
350 ; SI-DAG: V_CNDMASK_B32_e64
352 define void @test_udivrem_v4(<4 x i32> addrspace(1)* %out, <4 x i32> %x, <4 x i32> %y) {
353 %result0 = udiv <4 x i32> %x, %y
354 store <4 x i32> %result0, <4 x i32> addrspace(1)* %out
355 %result1 = urem <4 x i32> %x, %y
356 store <4 x i32> %result1, <4 x i32> addrspace(1)* %out