1 ; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
2 ; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
3 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
5 ; FUNC-LABEL: {{^}}udiv24_i8:
15 define void @udiv24_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
16 %den_ptr = getelementptr i8, i8 addrspace(1)* %in, i8 1
17 %num = load i8, i8 addrspace(1) * %in
18 %den = load i8, i8 addrspace(1) * %den_ptr
19 %result = udiv i8 %num, %den
20 store i8 %result, i8 addrspace(1)* %out
24 ; FUNC-LABEL: {{^}}udiv24_i16:
34 define void @udiv24_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
35 %den_ptr = getelementptr i16, i16 addrspace(1)* %in, i16 1
36 %num = load i16, i16 addrspace(1) * %in, align 2
37 %den = load i16, i16 addrspace(1) * %den_ptr, align 2
38 %result = udiv i16 %num, %den
39 store i16 %result, i16 addrspace(1)* %out, align 2
43 ; FUNC-LABEL: {{^}}udiv24_i32:
45 ; SI-DAG: v_cvt_f32_u32
53 define void @udiv24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
54 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
55 %num = load i32, i32 addrspace(1) * %in, align 4
56 %den = load i32, i32 addrspace(1) * %den_ptr, align 4
57 %num.i24.0 = shl i32 %num, 8
58 %den.i24.0 = shl i32 %den, 8
59 %num.i24 = lshr i32 %num.i24.0, 8
60 %den.i24 = lshr i32 %den.i24.0, 8
61 %result = udiv i32 %num.i24, %den.i24
62 store i32 %result, i32 addrspace(1)* %out, align 4
66 ; FUNC-LABEL: {{^}}udiv25_i32:
67 ; RCP_IFLAG is for URECIP in the full 32b alg
73 define void @udiv25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
74 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
75 %num = load i32, i32 addrspace(1) * %in, align 4
76 %den = load i32, i32 addrspace(1) * %den_ptr, align 4
77 %num.i24.0 = shl i32 %num, 7
78 %den.i24.0 = shl i32 %den, 7
79 %num.i24 = lshr i32 %num.i24.0, 7
80 %den.i24 = lshr i32 %den.i24.0, 7
81 %result = udiv i32 %num.i24, %den.i24
82 store i32 %result, i32 addrspace(1)* %out, align 4
86 ; FUNC-LABEL: {{^}}test_no_udiv24_i32_1:
87 ; RCP_IFLAG is for URECIP in the full 32b alg
93 define void @test_no_udiv24_i32_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
94 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
95 %num = load i32, i32 addrspace(1) * %in, align 4
96 %den = load i32, i32 addrspace(1) * %den_ptr, align 4
97 %num.i24.0 = shl i32 %num, 8
98 %den.i24.0 = shl i32 %den, 7
99 %num.i24 = lshr i32 %num.i24.0, 8
100 %den.i24 = lshr i32 %den.i24.0, 7
101 %result = udiv i32 %num.i24, %den.i24
102 store i32 %result, i32 addrspace(1)* %out, align 4
106 ; FUNC-LABEL: {{^}}test_no_udiv24_i32_2:
107 ; RCP_IFLAG is for URECIP in the full 32b alg
111 ; EG-NOT: UINT_TO_FLT
113 define void @test_no_udiv24_i32_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
114 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
115 %num = load i32, i32 addrspace(1) * %in, align 4
116 %den = load i32, i32 addrspace(1) * %den_ptr, align 4
117 %num.i24.0 = shl i32 %num, 7
118 %den.i24.0 = shl i32 %den, 8
119 %num.i24 = lshr i32 %num.i24.0, 7
120 %den.i24 = lshr i32 %den.i24.0, 8
121 %result = udiv i32 %num.i24, %den.i24
122 store i32 %result, i32 addrspace(1)* %out, align 4
126 ; FUNC-LABEL: {{^}}urem24_i8:
127 ; SI: v_cvt_f32_ubyte
128 ; SI: v_cvt_f32_ubyte
133 ; EG-DAG: UINT_TO_FLT
136 define void @urem24_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
137 %den_ptr = getelementptr i8, i8 addrspace(1)* %in, i8 1
138 %num = load i8, i8 addrspace(1) * %in
139 %den = load i8, i8 addrspace(1) * %den_ptr
140 %result = urem i8 %num, %den
141 store i8 %result, i8 addrspace(1)* %out
145 ; FUNC-LABEL: {{^}}urem24_i16:
152 ; EG-DAG: UINT_TO_FLT
155 define void @urem24_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
156 %den_ptr = getelementptr i16, i16 addrspace(1)* %in, i16 1
157 %num = load i16, i16 addrspace(1) * %in, align 2
158 %den = load i16, i16 addrspace(1) * %den_ptr, align 2
159 %result = urem i16 %num, %den
160 store i16 %result, i16 addrspace(1)* %out, align 2
164 ; FUNC-LABEL: {{^}}urem24_i32:
171 ; EG-DAG: UINT_TO_FLT
174 define void @urem24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
175 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
176 %num = load i32, i32 addrspace(1) * %in, align 4
177 %den = load i32, i32 addrspace(1) * %den_ptr, align 4
178 %num.i24.0 = shl i32 %num, 8
179 %den.i24.0 = shl i32 %den, 8
180 %num.i24 = lshr i32 %num.i24.0, 8
181 %den.i24 = lshr i32 %den.i24.0, 8
182 %result = urem i32 %num.i24, %den.i24
183 store i32 %result, i32 addrspace(1)* %out, align 4
187 ; FUNC-LABEL: {{^}}urem25_i32:
188 ; RCP_IFLAG is for URECIP in the full 32b alg
192 ; EG-NOT: UINT_TO_FLT
194 define void @urem25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
195 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
196 %num = load i32, i32 addrspace(1) * %in, align 4
197 %den = load i32, i32 addrspace(1) * %den_ptr, align 4
198 %num.i24.0 = shl i32 %num, 7
199 %den.i24.0 = shl i32 %den, 7
200 %num.i24 = lshr i32 %num.i24.0, 7
201 %den.i24 = lshr i32 %den.i24.0, 7
202 %result = urem i32 %num.i24, %den.i24
203 store i32 %result, i32 addrspace(1)* %out, align 4
207 ; FUNC-LABEL: {{^}}test_no_urem24_i32_1:
208 ; RCP_IFLAG is for URECIP in the full 32b alg
212 ; EG-NOT: UINT_TO_FLT
214 define void @test_no_urem24_i32_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
215 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
216 %num = load i32, i32 addrspace(1) * %in, align 4
217 %den = load i32, i32 addrspace(1) * %den_ptr, align 4
218 %num.i24.0 = shl i32 %num, 8
219 %den.i24.0 = shl i32 %den, 7
220 %num.i24 = lshr i32 %num.i24.0, 8
221 %den.i24 = lshr i32 %den.i24.0, 7
222 %result = urem i32 %num.i24, %den.i24
223 store i32 %result, i32 addrspace(1)* %out, align 4
227 ; FUNC-LABEL: {{^}}test_no_urem24_i32_2:
228 ; RCP_IFLAG is for URECIP in the full 32b alg
232 ; EG-NOT: UINT_TO_FLT
234 define void @test_no_urem24_i32_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
235 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
236 %num = load i32, i32 addrspace(1) * %in, align 4
237 %den = load i32, i32 addrspace(1) * %den_ptr, align 4
238 %num.i24.0 = shl i32 %num, 7
239 %den.i24.0 = shl i32 %den, 8
240 %num.i24 = lshr i32 %num.i24.0, 7
241 %den.i24 = lshr i32 %den.i24.0, 8
242 %result = urem i32 %num.i24, %den.i24
243 store i32 %result, i32 addrspace(1)* %out, align 4