1 ; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck -check-prefix=SI -check-prefix=FUNC %s
2 ; RUN: llc < %s -march=r600 -mcpu=cypress | FileCheck -check-prefix=EG -check-prefix=FUNC %s
4 declare i32 @llvm.AMDGPU.imax(i32, i32) nounwind readnone
7 ; FUNC-LABEL: @sext_in_reg_i1_i32
8 ; SI: S_LOAD_DWORD [[ARG:s[0-9]+]],
9 ; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], [[ARG]], 0, 1
10 ; SI: BUFFER_STORE_DWORD [[EXTRACT]],
12 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
13 ; EG: BFE_INT [[RES]], {{.*}}, 0.0, 1
14 ; EG-NEXT: LSHR * [[ADDR]]
15 define void @sext_in_reg_i1_i32(i32 addrspace(1)* %out, i32 %in) {
16 %shl = shl i32 %in, 31
17 %sext = ashr i32 %shl, 31
18 store i32 %sext, i32 addrspace(1)* %out
22 ; FUNC-LABEL: @sext_in_reg_i8_to_i32
23 ; SI: S_ADD_I32 [[VAL:s[0-9]+]],
24 ; SI: S_SEXT_I32_I8 [[EXTRACT:s[0-9]+]], [[VAL]]
25 ; SI: V_MOV_B32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]]
26 ; SI: BUFFER_STORE_DWORD [[VEXTRACT]],
28 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
30 ; EG-NEXT: BFE_INT [[RES]], {{.*}}, 0.0, literal
31 ; EG-NEXT: LSHR * [[ADDR]]
32 define void @sext_in_reg_i8_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
33 %c = add i32 %a, %b ; add to prevent folding into extload
35 %ashr = ashr i32 %shl, 24
36 store i32 %ashr, i32 addrspace(1)* %out, align 4
40 ; FUNC-LABEL: @sext_in_reg_i16_to_i32
41 ; SI: S_ADD_I32 [[VAL:s[0-9]+]],
42 ; SI: S_SEXT_I32_I16 [[EXTRACT:s[0-9]+]], [[VAL]]
43 ; SI: V_MOV_B32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]]
44 ; SI: BUFFER_STORE_DWORD [[VEXTRACT]],
46 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
48 ; EG-NEXT: BFE_INT [[RES]], {{.*}}, 0.0, literal
49 ; EG-NEXT: LSHR * [[ADDR]]
50 define void @sext_in_reg_i16_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
51 %c = add i32 %a, %b ; add to prevent folding into extload
53 %ashr = ashr i32 %shl, 16
54 store i32 %ashr, i32 addrspace(1)* %out, align 4
58 ; FUNC-LABEL: @sext_in_reg_i8_to_v1i32
59 ; SI: S_ADD_I32 [[VAL:s[0-9]+]],
60 ; SI: S_SEXT_I32_I8 [[EXTRACT:s[0-9]+]], [[VAL]]
61 ; SI: V_MOV_B32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]]
62 ; SI: BUFFER_STORE_DWORD [[VEXTRACT]],
64 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
66 ; EG-NEXT: BFE_INT [[RES]], {{.*}}, 0.0, literal
67 ; EG-NEXT: LSHR * [[ADDR]]
68 define void @sext_in_reg_i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i32> %a, <1 x i32> %b) nounwind {
69 %c = add <1 x i32> %a, %b ; add to prevent folding into extload
70 %shl = shl <1 x i32> %c, <i32 24>
71 %ashr = ashr <1 x i32> %shl, <i32 24>
72 store <1 x i32> %ashr, <1 x i32> addrspace(1)* %out, align 4
76 ; FUNC-LABEL: @sext_in_reg_i8_to_i64
77 ; SI: S_ADD_I32 [[VAL:s[0-9]+]],
78 ; SI: S_SEXT_I32_I8 [[EXTRACT:s[0-9]+]], [[VAL]]
79 ; SI: S_MOV_B32 {{s[0-9]+}}, -1
80 ; SI: BUFFER_STORE_DWORDX2
82 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES_LO:T[0-9]+\.[XYZW]]], [[ADDR_LO:T[0-9]+.[XYZW]]]
83 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES_HI:T[0-9]+\.[XYZW]]], [[ADDR_HI:T[0-9]+.[XYZW]]]
85 ; EG-NEXT: BFE_INT {{\*?}} [[RES_LO]], {{.*}}, 0.0, literal
90 ;; TODO Check address computation, using | with variables in {{}} does not work,
91 ;; also the _LO/_HI order might be different
92 define void @sext_in_reg_i8_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
95 %ashr = ashr i64 %shl, 56
96 store i64 %ashr, i64 addrspace(1)* %out, align 8
100 ; FUNC-LABEL: @sext_in_reg_i16_to_i64
101 ; SI: S_ADD_I32 [[VAL:s[0-9]+]],
102 ; SI: S_SEXT_I32_I16 [[EXTRACT:s[0-9]+]], [[VAL]]
103 ; SI: S_MOV_B32 {{s[0-9]+}}, -1
104 ; SI: BUFFER_STORE_DWORDX2
106 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES_LO:T[0-9]+\.[XYZW]]], [[ADDR_LO:T[0-9]+.[XYZW]]]
107 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES_HI:T[0-9]+\.[XYZW]]], [[ADDR_HI:T[0-9]+.[XYZW]]]
109 ; EG-NEXT: BFE_INT {{\*?}} [[RES_LO]], {{.*}}, 0.0, literal
110 ; EG: ASHR [[RES_HI]]
114 ;; TODO Check address computation, using | with variables in {{}} does not work,
115 ;; also the _LO/_HI order might be different
116 define void @sext_in_reg_i16_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
118 %shl = shl i64 %c, 48
119 %ashr = ashr i64 %shl, 48
120 store i64 %ashr, i64 addrspace(1)* %out, align 8
124 ; FUNC-LABEL: @sext_in_reg_i32_to_i64
127 ; SI: S_ADD_I32 [[ADD:s[0-9]+]],
128 ; SI: S_ASHR_I32 s{{[0-9]+}}, [[ADD]], 31
129 ; SI: BUFFER_STORE_DWORDX2
131 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES_LO:T[0-9]+\.[XYZW]]], [[ADDR_LO:T[0-9]+.[XYZW]]]
132 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES_HI:T[0-9]+\.[XYZW]]], [[ADDR_HI:T[0-9]+.[XYZW]]]
134 ; EG: ADD_INT {{\*?}} [[RES_LO]]
135 ; EG: ASHR [[RES_HI]]
139 ;; TODO Check address computation, using | with variables in {{}} does not work,
140 ;; also the _LO/_HI order might be different
141 define void @sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
143 %shl = shl i64 %c, 32
144 %ashr = ashr i64 %shl, 32
145 store i64 %ashr, i64 addrspace(1)* %out, align 8
149 ; This is broken on Evergreen for some reason related to the <1 x i64> kernel arguments.
150 ; XFUNC-LABEL: @sext_in_reg_i8_to_v1i64
151 ; XSI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 8
152 ; XSI: V_ASHRREV_I32_e32 {{v[0-9]+}}, 31,
153 ; XSI: BUFFER_STORE_DWORD
156 ; define void @sext_in_reg_i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i64> %a, <1 x i64> %b) nounwind {
157 ; %c = add <1 x i64> %a, %b
158 ; %shl = shl <1 x i64> %c, <i64 56>
159 ; %ashr = ashr <1 x i64> %shl, <i64 56>
160 ; store <1 x i64> %ashr, <1 x i64> addrspace(1)* %out, align 8
164 ; FUNC-LABEL: @sext_in_reg_i1_in_i32_other_amount
166 ; SI: S_LSHL_B32 [[REG:s[0-9]+]], {{s[0-9]+}}, 6
167 ; SI: S_ASHR_I32 {{s[0-9]+}}, [[REG]], 7
169 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
174 ; EG: LSHR {{\*?}} [[ADDR]]
175 define void @sext_in_reg_i1_in_i32_other_amount(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
179 store i32 %y, i32 addrspace(1)* %out
183 ; FUNC-LABEL: @sext_in_reg_v2i1_in_v2i32_other_amount
184 ; SI: S_LSHL_B32 [[REG0:s[0-9]+]], {{s[0-9]}}, 6
185 ; SI: S_ASHR_I32 {{s[0-9]+}}, [[REG0]], 7
186 ; SI: S_LSHL_B32 [[REG1:s[0-9]+]], {{s[0-9]}}, 6
187 ; SI: S_ASHR_I32 {{s[0-9]+}}, [[REG1]], 7
189 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
196 ; EG: LSHR {{\*?}} [[ADDR]]
197 define void @sext_in_reg_v2i1_in_v2i32_other_amount(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind {
198 %c = add <2 x i32> %a, %b
199 %x = shl <2 x i32> %c, <i32 6, i32 6>
200 %y = ashr <2 x i32> %x, <i32 7, i32 7>
201 store <2 x i32> %y, <2 x i32> addrspace(1)* %out, align 2
206 ; FUNC-LABEL: @sext_in_reg_v2i1_to_v2i32
207 ; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 1
208 ; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 1
209 ; SI: BUFFER_STORE_DWORDX2
211 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
212 ; EG: BFE_INT [[RES]]
213 ; EG: BFE_INT [[RES]]
214 ; EG: LSHR {{\*?}} [[ADDR]]
215 define void @sext_in_reg_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind {
216 %c = add <2 x i32> %a, %b ; add to prevent folding into extload
217 %shl = shl <2 x i32> %c, <i32 31, i32 31>
218 %ashr = ashr <2 x i32> %shl, <i32 31, i32 31>
219 store <2 x i32> %ashr, <2 x i32> addrspace(1)* %out, align 8
223 ; FUNC-LABEL: @sext_in_reg_v4i1_to_v4i32
224 ; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 1
225 ; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 1
226 ; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 1
227 ; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 1
228 ; SI: BUFFER_STORE_DWORDX4
230 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW][XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
231 ; EG: BFE_INT [[RES]]
232 ; EG: BFE_INT [[RES]]
233 ; EG: BFE_INT [[RES]]
234 ; EG: BFE_INT [[RES]]
235 ; EG: LSHR {{\*?}} [[ADDR]]
236 define void @sext_in_reg_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) nounwind {
237 %c = add <4 x i32> %a, %b ; add to prevent folding into extload
238 %shl = shl <4 x i32> %c, <i32 31, i32 31, i32 31, i32 31>
239 %ashr = ashr <4 x i32> %shl, <i32 31, i32 31, i32 31, i32 31>
240 store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8
244 ; FUNC-LABEL: @sext_in_reg_v2i8_to_v2i32
245 ; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
246 ; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
247 ; SI: BUFFER_STORE_DWORDX2
249 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
250 ; EG: BFE_INT [[RES]]
251 ; EG: BFE_INT [[RES]]
252 ; EG: LSHR {{\*?}} [[ADDR]]
253 define void @sext_in_reg_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind {
254 %c = add <2 x i32> %a, %b ; add to prevent folding into extload
255 %shl = shl <2 x i32> %c, <i32 24, i32 24>
256 %ashr = ashr <2 x i32> %shl, <i32 24, i32 24>
257 store <2 x i32> %ashr, <2 x i32> addrspace(1)* %out, align 8
261 ; FUNC-LABEL: @sext_in_reg_v4i8_to_v4i32
262 ; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
263 ; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
264 ; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
265 ; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
266 ; SI: BUFFER_STORE_DWORDX4
268 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW][XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
269 ; EG: BFE_INT [[RES]]
270 ; EG: BFE_INT [[RES]]
271 ; EG: BFE_INT [[RES]]
272 ; EG: BFE_INT [[RES]]
273 ; EG: LSHR {{\*?}} [[ADDR]]
274 define void @sext_in_reg_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) nounwind {
275 %c = add <4 x i32> %a, %b ; add to prevent folding into extload
276 %shl = shl <4 x i32> %c, <i32 24, i32 24, i32 24, i32 24>
277 %ashr = ashr <4 x i32> %shl, <i32 24, i32 24, i32 24, i32 24>
278 store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8
282 ; FUNC-LABEL: @sext_in_reg_v2i16_to_v2i32
283 ; SI: S_SEXT_I32_I16 {{s[0-9]+}}, {{s[0-9]+}}
284 ; SI: S_SEXT_I32_I16 {{s[0-9]+}}, {{s[0-9]+}}
285 ; SI: BUFFER_STORE_DWORDX2
287 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
288 ; EG: BFE_INT [[RES]]
289 ; EG: BFE_INT [[RES]]
290 ; EG: LSHR {{\*?}} [[ADDR]]
291 define void @sext_in_reg_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind {
292 %c = add <2 x i32> %a, %b ; add to prevent folding into extload
293 %shl = shl <2 x i32> %c, <i32 16, i32 16>
294 %ashr = ashr <2 x i32> %shl, <i32 16, i32 16>
295 store <2 x i32> %ashr, <2 x i32> addrspace(1)* %out, align 8
299 ; FUNC-LABEL: @testcase
300 define void @testcase(i8 addrspace(1)* %out, i8 %a) nounwind {
301 %and_a_1 = and i8 %a, 1
302 %cmp_eq = icmp eq i8 %and_a_1, 0
303 %cmp_slt = icmp slt i8 %a, 0
304 %sel0 = select i1 %cmp_slt, i8 0, i8 %a
305 %sel1 = select i1 %cmp_eq, i8 0, i8 %a
306 %xor = xor i8 %sel0, %sel1
307 store i8 %xor, i8 addrspace(1)* %out
311 ; FUNC-LABEL: @testcase_3
312 define void @testcase_3(i8 addrspace(1)* %out, i8 %a) nounwind {
313 %and_a_1 = and i8 %a, 1
314 %cmp_eq = icmp eq i8 %and_a_1, 0
315 %cmp_slt = icmp slt i8 %a, 0
316 %sel0 = select i1 %cmp_slt, i8 0, i8 %a
317 %sel1 = select i1 %cmp_eq, i8 0, i8 %a
318 %xor = xor i8 %sel0, %sel1
319 store i8 %xor, i8 addrspace(1)* %out
323 ; FIXME: The BFE should really be eliminated. I think it should happen
324 ; when computeMaskedBitsForTargetNode is implemented for imax.
326 ; FUNC-LABEL: @sext_in_reg_to_illegal_type
327 ; SI: BUFFER_LOAD_SBYTE
330 ; SI: BUFFER_STORE_SHORT
331 define void @sext_in_reg_to_illegal_type(i16 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %src) nounwind {
332 %tmp5 = load i8 addrspace(1)* %src, align 1
333 %tmp2 = sext i8 %tmp5 to i32
334 %tmp3 = tail call i32 @llvm.AMDGPU.imax(i32 %tmp2, i32 0) nounwind readnone
335 %tmp4 = trunc i32 %tmp3 to i8
336 %tmp6 = sext i8 %tmp4 to i16
337 store i16 %tmp6, i16 addrspace(1)* %out, align 2