1 ; Test 16-bit conditional stores that are presented as selects. The volatile
2 ; tests require z10, which use a branch instead of a LOCR.
4 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
6 declare void @foo(i16 *)
8 ; Test the simple case, with the loaded value first.
9 define void @f1(i16 *%ptr, i16 %alt, i32 %limit) {
12 ; CHECK: jl [[LABEL:[^ ]*]]
14 ; CHECK: sth %r3, 0(%r2)
17 %cond = icmp ult i32 %limit, 420
18 %orig = load i16 *%ptr
19 %res = select i1 %cond, i16 %orig, i16 %alt
20 store i16 %res, i16 *%ptr
24 ; ...and with the loaded value second
25 define void @f2(i16 *%ptr, i16 %alt, i32 %limit) {
28 ; CHECK: jhe [[LABEL:[^ ]*]]
30 ; CHECK: sth %r3, 0(%r2)
33 %cond = icmp ult i32 %limit, 420
34 %orig = load i16 *%ptr
35 %res = select i1 %cond, i16 %alt, i16 %orig
36 store i16 %res, i16 *%ptr
40 ; Test cases where the value is explicitly sign-extended to 32 bits, with the
42 define void @f3(i16 *%ptr, i32 %alt, i32 %limit) {
45 ; CHECK: jl [[LABEL:[^ ]*]]
47 ; CHECK: sth %r3, 0(%r2)
50 %cond = icmp ult i32 %limit, 420
51 %orig = load i16 *%ptr
52 %ext = sext i16 %orig to i32
53 %res = select i1 %cond, i32 %ext, i32 %alt
54 %trunc = trunc i32 %res to i16
55 store i16 %trunc, i16 *%ptr
59 ; ...and with the loaded value second
60 define void @f4(i16 *%ptr, i32 %alt, i32 %limit) {
63 ; CHECK: jhe [[LABEL:[^ ]*]]
65 ; CHECK: sth %r3, 0(%r2)
68 %cond = icmp ult i32 %limit, 420
69 %orig = load i16 *%ptr
70 %ext = sext i16 %orig to i32
71 %res = select i1 %cond, i32 %alt, i32 %ext
72 %trunc = trunc i32 %res to i16
73 store i16 %trunc, i16 *%ptr
77 ; Test cases where the value is explicitly zero-extended to 32 bits, with the
79 define void @f5(i16 *%ptr, i32 %alt, i32 %limit) {
82 ; CHECK: jl [[LABEL:[^ ]*]]
84 ; CHECK: sth %r3, 0(%r2)
87 %cond = icmp ult i32 %limit, 420
88 %orig = load i16 *%ptr
89 %ext = zext i16 %orig to i32
90 %res = select i1 %cond, i32 %ext, i32 %alt
91 %trunc = trunc i32 %res to i16
92 store i16 %trunc, i16 *%ptr
96 ; ...and with the loaded value second
97 define void @f6(i16 *%ptr, i32 %alt, i32 %limit) {
100 ; CHECK: jhe [[LABEL:[^ ]*]]
102 ; CHECK: sth %r3, 0(%r2)
105 %cond = icmp ult i32 %limit, 420
106 %orig = load i16 *%ptr
107 %ext = zext i16 %orig to i32
108 %res = select i1 %cond, i32 %alt, i32 %ext
109 %trunc = trunc i32 %res to i16
110 store i16 %trunc, i16 *%ptr
114 ; Test cases where the value is explicitly sign-extended to 64 bits, with the
115 ; loaded value first.
116 define void @f7(i16 *%ptr, i64 %alt, i32 %limit) {
119 ; CHECK: jl [[LABEL:[^ ]*]]
121 ; CHECK: sth %r3, 0(%r2)
124 %cond = icmp ult i32 %limit, 420
125 %orig = load i16 *%ptr
126 %ext = sext i16 %orig to i64
127 %res = select i1 %cond, i64 %ext, i64 %alt
128 %trunc = trunc i64 %res to i16
129 store i16 %trunc, i16 *%ptr
133 ; ...and with the loaded value second
134 define void @f8(i16 *%ptr, i64 %alt, i32 %limit) {
137 ; CHECK: jhe [[LABEL:[^ ]*]]
139 ; CHECK: sth %r3, 0(%r2)
142 %cond = icmp ult i32 %limit, 420
143 %orig = load i16 *%ptr
144 %ext = sext i16 %orig to i64
145 %res = select i1 %cond, i64 %alt, i64 %ext
146 %trunc = trunc i64 %res to i16
147 store i16 %trunc, i16 *%ptr
151 ; Test cases where the value is explicitly zero-extended to 64 bits, with the
152 ; loaded value first.
153 define void @f9(i16 *%ptr, i64 %alt, i32 %limit) {
156 ; CHECK: jl [[LABEL:[^ ]*]]
158 ; CHECK: sth %r3, 0(%r2)
161 %cond = icmp ult i32 %limit, 420
162 %orig = load i16 *%ptr
163 %ext = zext i16 %orig to i64
164 %res = select i1 %cond, i64 %ext, i64 %alt
165 %trunc = trunc i64 %res to i16
166 store i16 %trunc, i16 *%ptr
170 ; ...and with the loaded value second
171 define void @f10(i16 *%ptr, i64 %alt, i32 %limit) {
174 ; CHECK: jhe [[LABEL:[^ ]*]]
176 ; CHECK: sth %r3, 0(%r2)
179 %cond = icmp ult i32 %limit, 420
180 %orig = load i16 *%ptr
181 %ext = zext i16 %orig to i64
182 %res = select i1 %cond, i64 %alt, i64 %ext
183 %trunc = trunc i64 %res to i16
184 store i16 %trunc, i16 *%ptr
188 ; Check the high end of the aligned STH range.
189 define void @f11(i16 *%base, i16 %alt, i32 %limit) {
192 ; CHECK: jl [[LABEL:[^ ]*]]
194 ; CHECK: sth %r3, 4094(%r2)
197 %ptr = getelementptr i16 *%base, i64 2047
198 %cond = icmp ult i32 %limit, 420
199 %orig = load i16 *%ptr
200 %res = select i1 %cond, i16 %orig, i16 %alt
201 store i16 %res, i16 *%ptr
205 ; Check the next halfword up, which should use STHY instead of STH.
206 define void @f12(i16 *%base, i16 %alt, i32 %limit) {
209 ; CHECK: jl [[LABEL:[^ ]*]]
211 ; CHECK: sthy %r3, 4096(%r2)
214 %ptr = getelementptr i16 *%base, i64 2048
215 %cond = icmp ult i32 %limit, 420
216 %orig = load i16 *%ptr
217 %res = select i1 %cond, i16 %orig, i16 %alt
218 store i16 %res, i16 *%ptr
222 ; Check the high end of the aligned STHY range.
223 define void @f13(i16 *%base, i16 %alt, i32 %limit) {
226 ; CHECK: jl [[LABEL:[^ ]*]]
228 ; CHECK: sthy %r3, 524286(%r2)
231 %ptr = getelementptr i16 *%base, i64 262143
232 %cond = icmp ult i32 %limit, 420
233 %orig = load i16 *%ptr
234 %res = select i1 %cond, i16 %orig, i16 %alt
235 store i16 %res, i16 *%ptr
239 ; Check the next halfword up, which needs separate address logic.
240 ; Other sequences besides this one would be OK.
241 define void @f14(i16 *%base, i16 %alt, i32 %limit) {
244 ; CHECK: jl [[LABEL:[^ ]*]]
246 ; CHECK: agfi %r2, 524288
247 ; CHECK: sth %r3, 0(%r2)
250 %ptr = getelementptr i16 *%base, i64 262144
251 %cond = icmp ult i32 %limit, 420
252 %orig = load i16 *%ptr
253 %res = select i1 %cond, i16 %orig, i16 %alt
254 store i16 %res, i16 *%ptr
258 ; Check the low end of the STHY range.
259 define void @f15(i16 *%base, i16 %alt, i32 %limit) {
262 ; CHECK: jl [[LABEL:[^ ]*]]
264 ; CHECK: sthy %r3, -524288(%r2)
267 %ptr = getelementptr i16 *%base, i64 -262144
268 %cond = icmp ult i32 %limit, 420
269 %orig = load i16 *%ptr
270 %res = select i1 %cond, i16 %orig, i16 %alt
271 store i16 %res, i16 *%ptr
275 ; Check the next halfword down, which needs separate address logic.
276 ; Other sequences besides this one would be OK.
277 define void @f16(i16 *%base, i16 %alt, i32 %limit) {
280 ; CHECK: jl [[LABEL:[^ ]*]]
282 ; CHECK: agfi %r2, -524290
283 ; CHECK: sth %r3, 0(%r2)
286 %ptr = getelementptr i16 *%base, i64 -262145
287 %cond = icmp ult i32 %limit, 420
288 %orig = load i16 *%ptr
289 %res = select i1 %cond, i16 %orig, i16 %alt
290 store i16 %res, i16 *%ptr
294 ; Check that STHY allows an index.
295 define void @f17(i64 %base, i64 %index, i16 %alt, i32 %limit) {
298 ; CHECK: jl [[LABEL:[^ ]*]]
300 ; CHECK: sthy %r4, 4096(%r3,%r2)
303 %add1 = add i64 %base, %index
304 %add2 = add i64 %add1, 4096
305 %ptr = inttoptr i64 %add2 to i16 *
306 %cond = icmp ult i32 %limit, 420
307 %orig = load i16 *%ptr
308 %res = select i1 %cond, i16 %orig, i16 %alt
309 store i16 %res, i16 *%ptr
313 ; Check that volatile loads are not matched.
314 define void @f18(i16 *%ptr, i16 %alt, i32 %limit) {
316 ; CHECK: lh {{%r[0-5]}}, 0(%r2)
317 ; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]]
319 ; CHECK: sth {{%r[0-5]}}, 0(%r2)
321 %cond = icmp ult i32 %limit, 420
322 %orig = load volatile i16 *%ptr
323 %res = select i1 %cond, i16 %orig, i16 %alt
324 store i16 %res, i16 *%ptr
328 ; ...likewise stores. In this case we should have a conditional load into %r3.
329 define void @f19(i16 *%ptr, i16 %alt, i32 %limit) {
331 ; CHECK: jhe [[LABEL:[^ ]*]]
332 ; CHECK: lh %r3, 0(%r2)
334 ; CHECK: sth %r3, 0(%r2)
336 %cond = icmp ult i32 %limit, 420
337 %orig = load i16 *%ptr
338 %res = select i1 %cond, i16 %orig, i16 %alt
339 store volatile i16 %res, i16 *%ptr
343 ; Check that atomic loads are not matched. The transformation is OK for
344 ; the "unordered" case tested here, but since we don't try to handle atomic
345 ; operations at all in this context, it seems better to assert that than
346 ; to restrict the test to a stronger ordering.
347 define void @f20(i16 *%ptr, i16 %alt, i32 %limit) {
348 ; FIXME: should use a normal load instead of CS.
350 ; CHECK: lh {{%r[0-9]+}}, 0(%r2)
351 ; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]]
353 ; CHECK: sth {{%r[0-9]+}}, 0(%r2)
355 %cond = icmp ult i32 %limit, 420
356 %orig = load atomic i16 *%ptr unordered, align 2
357 %res = select i1 %cond, i16 %orig, i16 %alt
358 store i16 %res, i16 *%ptr
362 ; ...likewise stores.
363 define void @f21(i16 *%ptr, i16 %alt, i32 %limit) {
364 ; FIXME: should use a normal store instead of CS.
366 ; CHECK: jhe [[LABEL:[^ ]*]]
367 ; CHECK: lh %r3, 0(%r2)
369 ; CHECK: sth %r3, 0(%r2)
371 %cond = icmp ult i32 %limit, 420
372 %orig = load i16 *%ptr
373 %res = select i1 %cond, i16 %orig, i16 %alt
374 store atomic i16 %res, i16 *%ptr unordered, align 2
378 ; Try a frame index base.
379 define void @f22(i16 %alt, i32 %limit) {
381 ; CHECK: brasl %r14, foo@PLT
383 ; CHECK: jl [[LABEL:[^ ]*]]
385 ; CHECK: sth {{%r[0-9]+}}, {{[0-9]+}}(%r15)
387 ; CHECK: brasl %r14, foo@PLT
390 call void @foo(i16 *%ptr)
391 %cond = icmp ult i32 %limit, 420
392 %orig = load i16 *%ptr
393 %res = select i1 %cond, i16 %orig, i16 %alt
394 store i16 %res, i16 *%ptr
395 call void @foo(i16 *%ptr)