1 ; RUN: llc < %s -march=x86-64 -verify-machineinstrs | FileCheck %s --check-prefix X64
2 ; RUN: llc < %s -march=x86 -verify-machineinstrs | FileCheck %s --check-prefix X32
3 ; RUN: llc < %s -march=x86-64 -mattr=slow-incdec -verify-machineinstrs | FileCheck %s --check-prefix SLOW_INC
5 ; This file checks that atomic (non-seq_cst) stores of immediate values are
6 ; done in one mov instruction and not 2. More precisely, it makes sure that the
7 ; immediate is not first copied uselessly into a register.
9 ; Similarily, it checks that a binary operation of an immediate with an atomic
10 ; variable that is stored back in that variable is done as a single instruction.
11 ; For example: x.store(42 + x.load(memory_order_acquire), memory_order_release)
12 ; should be just an add instruction, instead of loading x into a register, doing
13 ; an add and storing the result back.
14 ; The binary operations supported are currently add, and, or, xor.
15 ; sub is not supported because they are translated by an addition of the
17 ; Finally, we also check the same kind of pattern for inc/dec
19 ; seq_cst stores are left as (lock) xchgl, but we try to check every other
20 ; attribute at least once.
22 ; Please note that these operations do not require the lock prefix: only
23 ; sequentially consistent stores require this kind of protection on X86.
24 ; And even for seq_cst operations, llvm uses the xchg instruction which has
25 ; an implicit lock prefix, so making it explicit is not required.
27 define void @store_atomic_imm_8(i8* %p) {
28 ; X64-LABEL: store_atomic_imm_8
31 ; X32-LABEL: store_atomic_imm_8
34 store atomic i8 42, i8* %p release, align 1
38 define void @store_atomic_imm_16(i16* %p) {
39 ; X64-LABEL: store_atomic_imm_16
42 ; X32-LABEL: store_atomic_imm_16
45 store atomic i16 42, i16* %p monotonic, align 2
49 define void @store_atomic_imm_32(i32* %p) {
50 ; X64-LABEL: store_atomic_imm_32
53 ; On 32 bits, there is an extra movl for each of those functions
54 ; (probably for alignment reasons).
55 ; X32-LABEL: store_atomic_imm_32
56 ; X32: movl 4(%esp), %eax
59 store atomic i32 42, i32* %p release, align 4
63 define void @store_atomic_imm_64(i64* %p) {
64 ; X64-LABEL: store_atomic_imm_64
67 ; These are implemented with a CAS loop on 32 bit architectures, and thus
68 ; cannot be optimized in the same way as the others.
69 ; X32-LABEL: store_atomic_imm_64
71 store atomic i64 42, i64* %p release, align 8
75 ; If an immediate is too big to fit in 32 bits, it cannot be store in one mov,
76 ; even on X64, one must use movabsq that can only target a register.
77 define void @store_atomic_imm_64_big(i64* %p) {
78 ; X64-LABEL: store_atomic_imm_64_big
81 store atomic i64 100000000000, i64* %p monotonic, align 8
85 ; It would be incorrect to replace a lock xchgl by a movl
86 define void @store_atomic_imm_32_seq_cst(i32* %p) {
87 ; X64-LABEL: store_atomic_imm_32_seq_cst
89 ; X32-LABEL: store_atomic_imm_32_seq_cst
91 store atomic i32 42, i32* %p seq_cst, align 4
97 define void @add_8(i8* %p) {
106 %1 = load atomic i8* %p seq_cst, align 1
108 store atomic i8 %2, i8* %p release, align 1
112 define void @add_16(i16* %p) {
113 ; Currently the transformation is not done on 16 bit accesses, as the backend
114 ; treat 16 bit arithmetic as expensive on X86/X86_64.
119 %1 = load atomic i16* %p acquire, align 2
121 store atomic i16 %2, i16* %p release, align 2
125 define void @add_32(i32* %p) {
134 %1 = load atomic i32* %p acquire, align 4
136 store atomic i32 %2, i32* %p monotonic, align 4
140 define void @add_64(i64* %p) {
145 ; We do not check X86-32 as it cannot do 'addq'.
147 %1 = load atomic i64* %p acquire, align 8
149 store atomic i64 %2, i64* %p release, align 8
153 define void @add_32_seq_cst(i32* %p) {
154 ; X64-LABEL: add_32_seq_cst
156 ; X32-LABEL: add_32_seq_cst
158 %1 = load atomic i32* %p monotonic, align 4
160 store atomic i32 %2, i32* %p seq_cst, align 4
166 define void @and_8(i8* %p) {
175 %1 = load atomic i8* %p monotonic, align 1
177 store atomic i8 %2, i8* %p release, align 1
181 define void @and_16(i16* %p) {
182 ; Currently the transformation is not done on 16 bit accesses, as the backend
183 ; treat 16 bit arithmetic as expensive on X86/X86_64.
188 %1 = load atomic i16* %p acquire, align 2
190 store atomic i16 %2, i16* %p release, align 2
194 define void @and_32(i32* %p) {
203 %1 = load atomic i32* %p acquire, align 4
205 store atomic i32 %2, i32* %p release, align 4
209 define void @and_64(i64* %p) {
214 ; We do not check X86-32 as it cannot do 'andq'.
216 %1 = load atomic i64* %p acquire, align 8
218 store atomic i64 %2, i64* %p release, align 8
222 define void @and_32_seq_cst(i32* %p) {
223 ; X64-LABEL: and_32_seq_cst
225 ; X32-LABEL: and_32_seq_cst
227 %1 = load atomic i32* %p monotonic, align 4
229 store atomic i32 %2, i32* %p seq_cst, align 4
235 define void @or_8(i8* %p) {
244 %1 = load atomic i8* %p acquire, align 1
246 store atomic i8 %2, i8* %p release, align 1
250 define void @or_16(i16* %p) {
255 %1 = load atomic i16* %p acquire, align 2
257 store atomic i16 %2, i16* %p release, align 2
261 define void @or_32(i32* %p) {
270 %1 = load atomic i32* %p acquire, align 4
272 store atomic i32 %2, i32* %p release, align 4
276 define void @or_64(i64* %p) {
281 ; We do not check X86-32 as it cannot do 'orq'.
283 %1 = load atomic i64* %p acquire, align 8
285 store atomic i64 %2, i64* %p release, align 8
289 define void @or_32_seq_cst(i32* %p) {
290 ; X64-LABEL: or_32_seq_cst
292 ; X32-LABEL: or_32_seq_cst
294 %1 = load atomic i32* %p monotonic, align 4
296 store atomic i32 %2, i32* %p seq_cst, align 4
302 define void @xor_8(i8* %p) {
311 %1 = load atomic i8* %p acquire, align 1
313 store atomic i8 %2, i8* %p release, align 1
317 define void @xor_16(i16* %p) {
322 %1 = load atomic i16* %p acquire, align 2
324 store atomic i16 %2, i16* %p release, align 2
328 define void @xor_32(i32* %p) {
337 %1 = load atomic i32* %p acquire, align 4
339 store atomic i32 %2, i32* %p release, align 4
343 define void @xor_64(i64* %p) {
348 ; We do not check X86-32 as it cannot do 'xorq'.
350 %1 = load atomic i64* %p acquire, align 8
352 store atomic i64 %2, i64* %p release, align 8
356 define void @xor_32_seq_cst(i32* %p) {
357 ; X64-LABEL: xor_32_seq_cst
359 ; X32-LABEL: xor_32_seq_cst
361 %1 = load atomic i32* %p monotonic, align 4
363 store atomic i32 %2, i32* %p seq_cst, align 4
369 define void @inc_8(i8* %p) {
378 ; SLOW_INC-LABEL: inc_8
381 %1 = load atomic i8* %p seq_cst, align 1
383 store atomic i8 %2, i8* %p release, align 1
387 define void @inc_16(i16* %p) {
388 ; Currently the transformation is not done on 16 bit accesses, as the backend
389 ; treat 16 bit arithmetic as expensive on X86/X86_64.
394 ; SLOW_INC-LABEL: inc_16
396 %1 = load atomic i16* %p acquire, align 2
398 store atomic i16 %2, i16* %p release, align 2
402 define void @inc_32(i32* %p) {
411 ; SLOW_INC-LABEL: inc_32
414 %1 = load atomic i32* %p acquire, align 4
416 store atomic i32 %2, i32* %p monotonic, align 4
420 define void @inc_64(i64* %p) {
425 ; We do not check X86-32 as it cannot do 'incq'.
427 ; SLOW_INC-LABEL: inc_64
430 %1 = load atomic i64* %p acquire, align 8
432 store atomic i64 %2, i64* %p release, align 8
436 define void @inc_32_seq_cst(i32* %p) {
437 ; X64-LABEL: inc_32_seq_cst
439 ; X32-LABEL: inc_32_seq_cst
441 %1 = load atomic i32* %p monotonic, align 4
443 store atomic i32 %2, i32* %p seq_cst, align 4
449 define void @dec_8(i8* %p) {
458 ; SLOW_INC-LABEL: dec_8
461 %1 = load atomic i8* %p seq_cst, align 1
463 store atomic i8 %2, i8* %p release, align 1
467 define void @dec_16(i16* %p) {
468 ; Currently the transformation is not done on 16 bit accesses, as the backend
469 ; treat 16 bit arithmetic as expensive on X86/X86_64.
474 ; SLOW_INC-LABEL: dec_16
476 %1 = load atomic i16* %p acquire, align 2
478 store atomic i16 %2, i16* %p release, align 2
482 define void @dec_32(i32* %p) {
491 ; SLOW_INC-LABEL: dec_32
494 %1 = load atomic i32* %p acquire, align 4
496 store atomic i32 %2, i32* %p monotonic, align 4
500 define void @dec_64(i64* %p) {
505 ; We do not check X86-32 as it cannot do 'decq'.
507 ; SLOW_INC-LABEL: dec_64
510 %1 = load atomic i64* %p acquire, align 8
512 store atomic i64 %2, i64* %p release, align 8
516 define void @dec_32_seq_cst(i32* %p) {
517 ; X64-LABEL: dec_32_seq_cst
519 ; X32-LABEL: dec_32_seq_cst
521 %1 = load atomic i32* %p monotonic, align 4
523 store atomic i32 %2, i32* %p seq_cst, align 4