test/CodeGen/X86/atomic_mi.ll

   1 ; RUN: llc < %s -march=x86-64 -verify-machineinstrs | FileCheck %s --check-prefix X64
   2 ; RUN: llc < %s -march=x86 -verify-machineinstrs | FileCheck %s --check-prefix X32
   3 ; RUN: llc < %s -march=x86-64 -mattr=slow-incdec -verify-machineinstrs | FileCheck %s --check-prefix SLOW_INC
   4
   5 ; This file checks that atomic (non-seq_cst) stores of immediate values are
   6 ; done in one mov instruction and not 2. More precisely, it makes sure that the
   7 ; immediate is not first copied uselessly into a register.
   8
   9 ; Similarily, it checks that a binary operation of an immediate with an atomic
  10 ; variable that is stored back in that variable is done as a single instruction.
  11 ; For example: x.store(42 + x.load(memory_order_acquire), memory_order_release)
  12 ; should be just an add instruction, instead of loading x into a register, doing
  13 ; an add and storing the result back.
  14 ; The binary operations supported are currently add, and, or, xor.
  15 ; sub is not supported because they are translated by an addition of the
  16 ; negated immediate.
  17 ; Finally, we also check the same kind of pattern for inc/dec
  18
  19 ; seq_cst stores are left as (lock) xchgl, but we try to check every other
  20 ; attribute at least once.
  21
  22 ; Please note that these operations do not require the lock prefix: only
  23 ; sequentially consistent stores require this kind of protection on X86.
  24 ; And even for seq_cst operations, llvm uses the xchg instruction which has
  25 ; an implicit lock prefix, so making it explicit is not required.
  26
  27 define void @store_atomic_imm_8(i8* %p) {
  28 ; X64-LABEL: store_atomic_imm_8
  29 ; X64: movb
  30 ; X64-NOT: movb
  31 ; X32-LABEL: store_atomic_imm_8
  32 ; X32: movb
  33 ; X32-NOT: movb
  34   store atomic i8 42, i8* %p release, align 1
  35   ret void
  36 }
  37
  38 define void @store_atomic_imm_16(i16* %p) {
  39 ; X64-LABEL: store_atomic_imm_16
  40 ; X64: movw
  41 ; X64-NOT: movw
  42 ; X32-LABEL: store_atomic_imm_16
  43 ; X32: movw
  44 ; X32-NOT: movw
  45   store atomic i16 42, i16* %p monotonic, align 2
  46   ret void
  47 }
  48
  49 define void @store_atomic_imm_32(i32* %p) {
  50 ; X64-LABEL: store_atomic_imm_32
  51 ; X64: movl
  52 ; X64-NOT: movl
  53 ;   On 32 bits, there is an extra movl for each of those functions
  54 ;   (probably for alignment reasons).
  55 ; X32-LABEL: store_atomic_imm_32
  56 ; X32: movl 4(%esp), %eax
  57 ; X32: movl
  58 ; X32-NOT: movl
  59   store atomic i32 42, i32* %p release, align 4
  60   ret void
  61 }
  62
  63 define void @store_atomic_imm_64(i64* %p) {
  64 ; X64-LABEL: store_atomic_imm_64
  65 ; X64: movq
  66 ; X64-NOT: movq
  67 ;   These are implemented with a CAS loop on 32 bit architectures, and thus
  68 ;   cannot be optimized in the same way as the others.
  69 ; X32-LABEL: store_atomic_imm_64
  70 ; X32: cmpxchg8b
  71   store atomic i64 42, i64* %p release, align 8
  72   ret void
  73 }
  74
  75 ; If an immediate is too big to fit in 32 bits, it cannot be store in one mov,
  76 ; even on X64, one must use movabsq that can only target a register.
  77 define void @store_atomic_imm_64_big(i64* %p) {
  78 ; X64-LABEL: store_atomic_imm_64_big
  79 ; X64: movabsq
  80 ; X64: movq
  81   store atomic i64 100000000000, i64* %p monotonic, align 8
  82   ret void
  83 }
  84
  85 ; It would be incorrect to replace a lock xchgl by a movl
  86 define void @store_atomic_imm_32_seq_cst(i32* %p) {
  87 ; X64-LABEL: store_atomic_imm_32_seq_cst
  88 ; X64: xchgl
  89 ; X32-LABEL: store_atomic_imm_32_seq_cst
  90 ; X32: xchgl
  91   store atomic i32 42, i32* %p seq_cst, align 4
  92   ret void
  93 }
  94
  95 ; ----- ADD -----
  96
  97 define void @add_8(i8* %p) {
  98 ; X64-LABEL: add_8
  99 ; X64-NOT: lock
 100 ; X64: addb
 101 ; X64-NOT: movb
 102 ; X32-LABEL: add_8
 103 ; X32-NOT: lock
 104 ; X32: addb
 105 ; X32-NOT: movb
 106   %1 = load atomic i8* %p seq_cst, align 1
 107   %2 = add i8 %1, 2
 108   store atomic i8 %2, i8* %p release, align 1
 109   ret void
 110 }
 111
 112 define void @add_16(i16* %p) {
 113 ;   Currently the transformation is not done on 16 bit accesses, as the backend
 114 ;   treat 16 bit arithmetic as expensive on X86/X86_64.
 115 ; X64-LABEL: add_16
 116 ; X64-NOT: addw
 117 ; X32-LABEL: add_16
 118 ; X32-NOT: addw
 119   %1 = load atomic i16* %p acquire, align 2
 120   %2 = add i16 %1, 2
 121   store atomic i16 %2, i16* %p release, align 2
 122   ret void
 123 }
 124
 125 define void @add_32(i32* %p) {
 126 ; X64-LABEL: add_32
 127 ; X64-NOT: lock
 128 ; X64: addl
 129 ; X64-NOT: movl
 130 ; X32-LABEL: add_32
 131 ; X32-NOT: lock
 132 ; X32: addl
 133 ; X32-NOT: movl
 134   %1 = load atomic i32* %p acquire, align 4
 135   %2 = add i32 %1, 2
 136   store atomic i32 %2, i32* %p monotonic, align 4
 137   ret void
 138 }
 139
 140 define void @add_64(i64* %p) {
 141 ; X64-LABEL: add_64
 142 ; X64-NOT: lock
 143 ; X64: addq
 144 ; X64-NOT: movq
 145 ;   We do not check X86-32 as it cannot do 'addq'.
 146 ; X32-LABEL: add_64
 147   %1 = load atomic i64* %p acquire, align 8
 148   %2 = add i64 %1, 2
 149   store atomic i64 %2, i64* %p release, align 8
 150   ret void
 151 }
 152
 153 define void @add_32_seq_cst(i32* %p) {
 154 ; X64-LABEL: add_32_seq_cst
 155 ; X64: xchgl
 156 ; X32-LABEL: add_32_seq_cst
 157 ; X32: xchgl
 158   %1 = load atomic i32* %p monotonic, align 4
 159   %2 = add i32 %1, 2
 160   store atomic i32 %2, i32* %p seq_cst, align 4
 161   ret void
 162 }
 163
 164 ; ----- AND -----
 165
 166 define void @and_8(i8* %p) {
 167 ; X64-LABEL: and_8
 168 ; X64-NOT: lock
 169 ; X64: andb
 170 ; X64-NOT: movb
 171 ; X32-LABEL: and_8
 172 ; X32-NOT: lock
 173 ; X32: andb
 174 ; X32-NOT: movb
 175   %1 = load atomic i8* %p monotonic, align 1
 176   %2 = and i8 %1, 2
 177   store atomic i8 %2, i8* %p release, align 1
 178   ret void
 179 }
 180
 181 define void @and_16(i16* %p) {
 182 ;   Currently the transformation is not done on 16 bit accesses, as the backend
 183 ;   treat 16 bit arithmetic as expensive on X86/X86_64.
 184 ; X64-LABEL: and_16
 185 ; X64-NOT: andw
 186 ; X32-LABEL: and_16
 187 ; X32-NOT: andw
 188   %1 = load atomic i16* %p acquire, align 2
 189   %2 = and i16 %1, 2
 190   store atomic i16 %2, i16* %p release, align 2
 191   ret void
 192 }
 193
 194 define void @and_32(i32* %p) {
 195 ; X64-LABEL: and_32
 196 ; X64-NOT: lock
 197 ; X64: andl
 198 ; X64-NOT: movl
 199 ; X32-LABEL: and_32
 200 ; X32-NOT: lock
 201 ; X32: andl
 202 ; X32-NOT: movl
 203   %1 = load atomic i32* %p acquire, align 4
 204   %2 = and i32 %1, 2
 205   store atomic i32 %2, i32* %p release, align 4
 206   ret void
 207 }
 208
 209 define void @and_64(i64* %p) {
 210 ; X64-LABEL: and_64
 211 ; X64-NOT: lock
 212 ; X64: andq
 213 ; X64-NOT: movq
 214 ;   We do not check X86-32 as it cannot do 'andq'.
 215 ; X32-LABEL: and_64
 216   %1 = load atomic i64* %p acquire, align 8
 217   %2 = and i64 %1, 2
 218   store atomic i64 %2, i64* %p release, align 8
 219   ret void
 220 }
 221
 222 define void @and_32_seq_cst(i32* %p) {
 223 ; X64-LABEL: and_32_seq_cst
 224 ; X64: xchgl
 225 ; X32-LABEL: and_32_seq_cst
 226 ; X32: xchgl
 227   %1 = load atomic i32* %p monotonic, align 4
 228   %2 = and i32 %1, 2
 229   store atomic i32 %2, i32* %p seq_cst, align 4
 230   ret void
 231 }
 232
 233 ; ----- OR -----
 234
 235 define void @or_8(i8* %p) {
 236 ; X64-LABEL: or_8
 237 ; X64-NOT: lock
 238 ; X64: orb
 239 ; X64-NOT: movb
 240 ; X32-LABEL: or_8
 241 ; X32-NOT: lock
 242 ; X32: orb
 243 ; X32-NOT: movb
 244   %1 = load atomic i8* %p acquire, align 1
 245   %2 = or i8 %1, 2
 246   store atomic i8 %2, i8* %p release, align 1
 247   ret void
 248 }
 249
 250 define void @or_16(i16* %p) {
 251 ; X64-LABEL: or_16
 252 ; X64-NOT: orw
 253 ; X32-LABEL: or_16
 254 ; X32-NOT: orw
 255   %1 = load atomic i16* %p acquire, align 2
 256   %2 = or i16 %1, 2
 257   store atomic i16 %2, i16* %p release, align 2
 258   ret void
 259 }
 260
 261 define void @or_32(i32* %p) {
 262 ; X64-LABEL: or_32
 263 ; X64-NOT: lock
 264 ; X64: orl
 265 ; X64-NOT: movl
 266 ; X32-LABEL: or_32
 267 ; X32-NOT: lock
 268 ; X32: orl
 269 ; X32-NOT: movl
 270   %1 = load atomic i32* %p acquire, align 4
 271   %2 = or i32 %1, 2
 272   store atomic i32 %2, i32* %p release, align 4
 273   ret void
 274 }
 275
 276 define void @or_64(i64* %p) {
 277 ; X64-LABEL: or_64
 278 ; X64-NOT: lock
 279 ; X64: orq
 280 ; X64-NOT: movq
 281 ;   We do not check X86-32 as it cannot do 'orq'.
 282 ; X32-LABEL: or_64
 283   %1 = load atomic i64* %p acquire, align 8
 284   %2 = or i64 %1, 2
 285   store atomic i64 %2, i64* %p release, align 8
 286   ret void
 287 }
 288
 289 define void @or_32_seq_cst(i32* %p) {
 290 ; X64-LABEL: or_32_seq_cst
 291 ; X64: xchgl
 292 ; X32-LABEL: or_32_seq_cst
 293 ; X32: xchgl
 294   %1 = load atomic i32* %p monotonic, align 4
 295   %2 = or i32 %1, 2
 296   store atomic i32 %2, i32* %p seq_cst, align 4
 297   ret void
 298 }
 299
 300 ; ----- XOR -----
 301
 302 define void @xor_8(i8* %p) {
 303 ; X64-LABEL: xor_8
 304 ; X64-NOT: lock
 305 ; X64: xorb
 306 ; X64-NOT: movb
 307 ; X32-LABEL: xor_8
 308 ; X32-NOT: lock
 309 ; X32: xorb
 310 ; X32-NOT: movb
 311   %1 = load atomic i8* %p acquire, align 1
 312   %2 = xor i8 %1, 2
 313   store atomic i8 %2, i8* %p release, align 1
 314   ret void
 315 }
 316
 317 define void @xor_16(i16* %p) {
 318 ; X64-LABEL: xor_16
 319 ; X64-NOT: xorw
 320 ; X32-LABEL: xor_16
 321 ; X32-NOT: xorw
 322   %1 = load atomic i16* %p acquire, align 2
 323   %2 = xor i16 %1, 2
 324   store atomic i16 %2, i16* %p release, align 2
 325   ret void
 326 }
 327
 328 define void @xor_32(i32* %p) {
 329 ; X64-LABEL: xor_32
 330 ; X64-NOT: lock
 331 ; X64: xorl
 332 ; X64-NOT: movl
 333 ; X32-LABEL: xor_32
 334 ; X32-NOT: lock
 335 ; X32: xorl
 336 ; X32-NOT: movl
 337   %1 = load atomic i32* %p acquire, align 4
 338   %2 = xor i32 %1, 2
 339   store atomic i32 %2, i32* %p release, align 4
 340   ret void
 341 }
 342
 343 define void @xor_64(i64* %p) {
 344 ; X64-LABEL: xor_64
 345 ; X64-NOT: lock
 346 ; X64: xorq
 347 ; X64-NOT: movq
 348 ;   We do not check X86-32 as it cannot do 'xorq'.
 349 ; X32-LABEL: xor_64
 350   %1 = load atomic i64* %p acquire, align 8
 351   %2 = xor i64 %1, 2
 352   store atomic i64 %2, i64* %p release, align 8
 353   ret void
 354 }
 355
 356 define void @xor_32_seq_cst(i32* %p) {
 357 ; X64-LABEL: xor_32_seq_cst
 358 ; X64: xchgl
 359 ; X32-LABEL: xor_32_seq_cst
 360 ; X32: xchgl
 361   %1 = load atomic i32* %p monotonic, align 4
 362   %2 = xor i32 %1, 2
 363   store atomic i32 %2, i32* %p seq_cst, align 4
 364   ret void
 365 }
 366
 367 ; ----- INC -----
 368
 369 define void @inc_8(i8* %p) {
 370 ; X64-LABEL: inc_8
 371 ; X64-NOT: lock
 372 ; X64: incb
 373 ; X64-NOT: movb
 374 ; X32-LABEL: inc_8
 375 ; X32-NOT: lock
 376 ; X32: incb
 377 ; X32-NOT: movb
 378 ; SLOW_INC-LABEL: inc_8
 379 ; SLOW_INC-NOT: incb
 380 ; SLOW_INC-NOT: movb
 381   %1 = load atomic i8* %p seq_cst, align 1
 382   %2 = add i8 %1, 1
 383   store atomic i8 %2, i8* %p release, align 1
 384   ret void
 385 }
 386
 387 define void @inc_16(i16* %p) {
 388 ;   Currently the transformation is not done on 16 bit accesses, as the backend
 389 ;   treat 16 bit arithmetic as expensive on X86/X86_64.
 390 ; X64-LABEL: inc_16
 391 ; X64-NOT: incw
 392 ; X32-LABEL: inc_16
 393 ; X32-NOT: incw
 394 ; SLOW_INC-LABEL: inc_16
 395 ; SLOW_INC-NOT: incw
 396   %1 = load atomic i16* %p acquire, align 2
 397   %2 = add i16 %1, 1
 398   store atomic i16 %2, i16* %p release, align 2
 399   ret void
 400 }
 401
 402 define void @inc_32(i32* %p) {
 403 ; X64-LABEL: inc_32
 404 ; X64-NOT: lock
 405 ; X64: incl
 406 ; X64-NOT: movl
 407 ; X32-LABEL: inc_32
 408 ; X32-NOT: lock
 409 ; X32: incl
 410 ; X32-NOT: movl
 411 ; SLOW_INC-LABEL: inc_32
 412 ; SLOW_INC-NOT: incl
 413 ; SLOW_INC-NOT: movl
 414   %1 = load atomic i32* %p acquire, align 4
 415   %2 = add i32 %1, 1
 416   store atomic i32 %2, i32* %p monotonic, align 4
 417   ret void
 418 }
 419
 420 define void @inc_64(i64* %p) {
 421 ; X64-LABEL: inc_64
 422 ; X64-NOT: lock
 423 ; X64: incq
 424 ; X64-NOT: movq
 425 ;   We do not check X86-32 as it cannot do 'incq'.
 426 ; X32-LABEL: inc_64
 427 ; SLOW_INC-LABEL: inc_64
 428 ; SLOW_INC-NOT: incq
 429 ; SLOW_INC-NOT: movq
 430   %1 = load atomic i64* %p acquire, align 8
 431   %2 = add i64 %1, 1
 432   store atomic i64 %2, i64* %p release, align 8
 433   ret void
 434 }
 435
 436 define void @inc_32_seq_cst(i32* %p) {
 437 ; X64-LABEL: inc_32_seq_cst
 438 ; X64: xchgl
 439 ; X32-LABEL: inc_32_seq_cst
 440 ; X32: xchgl
 441   %1 = load atomic i32* %p monotonic, align 4
 442   %2 = add i32 %1, 1
 443   store atomic i32 %2, i32* %p seq_cst, align 4
 444   ret void
 445 }
 446
 447 ; ----- DEC -----
 448
 449 define void @dec_8(i8* %p) {
 450 ; X64-LABEL: dec_8
 451 ; X64-NOT: lock
 452 ; X64: decb
 453 ; X64-NOT: movb
 454 ; X32-LABEL: dec_8
 455 ; X32-NOT: lock
 456 ; X32: decb
 457 ; X32-NOT: movb
 458 ; SLOW_INC-LABEL: dec_8
 459 ; SLOW_INC-NOT: decb
 460 ; SLOW_INC-NOT: movb
 461   %1 = load atomic i8* %p seq_cst, align 1
 462   %2 = sub i8 %1, 1
 463   store atomic i8 %2, i8* %p release, align 1
 464   ret void
 465 }
 466
 467 define void @dec_16(i16* %p) {
 468 ;   Currently the transformation is not done on 16 bit accesses, as the backend
 469 ;   treat 16 bit arithmetic as expensive on X86/X86_64.
 470 ; X64-LABEL: dec_16
 471 ; X64-NOT: decw
 472 ; X32-LABEL: dec_16
 473 ; X32-NOT: decw
 474 ; SLOW_INC-LABEL: dec_16
 475 ; SLOW_INC-NOT: decw
 476   %1 = load atomic i16* %p acquire, align 2
 477   %2 = sub i16 %1, 1
 478   store atomic i16 %2, i16* %p release, align 2
 479   ret void
 480 }
 481
 482 define void @dec_32(i32* %p) {
 483 ; X64-LABEL: dec_32
 484 ; X64-NOT: lock
 485 ; X64: decl
 486 ; X64-NOT: movl
 487 ; X32-LABEL: dec_32
 488 ; X32-NOT: lock
 489 ; X32: decl
 490 ; X32-NOT: movl
 491 ; SLOW_INC-LABEL: dec_32
 492 ; SLOW_INC-NOT: decl
 493 ; SLOW_INC-NOT: movl
 494   %1 = load atomic i32* %p acquire, align 4
 495   %2 = sub i32 %1, 1
 496   store atomic i32 %2, i32* %p monotonic, align 4
 497   ret void
 498 }
 499
 500 define void @dec_64(i64* %p) {
 501 ; X64-LABEL: dec_64
 502 ; X64-NOT: lock
 503 ; X64: decq
 504 ; X64-NOT: movq
 505 ;   We do not check X86-32 as it cannot do 'decq'.
 506 ; X32-LABEL: dec_64
 507 ; SLOW_INC-LABEL: dec_64
 508 ; SLOW_INC-NOT: decq
 509 ; SLOW_INC-NOT: movq
 510   %1 = load atomic i64* %p acquire, align 8
 511   %2 = sub i64 %1, 1
 512   store atomic i64 %2, i64* %p release, align 8
 513   ret void
 514 }
 515
 516 define void @dec_32_seq_cst(i32* %p) {
 517 ; X64-LABEL: dec_32_seq_cst
 518 ; X64: xchgl
 519 ; X32-LABEL: dec_32_seq_cst
 520 ; X32: xchgl
 521   %1 = load atomic i32* %p monotonic, align 4
 522   %2 = sub i32 %1, 1
 523   store atomic i32 %2, i32* %p seq_cst, align 4
 524   ret void
 525 }