[AArch64][CGP] Sink zext feeding stxr/stlxr into the same block.
authorAhmed Bougacha <ahmed.bougacha@gmail.com>
Fri, 22 May 2015 21:37:17 +0000 (21:37 +0000)
committerAhmed Bougacha <ahmed.bougacha@gmail.com>
Fri, 22 May 2015 21:37:17 +0000 (21:37 +0000)
The usual CodeGenPrepare trickery, on a target-specific intrinsic.
Without this, the expansion of atomics will usually have the zext
be hoisted out of the loop, defeating the various patterns we have
to catch this precise case.

Differential Revision: http://reviews.llvm.org/D9930

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@238054 91177308-0d34-0410-b5e6-96231b3b80d8

lib/CodeGen/CodeGenPrepare.cpp
test/CodeGen/AArch64/arm64-atomic.ll

index f37a2874b2565ac79ced410323b4d9ca9d0e2582..cf2b0a29b8402451168ef8e7ffe95b1b4fe2b788 100644 (file)
@@ -1397,6 +1397,16 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) {
       }
       return false;
     }
+    case Intrinsic::aarch64_stlxr:
+    case Intrinsic::aarch64_stxr: {
+      ZExtInst *ExtVal = dyn_cast<ZExtInst>(CI->getArgOperand(0));
+      if (!ExtVal || !ExtVal->hasOneUse() ||
+          ExtVal->getParent() == CI->getParent())
+        return false;
+      // Sink a zext feeding stlxr/stxr before it, so it can be folded into it.
+      ExtVal->moveBefore(CI);
+      return true;
+    }
     }
 
     if (TLI) {
index fa07e9f2e91f016689351437af637f6f7ad3ef9a..9136fb6271b5ae7ad423aab64989180e3a3e6a91 100644 (file)
@@ -2,12 +2,11 @@
 
 define i32 @val_compare_and_swap(i32* %p, i32 %cmp, i32 %new) #0 {
 ; CHECK-LABEL: val_compare_and_swap:
-; CHECK-NEXT: ubfx   x[[NEWVAL_REG:[0-9]+]], x2, #0, #32
 ; CHECK-NEXT: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
 ; CHECK-NEXT: ldaxr  [[RESULT:w[0-9]+]], [x0]
 ; CHECK-NEXT: cmp    [[RESULT]], w1
 ; CHECK-NEXT: b.ne   [[LABEL2:.?LBB[0-9]+_[0-9]+]]
-; CHECK-NEXT: stxr   [[SCRATCH_REG:w[0-9]+]], w[[NEWVAL_REG]], [x0]
+; CHECK-NEXT: stxr   [[SCRATCH_REG:w[0-9]+]], w2, [x0]
 ; CHECK-NEXT: cbnz   [[SCRATCH_REG]], [[LABEL]]
 ; CHECK-NEXT: [[LABEL2]]:
   %pair = cmpxchg i32* %p, i32 %cmp, i32 %new acquire acquire
@@ -17,12 +16,11 @@ define i32 @val_compare_and_swap(i32* %p, i32 %cmp, i32 %new) #0 {
 
 define i32 @val_compare_and_swap_rel(i32* %p, i32 %cmp, i32 %new) #0 {
 ; CHECK-LABEL: val_compare_and_swap_rel:
-; CHECK-NEXT: ubfx   x[[NEWVAL_REG:[0-9]+]], x2, #0, #32
 ; CHECK-NEXT: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
 ; CHECK-NEXT: ldaxr  [[RESULT:w[0-9]+]], [x0]
 ; CHECK-NEXT: cmp    [[RESULT]], w1
 ; CHECK-NEXT: b.ne   [[LABEL2:.?LBB[0-9]+_[0-9]+]]
-; CHECK-NEXT: stlxr  [[SCRATCH_REG:w[0-9]+]], w[[NEWVAL_REG]], [x0]
+; CHECK-NEXT: stlxr  [[SCRATCH_REG:w[0-9]+]], w2, [x0]
 ; CHECK-NEXT: cbnz   [[SCRATCH_REG]], [[LABEL]]
 ; CHECK-NEXT: [[LABEL2]]:
   %pair = cmpxchg i32* %p, i32 %cmp, i32 %new acq_rel monotonic