Add AtomicExpandPass::bracketInstWithFences, and use it whenever getInsertFencesForAt...
authorRobin Morisset <morisset@google.com>
Tue, 23 Sep 2014 20:31:14 +0000 (20:31 +0000)
committerRobin Morisset <morisset@google.com>
Tue, 23 Sep 2014 20:31:14 +0000 (20:31 +0000)
Summary:
The goal is to eventually remove all the code related to getInsertFencesForAtomic
in SelectionDAGBuilder as it is wrong (designed for ARM, not really portable, works
mostly by accident because the backends are overly conservative), and repeats the
same logic that goes in emitLeading/TrailingFence.

In this patch, I make AtomicExpandPass insert the fences as it knows better
where to put them. Because this requires getting the fences and not just
passing an IRBuilder around, I had to change the return type of
emitLeading/TrailingFence.
This code only triggers on ARM for now. Because it is earlier in the pipeline
than SelectionDAGBuilder, it triggers and lowers atomic accesses to atomic so
SelectionDAGBuilder does not add barriers anymore on ARM.

If this patch is accepted I plan to implement emitLeading/TrailingFence for all
backends that setInsertFencesForAtomic(true), which will allow both making them
less conservative and simplifying SelectionDAGBuilder once they are all using
this interface.

This should not cause any functionnal change so the existing tests are used
and not modified.

Test Plan: make check-all, benefits from existing tests of atomics on ARM

Reviewers: jfb, t.p.northover

Subscribers: aemerson, llvm-commits

Differential Revision: http://reviews.llvm.org/D5179

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@218329 91177308-0d34-0410-b5e6-96231b3b80d8

include/llvm/Target/TargetLowering.h
lib/CodeGen/AtomicExpandPass.cpp
lib/Target/ARM/ARMISelLowering.cpp
lib/Target/ARM/ARMISelLowering.h

index 0a7222599aac4e094f9b0ac3dfa7beeee2979b65..97318d7f89cf0a5e3f6021df43bfc63e578be786 100644 (file)
@@ -968,9 +968,13 @@ public:
   ///   AtomicRMW/AtomicCmpXchg/AtomicStore/AtomicLoad.
   /// RMW and CmpXchg set both IsStore and IsLoad to true.
   /// Backends with !getInsertFencesForAtomic() should keep a no-op here.
-  virtual void emitLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord,
+  /// This function should either return a nullptr, or a pointer to an IR-level
+  ///   Instruction*. Even complex fence sequences can be represented by a
+  ///   single Instruction* through an intrinsic to be lowered later.
+  virtual Instruction* emitLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord,
           bool IsStore, bool IsLoad) const {
     assert(!getInsertFencesForAtomic());
+    return nullptr;
   }
 
   /// Inserts in the IR a target-specific intrinsic specifying a fence.
@@ -978,9 +982,13 @@ public:
   ///   AtomicRMW/AtomicCmpXchg/AtomicStore/AtomicLoad.
   /// RMW and CmpXchg set both IsStore and IsLoad to true.
   /// Backends with !getInsertFencesForAtomic() should keep a no-op here.
-  virtual void emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord,
+  /// This function should either return a nullptr, or a pointer to an IR-level
+  ///   Instruction*. Even complex fence sequences can be represented by a
+  ///   single Instruction* through an intrinsic to be lowered later.
+  virtual Instruction* emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord,
           bool IsStore, bool IsLoad) const {
     assert(!getInsertFencesForAtomic());
+    return nullptr;
   }
 
   /// Returns true if the given (atomic) store should be expanded by the
index 91fa66be952ba48b81400004c94c7ab0eae2db09..3225731c05ccf33ee5df337bdf0d984676133b32 100644 (file)
@@ -8,7 +8,7 @@
 //===----------------------------------------------------------------------===//
 //
 // This file contains a pass (at IR level) to replace atomic instructions with
-// either (intrinsic-based) ldrex/strex loops or AtomicCmpXchg.
+// either (intrinsic-based) load-linked/store-conditional loops or AtomicCmpXchg.
 //
 //===----------------------------------------------------------------------===//
 
@@ -41,6 +41,8 @@ namespace {
     bool runOnFunction(Function &F) override;
 
   private:
+    bool bracketInstWithFences(Instruction *I, AtomicOrdering Order,
+                               bool IsStore, bool IsLoad);
     bool expandAtomicLoad(LoadInst *LI);
     bool expandAtomicStore(StoreInst *SI);
     bool expandAtomicRMW(AtomicRMWInst *AI);
@@ -80,10 +82,45 @@ bool AtomicExpand::runOnFunction(Function &F) {
     auto SI = dyn_cast<StoreInst>(I);
     auto RMWI = dyn_cast<AtomicRMWInst>(I);
     auto CASI = dyn_cast<AtomicCmpXchgInst>(I);
-
     assert((LI || SI || RMWI || CASI || isa<FenceInst>(I)) &&
            "Unknown atomic instruction");
 
+    auto FenceOrdering = Monotonic;
+    bool IsStore, IsLoad;
+    if (TargetLowering->getInsertFencesForAtomic()) {
+      if (LI && isAtLeastAcquire(LI->getOrdering())) {
+        FenceOrdering = LI->getOrdering();
+        LI->setOrdering(Monotonic);
+        IsStore = false;
+        IsLoad = true;
+      } else if (SI && isAtLeastRelease(SI->getOrdering())) {
+        FenceOrdering = SI->getOrdering();
+        SI->setOrdering(Monotonic);
+        IsStore = true;
+        IsLoad = false;
+      } else if (RMWI && (isAtLeastRelease(RMWI->getOrdering()) ||
+                          isAtLeastAcquire(RMWI->getOrdering()))) {
+        FenceOrdering = RMWI->getOrdering();
+        RMWI->setOrdering(Monotonic);
+        IsStore = IsLoad = true;
+      } else if (CASI && !TargetLowering->hasLoadLinkedStoreConditional() &&
+                    (isAtLeastRelease(CASI->getSuccessOrdering()) ||
+                     isAtLeastAcquire(CASI->getSuccessOrdering()))) {
+        // If a compare and swap is lowered to LL/SC, we can do smarter fence
+        // insertion, with a stronger one on the success path than on the
+        // failure path. As a result, fence insertion is directly done by
+        // expandAtomicCmpXchg in that case.
+        FenceOrdering = CASI->getSuccessOrdering();
+        CASI->setSuccessOrdering(Monotonic);
+        CASI->setFailureOrdering(Monotonic);
+        IsStore = IsLoad = true;
+      }
+
+      if (FenceOrdering != Monotonic) {
+        MadeChange |= bracketInstWithFences(I, FenceOrdering, IsStore, IsLoad);
+      }
+    }
+
     if (LI && TargetLowering->shouldExpandAtomicLoadInIR(LI)) {
       MadeChange |= expandAtomicLoad(LI);
     } else if (SI && TargetLowering->shouldExpandAtomicStoreInIR(SI)) {
@@ -97,30 +134,40 @@ bool AtomicExpand::runOnFunction(Function &F) {
   return MadeChange;
 }
 
+bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order,
+                                         bool IsStore, bool IsLoad) {
+  IRBuilder<> Builder(I);
+
+  auto LeadingFence =
+      TM->getSubtargetImpl()->getTargetLowering()->emitLeadingFence(
+      Builder, Order, IsStore, IsLoad);
+
+  auto TrailingFence =
+      TM->getSubtargetImpl()->getTargetLowering()->emitTrailingFence(
+      Builder, Order, IsStore, IsLoad);
+  // The trailing fence is emitted before the instruction instead of after
+  // because there is no easy way of setting Builder insertion point after
+  // an instruction. So we must erase it from the BB, and insert it back
+  // in the right place.
+  // We have a guard here because not every atomic operation generates a
+  // trailing fence.
+  if (TrailingFence) {
+    TrailingFence->removeFromParent();
+    TrailingFence->insertAfter(I);
+  }
+
+  return (LeadingFence || TrailingFence);
+}
+
 bool AtomicExpand::expandAtomicLoad(LoadInst *LI) {
   auto TLI = TM->getSubtargetImpl()->getTargetLowering();
-  // If getInsertFencesForAtomic() returns true, then the target does not want
-  // to deal with memory orders, and emitLeading/TrailingFence should take care
-  // of everything. Otherwise, emitLeading/TrailingFence are no-op and we
-  // should preserve the ordering.
-  AtomicOrdering MemOpOrder =
-      TLI->getInsertFencesForAtomic() ? Monotonic : LI->getOrdering();
   IRBuilder<> Builder(LI);
 
-  // Note that although no fence is required before atomic load on ARM, it is
-  // required before SequentiallyConsistent loads for the recommended Power
-  // mapping (see http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html).
-  // So we let the target choose what to emit.
-  TLI->emitLeadingFence(Builder, LI->getOrdering(),
-                        /*IsStore=*/false, /*IsLoad=*/true);
-
-  // The only 64-bit load guaranteed to be single-copy atomic by ARM is
-  // an ldrexd (A3.5.3).
+  // On some architectures, load-linked instructions are atomic for larger
+  // sizes than normal loads. For example, the only 64-bit load guaranteed
+  // to be single-copy atomic by ARM is an ldrexd (A3.5.3).
   Value *Val =
-      TLI->emitLoadLinked(Builder, LI->getPointerOperand(), MemOpOrder);
-
-  TLI->emitTrailingFence(Builder, LI->getOrdering(),
-                         /*IsStore=*/false, /*IsLoad=*/true);
+      TLI->emitLoadLinked(Builder, LI->getPointerOperand(), LI->getOrdering());
 
   LI->replaceAllUsesWith(Val);
   LI->eraseFromParent();
@@ -193,17 +240,11 @@ static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder,
 
 bool AtomicExpand::expandAtomicRMWToLLSC(AtomicRMWInst *AI) {
   auto TLI = TM->getSubtargetImpl()->getTargetLowering();
-  AtomicOrdering FenceOrder = AI->getOrdering();
+  AtomicOrdering MemOpOrder = AI->getOrdering();
   Value *Addr = AI->getPointerOperand();
   BasicBlock *BB = AI->getParent();
   Function *F = BB->getParent();
   LLVMContext &Ctx = F->getContext();
-  // If getInsertFencesForAtomic() returns true, then the target does not want
-  // to deal with memory orders, and emitLeading/TrailingFence should take care
-  // of everything. Otherwise, emitLeading/TrailingFence are no-op and we
-  // should preserve the ordering.
-  AtomicOrdering MemOpOrder =
-      TLI->getInsertFencesForAtomic() ? Monotonic : FenceOrder;
 
   // Given: atomicrmw some_op iN* %addr, iN %incr ordering
   //
@@ -230,7 +271,6 @@ bool AtomicExpand::expandAtomicRMWToLLSC(AtomicRMWInst *AI) {
   // the branch entirely.
   std::prev(BB->end())->eraseFromParent();
   Builder.SetInsertPoint(BB);
-  TLI->emitLeadingFence(Builder, FenceOrder, /*IsStore=*/true, /*IsLoad=*/true);
   Builder.CreateBr(LoopBB);
 
   // Start the main loop block now that we've taken care of the preliminaries.
@@ -247,7 +287,6 @@ bool AtomicExpand::expandAtomicRMWToLLSC(AtomicRMWInst *AI) {
   Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
 
   Builder.SetInsertPoint(ExitBB, ExitBB->begin());
-  TLI->emitTrailingFence(Builder, FenceOrder, /*IsStore=*/true, /*IsLoad=*/true);
 
   AI->replaceAllUsesWith(Loaded);
   AI->eraseFromParent();
@@ -256,11 +295,8 @@ bool AtomicExpand::expandAtomicRMWToLLSC(AtomicRMWInst *AI) {
 }
 
 bool AtomicExpand::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI) {
-  auto TargetLowering = TM->getSubtargetImpl()->getTargetLowering();
-  AtomicOrdering FenceOrder =
-      AI->getOrdering() == Unordered ? Monotonic : AI->getOrdering();
   AtomicOrdering MemOpOrder =
-      TargetLowering->getInsertFencesForAtomic() ? Monotonic : FenceOrder;
+      AI->getOrdering() == Unordered ? Monotonic : AI->getOrdering();
   Value *Addr = AI->getPointerOperand();
   BasicBlock *BB = AI->getParent();
   Function *F = BB->getParent();
@@ -292,8 +328,6 @@ bool AtomicExpand::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI) {
   // the branch entirely.
   std::prev(BB->end())->eraseFromParent();
   Builder.SetInsertPoint(BB);
-  TargetLowering->emitLeadingFence(Builder, FenceOrder,
-                                   /*IsStore=*/true, /*IsLoad=*/true);
   LoadInst *InitLoaded = Builder.CreateLoad(Addr);
   // Atomics require at least natural alignment.
   InitLoaded->setAlignment(AI->getType()->getPrimitiveSizeInBits());
@@ -317,8 +351,6 @@ bool AtomicExpand::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI) {
   Builder.CreateCondBr(Success, ExitBB, LoopBB);
 
   Builder.SetInsertPoint(ExitBB, ExitBB->begin());
-  TargetLowering->emitTrailingFence(Builder, FenceOrder,
-                                    /*IsStore=*/true, /*IsLoad=*/true);
 
   AI->replaceAllUsesWith(NewLoaded);
   AI->eraseFromParent();
index 97b62264462d6a1b53f532e01d5e9f4736963c18..ec59ec50a1da9b3aa378893b302b4c7e2e7860b9 100644 (file)
@@ -11024,11 +11024,11 @@ Instruction* ARMTargetLowering::makeDMB(IRBuilder<> &Builder,
 }
 
 // Based on http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
-void ARMTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
+Instruction* ARMTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
                                          AtomicOrdering Ord, bool IsStore,
                                          bool IsLoad) const {
   if (!getInsertFencesForAtomic())
-    return;
+    return nullptr;
 
   switch (Ord) {
   case NotAtomic:
@@ -11036,27 +11036,27 @@ void ARMTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
     llvm_unreachable("Invalid fence: unordered/non-atomic");
   case Monotonic:
   case Acquire:
-    return; // Nothing to do
+    return nullptr; // Nothing to do
   case SequentiallyConsistent:
     if (!IsStore)
-      return; // Nothing to do
-              /*FALLTHROUGH*/
+      return nullptr; // Nothing to do
+    /*FALLTHROUGH*/
   case Release:
   case AcquireRelease:
     if (Subtarget->isSwift())
-      makeDMB(Builder, ARM_MB::ISHST);
+      return makeDMB(Builder, ARM_MB::ISHST);
     // FIXME: add a comment with a link to documentation justifying this.
     else
-      makeDMB(Builder, ARM_MB::ISH);
-    return;
+      return makeDMB(Builder, ARM_MB::ISH);
   }
+  llvm_unreachable("Unknown fence ordering in emitLeadingFence");
 }
 
-void ARMTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
+Instruction* ARMTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
                                           AtomicOrdering Ord, bool IsStore,
                                           bool IsLoad) const {
   if (!getInsertFencesForAtomic())
-    return;
+    return nullptr;
 
   switch (Ord) {
   case NotAtomic:
@@ -11064,13 +11064,13 @@ void ARMTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
     llvm_unreachable("Invalid fence: unordered/not-atomic");
   case Monotonic:
   case Release:
-    return; // Nothing to do
+    return nullptr; // Nothing to do
   case Acquire:
   case AcquireRelease:
-    case SequentiallyConsistent:
-    makeDMB(Builder, ARM_MB::ISH);
-    return;
+  case SequentiallyConsistent:
+    return makeDMB(Builder, ARM_MB::ISH);
   }
+  llvm_unreachable("Unknown fence ordering in emitTrailingFence");
 }
 
 // Loads and stores less than 64-bits are already atomic; ones above that
index d5483553898d82c2fd8cc7d615ca40634dd5a857..0dd4ca260a50f309595ef8890e211dc540cb08fb 100644 (file)
@@ -399,9 +399,9 @@ namespace llvm {
     Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val,
                                 Value *Addr, AtomicOrdering Ord) const override;
 
-    void emitLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord,
+    Instruction* emitLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord,
                           bool IsStore, bool IsLoad) const override;
-    void emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord,
+    Instruction* emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord,
                            bool IsStore, bool IsLoad) const override;
 
     bool shouldExpandAtomicLoadInIR(LoadInst *LI) const override;