From fd4c3c983ef77518dc660e9fe779c5497f591878 Mon Sep 17 00:00:00 2001 From: Robin Morisset Date: Tue, 23 Sep 2014 20:31:14 +0000 Subject: [PATCH] Add AtomicExpandPass::bracketInstWithFences, and use it whenever getInsertFencesForAtomic would trigger in SelectionDAGBuilder Summary: The goal is to eventually remove all the code related to getInsertFencesForAtomic in SelectionDAGBuilder as it is wrong (designed for ARM, not really portable, works mostly by accident because the backends are overly conservative), and repeats the same logic that goes in emitLeading/TrailingFence. In this patch, I make AtomicExpandPass insert the fences as it knows better where to put them. Because this requires getting the fences and not just passing an IRBuilder around, I had to change the return type of emitLeading/TrailingFence. This code only triggers on ARM for now. Because it is earlier in the pipeline than SelectionDAGBuilder, it triggers and lowers atomic accesses to atomic so SelectionDAGBuilder does not add barriers anymore on ARM. If this patch is accepted I plan to implement emitLeading/TrailingFence for all backends that setInsertFencesForAtomic(true), which will allow both making them less conservative and simplifying SelectionDAGBuilder once they are all using this interface. This should not cause any functionnal change so the existing tests are used and not modified. Test Plan: make check-all, benefits from existing tests of atomics on ARM Reviewers: jfb, t.p.northover Subscribers: aemerson, llvm-commits Differential Revision: http://reviews.llvm.org/D5179 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@218329 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Target/TargetLowering.h | 12 ++- lib/CodeGen/AtomicExpandPass.cpp | 108 +++++++++++++++++---------- lib/Target/ARM/ARMISelLowering.cpp | 28 +++---- lib/Target/ARM/ARMISelLowering.h | 4 +- 4 files changed, 96 insertions(+), 56 deletions(-) diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index 0a7222599aa..97318d7f89c 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -968,9 +968,13 @@ public: /// AtomicRMW/AtomicCmpXchg/AtomicStore/AtomicLoad. /// RMW and CmpXchg set both IsStore and IsLoad to true. /// Backends with !getInsertFencesForAtomic() should keep a no-op here. - virtual void emitLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord, + /// This function should either return a nullptr, or a pointer to an IR-level + /// Instruction*. Even complex fence sequences can be represented by a + /// single Instruction* through an intrinsic to be lowered later. + virtual Instruction* emitLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord, bool IsStore, bool IsLoad) const { assert(!getInsertFencesForAtomic()); + return nullptr; } /// Inserts in the IR a target-specific intrinsic specifying a fence. @@ -978,9 +982,13 @@ public: /// AtomicRMW/AtomicCmpXchg/AtomicStore/AtomicLoad. /// RMW and CmpXchg set both IsStore and IsLoad to true. /// Backends with !getInsertFencesForAtomic() should keep a no-op here. - virtual void emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord, + /// This function should either return a nullptr, or a pointer to an IR-level + /// Instruction*. Even complex fence sequences can be represented by a + /// single Instruction* through an intrinsic to be lowered later. + virtual Instruction* emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord, bool IsStore, bool IsLoad) const { assert(!getInsertFencesForAtomic()); + return nullptr; } /// Returns true if the given (atomic) store should be expanded by the diff --git a/lib/CodeGen/AtomicExpandPass.cpp b/lib/CodeGen/AtomicExpandPass.cpp index 91fa66be952..3225731c05c 100644 --- a/lib/CodeGen/AtomicExpandPass.cpp +++ b/lib/CodeGen/AtomicExpandPass.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// // // This file contains a pass (at IR level) to replace atomic instructions with -// either (intrinsic-based) ldrex/strex loops or AtomicCmpXchg. +// either (intrinsic-based) load-linked/store-conditional loops or AtomicCmpXchg. // //===----------------------------------------------------------------------===// @@ -41,6 +41,8 @@ namespace { bool runOnFunction(Function &F) override; private: + bool bracketInstWithFences(Instruction *I, AtomicOrdering Order, + bool IsStore, bool IsLoad); bool expandAtomicLoad(LoadInst *LI); bool expandAtomicStore(StoreInst *SI); bool expandAtomicRMW(AtomicRMWInst *AI); @@ -80,10 +82,45 @@ bool AtomicExpand::runOnFunction(Function &F) { auto SI = dyn_cast(I); auto RMWI = dyn_cast(I); auto CASI = dyn_cast(I); - assert((LI || SI || RMWI || CASI || isa(I)) && "Unknown atomic instruction"); + auto FenceOrdering = Monotonic; + bool IsStore, IsLoad; + if (TargetLowering->getInsertFencesForAtomic()) { + if (LI && isAtLeastAcquire(LI->getOrdering())) { + FenceOrdering = LI->getOrdering(); + LI->setOrdering(Monotonic); + IsStore = false; + IsLoad = true; + } else if (SI && isAtLeastRelease(SI->getOrdering())) { + FenceOrdering = SI->getOrdering(); + SI->setOrdering(Monotonic); + IsStore = true; + IsLoad = false; + } else if (RMWI && (isAtLeastRelease(RMWI->getOrdering()) || + isAtLeastAcquire(RMWI->getOrdering()))) { + FenceOrdering = RMWI->getOrdering(); + RMWI->setOrdering(Monotonic); + IsStore = IsLoad = true; + } else if (CASI && !TargetLowering->hasLoadLinkedStoreConditional() && + (isAtLeastRelease(CASI->getSuccessOrdering()) || + isAtLeastAcquire(CASI->getSuccessOrdering()))) { + // If a compare and swap is lowered to LL/SC, we can do smarter fence + // insertion, with a stronger one on the success path than on the + // failure path. As a result, fence insertion is directly done by + // expandAtomicCmpXchg in that case. + FenceOrdering = CASI->getSuccessOrdering(); + CASI->setSuccessOrdering(Monotonic); + CASI->setFailureOrdering(Monotonic); + IsStore = IsLoad = true; + } + + if (FenceOrdering != Monotonic) { + MadeChange |= bracketInstWithFences(I, FenceOrdering, IsStore, IsLoad); + } + } + if (LI && TargetLowering->shouldExpandAtomicLoadInIR(LI)) { MadeChange |= expandAtomicLoad(LI); } else if (SI && TargetLowering->shouldExpandAtomicStoreInIR(SI)) { @@ -97,30 +134,40 @@ bool AtomicExpand::runOnFunction(Function &F) { return MadeChange; } +bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order, + bool IsStore, bool IsLoad) { + IRBuilder<> Builder(I); + + auto LeadingFence = + TM->getSubtargetImpl()->getTargetLowering()->emitLeadingFence( + Builder, Order, IsStore, IsLoad); + + auto TrailingFence = + TM->getSubtargetImpl()->getTargetLowering()->emitTrailingFence( + Builder, Order, IsStore, IsLoad); + // The trailing fence is emitted before the instruction instead of after + // because there is no easy way of setting Builder insertion point after + // an instruction. So we must erase it from the BB, and insert it back + // in the right place. + // We have a guard here because not every atomic operation generates a + // trailing fence. + if (TrailingFence) { + TrailingFence->removeFromParent(); + TrailingFence->insertAfter(I); + } + + return (LeadingFence || TrailingFence); +} + bool AtomicExpand::expandAtomicLoad(LoadInst *LI) { auto TLI = TM->getSubtargetImpl()->getTargetLowering(); - // If getInsertFencesForAtomic() returns true, then the target does not want - // to deal with memory orders, and emitLeading/TrailingFence should take care - // of everything. Otherwise, emitLeading/TrailingFence are no-op and we - // should preserve the ordering. - AtomicOrdering MemOpOrder = - TLI->getInsertFencesForAtomic() ? Monotonic : LI->getOrdering(); IRBuilder<> Builder(LI); - // Note that although no fence is required before atomic load on ARM, it is - // required before SequentiallyConsistent loads for the recommended Power - // mapping (see http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html). - // So we let the target choose what to emit. - TLI->emitLeadingFence(Builder, LI->getOrdering(), - /*IsStore=*/false, /*IsLoad=*/true); - - // The only 64-bit load guaranteed to be single-copy atomic by ARM is - // an ldrexd (A3.5.3). + // On some architectures, load-linked instructions are atomic for larger + // sizes than normal loads. For example, the only 64-bit load guaranteed + // to be single-copy atomic by ARM is an ldrexd (A3.5.3). Value *Val = - TLI->emitLoadLinked(Builder, LI->getPointerOperand(), MemOpOrder); - - TLI->emitTrailingFence(Builder, LI->getOrdering(), - /*IsStore=*/false, /*IsLoad=*/true); + TLI->emitLoadLinked(Builder, LI->getPointerOperand(), LI->getOrdering()); LI->replaceAllUsesWith(Val); LI->eraseFromParent(); @@ -193,17 +240,11 @@ static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder, bool AtomicExpand::expandAtomicRMWToLLSC(AtomicRMWInst *AI) { auto TLI = TM->getSubtargetImpl()->getTargetLowering(); - AtomicOrdering FenceOrder = AI->getOrdering(); + AtomicOrdering MemOpOrder = AI->getOrdering(); Value *Addr = AI->getPointerOperand(); BasicBlock *BB = AI->getParent(); Function *F = BB->getParent(); LLVMContext &Ctx = F->getContext(); - // If getInsertFencesForAtomic() returns true, then the target does not want - // to deal with memory orders, and emitLeading/TrailingFence should take care - // of everything. Otherwise, emitLeading/TrailingFence are no-op and we - // should preserve the ordering. - AtomicOrdering MemOpOrder = - TLI->getInsertFencesForAtomic() ? Monotonic : FenceOrder; // Given: atomicrmw some_op iN* %addr, iN %incr ordering // @@ -230,7 +271,6 @@ bool AtomicExpand::expandAtomicRMWToLLSC(AtomicRMWInst *AI) { // the branch entirely. std::prev(BB->end())->eraseFromParent(); Builder.SetInsertPoint(BB); - TLI->emitLeadingFence(Builder, FenceOrder, /*IsStore=*/true, /*IsLoad=*/true); Builder.CreateBr(LoopBB); // Start the main loop block now that we've taken care of the preliminaries. @@ -247,7 +287,6 @@ bool AtomicExpand::expandAtomicRMWToLLSC(AtomicRMWInst *AI) { Builder.CreateCondBr(TryAgain, LoopBB, ExitBB); Builder.SetInsertPoint(ExitBB, ExitBB->begin()); - TLI->emitTrailingFence(Builder, FenceOrder, /*IsStore=*/true, /*IsLoad=*/true); AI->replaceAllUsesWith(Loaded); AI->eraseFromParent(); @@ -256,11 +295,8 @@ bool AtomicExpand::expandAtomicRMWToLLSC(AtomicRMWInst *AI) { } bool AtomicExpand::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI) { - auto TargetLowering = TM->getSubtargetImpl()->getTargetLowering(); - AtomicOrdering FenceOrder = - AI->getOrdering() == Unordered ? Monotonic : AI->getOrdering(); AtomicOrdering MemOpOrder = - TargetLowering->getInsertFencesForAtomic() ? Monotonic : FenceOrder; + AI->getOrdering() == Unordered ? Monotonic : AI->getOrdering(); Value *Addr = AI->getPointerOperand(); BasicBlock *BB = AI->getParent(); Function *F = BB->getParent(); @@ -292,8 +328,6 @@ bool AtomicExpand::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI) { // the branch entirely. std::prev(BB->end())->eraseFromParent(); Builder.SetInsertPoint(BB); - TargetLowering->emitLeadingFence(Builder, FenceOrder, - /*IsStore=*/true, /*IsLoad=*/true); LoadInst *InitLoaded = Builder.CreateLoad(Addr); // Atomics require at least natural alignment. InitLoaded->setAlignment(AI->getType()->getPrimitiveSizeInBits()); @@ -317,8 +351,6 @@ bool AtomicExpand::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI) { Builder.CreateCondBr(Success, ExitBB, LoopBB); Builder.SetInsertPoint(ExitBB, ExitBB->begin()); - TargetLowering->emitTrailingFence(Builder, FenceOrder, - /*IsStore=*/true, /*IsLoad=*/true); AI->replaceAllUsesWith(NewLoaded); AI->eraseFromParent(); diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 97b62264462..ec59ec50a1d 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -11024,11 +11024,11 @@ Instruction* ARMTargetLowering::makeDMB(IRBuilder<> &Builder, } // Based on http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html -void ARMTargetLowering::emitLeadingFence(IRBuilder<> &Builder, +Instruction* ARMTargetLowering::emitLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord, bool IsStore, bool IsLoad) const { if (!getInsertFencesForAtomic()) - return; + return nullptr; switch (Ord) { case NotAtomic: @@ -11036,27 +11036,27 @@ void ARMTargetLowering::emitLeadingFence(IRBuilder<> &Builder, llvm_unreachable("Invalid fence: unordered/non-atomic"); case Monotonic: case Acquire: - return; // Nothing to do + return nullptr; // Nothing to do case SequentiallyConsistent: if (!IsStore) - return; // Nothing to do - /*FALLTHROUGH*/ + return nullptr; // Nothing to do + /*FALLTHROUGH*/ case Release: case AcquireRelease: if (Subtarget->isSwift()) - makeDMB(Builder, ARM_MB::ISHST); + return makeDMB(Builder, ARM_MB::ISHST); // FIXME: add a comment with a link to documentation justifying this. else - makeDMB(Builder, ARM_MB::ISH); - return; + return makeDMB(Builder, ARM_MB::ISH); } + llvm_unreachable("Unknown fence ordering in emitLeadingFence"); } -void ARMTargetLowering::emitTrailingFence(IRBuilder<> &Builder, +Instruction* ARMTargetLowering::emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord, bool IsStore, bool IsLoad) const { if (!getInsertFencesForAtomic()) - return; + return nullptr; switch (Ord) { case NotAtomic: @@ -11064,13 +11064,13 @@ void ARMTargetLowering::emitTrailingFence(IRBuilder<> &Builder, llvm_unreachable("Invalid fence: unordered/not-atomic"); case Monotonic: case Release: - return; // Nothing to do + return nullptr; // Nothing to do case Acquire: case AcquireRelease: - case SequentiallyConsistent: - makeDMB(Builder, ARM_MB::ISH); - return; + case SequentiallyConsistent: + return makeDMB(Builder, ARM_MB::ISH); } + llvm_unreachable("Unknown fence ordering in emitTrailingFence"); } // Loads and stores less than 64-bits are already atomic; ones above that diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index d5483553898..0dd4ca260a5 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -399,9 +399,9 @@ namespace llvm { Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const override; - void emitLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord, + Instruction* emitLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord, bool IsStore, bool IsLoad) const override; - void emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord, + Instruction* emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord, bool IsStore, bool IsLoad) const override; bool shouldExpandAtomicLoadInIR(LoadInst *LI) const override; -- 2.34.1