From: Robin Morisset <morisset@google.com> Date: Thu, 21 Aug 2014 21:50:01 +0000 (+0000) Subject: Rename AtomicExpandLoadLinked into AtomicExpand X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=cf165c36eea407c7855367738a533349c1abe4bc;p=oota-llvm.git Rename AtomicExpandLoadLinked into AtomicExpand AtomicExpandLoadLinked is currently rather ARM-specific. This patch is the first of a group that aim at making it more target-independent. See http://lists.cs.uiuc.edu/pipermail/llvmdev/2014-August/075873.html for details The command line option is "atomic-expand" git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@216231 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/CodeGen/Passes.h b/include/llvm/CodeGen/Passes.h index 0869e3e1857..31fba89c7ac 100644 --- a/include/llvm/CodeGen/Passes.h +++ b/include/llvm/CodeGen/Passes.h @@ -345,7 +345,7 @@ protected: /// List of target independent CodeGen pass IDs. namespace llvm { - FunctionPass *createAtomicExpandLoadLinkedPass(const TargetMachine *TM); + FunctionPass *createAtomicExpandPass(const TargetMachine *TM); /// \brief Create a basic TargetTransformInfo analysis pass. /// @@ -372,8 +372,9 @@ namespace llvm { /// matching during instruction selection. FunctionPass *createCodeGenPreparePass(const TargetMachine *TM = nullptr); - /// AtomicExpandLoadLinkedID -- FIXME - extern char &AtomicExpandLoadLinkedID; + /// AtomicExpandID -- Lowers atomic operations in terms of either cmpxchg + /// load-linked/store-conditional loops. + extern char &AtomicExpandID; /// MachineLoopInfo - This pass is a loop analysis pass. extern char &MachineLoopInfoID; diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h index 01cc081bfa4..aaf9364baf6 100644 --- a/include/llvm/InitializePasses.h +++ b/include/llvm/InitializePasses.h @@ -71,7 +71,7 @@ void initializeAliasDebuggerPass(PassRegistry&); void initializeAliasSetPrinterPass(PassRegistry&); void initializeAlwaysInlinerPass(PassRegistry&); void initializeArgPromotionPass(PassRegistry&); -void initializeAtomicExpandLoadLinkedPass(PassRegistry&); +void initializeAtomicExpandPass(PassRegistry&); void initializeSampleProfileLoaderPass(PassRegistry&); void initializeBarrierNoopPass(PassRegistry&); void initializeBasicAliasAnalysisPass(PassRegistry&); diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index a9eb1736041..3bcc71b706a 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -937,7 +937,7 @@ public: /// @} //===--------------------------------------------------------------------===// - /// \name Helpers for load-linked/store-conditional atomic expansion. + /// \name Helpers for atomic expansion. /// @{ /// Perform a load-linked operation on Addr, returning a "Value *" with the @@ -957,7 +957,7 @@ public: } /// Return true if the given (atomic) instruction should be expanded by the - /// IR-level AtomicExpandLoadLinked pass into a loop involving + /// IR-level AtomicExpand pass into a loop involving /// load-linked/store-conditional pairs. Atomic stores will be expanded in the /// same way as "atomic xchg" operations which ignore their output if needed. virtual bool shouldExpandAtomicInIR(Instruction *Inst) const { diff --git a/include/llvm/Target/TargetSubtargetInfo.h b/include/llvm/Target/TargetSubtargetInfo.h index 45a93309501..81f6c1eadd1 100644 --- a/include/llvm/Target/TargetSubtargetInfo.h +++ b/include/llvm/Target/TargetSubtargetInfo.h @@ -118,7 +118,7 @@ public: virtual bool enablePostMachineScheduler() const; /// \brief True if the subtarget should run the atomic expansion pass. - virtual bool enableAtomicExpandLoadLinked() const; + virtual bool enableAtomicExpand() const; /// \brief Override generic scheduling policy within a region. /// diff --git a/lib/CodeGen/AtomicExpandLoadLinkedPass.cpp b/lib/CodeGen/AtomicExpandLoadLinkedPass.cpp deleted file mode 100644 index 5c40069fd66..00000000000 --- a/lib/CodeGen/AtomicExpandLoadLinkedPass.cpp +++ /dev/null @@ -1,384 +0,0 @@ -//===-- AtomicExpandLoadLinkedPass.cpp - Expand atomic instructions -------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains a pass (at IR level) to replace atomic instructions with -// appropriate (intrinsic-based) ldrex/strex loops. -// -//===----------------------------------------------------------------------===// - -#include "llvm/CodeGen/Passes.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Intrinsics.h" -#include "llvm/IR/Module.h" -#include "llvm/Support/Debug.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetSubtargetInfo.h" - -using namespace llvm; - -#define DEBUG_TYPE "arm-atomic-expand" - -namespace { - class AtomicExpandLoadLinked : public FunctionPass { - const TargetMachine *TM; - public: - static char ID; // Pass identification, replacement for typeid - explicit AtomicExpandLoadLinked(const TargetMachine *TM = nullptr) - : FunctionPass(ID), TM(TM) { - initializeAtomicExpandLoadLinkedPass(*PassRegistry::getPassRegistry()); - } - - bool runOnFunction(Function &F) override; - bool expandAtomicInsts(Function &F); - - bool expandAtomicLoad(LoadInst *LI); - bool expandAtomicStore(StoreInst *LI); - bool expandAtomicRMW(AtomicRMWInst *AI); - bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI); - - AtomicOrdering insertLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord); - void insertTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord); - }; -} - -char AtomicExpandLoadLinked::ID = 0; -char &llvm::AtomicExpandLoadLinkedID = AtomicExpandLoadLinked::ID; -INITIALIZE_TM_PASS(AtomicExpandLoadLinked, "atomic-ll-sc", - "Expand Atomic calls in terms of load-linked & store-conditional", - false, false) - -FunctionPass *llvm::createAtomicExpandLoadLinkedPass(const TargetMachine *TM) { - return new AtomicExpandLoadLinked(TM); -} - -bool AtomicExpandLoadLinked::runOnFunction(Function &F) { - if (!TM || !TM->getSubtargetImpl()->enableAtomicExpandLoadLinked()) - return false; - - SmallVector<Instruction *, 1> AtomicInsts; - - // Changing control-flow while iterating through it is a bad idea, so gather a - // list of all atomic instructions before we start. - for (BasicBlock &BB : F) - for (Instruction &Inst : BB) { - if (isa<AtomicRMWInst>(&Inst) || isa<AtomicCmpXchgInst>(&Inst) || - (isa<LoadInst>(&Inst) && cast<LoadInst>(&Inst)->isAtomic()) || - (isa<StoreInst>(&Inst) && cast<StoreInst>(&Inst)->isAtomic())) - AtomicInsts.push_back(&Inst); - } - - bool MadeChange = false; - for (Instruction *Inst : AtomicInsts) { - if (!TM->getSubtargetImpl()->getTargetLowering()->shouldExpandAtomicInIR( - Inst)) - continue; - - if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(Inst)) - MadeChange |= expandAtomicRMW(AI); - else if (AtomicCmpXchgInst *CI = dyn_cast<AtomicCmpXchgInst>(Inst)) - MadeChange |= expandAtomicCmpXchg(CI); - else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) - MadeChange |= expandAtomicLoad(LI); - else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) - MadeChange |= expandAtomicStore(SI); - else - llvm_unreachable("Unknown atomic instruction"); - } - - return MadeChange; -} - -bool AtomicExpandLoadLinked::expandAtomicLoad(LoadInst *LI) { - // Load instructions don't actually need a leading fence, even in the - // SequentiallyConsistent case. - AtomicOrdering MemOpOrder = - TM->getSubtargetImpl()->getTargetLowering()->getInsertFencesForAtomic() - ? Monotonic - : LI->getOrdering(); - - // The only 64-bit load guaranteed to be single-copy atomic by the ARM is - // an ldrexd (A3.5.3). - IRBuilder<> Builder(LI); - Value *Val = TM->getSubtargetImpl()->getTargetLowering()->emitLoadLinked( - Builder, LI->getPointerOperand(), MemOpOrder); - - insertTrailingFence(Builder, LI->getOrdering()); - - LI->replaceAllUsesWith(Val); - LI->eraseFromParent(); - - return true; -} - -bool AtomicExpandLoadLinked::expandAtomicStore(StoreInst *SI) { - // The only atomic 64-bit store on ARM is an strexd that succeeds, which means - // we need a loop and the entire instruction is essentially an "atomicrmw - // xchg" that ignores the value loaded. - IRBuilder<> Builder(SI); - AtomicRMWInst *AI = - Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, SI->getPointerOperand(), - SI->getValueOperand(), SI->getOrdering()); - SI->eraseFromParent(); - - // Now we have an appropriate swap instruction, lower it as usual. - return expandAtomicRMW(AI); -} - -bool AtomicExpandLoadLinked::expandAtomicRMW(AtomicRMWInst *AI) { - AtomicOrdering Order = AI->getOrdering(); - Value *Addr = AI->getPointerOperand(); - BasicBlock *BB = AI->getParent(); - Function *F = BB->getParent(); - LLVMContext &Ctx = F->getContext(); - - // Given: atomicrmw some_op iN* %addr, iN %incr ordering - // - // The standard expansion we produce is: - // [...] - // fence? - // atomicrmw.start: - // %loaded = @load.linked(%addr) - // %new = some_op iN %loaded, %incr - // %stored = @store_conditional(%new, %addr) - // %try_again = icmp i32 ne %stored, 0 - // br i1 %try_again, label %loop, label %atomicrmw.end - // atomicrmw.end: - // fence? - // [...] - BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end"); - BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB); - - // This grabs the DebugLoc from AI. - IRBuilder<> Builder(AI); - - // The split call above "helpfully" added a branch at the end of BB (to the - // wrong place), but we might want a fence too. It's easiest to just remove - // the branch entirely. - std::prev(BB->end())->eraseFromParent(); - Builder.SetInsertPoint(BB); - AtomicOrdering MemOpOrder = insertLeadingFence(Builder, Order); - Builder.CreateBr(LoopBB); - - // Start the main loop block now that we've taken care of the preliminaries. - Builder.SetInsertPoint(LoopBB); - Value *Loaded = TM->getSubtargetImpl()->getTargetLowering()->emitLoadLinked( - Builder, Addr, MemOpOrder); - - Value *NewVal; - switch (AI->getOperation()) { - case AtomicRMWInst::Xchg: - NewVal = AI->getValOperand(); - break; - case AtomicRMWInst::Add: - NewVal = Builder.CreateAdd(Loaded, AI->getValOperand(), "new"); - break; - case AtomicRMWInst::Sub: - NewVal = Builder.CreateSub(Loaded, AI->getValOperand(), "new"); - break; - case AtomicRMWInst::And: - NewVal = Builder.CreateAnd(Loaded, AI->getValOperand(), "new"); - break; - case AtomicRMWInst::Nand: - NewVal = Builder.CreateNot(Builder.CreateAnd(Loaded, AI->getValOperand()), - "new"); - break; - case AtomicRMWInst::Or: - NewVal = Builder.CreateOr(Loaded, AI->getValOperand(), "new"); - break; - case AtomicRMWInst::Xor: - NewVal = Builder.CreateXor(Loaded, AI->getValOperand(), "new"); - break; - case AtomicRMWInst::Max: - NewVal = Builder.CreateICmpSGT(Loaded, AI->getValOperand()); - NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new"); - break; - case AtomicRMWInst::Min: - NewVal = Builder.CreateICmpSLE(Loaded, AI->getValOperand()); - NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new"); - break; - case AtomicRMWInst::UMax: - NewVal = Builder.CreateICmpUGT(Loaded, AI->getValOperand()); - NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new"); - break; - case AtomicRMWInst::UMin: - NewVal = Builder.CreateICmpULE(Loaded, AI->getValOperand()); - NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new"); - break; - default: - llvm_unreachable("Unknown atomic op"); - } - - Value *StoreSuccess = - TM->getSubtargetImpl()->getTargetLowering()->emitStoreConditional( - Builder, NewVal, Addr, MemOpOrder); - Value *TryAgain = Builder.CreateICmpNE( - StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain"); - Builder.CreateCondBr(TryAgain, LoopBB, ExitBB); - - Builder.SetInsertPoint(ExitBB, ExitBB->begin()); - insertTrailingFence(Builder, Order); - - AI->replaceAllUsesWith(Loaded); - AI->eraseFromParent(); - - return true; -} - -bool AtomicExpandLoadLinked::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { - AtomicOrdering SuccessOrder = CI->getSuccessOrdering(); - AtomicOrdering FailureOrder = CI->getFailureOrdering(); - Value *Addr = CI->getPointerOperand(); - BasicBlock *BB = CI->getParent(); - Function *F = BB->getParent(); - LLVMContext &Ctx = F->getContext(); - - // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord - // - // The full expansion we produce is: - // [...] - // fence? - // cmpxchg.start: - // %loaded = @load.linked(%addr) - // %should_store = icmp eq %loaded, %desired - // br i1 %should_store, label %cmpxchg.trystore, - // label %cmpxchg.failure - // cmpxchg.trystore: - // %stored = @store_conditional(%new, %addr) - // %success = icmp eq i32 %stored, 0 - // br i1 %success, label %cmpxchg.success, label %loop/%cmpxchg.failure - // cmpxchg.success: - // fence? - // br label %cmpxchg.end - // cmpxchg.failure: - // fence? - // br label %cmpxchg.end - // cmpxchg.end: - // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure] - // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0 - // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1 - // [...] - BasicBlock *ExitBB = BB->splitBasicBlock(CI, "cmpxchg.end"); - auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB); - auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, FailureBB); - auto TryStoreBB = BasicBlock::Create(Ctx, "cmpxchg.trystore", F, SuccessBB); - auto LoopBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, TryStoreBB); - - // This grabs the DebugLoc from CI - IRBuilder<> Builder(CI); - - // The split call above "helpfully" added a branch at the end of BB (to the - // wrong place), but we might want a fence too. It's easiest to just remove - // the branch entirely. - std::prev(BB->end())->eraseFromParent(); - Builder.SetInsertPoint(BB); - AtomicOrdering MemOpOrder = insertLeadingFence(Builder, SuccessOrder); - Builder.CreateBr(LoopBB); - - // Start the main loop block now that we've taken care of the preliminaries. - Builder.SetInsertPoint(LoopBB); - Value *Loaded = TM->getSubtargetImpl()->getTargetLowering()->emitLoadLinked( - Builder, Addr, MemOpOrder); - Value *ShouldStore = - Builder.CreateICmpEQ(Loaded, CI->getCompareOperand(), "should_store"); - - // If the the cmpxchg doesn't actually need any ordering when it fails, we can - // jump straight past that fence instruction (if it exists). - Builder.CreateCondBr(ShouldStore, TryStoreBB, FailureBB); - - Builder.SetInsertPoint(TryStoreBB); - Value *StoreSuccess = - TM->getSubtargetImpl()->getTargetLowering()->emitStoreConditional( - Builder, CI->getNewValOperand(), Addr, MemOpOrder); - StoreSuccess = Builder.CreateICmpEQ( - StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success"); - Builder.CreateCondBr(StoreSuccess, SuccessBB, - CI->isWeak() ? FailureBB : LoopBB); - - // Make sure later instructions don't get reordered with a fence if necessary. - Builder.SetInsertPoint(SuccessBB); - insertTrailingFence(Builder, SuccessOrder); - Builder.CreateBr(ExitBB); - - Builder.SetInsertPoint(FailureBB); - insertTrailingFence(Builder, FailureOrder); - Builder.CreateBr(ExitBB); - - // Finally, we have control-flow based knowledge of whether the cmpxchg - // succeeded or not. We expose this to later passes by converting any - // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate PHI. - - // Setup the builder so we can create any PHIs we need. - Builder.SetInsertPoint(ExitBB, ExitBB->begin()); - PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2); - Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB); - Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB); - - // Look for any users of the cmpxchg that are just comparing the loaded value - // against the desired one, and replace them with the CFG-derived version. - SmallVector<ExtractValueInst *, 2> PrunedInsts; - for (auto User : CI->users()) { - ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User); - if (!EV) - continue; - - assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 && - "weird extraction from { iN, i1 }"); - - if (EV->getIndices()[0] == 0) - EV->replaceAllUsesWith(Loaded); - else - EV->replaceAllUsesWith(Success); - - PrunedInsts.push_back(EV); - } - - // We can remove the instructions now we're no longer iterating through them. - for (auto EV : PrunedInsts) - EV->eraseFromParent(); - - if (!CI->use_empty()) { - // Some use of the full struct return that we don't understand has happened, - // so we've got to reconstruct it properly. - Value *Res; - Res = Builder.CreateInsertValue(UndefValue::get(CI->getType()), Loaded, 0); - Res = Builder.CreateInsertValue(Res, Success, 1); - - CI->replaceAllUsesWith(Res); - } - - CI->eraseFromParent(); - return true; -} - -AtomicOrdering AtomicExpandLoadLinked::insertLeadingFence(IRBuilder<> &Builder, - AtomicOrdering Ord) { - if (!TM->getSubtargetImpl()->getTargetLowering()->getInsertFencesForAtomic()) - return Ord; - - if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent) - Builder.CreateFence(Release); - - // The exclusive operations don't need any barrier if we're adding separate - // fences. - return Monotonic; -} - -void AtomicExpandLoadLinked::insertTrailingFence(IRBuilder<> &Builder, - AtomicOrdering Ord) { - if (!TM->getSubtargetImpl()->getTargetLowering()->getInsertFencesForAtomic()) - return; - - if (Ord == Acquire || Ord == AcquireRelease) - Builder.CreateFence(Acquire); - else if (Ord == SequentiallyConsistent) - Builder.CreateFence(SequentiallyConsistent); -} diff --git a/lib/CodeGen/AtomicExpandPass.cpp b/lib/CodeGen/AtomicExpandPass.cpp new file mode 100644 index 00000000000..d2ed07775bc --- /dev/null +++ b/lib/CodeGen/AtomicExpandPass.cpp @@ -0,0 +1,384 @@ +//===-- AtomicExpandPass.cpp - Expand atomic instructions -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass (at IR level) to replace atomic instructions with +// appropriate (intrinsic-based) ldrex/strex loops. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "atomic-expand" + +namespace { + class AtomicExpand: public FunctionPass { + const TargetMachine *TM; + public: + static char ID; // Pass identification, replacement for typeid + explicit AtomicExpand(const TargetMachine *TM = nullptr) + : FunctionPass(ID), TM(TM) { + initializeAtomicExpandPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override; + bool expandAtomicInsts(Function &F); + + bool expandAtomicLoad(LoadInst *LI); + bool expandAtomicStore(StoreInst *LI); + bool expandAtomicRMW(AtomicRMWInst *AI); + bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI); + + AtomicOrdering insertLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord); + void insertTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord); + }; +} + +char AtomicExpand::ID = 0; +char &llvm::AtomicExpandID = AtomicExpand::ID; +INITIALIZE_TM_PASS(AtomicExpand, "atomic-expand", + "Expand Atomic calls in terms of either load-linked & store-conditional or cmpxchg", + false, false) + +FunctionPass *llvm::createAtomicExpandPass(const TargetMachine *TM) { + return new AtomicExpand(TM); +} + +bool AtomicExpand::runOnFunction(Function &F) { + if (!TM || !TM->getSubtargetImpl()->enableAtomicExpand()) + return false; + + SmallVector<Instruction *, 1> AtomicInsts; + + // Changing control-flow while iterating through it is a bad idea, so gather a + // list of all atomic instructions before we start. + for (BasicBlock &BB : F) + for (Instruction &Inst : BB) { + if (isa<AtomicRMWInst>(&Inst) || isa<AtomicCmpXchgInst>(&Inst) || + (isa<LoadInst>(&Inst) && cast<LoadInst>(&Inst)->isAtomic()) || + (isa<StoreInst>(&Inst) && cast<StoreInst>(&Inst)->isAtomic())) + AtomicInsts.push_back(&Inst); + } + + bool MadeChange = false; + for (Instruction *Inst : AtomicInsts) { + if (!TM->getSubtargetImpl()->getTargetLowering()->shouldExpandAtomicInIR( + Inst)) + continue; + + if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(Inst)) + MadeChange |= expandAtomicRMW(AI); + else if (AtomicCmpXchgInst *CI = dyn_cast<AtomicCmpXchgInst>(Inst)) + MadeChange |= expandAtomicCmpXchg(CI); + else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) + MadeChange |= expandAtomicLoad(LI); + else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) + MadeChange |= expandAtomicStore(SI); + else + llvm_unreachable("Unknown atomic instruction"); + } + + return MadeChange; +} + +bool AtomicExpand::expandAtomicLoad(LoadInst *LI) { + // Load instructions don't actually need a leading fence, even in the + // SequentiallyConsistent case. + AtomicOrdering MemOpOrder = + TM->getSubtargetImpl()->getTargetLowering()->getInsertFencesForAtomic() + ? Monotonic + : LI->getOrdering(); + + // The only 64-bit load guaranteed to be single-copy atomic by the ARM is + // an ldrexd (A3.5.3). + IRBuilder<> Builder(LI); + Value *Val = TM->getSubtargetImpl()->getTargetLowering()->emitLoadLinked( + Builder, LI->getPointerOperand(), MemOpOrder); + + insertTrailingFence(Builder, LI->getOrdering()); + + LI->replaceAllUsesWith(Val); + LI->eraseFromParent(); + + return true; +} + +bool AtomicExpand::expandAtomicStore(StoreInst *SI) { + // The only atomic 64-bit store on ARM is an strexd that succeeds, which means + // we need a loop and the entire instruction is essentially an "atomicrmw + // xchg" that ignores the value loaded. + IRBuilder<> Builder(SI); + AtomicRMWInst *AI = + Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, SI->getPointerOperand(), + SI->getValueOperand(), SI->getOrdering()); + SI->eraseFromParent(); + + // Now we have an appropriate swap instruction, lower it as usual. + return expandAtomicRMW(AI); +} + +bool AtomicExpand::expandAtomicRMW(AtomicRMWInst *AI) { + AtomicOrdering Order = AI->getOrdering(); + Value *Addr = AI->getPointerOperand(); + BasicBlock *BB = AI->getParent(); + Function *F = BB->getParent(); + LLVMContext &Ctx = F->getContext(); + + // Given: atomicrmw some_op iN* %addr, iN %incr ordering + // + // The standard expansion we produce is: + // [...] + // fence? + // atomicrmw.start: + // %loaded = @load.linked(%addr) + // %new = some_op iN %loaded, %incr + // %stored = @store_conditional(%new, %addr) + // %try_again = icmp i32 ne %stored, 0 + // br i1 %try_again, label %loop, label %atomicrmw.end + // atomicrmw.end: + // fence? + // [...] + BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end"); + BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB); + + // This grabs the DebugLoc from AI. + IRBuilder<> Builder(AI); + + // The split call above "helpfully" added a branch at the end of BB (to the + // wrong place), but we might want a fence too. It's easiest to just remove + // the branch entirely. + std::prev(BB->end())->eraseFromParent(); + Builder.SetInsertPoint(BB); + AtomicOrdering MemOpOrder = insertLeadingFence(Builder, Order); + Builder.CreateBr(LoopBB); + + // Start the main loop block now that we've taken care of the preliminaries. + Builder.SetInsertPoint(LoopBB); + Value *Loaded = TM->getSubtargetImpl()->getTargetLowering()->emitLoadLinked( + Builder, Addr, MemOpOrder); + + Value *NewVal; + switch (AI->getOperation()) { + case AtomicRMWInst::Xchg: + NewVal = AI->getValOperand(); + break; + case AtomicRMWInst::Add: + NewVal = Builder.CreateAdd(Loaded, AI->getValOperand(), "new"); + break; + case AtomicRMWInst::Sub: + NewVal = Builder.CreateSub(Loaded, AI->getValOperand(), "new"); + break; + case AtomicRMWInst::And: + NewVal = Builder.CreateAnd(Loaded, AI->getValOperand(), "new"); + break; + case AtomicRMWInst::Nand: + NewVal = Builder.CreateNot(Builder.CreateAnd(Loaded, AI->getValOperand()), + "new"); + break; + case AtomicRMWInst::Or: + NewVal = Builder.CreateOr(Loaded, AI->getValOperand(), "new"); + break; + case AtomicRMWInst::Xor: + NewVal = Builder.CreateXor(Loaded, AI->getValOperand(), "new"); + break; + case AtomicRMWInst::Max: + NewVal = Builder.CreateICmpSGT(Loaded, AI->getValOperand()); + NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new"); + break; + case AtomicRMWInst::Min: + NewVal = Builder.CreateICmpSLE(Loaded, AI->getValOperand()); + NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new"); + break; + case AtomicRMWInst::UMax: + NewVal = Builder.CreateICmpUGT(Loaded, AI->getValOperand()); + NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new"); + break; + case AtomicRMWInst::UMin: + NewVal = Builder.CreateICmpULE(Loaded, AI->getValOperand()); + NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new"); + break; + default: + llvm_unreachable("Unknown atomic op"); + } + + Value *StoreSuccess = + TM->getSubtargetImpl()->getTargetLowering()->emitStoreConditional( + Builder, NewVal, Addr, MemOpOrder); + Value *TryAgain = Builder.CreateICmpNE( + StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain"); + Builder.CreateCondBr(TryAgain, LoopBB, ExitBB); + + Builder.SetInsertPoint(ExitBB, ExitBB->begin()); + insertTrailingFence(Builder, Order); + + AI->replaceAllUsesWith(Loaded); + AI->eraseFromParent(); + + return true; +} + +bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { + AtomicOrdering SuccessOrder = CI->getSuccessOrdering(); + AtomicOrdering FailureOrder = CI->getFailureOrdering(); + Value *Addr = CI->getPointerOperand(); + BasicBlock *BB = CI->getParent(); + Function *F = BB->getParent(); + LLVMContext &Ctx = F->getContext(); + + // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord + // + // The full expansion we produce is: + // [...] + // fence? + // cmpxchg.start: + // %loaded = @load.linked(%addr) + // %should_store = icmp eq %loaded, %desired + // br i1 %should_store, label %cmpxchg.trystore, + // label %cmpxchg.failure + // cmpxchg.trystore: + // %stored = @store_conditional(%new, %addr) + // %success = icmp eq i32 %stored, 0 + // br i1 %success, label %cmpxchg.success, label %loop/%cmpxchg.failure + // cmpxchg.success: + // fence? + // br label %cmpxchg.end + // cmpxchg.failure: + // fence? + // br label %cmpxchg.end + // cmpxchg.end: + // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure] + // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0 + // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1 + // [...] + BasicBlock *ExitBB = BB->splitBasicBlock(CI, "cmpxchg.end"); + auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB); + auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, FailureBB); + auto TryStoreBB = BasicBlock::Create(Ctx, "cmpxchg.trystore", F, SuccessBB); + auto LoopBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, TryStoreBB); + + // This grabs the DebugLoc from CI + IRBuilder<> Builder(CI); + + // The split call above "helpfully" added a branch at the end of BB (to the + // wrong place), but we might want a fence too. It's easiest to just remove + // the branch entirely. + std::prev(BB->end())->eraseFromParent(); + Builder.SetInsertPoint(BB); + AtomicOrdering MemOpOrder = insertLeadingFence(Builder, SuccessOrder); + Builder.CreateBr(LoopBB); + + // Start the main loop block now that we've taken care of the preliminaries. + Builder.SetInsertPoint(LoopBB); + Value *Loaded = TM->getSubtargetImpl()->getTargetLowering()->emitLoadLinked( + Builder, Addr, MemOpOrder); + Value *ShouldStore = + Builder.CreateICmpEQ(Loaded, CI->getCompareOperand(), "should_store"); + + // If the the cmpxchg doesn't actually need any ordering when it fails, we can + // jump straight past that fence instruction (if it exists). + Builder.CreateCondBr(ShouldStore, TryStoreBB, FailureBB); + + Builder.SetInsertPoint(TryStoreBB); + Value *StoreSuccess = + TM->getSubtargetImpl()->getTargetLowering()->emitStoreConditional( + Builder, CI->getNewValOperand(), Addr, MemOpOrder); + StoreSuccess = Builder.CreateICmpEQ( + StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success"); + Builder.CreateCondBr(StoreSuccess, SuccessBB, + CI->isWeak() ? FailureBB : LoopBB); + + // Make sure later instructions don't get reordered with a fence if necessary. + Builder.SetInsertPoint(SuccessBB); + insertTrailingFence(Builder, SuccessOrder); + Builder.CreateBr(ExitBB); + + Builder.SetInsertPoint(FailureBB); + insertTrailingFence(Builder, FailureOrder); + Builder.CreateBr(ExitBB); + + // Finally, we have control-flow based knowledge of whether the cmpxchg + // succeeded or not. We expose this to later passes by converting any + // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate PHI. + + // Setup the builder so we can create any PHIs we need. + Builder.SetInsertPoint(ExitBB, ExitBB->begin()); + PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2); + Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB); + Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB); + + // Look for any users of the cmpxchg that are just comparing the loaded value + // against the desired one, and replace them with the CFG-derived version. + SmallVector<ExtractValueInst *, 2> PrunedInsts; + for (auto User : CI->users()) { + ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User); + if (!EV) + continue; + + assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 && + "weird extraction from { iN, i1 }"); + + if (EV->getIndices()[0] == 0) + EV->replaceAllUsesWith(Loaded); + else + EV->replaceAllUsesWith(Success); + + PrunedInsts.push_back(EV); + } + + // We can remove the instructions now we're no longer iterating through them. + for (auto EV : PrunedInsts) + EV->eraseFromParent(); + + if (!CI->use_empty()) { + // Some use of the full struct return that we don't understand has happened, + // so we've got to reconstruct it properly. + Value *Res; + Res = Builder.CreateInsertValue(UndefValue::get(CI->getType()), Loaded, 0); + Res = Builder.CreateInsertValue(Res, Success, 1); + + CI->replaceAllUsesWith(Res); + } + + CI->eraseFromParent(); + return true; +} + +AtomicOrdering AtomicExpand::insertLeadingFence(IRBuilder<> &Builder, + AtomicOrdering Ord) { + if (!TM->getSubtargetImpl()->getTargetLowering()->getInsertFencesForAtomic()) + return Ord; + + if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent) + Builder.CreateFence(Release); + + // The exclusive operations don't need any barrier if we're adding separate + // fences. + return Monotonic; +} + +void AtomicExpand::insertTrailingFence(IRBuilder<> &Builder, + AtomicOrdering Ord) { + if (!TM->getSubtargetImpl()->getTargetLowering()->getInsertFencesForAtomic()) + return; + + if (Ord == Acquire || Ord == AcquireRelease) + Builder.CreateFence(Acquire); + else if (Ord == SequentiallyConsistent) + Builder.CreateFence(SequentiallyConsistent); +} diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index 2a247c12e64..07453d75e81 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -2,7 +2,7 @@ add_llvm_library(LLVMCodeGen AggressiveAntiDepBreaker.cpp AllocationOrder.cpp Analysis.cpp - AtomicExpandLoadLinkedPass.cpp + AtomicExpandPass.cpp BasicTargetTransformInfo.cpp BranchFolding.cpp CalcSpillWeights.cpp diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp index 12b0411065a..307dec548fc 100644 --- a/lib/CodeGen/CodeGen.cpp +++ b/lib/CodeGen/CodeGen.cpp @@ -20,7 +20,7 @@ using namespace llvm; /// initializeCodeGen - Initialize all passes linked into the CodeGen library. void llvm::initializeCodeGen(PassRegistry &Registry) { - initializeAtomicExpandLoadLinkedPass(Registry); + initializeAtomicExpandPass(Registry); initializeBasicTTIPass(Registry); initializeBranchFolderPassPass(Registry); initializeCodeGenPreparePass(Registry); diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp index f3172a62f03..e04fe1b5a97 100644 --- a/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -144,7 +144,7 @@ TargetPassConfig *AArch64TargetMachine::createPassConfig(PassManagerBase &PM) { void AArch64PassConfig::addIRPasses() { // Always expand atomic operations, we don't deal with atomicrmw or cmpxchg // ourselves. - addPass(createAtomicExpandLoadLinkedPass(TM)); + addPass(createAtomicExpandPass(TM)); // Cmpxchg instructions are often used with a subsequent comparison to // determine whether it succeeded. We can exploit existing control-flow in diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 9c4f05283f2..25f2316a555 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -428,7 +428,7 @@ bool ARMSubtarget::enablePostMachineScheduler() const { return (!isThumb() || hasThumb2()); } -bool ARMSubtarget::enableAtomicExpandLoadLinked() const { +bool ARMSubtarget::enableAtomicExpand() const { return hasAnyDataBarrier() && !isThumb1Only(); } diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 1263e8b7121..8b40f2f1932 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -436,8 +436,8 @@ public: /// True for some subtargets at > -O0. bool enablePostMachineScheduler() const override; - // enableAtomicExpandLoadLinked - True if we need to expand our atomics. - bool enableAtomicExpandLoadLinked() const override; + // enableAtomicExpand- True if we need to expand our atomics. + bool enableAtomicExpand() const override; /// getInstrItins - Return the instruction itineraries based on subtarget /// selection. diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index fc164ada35f..3e5840d51b6 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -161,7 +161,7 @@ void ARMPassConfig::addIRPasses() { if (TM->Options.ThreadModel == ThreadModel::Single) addPass(createLowerAtomicPass()); else - addPass(createAtomicExpandLoadLinkedPass(TM)); + addPass(createAtomicExpandPass(TM)); // Cmpxchg instructions are often used with a subsequent comparison to // determine whether it succeeded. We can exploit existing control-flow in diff --git a/lib/Target/TargetSubtargetInfo.cpp b/lib/Target/TargetSubtargetInfo.cpp index 386a813b057..23c038fff66 100644 --- a/lib/Target/TargetSubtargetInfo.cpp +++ b/lib/Target/TargetSubtargetInfo.cpp @@ -39,7 +39,7 @@ bool TargetSubtargetInfo::useMachineScheduler() const { return enableMachineScheduler(); } -bool TargetSubtargetInfo::enableAtomicExpandLoadLinked() const { +bool TargetSubtargetInfo::enableAtomicExpand() const { return true; } diff --git a/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll b/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll new file mode 100644 index 00000000000..1f40f94057a --- /dev/null +++ b/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll @@ -0,0 +1,364 @@ +; RUN: opt -S -o - -mtriple=armv7-apple-ios7.0 -atomic-expand %s | FileCheck %s + +define i8 @test_atomic_xchg_i8(i8* %ptr, i8 %xchgend) { +; CHECK-LABEL: @test_atomic_xchg_i8 +; CHECK-NOT: fence +; CHECK: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr) +; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8 +; CHECK: [[NEWVAL32:%.*]] = zext i8 %xchgend to i32 +; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr) +; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0 +; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]] +; CHECK: [[END]]: +; CHECK-NOT: fence +; CHECK: ret i8 [[OLDVAL]] + %res = atomicrmw xchg i8* %ptr, i8 %xchgend monotonic + ret i8 %res +} + +define i16 @test_atomic_add_i16(i16* %ptr, i16 %addend) { +; CHECK-LABEL: @test_atomic_add_i16 +; CHECK: fence release +; CHECK: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i16(i16* %ptr) +; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i16 +; CHECK: [[NEWVAL:%.*]] = add i16 [[OLDVAL]], %addend +; CHECK: [[NEWVAL32:%.*]] = zext i16 [[NEWVAL]] to i32 +; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i16(i32 [[NEWVAL32]], i16* %ptr) +; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0 +; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]] +; CHECK: [[END]]: +; CHECK: fence seq_cst +; CHECK: ret i16 [[OLDVAL]] + %res = atomicrmw add i16* %ptr, i16 %addend seq_cst + ret i16 %res +} + +define i32 @test_atomic_sub_i32(i32* %ptr, i32 %subend) { +; CHECK-LABEL: @test_atomic_sub_i32 +; CHECK-NOT: fence +; CHECK: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK: [[OLDVAL:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %ptr) +; CHECK: [[NEWVAL:%.*]] = sub i32 [[OLDVAL]], %subend +; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 [[NEWVAL]], i32* %ptr) +; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0 +; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]] +; CHECK: [[END]]: +; CHECK: fence acquire +; CHECK: ret i32 [[OLDVAL]] + %res = atomicrmw sub i32* %ptr, i32 %subend acquire + ret i32 %res +} + +define i8 @test_atomic_and_i8(i8* %ptr, i8 %andend) { +; CHECK-LABEL: @test_atomic_and_i8 +; CHECK: fence release +; CHECK: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr) +; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8 +; CHECK: [[NEWVAL:%.*]] = and i8 [[OLDVAL]], %andend +; CHECK: [[NEWVAL32:%.*]] = zext i8 [[NEWVAL]] to i32 +; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr) +; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0 +; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]] +; CHECK: [[END]]: +; CHECK-NOT: fence +; CHECK: ret i8 [[OLDVAL]] + %res = atomicrmw and i8* %ptr, i8 %andend release + ret i8 %res +} + +define i16 @test_atomic_nand_i16(i16* %ptr, i16 %nandend) { +; CHECK-LABEL: @test_atomic_nand_i16 +; CHECK: fence release +; CHECK: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i16(i16* %ptr) +; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i16 +; CHECK: [[NEWVAL_TMP:%.*]] = and i16 [[OLDVAL]], %nandend +; CHECK: [[NEWVAL:%.*]] = xor i16 [[NEWVAL_TMP]], -1 +; CHECK: [[NEWVAL32:%.*]] = zext i16 [[NEWVAL]] to i32 +; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i16(i32 [[NEWVAL32]], i16* %ptr) +; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0 +; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]] +; CHECK: [[END]]: +; CHECK: fence seq_cst +; CHECK: ret i16 [[OLDVAL]] + %res = atomicrmw nand i16* %ptr, i16 %nandend seq_cst + ret i16 %res +} + +define i64 @test_atomic_or_i64(i64* %ptr, i64 %orend) { +; CHECK-LABEL: @test_atomic_or_i64 +; CHECK: fence release +; CHECK: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8* +; CHECK: [[LOHI:%.*]] = call { i32, i32 } @llvm.arm.ldrexd(i8* [[PTR8]]) +; CHECK: [[LO:%.*]] = extractvalue { i32, i32 } [[LOHI]], 0 +; CHECK: [[HI:%.*]] = extractvalue { i32, i32 } [[LOHI]], 1 +; CHECK: [[LO64:%.*]] = zext i32 [[LO]] to i64 +; CHECK: [[HI64_TMP:%.*]] = zext i32 [[HI]] to i64 +; CHECK: [[HI64:%.*]] = shl i64 [[HI64_TMP]], 32 +; CHECK: [[OLDVAL:%.*]] = or i64 [[LO64]], [[HI64]] +; CHECK: [[NEWVAL:%.*]] = or i64 [[OLDVAL]], %orend +; CHECK: [[NEWLO:%.*]] = trunc i64 [[NEWVAL]] to i32 +; CHECK: [[NEWHI_TMP:%.*]] = lshr i64 [[NEWVAL]], 32 +; CHECK: [[NEWHI:%.*]] = trunc i64 [[NEWHI_TMP]] to i32 +; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8* +; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strexd(i32 [[NEWLO]], i32 [[NEWHI]], i8* [[PTR8]]) +; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0 +; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]] +; CHECK: [[END]]: +; CHECK: fence seq_cst +; CHECK: ret i64 [[OLDVAL]] + %res = atomicrmw or i64* %ptr, i64 %orend seq_cst + ret i64 %res +} + +define i8 @test_atomic_xor_i8(i8* %ptr, i8 %xorend) { +; CHECK-LABEL: @test_atomic_xor_i8 +; CHECK: fence release +; CHECK: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr) +; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8 +; CHECK: [[NEWVAL:%.*]] = xor i8 [[OLDVAL]], %xorend +; CHECK: [[NEWVAL32:%.*]] = zext i8 [[NEWVAL]] to i32 +; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr) +; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0 +; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]] +; CHECK: [[END]]: +; CHECK: fence seq_cst +; CHECK: ret i8 [[OLDVAL]] + %res = atomicrmw xor i8* %ptr, i8 %xorend seq_cst + ret i8 %res +} + +define i8 @test_atomic_max_i8(i8* %ptr, i8 %maxend) { +; CHECK-LABEL: @test_atomic_max_i8 +; CHECK: fence release +; CHECK: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr) +; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8 +; CHECK: [[WANT_OLD:%.*]] = icmp sgt i8 [[OLDVAL]], %maxend +; CHECK: [[NEWVAL:%.*]] = select i1 [[WANT_OLD]], i8 [[OLDVAL]], i8 %maxend +; CHECK: [[NEWVAL32:%.*]] = zext i8 [[NEWVAL]] to i32 +; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr) +; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0 +; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]] +; CHECK: [[END]]: +; CHECK: fence seq_cst +; CHECK: ret i8 [[OLDVAL]] + %res = atomicrmw max i8* %ptr, i8 %maxend seq_cst + ret i8 %res +} + +define i8 @test_atomic_min_i8(i8* %ptr, i8 %minend) { +; CHECK-LABEL: @test_atomic_min_i8 +; CHECK: fence release +; CHECK: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr) +; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8 +; CHECK: [[WANT_OLD:%.*]] = icmp sle i8 [[OLDVAL]], %minend +; CHECK: [[NEWVAL:%.*]] = select i1 [[WANT_OLD]], i8 [[OLDVAL]], i8 %minend +; CHECK: [[NEWVAL32:%.*]] = zext i8 [[NEWVAL]] to i32 +; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr) +; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0 +; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]] +; CHECK: [[END]]: +; CHECK: fence seq_cst +; CHECK: ret i8 [[OLDVAL]] + %res = atomicrmw min i8* %ptr, i8 %minend seq_cst + ret i8 %res +} + +define i8 @test_atomic_umax_i8(i8* %ptr, i8 %umaxend) { +; CHECK-LABEL: @test_atomic_umax_i8 +; CHECK: fence release +; CHECK: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr) +; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8 +; CHECK: [[WANT_OLD:%.*]] = icmp ugt i8 [[OLDVAL]], %umaxend +; CHECK: [[NEWVAL:%.*]] = select i1 [[WANT_OLD]], i8 [[OLDVAL]], i8 %umaxend +; CHECK: [[NEWVAL32:%.*]] = zext i8 [[NEWVAL]] to i32 +; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr) +; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0 +; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]] +; CHECK: [[END]]: +; CHECK: fence seq_cst +; CHECK: ret i8 [[OLDVAL]] + %res = atomicrmw umax i8* %ptr, i8 %umaxend seq_cst + ret i8 %res +} + +define i8 @test_atomic_umin_i8(i8* %ptr, i8 %uminend) { +; CHECK-LABEL: @test_atomic_umin_i8 +; CHECK: fence release +; CHECK: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr) +; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8 +; CHECK: [[WANT_OLD:%.*]] = icmp ule i8 [[OLDVAL]], %uminend +; CHECK: [[NEWVAL:%.*]] = select i1 [[WANT_OLD]], i8 [[OLDVAL]], i8 %uminend +; CHECK: [[NEWVAL32:%.*]] = zext i8 [[NEWVAL]] to i32 +; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr) +; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0 +; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]] +; CHECK: [[END]]: +; CHECK: fence seq_cst +; CHECK: ret i8 [[OLDVAL]] + %res = atomicrmw umin i8* %ptr, i8 %uminend seq_cst + ret i8 %res +} + +define i8 @test_cmpxchg_i8_seqcst_seqcst(i8* %ptr, i8 %desired, i8 %newval) { +; CHECK-LABEL: @test_cmpxchg_i8_seqcst_seqcst +; CHECK: fence release +; CHECK: br label %[[LOOP:.*]] + +; CHECK: [[LOOP]]: +; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr) +; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i8 +; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i8 [[OLDVAL]], %desired +; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]] + +; CHECK: [[TRY_STORE]]: +; CHECK: [[NEWVAL32:%.*]] = zext i8 %newval to i32 +; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr) +; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0 +; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]] + +; CHECK: [[SUCCESS_BB]]: +; CHECK: fence seq_cst +; CHECK: br label %[[DONE:.*]] + +; CHECK: [[FAILURE_BB]]: +; CHECK: fence seq_cst +; CHECK: br label %[[DONE]] + +; CHECK: [[DONE]]: +; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ] +; CHECK: ret i8 [[OLDVAL]] + + %pairold = cmpxchg i8* %ptr, i8 %desired, i8 %newval seq_cst seq_cst + %old = extractvalue { i8, i1 } %pairold, 0 + ret i8 %old +} + +define i16 @test_cmpxchg_i16_seqcst_monotonic(i16* %ptr, i16 %desired, i16 %newval) { +; CHECK-LABEL: @test_cmpxchg_i16_seqcst_monotonic +; CHECK: fence release +; CHECK: br label %[[LOOP:.*]] + +; CHECK: [[LOOP]]: +; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i16(i16* %ptr) +; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i16 +; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i16 [[OLDVAL]], %desired +; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]] + +; CHECK: [[TRY_STORE]]: +; CHECK: [[NEWVAL32:%.*]] = zext i16 %newval to i32 +; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i16(i32 [[NEWVAL32]], i16* %ptr) +; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0 +; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]] + +; CHECK: [[SUCCESS_BB]]: +; CHECK: fence seq_cst +; CHECK: br label %[[DONE:.*]] + +; CHECK: [[FAILURE_BB]]: +; CHECK-NOT: fence +; CHECK: br label %[[DONE]] + +; CHECK: [[DONE]]: +; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ] +; CHECK: ret i16 [[OLDVAL]] + + %pairold = cmpxchg i16* %ptr, i16 %desired, i16 %newval seq_cst monotonic + %old = extractvalue { i16, i1 } %pairold, 0 + ret i16 %old +} + +define i32 @test_cmpxchg_i32_acquire_acquire(i32* %ptr, i32 %desired, i32 %newval) { +; CHECK-LABEL: @test_cmpxchg_i32_acquire_acquire +; CHECK-NOT: fence +; CHECK: br label %[[LOOP:.*]] + +; CHECK: [[LOOP]]: +; CHECK: [[OLDVAL:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %ptr) +; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[OLDVAL]], %desired +; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]] + +; CHECK: [[TRY_STORE]]: +; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %newval, i32* %ptr) +; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0 +; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]] + +; CHECK: [[SUCCESS_BB]]: +; CHECK: fence acquire +; CHECK: br label %[[DONE:.*]] + +; CHECK: [[FAILURE_BB]]: +; CHECK: fence acquire +; CHECK: br label %[[DONE]] + +; CHECK: [[DONE]]: +; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ] +; CHECK: ret i32 [[OLDVAL]] + + %pairold = cmpxchg i32* %ptr, i32 %desired, i32 %newval acquire acquire + %old = extractvalue { i32, i1 } %pairold, 0 + ret i32 %old +} + +define i64 @test_cmpxchg_i64_monotonic_monotonic(i64* %ptr, i64 %desired, i64 %newval) { +; CHECK-LABEL: @test_cmpxchg_i64_monotonic_monotonic +; CHECK-NOT: fence +; CHECK: br label %[[LOOP:.*]] + +; CHECK: [[LOOP]]: +; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8* +; CHECK: [[LOHI:%.*]] = call { i32, i32 } @llvm.arm.ldrexd(i8* [[PTR8]]) +; CHECK: [[LO:%.*]] = extractvalue { i32, i32 } [[LOHI]], 0 +; CHECK: [[HI:%.*]] = extractvalue { i32, i32 } [[LOHI]], 1 +; CHECK: [[LO64:%.*]] = zext i32 [[LO]] to i64 +; CHECK: [[HI64_TMP:%.*]] = zext i32 [[HI]] to i64 +; CHECK: [[HI64:%.*]] = shl i64 [[HI64_TMP]], 32 +; CHECK: [[OLDVAL:%.*]] = or i64 [[LO64]], [[HI64]] +; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i64 [[OLDVAL]], %desired +; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]] + +; CHECK: [[TRY_STORE]]: +; CHECK: [[NEWLO:%.*]] = trunc i64 %newval to i32 +; CHECK: [[NEWHI_TMP:%.*]] = lshr i64 %newval, 32 +; CHECK: [[NEWHI:%.*]] = trunc i64 [[NEWHI_TMP]] to i32 +; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8* +; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strexd(i32 [[NEWLO]], i32 [[NEWHI]], i8* [[PTR8]]) +; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0 +; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]] + +; CHECK: [[SUCCESS_BB]]: +; CHECK-NOT: fence +; CHECK: br label %[[DONE:.*]] + +; CHECK: [[FAILURE_BB]]: +; CHECK-NOT: fence +; CHECK: br label %[[DONE]] + +; CHECK: [[DONE]]: +; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ] +; CHECK: ret i64 [[OLDVAL]] + + %pairold = cmpxchg i64* %ptr, i64 %desired, i64 %newval monotonic monotonic + %old = extractvalue { i64, i1 } %pairold, 0 + ret i64 %old +} diff --git a/test/Transforms/AtomicExpand/ARM/atomic-expansion-v8.ll b/test/Transforms/AtomicExpand/ARM/atomic-expansion-v8.ll new file mode 100644 index 00000000000..42d7b781006 --- /dev/null +++ b/test/Transforms/AtomicExpand/ARM/atomic-expansion-v8.ll @@ -0,0 +1,226 @@ +; RUN: opt -S -o - -mtriple=armv8-linux-gnueabihf -atomic-expand %s | FileCheck %s + +define i8 @test_atomic_xchg_i8(i8* %ptr, i8 %xchgend) { +; CHECK-LABEL: @test_atomic_xchg_i8 +; CHECK-NOT: fence +; CHECK: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr) +; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8 +; CHECK: [[NEWVAL32:%.*]] = zext i8 %xchgend to i32 +; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr) +; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0 +; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]] +; CHECK: [[END]]: +; CHECK-NOT: fence +; CHECK: ret i8 [[OLDVAL]] + %res = atomicrmw xchg i8* %ptr, i8 %xchgend monotonic + ret i8 %res +} + +define i16 @test_atomic_add_i16(i16* %ptr, i16 %addend) { +; CHECK-LABEL: @test_atomic_add_i16 +; CHECK-NOT: fence +; CHECK: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldaex.p0i16(i16* %ptr) +; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i16 +; CHECK: [[NEWVAL:%.*]] = add i16 [[OLDVAL]], %addend +; CHECK: [[NEWVAL32:%.*]] = zext i16 [[NEWVAL]] to i32 +; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.stlex.p0i16(i32 [[NEWVAL32]], i16* %ptr) +; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0 +; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]] +; CHECK: [[END]]: +; CHECK-NOT: fence +; CHECK: ret i16 [[OLDVAL]] + %res = atomicrmw add i16* %ptr, i16 %addend seq_cst + ret i16 %res +} + +define i32 @test_atomic_sub_i32(i32* %ptr, i32 %subend) { +; CHECK-LABEL: @test_atomic_sub_i32 +; CHECK-NOT: fence +; CHECK: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK: [[OLDVAL:%.*]] = call i32 @llvm.arm.ldaex.p0i32(i32* %ptr) +; CHECK: [[NEWVAL:%.*]] = sub i32 [[OLDVAL]], %subend +; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 [[NEWVAL]], i32* %ptr) +; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0 +; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]] +; CHECK: [[END]]: +; CHECK-NOT: fence +; CHECK: ret i32 [[OLDVAL]] + %res = atomicrmw sub i32* %ptr, i32 %subend acquire + ret i32 %res +} + +define i64 @test_atomic_or_i64(i64* %ptr, i64 %orend) { +; CHECK-LABEL: @test_atomic_or_i64 +; CHECK-NOT: fence +; CHECK: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8* +; CHECK: [[LOHI:%.*]] = call { i32, i32 } @llvm.arm.ldaexd(i8* [[PTR8]]) +; CHECK: [[LO:%.*]] = extractvalue { i32, i32 } [[LOHI]], 0 +; CHECK: [[HI:%.*]] = extractvalue { i32, i32 } [[LOHI]], 1 +; CHECK: [[LO64:%.*]] = zext i32 [[LO]] to i64 +; CHECK: [[HI64_TMP:%.*]] = zext i32 [[HI]] to i64 +; CHECK: [[HI64:%.*]] = shl i64 [[HI64_TMP]], 32 +; CHECK: [[OLDVAL:%.*]] = or i64 [[LO64]], [[HI64]] +; CHECK: [[NEWVAL:%.*]] = or i64 [[OLDVAL]], %orend +; CHECK: [[NEWLO:%.*]] = trunc i64 [[NEWVAL]] to i32 +; CHECK: [[NEWHI_TMP:%.*]] = lshr i64 [[NEWVAL]], 32 +; CHECK: [[NEWHI:%.*]] = trunc i64 [[NEWHI_TMP]] to i32 +; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8* +; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.stlexd(i32 [[NEWLO]], i32 [[NEWHI]], i8* [[PTR8]]) +; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0 +; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]] +; CHECK: [[END]]: +; CHECK-NOT: fence +; CHECK: ret i64 [[OLDVAL]] + %res = atomicrmw or i64* %ptr, i64 %orend seq_cst + ret i64 %res +} + +define i8 @test_cmpxchg_i8_seqcst_seqcst(i8* %ptr, i8 %desired, i8 %newval) { +; CHECK-LABEL: @test_cmpxchg_i8_seqcst_seqcst +; CHECK-NOT: fence +; CHECK: br label %[[LOOP:.*]] + +; CHECK: [[LOOP]]: +; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldaex.p0i8(i8* %ptr) +; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i8 +; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i8 [[OLDVAL]], %desired +; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]] + +; CHECK: [[TRY_STORE]]: +; CHECK: [[NEWVAL32:%.*]] = zext i8 %newval to i32 +; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.stlex.p0i8(i32 [[NEWVAL32]], i8* %ptr) +; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0 +; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]] + +; CHECK: [[SUCCESS_BB]]: +; CHECK-NOT: fence_cst +; CHECK: br label %[[DONE:.*]] + +; CHECK: [[FAILURE_BB]]: +; CHECK-NOT: fence_cst +; CHECK: br label %[[DONE]] + +; CHECK: [[DONE]]: +; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ] +; CHECK: ret i8 [[OLDVAL]] + + %pairold = cmpxchg i8* %ptr, i8 %desired, i8 %newval seq_cst seq_cst + %old = extractvalue { i8, i1 } %pairold, 0 + ret i8 %old +} + +define i16 @test_cmpxchg_i16_seqcst_monotonic(i16* %ptr, i16 %desired, i16 %newval) { +; CHECK-LABEL: @test_cmpxchg_i16_seqcst_monotonic +; CHECK-NOT: fence +; CHECK: br label %[[LOOP:.*]] + +; CHECK: [[LOOP]]: +; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldaex.p0i16(i16* %ptr) +; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i16 +; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i16 [[OLDVAL]], %desired +; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]] + +; CHECK: [[TRY_STORE]]: +; CHECK: [[NEWVAL32:%.*]] = zext i16 %newval to i32 +; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.stlex.p0i16(i32 [[NEWVAL32]], i16* %ptr) +; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0 +; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]] + +; CHECK: [[SUCCESS_BB]]: +; CHECK-NOT: fence +; CHECK: br label %[[DONE:.*]] + +; CHECK: [[FAILURE_BB]]: +; CHECK-NOT: fence +; CHECK: br label %[[DONE]] + +; CHECK: [[DONE]]: +; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ] +; CHECK: ret i16 [[OLDVAL]] + + %pairold = cmpxchg i16* %ptr, i16 %desired, i16 %newval seq_cst monotonic + %old = extractvalue { i16, i1 } %pairold, 0 + ret i16 %old +} + +define i32 @test_cmpxchg_i32_acquire_acquire(i32* %ptr, i32 %desired, i32 %newval) { +; CHECK-LABEL: @test_cmpxchg_i32_acquire_acquire +; CHECK-NOT: fence +; CHECK: br label %[[LOOP:.*]] + +; CHECK: [[LOOP]]: +; CHECK: [[OLDVAL:%.*]] = call i32 @llvm.arm.ldaex.p0i32(i32* %ptr) +; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[OLDVAL]], %desired +; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]] + +; CHECK: [[TRY_STORE]]: +; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %newval, i32* %ptr) +; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0 +; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]] + +; CHECK: [[SUCCESS_BB]]: +; CHECK-NOT: fence_cst +; CHECK: br label %[[DONE:.*]] + +; CHECK: [[FAILURE_BB]]: +; CHECK-NOT: fence_cst +; CHECK: br label %[[DONE]] + +; CHECK: [[DONE]]: +; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ] +; CHECK: ret i32 [[OLDVAL]] + + %pairold = cmpxchg i32* %ptr, i32 %desired, i32 %newval acquire acquire + %old = extractvalue { i32, i1 } %pairold, 0 + ret i32 %old +} + +define i64 @test_cmpxchg_i64_monotonic_monotonic(i64* %ptr, i64 %desired, i64 %newval) { +; CHECK-LABEL: @test_cmpxchg_i64_monotonic_monotonic +; CHECK-NOT: fence +; CHECK: br label %[[LOOP:.*]] + +; CHECK: [[LOOP]]: +; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8* +; CHECK: [[LOHI:%.*]] = call { i32, i32 } @llvm.arm.ldrexd(i8* [[PTR8]]) +; CHECK: [[LO:%.*]] = extractvalue { i32, i32 } [[LOHI]], 0 +; CHECK: [[HI:%.*]] = extractvalue { i32, i32 } [[LOHI]], 1 +; CHECK: [[LO64:%.*]] = zext i32 [[LO]] to i64 +; CHECK: [[HI64_TMP:%.*]] = zext i32 [[HI]] to i64 +; CHECK: [[HI64:%.*]] = shl i64 [[HI64_TMP]], 32 +; CHECK: [[OLDVAL:%.*]] = or i64 [[LO64]], [[HI64]] +; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i64 [[OLDVAL]], %desired +; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]] + +; CHECK: [[TRY_STORE]]: +; CHECK: [[NEWLO:%.*]] = trunc i64 %newval to i32 +; CHECK: [[NEWHI_TMP:%.*]] = lshr i64 %newval, 32 +; CHECK: [[NEWHI:%.*]] = trunc i64 [[NEWHI_TMP]] to i32 +; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8* +; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strexd(i32 [[NEWLO]], i32 [[NEWHI]], i8* [[PTR8]]) +; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0 +; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]] + +; CHECK: [[SUCCESS_BB]]: +; CHECK-NOT: fence_cst +; CHECK: br label %[[DONE:.*]] + +; CHECK: [[FAILURE_BB]]: +; CHECK-NOT: fence_cst +; CHECK: br label %[[DONE]] + +; CHECK: [[DONE]]: +; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ] +; CHECK: ret i64 [[OLDVAL]] + + %pairold = cmpxchg i64* %ptr, i64 %desired, i64 %newval monotonic monotonic + %old = extractvalue { i64, i1 } %pairold, 0 + ret i64 %old +} diff --git a/test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll b/test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll new file mode 100644 index 00000000000..28e70632455 --- /dev/null +++ b/test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll @@ -0,0 +1,97 @@ +; RUN: opt -atomic-expand -S -mtriple=thumbv7s-apple-ios7.0 %s | FileCheck %s + +define i32 @test_cmpxchg_seq_cst(i32* %addr, i32 %desired, i32 %new) { +; CHECK-LABEL: @test_cmpxchg_seq_cst +; CHECK: fence release +; CHECK: br label %[[START:.*]] + +; CHECK: [[START]]: +; CHECK: [[LOADED:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %addr) +; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[LOADED]], %desired +; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]] + +; CHECK: [[TRY_STORE]]: +; CHECK: [[STREX:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %new, i32* %addr) +; CHECK: [[SUCCESS:%.*]] = icmp eq i32 [[STREX]], 0 +; CHECK: br i1 [[SUCCESS]], label %[[SUCCESS_BB:.*]], label %[[FAILURE_BB]] + +; CHECK: [[SUCCESS_BB]]: +; CHECK: fence seq_cst +; CHECK: br label %[[END:.*]] + +; CHECK: [[FAILURE_BB]]: +; CHECK: fence seq_cst +; CHECK: br label %[[END]] + +; CHECK: [[END]]: +; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ] +; CHECK: ret i32 [[LOADED]] + + %pair = cmpxchg weak i32* %addr, i32 %desired, i32 %new seq_cst seq_cst + %oldval = extractvalue { i32, i1 } %pair, 0 + ret i32 %oldval +} + +define i1 @test_cmpxchg_weak_fail(i32* %addr, i32 %desired, i32 %new) { +; CHECK-LABEL: @test_cmpxchg_weak_fail +; CHECK: fence release +; CHECK: br label %[[START:.*]] + +; CHECK: [[START]]: +; CHECK: [[LOADED:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %addr) +; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[LOADED]], %desired +; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]] + +; CHECK: [[TRY_STORE]]: +; CHECK: [[STREX:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %new, i32* %addr) +; CHECK: [[SUCCESS:%.*]] = icmp eq i32 [[STREX]], 0 +; CHECK: br i1 [[SUCCESS]], label %[[SUCCESS_BB:.*]], label %[[FAILURE_BB:.*]] + +; CHECK: [[SUCCESS_BB]]: +; CHECK: fence seq_cst +; CHECK: br label %[[END:.*]] + +; CHECK: [[FAILURE_BB]]: +; CHECK-NOT: fence +; CHECK: br label %[[END]] + +; CHECK: [[END]]: +; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ] +; CHECK: ret i1 [[SUCCESS]] + + %pair = cmpxchg weak i32* %addr, i32 %desired, i32 %new seq_cst monotonic + %oldval = extractvalue { i32, i1 } %pair, 1 + ret i1 %oldval +} + +define i32 @test_cmpxchg_monotonic(i32* %addr, i32 %desired, i32 %new) { +; CHECK-LABEL: @test_cmpxchg_monotonic +; CHECK-NOT: fence +; CHECK: br label %[[START:.*]] + +; CHECK: [[START]]: +; CHECK: [[LOADED:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %addr) +; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[LOADED]], %desired +; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]] + +; CHECK: [[TRY_STORE]]: +; CHECK: [[STREX:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %new, i32* %addr) +; CHECK: [[SUCCESS:%.*]] = icmp eq i32 [[STREX]], 0 +; CHECK: br i1 [[SUCCESS]], label %[[SUCCESS_BB:.*]], label %[[FAILURE_BB:.*]] + +; CHECK: [[SUCCESS_BB]]: +; CHECK-NOT: fence +; CHECK: br label %[[END:.*]] + +; CHECK: [[FAILURE_BB]]: +; CHECK-NOT: fence +; CHECK: br label %[[END]] + +; CHECK: [[END]]: +; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ] +; CHECK: ret i32 [[LOADED]] + + %pair = cmpxchg weak i32* %addr, i32 %desired, i32 %new monotonic monotonic + %oldval = extractvalue { i32, i1 } %pair, 0 + ret i32 %oldval +} diff --git a/test/Transforms/AtomicExpand/ARM/lit.local.cfg b/test/Transforms/AtomicExpand/ARM/lit.local.cfg new file mode 100644 index 00000000000..98c6700c209 --- /dev/null +++ b/test/Transforms/AtomicExpand/ARM/lit.local.cfg @@ -0,0 +1,3 @@ +if not 'ARM' in config.root.targets: + config.unsupported = True + diff --git a/test/Transforms/AtomicExpandLoadLinked/ARM/atomic-expansion-v7.ll b/test/Transforms/AtomicExpandLoadLinked/ARM/atomic-expansion-v7.ll deleted file mode 100644 index 6a93016fc26..00000000000 --- a/test/Transforms/AtomicExpandLoadLinked/ARM/atomic-expansion-v7.ll +++ /dev/null @@ -1,364 +0,0 @@ -; RUN: opt -S -o - -mtriple=armv7-apple-ios7.0 -atomic-ll-sc %s | FileCheck %s - -define i8 @test_atomic_xchg_i8(i8* %ptr, i8 %xchgend) { -; CHECK-LABEL: @test_atomic_xchg_i8 -; CHECK-NOT: fence -; CHECK: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr) -; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8 -; CHECK: [[NEWVAL32:%.*]] = zext i8 %xchgend to i32 -; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr) -; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0 -; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]] -; CHECK: [[END]]: -; CHECK-NOT: fence -; CHECK: ret i8 [[OLDVAL]] - %res = atomicrmw xchg i8* %ptr, i8 %xchgend monotonic - ret i8 %res -} - -define i16 @test_atomic_add_i16(i16* %ptr, i16 %addend) { -; CHECK-LABEL: @test_atomic_add_i16 -; CHECK: fence release -; CHECK: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i16(i16* %ptr) -; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i16 -; CHECK: [[NEWVAL:%.*]] = add i16 [[OLDVAL]], %addend -; CHECK: [[NEWVAL32:%.*]] = zext i16 [[NEWVAL]] to i32 -; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i16(i32 [[NEWVAL32]], i16* %ptr) -; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0 -; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]] -; CHECK: [[END]]: -; CHECK: fence seq_cst -; CHECK: ret i16 [[OLDVAL]] - %res = atomicrmw add i16* %ptr, i16 %addend seq_cst - ret i16 %res -} - -define i32 @test_atomic_sub_i32(i32* %ptr, i32 %subend) { -; CHECK-LABEL: @test_atomic_sub_i32 -; CHECK-NOT: fence -; CHECK: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK: [[OLDVAL:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %ptr) -; CHECK: [[NEWVAL:%.*]] = sub i32 [[OLDVAL]], %subend -; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 [[NEWVAL]], i32* %ptr) -; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0 -; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]] -; CHECK: [[END]]: -; CHECK: fence acquire -; CHECK: ret i32 [[OLDVAL]] - %res = atomicrmw sub i32* %ptr, i32 %subend acquire - ret i32 %res -} - -define i8 @test_atomic_and_i8(i8* %ptr, i8 %andend) { -; CHECK-LABEL: @test_atomic_and_i8 -; CHECK: fence release -; CHECK: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr) -; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8 -; CHECK: [[NEWVAL:%.*]] = and i8 [[OLDVAL]], %andend -; CHECK: [[NEWVAL32:%.*]] = zext i8 [[NEWVAL]] to i32 -; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr) -; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0 -; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]] -; CHECK: [[END]]: -; CHECK-NOT: fence -; CHECK: ret i8 [[OLDVAL]] - %res = atomicrmw and i8* %ptr, i8 %andend release - ret i8 %res -} - -define i16 @test_atomic_nand_i16(i16* %ptr, i16 %nandend) { -; CHECK-LABEL: @test_atomic_nand_i16 -; CHECK: fence release -; CHECK: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i16(i16* %ptr) -; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i16 -; CHECK: [[NEWVAL_TMP:%.*]] = and i16 [[OLDVAL]], %nandend -; CHECK: [[NEWVAL:%.*]] = xor i16 [[NEWVAL_TMP]], -1 -; CHECK: [[NEWVAL32:%.*]] = zext i16 [[NEWVAL]] to i32 -; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i16(i32 [[NEWVAL32]], i16* %ptr) -; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0 -; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]] -; CHECK: [[END]]: -; CHECK: fence seq_cst -; CHECK: ret i16 [[OLDVAL]] - %res = atomicrmw nand i16* %ptr, i16 %nandend seq_cst - ret i16 %res -} - -define i64 @test_atomic_or_i64(i64* %ptr, i64 %orend) { -; CHECK-LABEL: @test_atomic_or_i64 -; CHECK: fence release -; CHECK: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8* -; CHECK: [[LOHI:%.*]] = call { i32, i32 } @llvm.arm.ldrexd(i8* [[PTR8]]) -; CHECK: [[LO:%.*]] = extractvalue { i32, i32 } [[LOHI]], 0 -; CHECK: [[HI:%.*]] = extractvalue { i32, i32 } [[LOHI]], 1 -; CHECK: [[LO64:%.*]] = zext i32 [[LO]] to i64 -; CHECK: [[HI64_TMP:%.*]] = zext i32 [[HI]] to i64 -; CHECK: [[HI64:%.*]] = shl i64 [[HI64_TMP]], 32 -; CHECK: [[OLDVAL:%.*]] = or i64 [[LO64]], [[HI64]] -; CHECK: [[NEWVAL:%.*]] = or i64 [[OLDVAL]], %orend -; CHECK: [[NEWLO:%.*]] = trunc i64 [[NEWVAL]] to i32 -; CHECK: [[NEWHI_TMP:%.*]] = lshr i64 [[NEWVAL]], 32 -; CHECK: [[NEWHI:%.*]] = trunc i64 [[NEWHI_TMP]] to i32 -; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8* -; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strexd(i32 [[NEWLO]], i32 [[NEWHI]], i8* [[PTR8]]) -; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0 -; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]] -; CHECK: [[END]]: -; CHECK: fence seq_cst -; CHECK: ret i64 [[OLDVAL]] - %res = atomicrmw or i64* %ptr, i64 %orend seq_cst - ret i64 %res -} - -define i8 @test_atomic_xor_i8(i8* %ptr, i8 %xorend) { -; CHECK-LABEL: @test_atomic_xor_i8 -; CHECK: fence release -; CHECK: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr) -; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8 -; CHECK: [[NEWVAL:%.*]] = xor i8 [[OLDVAL]], %xorend -; CHECK: [[NEWVAL32:%.*]] = zext i8 [[NEWVAL]] to i32 -; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr) -; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0 -; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]] -; CHECK: [[END]]: -; CHECK: fence seq_cst -; CHECK: ret i8 [[OLDVAL]] - %res = atomicrmw xor i8* %ptr, i8 %xorend seq_cst - ret i8 %res -} - -define i8 @test_atomic_max_i8(i8* %ptr, i8 %maxend) { -; CHECK-LABEL: @test_atomic_max_i8 -; CHECK: fence release -; CHECK: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr) -; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8 -; CHECK: [[WANT_OLD:%.*]] = icmp sgt i8 [[OLDVAL]], %maxend -; CHECK: [[NEWVAL:%.*]] = select i1 [[WANT_OLD]], i8 [[OLDVAL]], i8 %maxend -; CHECK: [[NEWVAL32:%.*]] = zext i8 [[NEWVAL]] to i32 -; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr) -; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0 -; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]] -; CHECK: [[END]]: -; CHECK: fence seq_cst -; CHECK: ret i8 [[OLDVAL]] - %res = atomicrmw max i8* %ptr, i8 %maxend seq_cst - ret i8 %res -} - -define i8 @test_atomic_min_i8(i8* %ptr, i8 %minend) { -; CHECK-LABEL: @test_atomic_min_i8 -; CHECK: fence release -; CHECK: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr) -; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8 -; CHECK: [[WANT_OLD:%.*]] = icmp sle i8 [[OLDVAL]], %minend -; CHECK: [[NEWVAL:%.*]] = select i1 [[WANT_OLD]], i8 [[OLDVAL]], i8 %minend -; CHECK: [[NEWVAL32:%.*]] = zext i8 [[NEWVAL]] to i32 -; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr) -; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0 -; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]] -; CHECK: [[END]]: -; CHECK: fence seq_cst -; CHECK: ret i8 [[OLDVAL]] - %res = atomicrmw min i8* %ptr, i8 %minend seq_cst - ret i8 %res -} - -define i8 @test_atomic_umax_i8(i8* %ptr, i8 %umaxend) { -; CHECK-LABEL: @test_atomic_umax_i8 -; CHECK: fence release -; CHECK: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr) -; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8 -; CHECK: [[WANT_OLD:%.*]] = icmp ugt i8 [[OLDVAL]], %umaxend -; CHECK: [[NEWVAL:%.*]] = select i1 [[WANT_OLD]], i8 [[OLDVAL]], i8 %umaxend -; CHECK: [[NEWVAL32:%.*]] = zext i8 [[NEWVAL]] to i32 -; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr) -; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0 -; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]] -; CHECK: [[END]]: -; CHECK: fence seq_cst -; CHECK: ret i8 [[OLDVAL]] - %res = atomicrmw umax i8* %ptr, i8 %umaxend seq_cst - ret i8 %res -} - -define i8 @test_atomic_umin_i8(i8* %ptr, i8 %uminend) { -; CHECK-LABEL: @test_atomic_umin_i8 -; CHECK: fence release -; CHECK: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr) -; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8 -; CHECK: [[WANT_OLD:%.*]] = icmp ule i8 [[OLDVAL]], %uminend -; CHECK: [[NEWVAL:%.*]] = select i1 [[WANT_OLD]], i8 [[OLDVAL]], i8 %uminend -; CHECK: [[NEWVAL32:%.*]] = zext i8 [[NEWVAL]] to i32 -; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr) -; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0 -; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]] -; CHECK: [[END]]: -; CHECK: fence seq_cst -; CHECK: ret i8 [[OLDVAL]] - %res = atomicrmw umin i8* %ptr, i8 %uminend seq_cst - ret i8 %res -} - -define i8 @test_cmpxchg_i8_seqcst_seqcst(i8* %ptr, i8 %desired, i8 %newval) { -; CHECK-LABEL: @test_cmpxchg_i8_seqcst_seqcst -; CHECK: fence release -; CHECK: br label %[[LOOP:.*]] - -; CHECK: [[LOOP]]: -; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr) -; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i8 -; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i8 [[OLDVAL]], %desired -; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]] - -; CHECK: [[TRY_STORE]]: -; CHECK: [[NEWVAL32:%.*]] = zext i8 %newval to i32 -; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr) -; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0 -; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]] - -; CHECK: [[SUCCESS_BB]]: -; CHECK: fence seq_cst -; CHECK: br label %[[DONE:.*]] - -; CHECK: [[FAILURE_BB]]: -; CHECK: fence seq_cst -; CHECK: br label %[[DONE]] - -; CHECK: [[DONE]]: -; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ] -; CHECK: ret i8 [[OLDVAL]] - - %pairold = cmpxchg i8* %ptr, i8 %desired, i8 %newval seq_cst seq_cst - %old = extractvalue { i8, i1 } %pairold, 0 - ret i8 %old -} - -define i16 @test_cmpxchg_i16_seqcst_monotonic(i16* %ptr, i16 %desired, i16 %newval) { -; CHECK-LABEL: @test_cmpxchg_i16_seqcst_monotonic -; CHECK: fence release -; CHECK: br label %[[LOOP:.*]] - -; CHECK: [[LOOP]]: -; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i16(i16* %ptr) -; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i16 -; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i16 [[OLDVAL]], %desired -; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]] - -; CHECK: [[TRY_STORE]]: -; CHECK: [[NEWVAL32:%.*]] = zext i16 %newval to i32 -; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i16(i32 [[NEWVAL32]], i16* %ptr) -; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0 -; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]] - -; CHECK: [[SUCCESS_BB]]: -; CHECK: fence seq_cst -; CHECK: br label %[[DONE:.*]] - -; CHECK: [[FAILURE_BB]]: -; CHECK-NOT: fence -; CHECK: br label %[[DONE]] - -; CHECK: [[DONE]]: -; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ] -; CHECK: ret i16 [[OLDVAL]] - - %pairold = cmpxchg i16* %ptr, i16 %desired, i16 %newval seq_cst monotonic - %old = extractvalue { i16, i1 } %pairold, 0 - ret i16 %old -} - -define i32 @test_cmpxchg_i32_acquire_acquire(i32* %ptr, i32 %desired, i32 %newval) { -; CHECK-LABEL: @test_cmpxchg_i32_acquire_acquire -; CHECK-NOT: fence -; CHECK: br label %[[LOOP:.*]] - -; CHECK: [[LOOP]]: -; CHECK: [[OLDVAL:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %ptr) -; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[OLDVAL]], %desired -; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]] - -; CHECK: [[TRY_STORE]]: -; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %newval, i32* %ptr) -; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0 -; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]] - -; CHECK: [[SUCCESS_BB]]: -; CHECK: fence acquire -; CHECK: br label %[[DONE:.*]] - -; CHECK: [[FAILURE_BB]]: -; CHECK: fence acquire -; CHECK: br label %[[DONE]] - -; CHECK: [[DONE]]: -; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ] -; CHECK: ret i32 [[OLDVAL]] - - %pairold = cmpxchg i32* %ptr, i32 %desired, i32 %newval acquire acquire - %old = extractvalue { i32, i1 } %pairold, 0 - ret i32 %old -} - -define i64 @test_cmpxchg_i64_monotonic_monotonic(i64* %ptr, i64 %desired, i64 %newval) { -; CHECK-LABEL: @test_cmpxchg_i64_monotonic_monotonic -; CHECK-NOT: fence -; CHECK: br label %[[LOOP:.*]] - -; CHECK: [[LOOP]]: -; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8* -; CHECK: [[LOHI:%.*]] = call { i32, i32 } @llvm.arm.ldrexd(i8* [[PTR8]]) -; CHECK: [[LO:%.*]] = extractvalue { i32, i32 } [[LOHI]], 0 -; CHECK: [[HI:%.*]] = extractvalue { i32, i32 } [[LOHI]], 1 -; CHECK: [[LO64:%.*]] = zext i32 [[LO]] to i64 -; CHECK: [[HI64_TMP:%.*]] = zext i32 [[HI]] to i64 -; CHECK: [[HI64:%.*]] = shl i64 [[HI64_TMP]], 32 -; CHECK: [[OLDVAL:%.*]] = or i64 [[LO64]], [[HI64]] -; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i64 [[OLDVAL]], %desired -; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]] - -; CHECK: [[TRY_STORE]]: -; CHECK: [[NEWLO:%.*]] = trunc i64 %newval to i32 -; CHECK: [[NEWHI_TMP:%.*]] = lshr i64 %newval, 32 -; CHECK: [[NEWHI:%.*]] = trunc i64 [[NEWHI_TMP]] to i32 -; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8* -; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strexd(i32 [[NEWLO]], i32 [[NEWHI]], i8* [[PTR8]]) -; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0 -; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]] - -; CHECK: [[SUCCESS_BB]]: -; CHECK-NOT: fence -; CHECK: br label %[[DONE:.*]] - -; CHECK: [[FAILURE_BB]]: -; CHECK-NOT: fence -; CHECK: br label %[[DONE]] - -; CHECK: [[DONE]]: -; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ] -; CHECK: ret i64 [[OLDVAL]] - - %pairold = cmpxchg i64* %ptr, i64 %desired, i64 %newval monotonic monotonic - %old = extractvalue { i64, i1 } %pairold, 0 - ret i64 %old -} \ No newline at end of file diff --git a/test/Transforms/AtomicExpandLoadLinked/ARM/atomic-expansion-v8.ll b/test/Transforms/AtomicExpandLoadLinked/ARM/atomic-expansion-v8.ll deleted file mode 100644 index 8092c1010ff..00000000000 --- a/test/Transforms/AtomicExpandLoadLinked/ARM/atomic-expansion-v8.ll +++ /dev/null @@ -1,226 +0,0 @@ -; RUN: opt -S -o - -mtriple=armv8-linux-gnueabihf -atomic-ll-sc %s | FileCheck %s - -define i8 @test_atomic_xchg_i8(i8* %ptr, i8 %xchgend) { -; CHECK-LABEL: @test_atomic_xchg_i8 -; CHECK-NOT: fence -; CHECK: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr) -; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8 -; CHECK: [[NEWVAL32:%.*]] = zext i8 %xchgend to i32 -; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr) -; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0 -; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]] -; CHECK: [[END]]: -; CHECK-NOT: fence -; CHECK: ret i8 [[OLDVAL]] - %res = atomicrmw xchg i8* %ptr, i8 %xchgend monotonic - ret i8 %res -} - -define i16 @test_atomic_add_i16(i16* %ptr, i16 %addend) { -; CHECK-LABEL: @test_atomic_add_i16 -; CHECK-NOT: fence -; CHECK: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldaex.p0i16(i16* %ptr) -; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i16 -; CHECK: [[NEWVAL:%.*]] = add i16 [[OLDVAL]], %addend -; CHECK: [[NEWVAL32:%.*]] = zext i16 [[NEWVAL]] to i32 -; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.stlex.p0i16(i32 [[NEWVAL32]], i16* %ptr) -; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0 -; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]] -; CHECK: [[END]]: -; CHECK-NOT: fence -; CHECK: ret i16 [[OLDVAL]] - %res = atomicrmw add i16* %ptr, i16 %addend seq_cst - ret i16 %res -} - -define i32 @test_atomic_sub_i32(i32* %ptr, i32 %subend) { -; CHECK-LABEL: @test_atomic_sub_i32 -; CHECK-NOT: fence -; CHECK: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK: [[OLDVAL:%.*]] = call i32 @llvm.arm.ldaex.p0i32(i32* %ptr) -; CHECK: [[NEWVAL:%.*]] = sub i32 [[OLDVAL]], %subend -; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 [[NEWVAL]], i32* %ptr) -; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0 -; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]] -; CHECK: [[END]]: -; CHECK-NOT: fence -; CHECK: ret i32 [[OLDVAL]] - %res = atomicrmw sub i32* %ptr, i32 %subend acquire - ret i32 %res -} - -define i64 @test_atomic_or_i64(i64* %ptr, i64 %orend) { -; CHECK-LABEL: @test_atomic_or_i64 -; CHECK-NOT: fence -; CHECK: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8* -; CHECK: [[LOHI:%.*]] = call { i32, i32 } @llvm.arm.ldaexd(i8* [[PTR8]]) -; CHECK: [[LO:%.*]] = extractvalue { i32, i32 } [[LOHI]], 0 -; CHECK: [[HI:%.*]] = extractvalue { i32, i32 } [[LOHI]], 1 -; CHECK: [[LO64:%.*]] = zext i32 [[LO]] to i64 -; CHECK: [[HI64_TMP:%.*]] = zext i32 [[HI]] to i64 -; CHECK: [[HI64:%.*]] = shl i64 [[HI64_TMP]], 32 -; CHECK: [[OLDVAL:%.*]] = or i64 [[LO64]], [[HI64]] -; CHECK: [[NEWVAL:%.*]] = or i64 [[OLDVAL]], %orend -; CHECK: [[NEWLO:%.*]] = trunc i64 [[NEWVAL]] to i32 -; CHECK: [[NEWHI_TMP:%.*]] = lshr i64 [[NEWVAL]], 32 -; CHECK: [[NEWHI:%.*]] = trunc i64 [[NEWHI_TMP]] to i32 -; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8* -; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.stlexd(i32 [[NEWLO]], i32 [[NEWHI]], i8* [[PTR8]]) -; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0 -; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]] -; CHECK: [[END]]: -; CHECK-NOT: fence -; CHECK: ret i64 [[OLDVAL]] - %res = atomicrmw or i64* %ptr, i64 %orend seq_cst - ret i64 %res -} - -define i8 @test_cmpxchg_i8_seqcst_seqcst(i8* %ptr, i8 %desired, i8 %newval) { -; CHECK-LABEL: @test_cmpxchg_i8_seqcst_seqcst -; CHECK-NOT: fence -; CHECK: br label %[[LOOP:.*]] - -; CHECK: [[LOOP]]: -; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldaex.p0i8(i8* %ptr) -; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i8 -; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i8 [[OLDVAL]], %desired -; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]] - -; CHECK: [[TRY_STORE]]: -; CHECK: [[NEWVAL32:%.*]] = zext i8 %newval to i32 -; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.stlex.p0i8(i32 [[NEWVAL32]], i8* %ptr) -; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0 -; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]] - -; CHECK: [[SUCCESS_BB]]: -; CHECK-NOT: fence_cst -; CHECK: br label %[[DONE:.*]] - -; CHECK: [[FAILURE_BB]]: -; CHECK-NOT: fence_cst -; CHECK: br label %[[DONE]] - -; CHECK: [[DONE]]: -; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ] -; CHECK: ret i8 [[OLDVAL]] - - %pairold = cmpxchg i8* %ptr, i8 %desired, i8 %newval seq_cst seq_cst - %old = extractvalue { i8, i1 } %pairold, 0 - ret i8 %old -} - -define i16 @test_cmpxchg_i16_seqcst_monotonic(i16* %ptr, i16 %desired, i16 %newval) { -; CHECK-LABEL: @test_cmpxchg_i16_seqcst_monotonic -; CHECK-NOT: fence -; CHECK: br label %[[LOOP:.*]] - -; CHECK: [[LOOP]]: -; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldaex.p0i16(i16* %ptr) -; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i16 -; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i16 [[OLDVAL]], %desired -; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]] - -; CHECK: [[TRY_STORE]]: -; CHECK: [[NEWVAL32:%.*]] = zext i16 %newval to i32 -; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.stlex.p0i16(i32 [[NEWVAL32]], i16* %ptr) -; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0 -; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]] - -; CHECK: [[SUCCESS_BB]]: -; CHECK-NOT: fence -; CHECK: br label %[[DONE:.*]] - -; CHECK: [[FAILURE_BB]]: -; CHECK-NOT: fence -; CHECK: br label %[[DONE]] - -; CHECK: [[DONE]]: -; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ] -; CHECK: ret i16 [[OLDVAL]] - - %pairold = cmpxchg i16* %ptr, i16 %desired, i16 %newval seq_cst monotonic - %old = extractvalue { i16, i1 } %pairold, 0 - ret i16 %old -} - -define i32 @test_cmpxchg_i32_acquire_acquire(i32* %ptr, i32 %desired, i32 %newval) { -; CHECK-LABEL: @test_cmpxchg_i32_acquire_acquire -; CHECK-NOT: fence -; CHECK: br label %[[LOOP:.*]] - -; CHECK: [[LOOP]]: -; CHECK: [[OLDVAL:%.*]] = call i32 @llvm.arm.ldaex.p0i32(i32* %ptr) -; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[OLDVAL]], %desired -; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]] - -; CHECK: [[TRY_STORE]]: -; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %newval, i32* %ptr) -; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0 -; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]] - -; CHECK: [[SUCCESS_BB]]: -; CHECK-NOT: fence_cst -; CHECK: br label %[[DONE:.*]] - -; CHECK: [[FAILURE_BB]]: -; CHECK-NOT: fence_cst -; CHECK: br label %[[DONE]] - -; CHECK: [[DONE]]: -; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ] -; CHECK: ret i32 [[OLDVAL]] - - %pairold = cmpxchg i32* %ptr, i32 %desired, i32 %newval acquire acquire - %old = extractvalue { i32, i1 } %pairold, 0 - ret i32 %old -} - -define i64 @test_cmpxchg_i64_monotonic_monotonic(i64* %ptr, i64 %desired, i64 %newval) { -; CHECK-LABEL: @test_cmpxchg_i64_monotonic_monotonic -; CHECK-NOT: fence -; CHECK: br label %[[LOOP:.*]] - -; CHECK: [[LOOP]]: -; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8* -; CHECK: [[LOHI:%.*]] = call { i32, i32 } @llvm.arm.ldrexd(i8* [[PTR8]]) -; CHECK: [[LO:%.*]] = extractvalue { i32, i32 } [[LOHI]], 0 -; CHECK: [[HI:%.*]] = extractvalue { i32, i32 } [[LOHI]], 1 -; CHECK: [[LO64:%.*]] = zext i32 [[LO]] to i64 -; CHECK: [[HI64_TMP:%.*]] = zext i32 [[HI]] to i64 -; CHECK: [[HI64:%.*]] = shl i64 [[HI64_TMP]], 32 -; CHECK: [[OLDVAL:%.*]] = or i64 [[LO64]], [[HI64]] -; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i64 [[OLDVAL]], %desired -; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]] - -; CHECK: [[TRY_STORE]]: -; CHECK: [[NEWLO:%.*]] = trunc i64 %newval to i32 -; CHECK: [[NEWHI_TMP:%.*]] = lshr i64 %newval, 32 -; CHECK: [[NEWHI:%.*]] = trunc i64 [[NEWHI_TMP]] to i32 -; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8* -; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strexd(i32 [[NEWLO]], i32 [[NEWHI]], i8* [[PTR8]]) -; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0 -; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]] - -; CHECK: [[SUCCESS_BB]]: -; CHECK-NOT: fence_cst -; CHECK: br label %[[DONE:.*]] - -; CHECK: [[FAILURE_BB]]: -; CHECK-NOT: fence_cst -; CHECK: br label %[[DONE]] - -; CHECK: [[DONE]]: -; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ] -; CHECK: ret i64 [[OLDVAL]] - - %pairold = cmpxchg i64* %ptr, i64 %desired, i64 %newval monotonic monotonic - %old = extractvalue { i64, i1 } %pairold, 0 - ret i64 %old -} \ No newline at end of file diff --git a/test/Transforms/AtomicExpandLoadLinked/ARM/cmpxchg-weak.ll b/test/Transforms/AtomicExpandLoadLinked/ARM/cmpxchg-weak.ll deleted file mode 100644 index 07a4a7f26e6..00000000000 --- a/test/Transforms/AtomicExpandLoadLinked/ARM/cmpxchg-weak.ll +++ /dev/null @@ -1,97 +0,0 @@ -; RUN: opt -atomic-ll-sc -S -mtriple=thumbv7s-apple-ios7.0 %s | FileCheck %s - -define i32 @test_cmpxchg_seq_cst(i32* %addr, i32 %desired, i32 %new) { -; CHECK-LABEL: @test_cmpxchg_seq_cst -; CHECK: fence release -; CHECK: br label %[[START:.*]] - -; CHECK: [[START]]: -; CHECK: [[LOADED:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %addr) -; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[LOADED]], %desired -; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]] - -; CHECK: [[TRY_STORE]]: -; CHECK: [[STREX:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %new, i32* %addr) -; CHECK: [[SUCCESS:%.*]] = icmp eq i32 [[STREX]], 0 -; CHECK: br i1 [[SUCCESS]], label %[[SUCCESS_BB:.*]], label %[[FAILURE_BB]] - -; CHECK: [[SUCCESS_BB]]: -; CHECK: fence seq_cst -; CHECK: br label %[[END:.*]] - -; CHECK: [[FAILURE_BB]]: -; CHECK: fence seq_cst -; CHECK: br label %[[END]] - -; CHECK: [[END]]: -; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ] -; CHECK: ret i32 [[LOADED]] - - %pair = cmpxchg weak i32* %addr, i32 %desired, i32 %new seq_cst seq_cst - %oldval = extractvalue { i32, i1 } %pair, 0 - ret i32 %oldval -} - -define i1 @test_cmpxchg_weak_fail(i32* %addr, i32 %desired, i32 %new) { -; CHECK-LABEL: @test_cmpxchg_weak_fail -; CHECK: fence release -; CHECK: br label %[[START:.*]] - -; CHECK: [[START]]: -; CHECK: [[LOADED:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %addr) -; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[LOADED]], %desired -; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]] - -; CHECK: [[TRY_STORE]]: -; CHECK: [[STREX:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %new, i32* %addr) -; CHECK: [[SUCCESS:%.*]] = icmp eq i32 [[STREX]], 0 -; CHECK: br i1 [[SUCCESS]], label %[[SUCCESS_BB:.*]], label %[[FAILURE_BB:.*]] - -; CHECK: [[SUCCESS_BB]]: -; CHECK: fence seq_cst -; CHECK: br label %[[END:.*]] - -; CHECK: [[FAILURE_BB]]: -; CHECK-NOT: fence -; CHECK: br label %[[END]] - -; CHECK: [[END]]: -; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ] -; CHECK: ret i1 [[SUCCESS]] - - %pair = cmpxchg weak i32* %addr, i32 %desired, i32 %new seq_cst monotonic - %oldval = extractvalue { i32, i1 } %pair, 1 - ret i1 %oldval -} - -define i32 @test_cmpxchg_monotonic(i32* %addr, i32 %desired, i32 %new) { -; CHECK-LABEL: @test_cmpxchg_monotonic -; CHECK-NOT: fence -; CHECK: br label %[[START:.*]] - -; CHECK: [[START]]: -; CHECK: [[LOADED:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %addr) -; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[LOADED]], %desired -; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]] - -; CHECK: [[TRY_STORE]]: -; CHECK: [[STREX:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %new, i32* %addr) -; CHECK: [[SUCCESS:%.*]] = icmp eq i32 [[STREX]], 0 -; CHECK: br i1 [[SUCCESS]], label %[[SUCCESS_BB:.*]], label %[[FAILURE_BB:.*]] - -; CHECK: [[SUCCESS_BB]]: -; CHECK-NOT: fence -; CHECK: br label %[[END:.*]] - -; CHECK: [[FAILURE_BB]]: -; CHECK-NOT: fence -; CHECK: br label %[[END]] - -; CHECK: [[END]]: -; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ] -; CHECK: ret i32 [[LOADED]] - - %pair = cmpxchg weak i32* %addr, i32 %desired, i32 %new monotonic monotonic - %oldval = extractvalue { i32, i1 } %pair, 0 - ret i32 %oldval -} diff --git a/test/Transforms/AtomicExpandLoadLinked/ARM/lit.local.cfg b/test/Transforms/AtomicExpandLoadLinked/ARM/lit.local.cfg deleted file mode 100644 index 98c6700c209..00000000000 --- a/test/Transforms/AtomicExpandLoadLinked/ARM/lit.local.cfg +++ /dev/null @@ -1,3 +0,0 @@ -if not 'ARM' in config.root.targets: - config.unsupported = True - diff --git a/tools/opt/opt.cpp b/tools/opt/opt.cpp index 9a0c9a42089..abcf6910b28 100644 --- a/tools/opt/opt.cpp +++ b/tools/opt/opt.cpp @@ -345,7 +345,7 @@ int main(int argc, char **argv) { // For codegen passes, only passes that do IR to IR transformation are // supported. initializeCodeGenPreparePass(Registry); - initializeAtomicExpandLoadLinkedPass(Registry); + initializeAtomicExpandPass(Registry); #ifdef LINK_POLLY_INTO_TOOLS polly::initializePollyPasses(Registry);