X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FCodeGen%2FAtomicExpandPass.cpp;h=077c52b19a7a3d1907cdd3dcda9eb967ccaf0f51;hb=53395c948459c88257c8ae57b65831fab51e4646;hp=e7998db4a7c1172f10a7b15c3b94b2f031fa7e94;hpb=9cb8e12ce13a21a3eb86af76d93423b6b11d86be;p=oota-llvm.git diff --git a/lib/CodeGen/AtomicExpandPass.cpp b/lib/CodeGen/AtomicExpandPass.cpp index e7998db4a7c..077c52b19a7 100644 --- a/lib/CodeGen/AtomicExpandPass.cpp +++ b/lib/CodeGen/AtomicExpandPass.cpp @@ -8,11 +8,18 @@ //===----------------------------------------------------------------------===// // // This file contains a pass (at IR level) to replace atomic instructions with -// either (intrinsic-based) load-linked/store-conditional loops or -// AtomicCmpXchg. +// target specific instruction which implement the same semantics in a way +// which better fits the target backend. This can include the use of either +// (intrinsic-based) load-linked/store-conditional loops, AtomicCmpXchg, or +// type coercions. // //===----------------------------------------------------------------------===// +#include "llvm/ADT/SetOperations.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/MemoryLocation.h" #include "llvm/CodeGen/AtomicExpandUtils.h" #include "llvm/CodeGen/Passes.h" #include "llvm/IR/Function.h" @@ -21,10 +28,13 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" +#include "llvm/IR/NoFolder.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" using namespace llvm; @@ -46,16 +56,48 @@ namespace { private: bool bracketInstWithFences(Instruction *I, AtomicOrdering Order, bool IsStore, bool IsLoad); + IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL); + LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI); bool tryExpandAtomicLoad(LoadInst *LI); bool expandAtomicLoadToLL(LoadInst *LI); bool expandAtomicLoadToCmpXchg(LoadInst *LI); + StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI); bool expandAtomicStore(StoreInst *SI); bool tryExpandAtomicRMW(AtomicRMWInst *AI); - bool expandAtomicRMWToLLSC(AtomicRMWInst *AI); + bool expandAtomicOpToLLSC( + Instruction *I, Value *Addr, AtomicOrdering MemOpOrder, + std::function &, Value *)> PerformOp); bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI); bool isIdempotentRMW(AtomicRMWInst *AI); bool simplifyIdempotentRMW(AtomicRMWInst *AI); }; + + + // If 'LI' is a relaxed load, and it is immediately followed by a +// atomic read-modify-write that has acq_rel parameter, we don't have to do +// anything since the rmw serves as a natural barrier. +void MarkRelaxedLoadBeforeAcqrelRMW(LoadInst* LI) { + auto* BB = LI->getParent(); + auto BBI = LI->getIterator(); + for (BBI++; BBI != BB->end(); BBI++) { + Instruction* CurInst = &*BBI; + if (!CurInst) { + return; + } + if (!CurInst->isAtomic()) { + continue; + } + auto* RMW = dyn_cast(CurInst); + if (!RMW) { + return; + } + if (RMW->getOrdering() == AcquireRelease || + RMW->getOrdering() == SequentiallyConsistent) { + LI->setHasSubsequentAcqlRMW(true); + } + } +} + } char AtomicExpand::ID = 0; @@ -74,12 +116,61 @@ bool AtomicExpand::runOnFunction(Function &F) { TLI = TM->getSubtargetImpl(F)->getTargetLowering(); SmallVector AtomicInsts; + SmallVector MonotonicLoadInsts; // Changing control-flow while iterating through it is a bad idea, so gather a // list of all atomic instructions before we start. for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) { - if (I->isAtomic()) + // XXX-update: For relaxed loads, change them to acquire. This includes + // relaxed loads, relaxed atomic RMW & relaxed atomic compare exchange. + if (I->isAtomic()) { + switch (I->getOpcode()) { + case Instruction::AtomicCmpXchg: { + // XXX-comment: AtomicCmpXchg in AArch64 will be translated to a + // conditional branch that contains the value of the load anyway, so + // we don't need to do anything. + /* + auto* CmpXchg = dyn_cast(&*I); + auto SuccOrdering = CmpXchg->getSuccessOrdering(); + if (SuccOrdering == Monotonic) { + CmpXchg->setSuccessOrdering(Acquire); + } else if (SuccOrdering == Release) { + CmpXchg->setSuccessOrdering(AcquireRelease); + } + */ + break; + } + case Instruction::AtomicRMW: { + // XXX-comment: Similar to AtomicCmpXchg. These instructions in + // AArch64 will be translated to a loop whose condition depends on the + // store status, which further depends on the load value. + /* + auto* RMW = dyn_cast(&*I); + if (RMW->getOrdering() == Monotonic) { + RMW->setOrdering(Acquire); + } + */ + break; + } + case Instruction::Load: { + auto* LI = dyn_cast(&*I); + if (LI->getOrdering() == Monotonic) { + /* + DEBUG(dbgs() << "Transforming relaxed loads to acquire loads: " + << *LI << '\n'); + LI->setOrdering(Acquire); + */ +// MonotonicLoadInsts.push_back(LI); + MarkRelaxedLoadBeforeAcqrelRMW(LI); + } + break; + } + default: { + break; + } + } AtomicInsts.push_back(&*I); + } } bool MadeChange = false; @@ -96,7 +187,7 @@ bool AtomicExpand::runOnFunction(Function &F) { if (TLI->getInsertFencesForAtomic()) { if (LI && isAtLeastAcquire(LI->getOrdering())) { FenceOrdering = LI->getOrdering(); - LI->setOrdering(Monotonic); +// AddFakeConditionalBranch( IsStore = false; IsLoad = true; } else if (SI && isAtLeastRelease(SI->getOrdering())) { @@ -128,9 +219,27 @@ bool AtomicExpand::runOnFunction(Function &F) { } if (LI) { + if (LI->getType()->isFloatingPointTy()) { + // TODO: add a TLI hook to control this so that each target can + // convert to lowering the original type one at a time. + LI = convertAtomicLoadToIntegerType(LI); + assert(LI->getType()->isIntegerTy() && "invariant broken"); + MadeChange = true; + } + MadeChange |= tryExpandAtomicLoad(LI); - } else if (SI && TLI->shouldExpandAtomicStoreInIR(SI)) { - MadeChange |= expandAtomicStore(SI); + } else if (SI) { + if (SI->getValueOperand()->getType()->isFloatingPointTy()) { + // TODO: add a TLI hook to control this so that each target can + // convert to lowering the original type one at a time. + SI = convertAtomicStoreToIntegerType(SI); + assert(SI->getValueOperand()->getType()->isIntegerTy() && + "invariant broken"); + MadeChange = true; + } + + if (TLI->shouldExpandAtomicStoreInIR(SI)) + MadeChange |= expandAtomicStore(SI); } else if (RMWI) { // There are two different ways of expanding RMW instructions: // - into a load if it is idempotent @@ -146,6 +255,7 @@ bool AtomicExpand::runOnFunction(Function &F) { MadeChange |= expandAtomicCmpXchg(CASI); } } + return MadeChange; } @@ -170,17 +280,55 @@ bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order, return (LeadingFence || TrailingFence); } +/// Get the iX type with the same bitwidth as T. +IntegerType *AtomicExpand::getCorrespondingIntegerType(Type *T, + const DataLayout &DL) { + EVT VT = TLI->getValueType(DL, T); + unsigned BitWidth = VT.getStoreSizeInBits(); + assert(BitWidth == VT.getSizeInBits() && "must be a power of two"); + return IntegerType::get(T->getContext(), BitWidth); +} + +/// Convert an atomic load of a non-integral type to an integer load of the +/// equivelent bitwidth. See the function comment on +/// convertAtomicStoreToIntegerType for background. +LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) { + auto *M = LI->getModule(); + Type *NewTy = getCorrespondingIntegerType(LI->getType(), + M->getDataLayout()); + + IRBuilder<> Builder(LI); + + Value *Addr = LI->getPointerOperand(); + Type *PT = PointerType::get(NewTy, + Addr->getType()->getPointerAddressSpace()); + Value *NewAddr = Builder.CreateBitCast(Addr, PT); + + auto *NewLI = Builder.CreateLoad(NewAddr); + NewLI->setAlignment(LI->getAlignment()); + NewLI->setVolatile(LI->isVolatile()); + NewLI->setAtomic(LI->getOrdering(), LI->getSynchScope()); + DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n"); + + Value *NewVal = Builder.CreateBitCast(NewLI, LI->getType()); + LI->replaceAllUsesWith(NewVal); + LI->eraseFromParent(); + return NewLI; +} + bool AtomicExpand::tryExpandAtomicLoad(LoadInst *LI) { switch (TLI->shouldExpandAtomicLoadInIR(LI)) { case TargetLoweringBase::AtomicExpansionKind::None: return false; - case TargetLoweringBase::AtomicExpansionKind::LLSC: { + case TargetLoweringBase::AtomicExpansionKind::LLSC: + return expandAtomicOpToLLSC( + LI, LI->getPointerOperand(), LI->getOrdering(), + [](IRBuilder<> &Builder, Value *Loaded) { return Loaded; }); + case TargetLoweringBase::AtomicExpansionKind::LLOnly: return expandAtomicLoadToLL(LI); - } - case TargetLoweringBase::AtomicExpansionKind::CmpXChg: { + case TargetLoweringBase::AtomicExpansionKind::CmpXChg: return expandAtomicLoadToCmpXchg(LI); } - } llvm_unreachable("Unhandled case in tryExpandAtomicLoad"); } @@ -192,6 +340,7 @@ bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) { // to be single-copy atomic by ARM is an ldrexd (A3.5.3). Value *Val = TLI->emitLoadLinked(Builder, LI->getPointerOperand(), LI->getOrdering()); + TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder); LI->replaceAllUsesWith(Val); LI->eraseFromParent(); @@ -217,6 +366,35 @@ bool AtomicExpand::expandAtomicLoadToCmpXchg(LoadInst *LI) { return true; } +/// Convert an atomic store of a non-integral type to an integer store of the +/// equivelent bitwidth. We used to not support floating point or vector +/// atomics in the IR at all. The backends learned to deal with the bitcast +/// idiom because that was the only way of expressing the notion of a atomic +/// float or vector store. The long term plan is to teach each backend to +/// instruction select from the original atomic store, but as a migration +/// mechanism, we convert back to the old format which the backends understand. +/// Each backend will need individual work to recognize the new format. +StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) { + IRBuilder<> Builder(SI); + auto *M = SI->getModule(); + Type *NewTy = getCorrespondingIntegerType(SI->getValueOperand()->getType(), + M->getDataLayout()); + Value *NewVal = Builder.CreateBitCast(SI->getValueOperand(), NewTy); + + Value *Addr = SI->getPointerOperand(); + Type *PT = PointerType::get(NewTy, + Addr->getType()->getPointerAddressSpace()); + Value *NewAddr = Builder.CreateBitCast(Addr, PT); + + StoreInst *NewSI = Builder.CreateStore(NewVal, NewAddr); + NewSI->setAlignment(SI->getAlignment()); + NewSI->setVolatile(SI->isVolatile()); + NewSI->setAtomic(SI->getOrdering(), SI->getSynchScope()); + DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n"); + SI->eraseFromParent(); + return NewSI; +} + bool AtomicExpand::expandAtomicStore(StoreInst *SI) { // This function is only called on atomic stores that are too large to be // atomic if implemented as a native store. So we replace them by an @@ -245,20 +423,6 @@ static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr, NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded"); } -bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) { - switch (TLI->shouldExpandAtomicRMWInIR(AI)) { - case TargetLoweringBase::AtomicExpansionKind::None: - return false; - case TargetLoweringBase::AtomicExpansionKind::LLSC: { - return expandAtomicRMWToLLSC(AI); - } - case TargetLoweringBase::AtomicExpansionKind::CmpXChg: { - return expandAtomicRMWToCmpXchg(AI, createCmpXchgInstFun); - } - } - llvm_unreachable("Unhandled case in tryExpandAtomicRMW"); -} - /// Emit IR to implement the given atomicrmw operation on values in registers, /// returning the new value. static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder, @@ -296,10 +460,28 @@ static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder, } } -bool AtomicExpand::expandAtomicRMWToLLSC(AtomicRMWInst *AI) { - AtomicOrdering MemOpOrder = AI->getOrdering(); - Value *Addr = AI->getPointerOperand(); - BasicBlock *BB = AI->getParent(); +bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) { + switch (TLI->shouldExpandAtomicRMWInIR(AI)) { + case TargetLoweringBase::AtomicExpansionKind::None: + return false; + case TargetLoweringBase::AtomicExpansionKind::LLSC: + return expandAtomicOpToLLSC(AI, AI->getPointerOperand(), AI->getOrdering(), + [&](IRBuilder<> &Builder, Value *Loaded) { + return performAtomicOp(AI->getOperation(), + Builder, Loaded, + AI->getValOperand()); + }); + case TargetLoweringBase::AtomicExpansionKind::CmpXChg: + return expandAtomicRMWToCmpXchg(AI, createCmpXchgInstFun); + default: + llvm_unreachable("Unhandled case in tryExpandAtomicRMW"); + } +} + +bool AtomicExpand::expandAtomicOpToLLSC( + Instruction *I, Value *Addr, AtomicOrdering MemOpOrder, + std::function &, Value *)> PerformOp) { + BasicBlock *BB = I->getParent(); Function *F = BB->getParent(); LLVMContext &Ctx = F->getContext(); @@ -317,11 +499,11 @@ bool AtomicExpand::expandAtomicRMWToLLSC(AtomicRMWInst *AI) { // atomicrmw.end: // fence? // [...] - BasicBlock *ExitBB = BB->splitBasicBlock(AI->getIterator(), "atomicrmw.end"); + BasicBlock *ExitBB = BB->splitBasicBlock(I->getIterator(), "atomicrmw.end"); BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB); - // This grabs the DebugLoc from AI. - IRBuilder<> Builder(AI); + // This grabs the DebugLoc from I. + IRBuilder<> Builder(I); // The split call above "helpfully" added a branch at the end of BB (to the // wrong place), but we might want a fence too. It's easiest to just remove @@ -334,8 +516,7 @@ bool AtomicExpand::expandAtomicRMWToLLSC(AtomicRMWInst *AI) { Builder.SetInsertPoint(LoopBB); Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder); - Value *NewVal = - performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand()); + Value *NewVal = PerformOp(Builder, Loaded); Value *StoreSuccess = TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder); @@ -345,8 +526,8 @@ bool AtomicExpand::expandAtomicRMWToLLSC(AtomicRMWInst *AI) { Builder.SetInsertPoint(ExitBB, ExitBB->begin()); - AI->replaceAllUsesWith(Loaded); - AI->eraseFromParent(); + I->replaceAllUsesWith(Loaded); + I->eraseFromParent(); return true; }