From cb148c8541b015fe4db2c9fd7cfccd767ba6db1a Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Fri, 10 Apr 2015 21:48:25 +0000 Subject: [PATCH] [RewriteStatepointsForGC] Limited support for vectors of pointers This patch adds limited support for inserting explicit relocations when there's a vector of pointers live over the statepoint. This doesn't handle the case where the vector contains a mix of base and non-base pointers; that's future work. The current implementation just scalarizes the vector over the gc.statepoint before doing the explicit rewrite. An alternate approach would be to plumb the vector all the way though the backend lowering, but doing that appears challenging. In particular, the size of the indirect spill slot is currently assumed to be sizeof(pointer) throughout the backend. In practice, this is enough to allow running the SLP and Loop vectorizers before RewriteStatepointsForGC. Differential Revision: http://reviews.llvm.org/D8671 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@234647 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../Scalar/RewriteStatepointsForGC.cpp | 249 ++++++++++++++++-- .../RewriteStatepointsForGC/live-vector.ll | 87 ++++++ 2 files changed, 311 insertions(+), 25 deletions(-) create mode 100644 test/Transforms/RewriteStatepointsForGC/live-vector.ll diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp index 39dcfe53a15..1645591b2db 100644 --- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -131,38 +131,63 @@ static bool isGCPointerType(const Type *T) { return false; } -/// Return true if the Value is a gc reference type which is potentially used -/// after the instruction 'loc'. This is only used with the edge reachability -/// liveness code. Note: It is assumed the V dominates loc. -static bool isLiveGCReferenceAt(Value &V, Instruction *loc, DominatorTree &DT, - LoopInfo *LI) { - if (!isGCPointerType(V.getType())) - return false; - - if (V.use_empty()) - return false; - - // Given assumption that V dominates loc, this may be live - return true; +// Return true if this type is one which a) is a gc pointer or contains a GC +// pointer and b) is of a type this code expects to encounter as a live value. +// (The insertion code will assert that a type which matches (a) and not (b) +// is not encountered.) +static bool isHandledGCPointerType(Type *T) { + // We fully support gc pointers + if (isGCPointerType(T)) + return true; + // We partially support vectors of gc pointers. The code will assert if it + // can't handle something. + if (auto VT = dyn_cast(T)) + if (isGCPointerType(VT->getElementType())) + return true; + return false; } #ifndef NDEBUG -static bool isAggWhichContainsGCPtrType(Type *Ty) { +/// Returns true if this type contains a gc pointer whether we know how to +/// handle that type or not. +static bool containsGCPtrType(Type *Ty) { + if(isGCPointerType(Ty)) + return true; if (VectorType *VT = dyn_cast(Ty)) return isGCPointerType(VT->getScalarType()); if (ArrayType *AT = dyn_cast(Ty)) - return isGCPointerType(AT->getElementType()) || - isAggWhichContainsGCPtrType(AT->getElementType()); + return containsGCPtrType(AT->getElementType()); if (StructType *ST = dyn_cast(Ty)) return std::any_of(ST->subtypes().begin(), ST->subtypes().end(), [](Type *SubType) { - return isGCPointerType(SubType) || - isAggWhichContainsGCPtrType(SubType); + return containsGCPtrType(SubType); }); return false; } + +// Returns true if this is a type which a) is a gc pointer or contains a GC +// pointer and b) is of a type which the code doesn't expect (i.e. first class +// aggregates). Used to trip assertions. +static bool isUnhandledGCPointerType(Type *Ty) { + return containsGCPtrType(Ty) && !isHandledGCPointerType(Ty); +} #endif +/// Return true if the Value is a gc reference type which is potentially used +/// after the instruction 'loc'. This is only used with the edge reachability +/// liveness code. Note: It is assumed the V dominates loc. +static bool isLiveGCReferenceAt(Value &V, Instruction *Loc, DominatorTree &DT, + LoopInfo *LI) { + if (!isHandledGCPointerType(V.getType())) + return false; + + if (V.use_empty()) + return false; + + // Given assumption that V dominates loc, this may be live + return true; +} + // Conservatively identifies any definitions which might be live at the // given instruction. The analysis is performed immediately before the // given instruction. Values defined by that instruction are not considered @@ -189,7 +214,7 @@ static void findLiveGCValuesAtInst(Instruction *term, BasicBlock *pred, // Are there any gc pointer arguments live over this point? This needs to be // special cased since arguments aren't defined in basic blocks. for (Argument &arg : F->args()) { - assert(!isAggWhichContainsGCPtrType(arg.getType()) && + assert(!isUnhandledGCPointerType(arg.getType()) && "support for FCA unimplemented"); if (is_live_gc_reference(arg)) { @@ -233,7 +258,7 @@ static void findLiveGCValuesAtInst(Instruction *term, BasicBlock *pred, break; } - assert(!isAggWhichContainsGCPtrType(inst.getType()) && + assert(!isUnhandledGCPointerType(inst.getType()) && "support for FCA unimplemented"); if (is_live_gc_reference(inst)) { @@ -299,6 +324,51 @@ analyzeParsePointLiveness(DominatorTree &DT, const CallSite &CS, result.liveset = liveset; } +/// If we can trivially determine that this vector contains only base pointers, +/// return the base instruction. +static Value *findBaseOfVector(Value *I) { + assert(I->getType()->isVectorTy() && + cast(I->getType())->getElementType()->isPointerTy() && + "Illegal to ask for the base pointer of a non-pointer type"); + + // Each case parallels findBaseDefiningValue below, see that code for + // detailed motivation. + + if (isa(I)) + // An incoming argument to the function is a base pointer + return I; + + // We shouldn't see the address of a global as a vector value? + assert(!isa(I) && + "unexpected global variable found in base of vector"); + + // inlining could possibly introduce phi node that contains + // undef if callee has multiple returns + if (isa(I)) + // utterly meaningless, but useful for dealing with partially optimized + // code. + return I; + + // Due to inheritance, this must be _after_ the global variable and undef + // checks + if (Constant *Con = dyn_cast(I)) { + assert(!isa(I) && !isa(I) && + "order of checks wrong!"); + assert(Con->isNullValue() && "null is the only case which makes sense"); + return Con; + } + + if (isa(I)) + return I; + + // Note: This code is currently rather incomplete. We are essentially only + // handling cases where the vector element is trivially a base pointer. We + // need to update the entire base pointer construction algorithm to know how + // to track vector elements and potentially scalarize, but the case which + // would motivate the work hasn't shown up in real workloads yet. + llvm_unreachable("no base found for vector element"); +} + /// Helper function for findBasePointer - Will return a value which either a) /// defines the base pointer for the input or b) blocks the simple search /// (i.e. a PHI or Select of two derived pointers) @@ -306,10 +376,15 @@ static Value *findBaseDefiningValue(Value *I) { assert(I->getType()->isPointerTy() && "Illegal to ask for the base pointer of a non-pointer type"); - // There are instructions which can never return gc pointer values. Sanity - // check that this is actually true. - assert(!isa(I) && !isa(I) && - !isa(I) && "Vector types are not gc pointers"); + // This case is a bit of a hack - it only handles extracts from vectors which + // trivially contain only base pointers. See note inside the function for + // how to improve this. + if (auto *EEI = dyn_cast(I)) { + Value *VectorOperand = EEI->getVectorOperand(); + Value *VectorBase = findBaseOfVector(VectorOperand); + assert(VectorBase && "extract element not known to be a trivial base"); + return EEI; + } if (isa(I)) // An incoming argument to the function is a base pointer @@ -1650,6 +1725,117 @@ static void addBasesAsLiveValues(StatepointLiveSetTy &liveset, assert(liveset.size() == PointerToBase.size()); } +/// Remove any vector of pointers from the liveset by scalarizing them over the +/// statepoint instruction. Adds the scalarized pieces to the liveset. It +/// would be preferrable to include the vector in the statepoint itself, but +/// the lowering code currently does not handle that. Extending it would be +/// slightly non-trivial since it requires a format change. Given how rare +/// such cases are (for the moment?) scalarizing is an acceptable comprimise. +static void splitVectorValues(Instruction *StatepointInst, + StatepointLiveSetTy& LiveSet, DominatorTree &DT) { + SmallVector ToSplit; + for (Value *V : LiveSet) + if (isa(V->getType())) + ToSplit.push_back(V); + + if (ToSplit.empty()) + return; + + Function &F = *(StatepointInst->getParent()->getParent()); + + DenseMap AllocaMap; + // First is normal return, second is exceptional return (invoke only) + DenseMap> Replacements; + for (Value *V : ToSplit) { + LiveSet.erase(V); + + AllocaInst *Alloca = new AllocaInst(V->getType(), "", + F.getEntryBlock().getFirstNonPHI()); + AllocaMap[V] = Alloca; + + VectorType *VT = cast(V->getType()); + IRBuilder<> Builder(StatepointInst); + SmallVector Elements; + for (unsigned i = 0; i < VT->getNumElements(); i++) + Elements.push_back(Builder.CreateExtractElement(V, Builder.getInt32(i))); + LiveSet.insert(Elements.begin(), Elements.end()); + + auto InsertVectorReform = [&](Instruction *IP) { + Builder.SetInsertPoint(IP); + Builder.SetCurrentDebugLocation(IP->getDebugLoc()); + Value *ResultVec = UndefValue::get(VT); + for (unsigned i = 0; i < VT->getNumElements(); i++) + ResultVec = Builder.CreateInsertElement(ResultVec, Elements[i], + Builder.getInt32(i)); + return ResultVec; + }; + + if (isa(StatepointInst)) { + BasicBlock::iterator Next(StatepointInst); + Next++; + Instruction *IP = &*(Next); + Replacements[V].first = InsertVectorReform(IP); + Replacements[V].second = nullptr; + } else { + InvokeInst *Invoke = cast(StatepointInst); + // We've already normalized - check that we don't have shared destination + // blocks + BasicBlock *NormalDest = Invoke->getNormalDest(); + assert(!isa(NormalDest->begin())); + BasicBlock *UnwindDest = Invoke->getUnwindDest(); + assert(!isa(UnwindDest->begin())); + // Insert insert element sequences in both successors + Instruction *IP = &*(NormalDest->getFirstInsertionPt()); + Replacements[V].first = InsertVectorReform(IP); + IP = &*(UnwindDest->getFirstInsertionPt()); + Replacements[V].second = InsertVectorReform(IP); + } + } + for (Value *V : ToSplit) { + AllocaInst *Alloca = AllocaMap[V]; + + // Capture all users before we start mutating use lists + SmallVector Users; + for (User *U : V->users()) + Users.push_back(cast(U)); + + for (Instruction *I : Users) { + if (auto Phi = dyn_cast(I)) { + for (unsigned i = 0; i < Phi->getNumIncomingValues(); i++) + if (V == Phi->getIncomingValue(i)) { + LoadInst *Load = new LoadInst(Alloca, "", + Phi->getIncomingBlock(i)->getTerminator()); + Phi->setIncomingValue(i, Load); + } + } else { + LoadInst *Load = new LoadInst(Alloca, "", I); + I->replaceUsesOfWith(V, Load); + } + } + + // Store the original value and the replacement value into the alloca + StoreInst *Store = new StoreInst(V, Alloca); + if (auto I = dyn_cast(V)) + Store->insertAfter(I); + else + Store->insertAfter(Alloca); + + // Normal return for invoke, or call return + Instruction *Replacement = cast(Replacements[V].first); + (new StoreInst(Replacement, Alloca))->insertAfter(Replacement); + // Unwind return for invoke only + Replacement = cast_or_null(Replacements[V].second); + if (Replacement) + (new StoreInst(Replacement, Alloca))->insertAfter(Replacement); + } + + // apply mem2reg to promote alloca to SSA + SmallVector Allocas; + for (Value *V : ToSplit) + Allocas.push_back(AllocaMap[V]); + PromoteMemToReg(Allocas, DT); +} + static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P, SmallVectorImpl &toUpdate) { #ifndef NDEBUG @@ -1680,7 +1866,9 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P, SmallVector DeoptValues; for (Use &U : StatepointCS.vm_state_args()) { Value *Arg = cast(&U); - if (isGCPointerType(Arg->getType())) + assert(!isUnhandledGCPointerType(Arg->getType()) && + "support for FCA unimplemented"); + if (isHandledGCPointerType(Arg->getType())) DeoptValues.push_back(Arg); } insertUseHolderAfter(CS, DeoptValues, holders); @@ -1698,6 +1886,17 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P, // site. findLiveReferences(F, DT, P, toUpdate, records); + // Do a limited scalarization of any live at safepoint vector values which + // contain pointers. This enables this pass to run after vectorization at + // the cost of some possible performance loss. TODO: it would be nice to + // natively support vectors all the way through the backend so we don't need + // to scalarize here. + for (size_t i = 0; i < records.size(); i++) { + struct PartiallyConstructedSafepointRecord &info = records[i]; + Instruction *statepoint = toUpdate[i].getInstruction(); + splitVectorValues(cast(statepoint), info.liveset, DT); + } + // B) Find the base pointers for each live pointer /* scope for caching */ { // Cache the 'defining value' relation used in the computation and diff --git a/test/Transforms/RewriteStatepointsForGC/live-vector.ll b/test/Transforms/RewriteStatepointsForGC/live-vector.ll new file mode 100644 index 00000000000..555f05e576c --- /dev/null +++ b/test/Transforms/RewriteStatepointsForGC/live-vector.ll @@ -0,0 +1,87 @@ +; Test that we can correctly handle vectors of pointers in statepoint +; rewriting. Currently, we scalarize, but that's an implementation detail. +; RUN: opt %s -rewrite-statepoints-for-gc -S | FileCheck %s + +; A non-vector relocation for comparison +define i64 addrspace(1)* @test(i64 addrspace(1)* %obj) gc "statepoint-example" { +; CHECK-LABEL: test +; CHECK: gc.statepoint +; CHECK-NEXT: gc.relocate +; CHECK-NEXT: ret i64 addrspace(1)* %obj.relocated +entry: + %safepoint_token = call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @do_safepoint, i32 0, i32 0, i32 0) + ret i64 addrspace(1)* %obj +} + +; A base vector from a argument +define <2 x i64 addrspace(1)*> @test2(<2 x i64 addrspace(1)*> %obj) gc "statepoint-example" { +; CHECK-LABEL: test2 +; CHECK: extractelement +; CHECK-NEXT: extractelement +; CHECK-NEXT: gc.statepoint +; CHECK-NEXT: gc.relocate +; CHECK-NEXT: gc.relocate +; CHECK-NEXT: insertelement +; CHECK-NEXT: insertelement +; CHECK-NEXT: ret <2 x i64 addrspace(1)*> %5 +entry: + %safepoint_token = call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @do_safepoint, i32 0, i32 0, i32 0) + ret <2 x i64 addrspace(1)*> %obj +} + +; A base vector from a load +define <2 x i64 addrspace(1)*> @test3(<2 x i64 addrspace(1)*>* %ptr) gc "statepoint-example" { +; CHECK-LABEL: test3 +; CHECK: load +; CHECK-NEXT: extractelement +; CHECK-NEXT: extractelement +; CHECK-NEXT: gc.statepoint +; CHECK-NEXT: gc.relocate +; CHECK-NEXT: gc.relocate +; CHECK-NEXT: insertelement +; CHECK-NEXT: insertelement +; CHECK-NEXT: ret <2 x i64 addrspace(1)*> %5 +entry: + %obj = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr + %safepoint_token = call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @do_safepoint, i32 0, i32 0, i32 0) + ret <2 x i64 addrspace(1)*> %obj +} + +declare i32 @fake_personality_function() + +; When a statepoint is an invoke rather than a call +define <2 x i64 addrspace(1)*> @test4(<2 x i64 addrspace(1)*>* %ptr) gc "statepoint-example" { +; CHECK-LABEL: test4 +; CHECK: load +; CHECK-NEXT: extractelement +; CHECK-NEXT: extractelement +; CHECK-NEXT: gc.statepoint +entry: + %obj = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr + invoke i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @do_safepoint, i32 0, i32 0, i32 0) + to label %normal_return unwind label %exceptional_return + +; CHECK-LABEL: normal_return: +; CHECK: gc.relocate +; CHECK-NEXT: gc.relocate +; CHECK-NEXT: insertelement +; CHECK-NEXT: insertelement +; CHECK-NEXT: ret <2 x i64 addrspace(1)*> %6 +normal_return: ; preds = %entry + ret <2 x i64 addrspace(1)*> %obj + +; CHECK-LABEL: exceptional_return: +; CHECK: gc.relocate +; CHECK-NEXT: gc.relocate +; CHECK-NEXT: insertelement +; CHECK-NEXT: insertelement +; CHECK-NEXT: ret <2 x i64 addrspace(1)*> %10 +exceptional_return: ; preds = %entry + %landing_pad4 = landingpad { i8*, i32 } personality i32 ()* @fake_personality_function + cleanup + ret <2 x i64 addrspace(1)*> %obj +} + +declare void @do_safepoint() + +declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()*, i32, i32, ...) -- 2.34.1