1 //===- RewriteStatepointsForGC.cpp - Make GC relocations explicit ---------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // Rewrite an existing set of gc.statepoints such that they make potential
11 // relocations performed by the garbage collector explicit in the IR.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/Pass.h"
16 #include "llvm/Analysis/CFG.h"
17 #include "llvm/Analysis/TargetTransformInfo.h"
18 #include "llvm/ADT/SetOperations.h"
19 #include "llvm/ADT/Statistic.h"
20 #include "llvm/ADT/DenseSet.h"
21 #include "llvm/ADT/SetVector.h"
22 #include "llvm/IR/BasicBlock.h"
23 #include "llvm/IR/CallSite.h"
24 #include "llvm/IR/Dominators.h"
25 #include "llvm/IR/Function.h"
26 #include "llvm/IR/IRBuilder.h"
27 #include "llvm/IR/InstIterator.h"
28 #include "llvm/IR/Instructions.h"
29 #include "llvm/IR/Intrinsics.h"
30 #include "llvm/IR/IntrinsicInst.h"
31 #include "llvm/IR/Module.h"
32 #include "llvm/IR/Statepoint.h"
33 #include "llvm/IR/Value.h"
34 #include "llvm/IR/Verifier.h"
35 #include "llvm/Support/Debug.h"
36 #include "llvm/Support/CommandLine.h"
37 #include "llvm/Transforms/Scalar.h"
38 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
39 #include "llvm/Transforms/Utils/Cloning.h"
40 #include "llvm/Transforms/Utils/Local.h"
41 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
43 #define DEBUG_TYPE "rewrite-statepoints-for-gc"
47 // Print tracing output
48 static cl::opt<bool> TraceLSP("trace-rewrite-statepoints", cl::Hidden,
51 // Print the liveset found at the insert location
52 static cl::opt<bool> PrintLiveSet("spp-print-liveset", cl::Hidden,
54 static cl::opt<bool> PrintLiveSetSize("spp-print-liveset-size", cl::Hidden,
56 // Print out the base pointers for debugging
57 static cl::opt<bool> PrintBasePointers("spp-print-base-pointers", cl::Hidden,
60 // Cost threshold measuring when it is profitable to rematerialize value instead
62 static cl::opt<unsigned>
63 RematerializationThreshold("spp-rematerialization-threshold", cl::Hidden,
67 static bool ClobberNonLive = true;
69 static bool ClobberNonLive = false;
71 static cl::opt<bool, true> ClobberNonLiveOverride("rs4gc-clobber-non-live",
72 cl::location(ClobberNonLive),
76 struct RewriteStatepointsForGC : public FunctionPass {
77 static char ID; // Pass identification, replacement for typeid
79 RewriteStatepointsForGC() : FunctionPass(ID) {
80 initializeRewriteStatepointsForGCPass(*PassRegistry::getPassRegistry());
82 bool runOnFunction(Function &F) override;
84 void getAnalysisUsage(AnalysisUsage &AU) const override {
85 // We add and rewrite a bunch of instructions, but don't really do much
86 // else. We could in theory preserve a lot more analyses here.
87 AU.addRequired<DominatorTreeWrapperPass>();
88 AU.addRequired<TargetTransformInfoWrapperPass>();
93 char RewriteStatepointsForGC::ID = 0;
95 FunctionPass *llvm::createRewriteStatepointsForGCPass() {
96 return new RewriteStatepointsForGC();
99 INITIALIZE_PASS_BEGIN(RewriteStatepointsForGC, "rewrite-statepoints-for-gc",
100 "Make relocations explicit at statepoints", false, false)
101 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
102 INITIALIZE_PASS_END(RewriteStatepointsForGC, "rewrite-statepoints-for-gc",
103 "Make relocations explicit at statepoints", false, false)
106 struct GCPtrLivenessData {
107 /// Values defined in this block.
108 DenseMap<BasicBlock *, DenseSet<Value *>> KillSet;
109 /// Values used in this block (and thus live); does not included values
110 /// killed within this block.
111 DenseMap<BasicBlock *, DenseSet<Value *>> LiveSet;
113 /// Values live into this basic block (i.e. used by any
114 /// instruction in this basic block or ones reachable from here)
115 DenseMap<BasicBlock *, DenseSet<Value *>> LiveIn;
117 /// Values live out of this basic block (i.e. live into
118 /// any successor block)
119 DenseMap<BasicBlock *, DenseSet<Value *>> LiveOut;
122 // The type of the internal cache used inside the findBasePointers family
123 // of functions. From the callers perspective, this is an opaque type and
124 // should not be inspected.
126 // In the actual implementation this caches two relations:
127 // - The base relation itself (i.e. this pointer is based on that one)
128 // - The base defining value relation (i.e. before base_phi insertion)
129 // Generally, after the execution of a full findBasePointer call, only the
130 // base relation will remain. Internally, we add a mixture of the two
131 // types, then update all the second type to the first type
132 typedef DenseMap<Value *, Value *> DefiningValueMapTy;
133 typedef DenseSet<llvm::Value *> StatepointLiveSetTy;
134 typedef DenseMap<Instruction *, Value *> RematerializedValueMapTy;
136 struct PartiallyConstructedSafepointRecord {
137 /// The set of values known to be live accross this safepoint
138 StatepointLiveSetTy liveset;
140 /// Mapping from live pointers to a base-defining-value
141 DenseMap<llvm::Value *, llvm::Value *> PointerToBase;
143 /// The *new* gc.statepoint instruction itself. This produces the token
144 /// that normal path gc.relocates and the gc.result are tied to.
145 Instruction *StatepointToken;
147 /// Instruction to which exceptional gc relocates are attached
148 /// Makes it easier to iterate through them during relocationViaAlloca.
149 Instruction *UnwindToken;
151 /// Record live values we are rematerialized instead of relocating.
152 /// They are not included into 'liveset' field.
153 /// Maps rematerialized copy to it's original value.
154 RematerializedValueMapTy RematerializedValues;
158 /// Compute the live-in set for every basic block in the function
159 static void computeLiveInValues(DominatorTree &DT, Function &F,
160 GCPtrLivenessData &Data);
162 /// Given results from the dataflow liveness computation, find the set of live
163 /// Values at a particular instruction.
164 static void findLiveSetAtInst(Instruction *inst, GCPtrLivenessData &Data,
165 StatepointLiveSetTy &out);
167 // TODO: Once we can get to the GCStrategy, this becomes
168 // Optional<bool> isGCManagedPointer(const Value *V) const override {
170 static bool isGCPointerType(const Type *T) {
171 if (const PointerType *PT = dyn_cast<PointerType>(T))
172 // For the sake of this example GC, we arbitrarily pick addrspace(1) as our
173 // GC managed heap. We know that a pointer into this heap needs to be
174 // updated and that no other pointer does.
175 return (1 == PT->getAddressSpace());
179 // Return true if this type is one which a) is a gc pointer or contains a GC
180 // pointer and b) is of a type this code expects to encounter as a live value.
181 // (The insertion code will assert that a type which matches (a) and not (b)
182 // is not encountered.)
183 static bool isHandledGCPointerType(Type *T) {
184 // We fully support gc pointers
185 if (isGCPointerType(T))
187 // We partially support vectors of gc pointers. The code will assert if it
188 // can't handle something.
189 if (auto VT = dyn_cast<VectorType>(T))
190 if (isGCPointerType(VT->getElementType()))
196 /// Returns true if this type contains a gc pointer whether we know how to
197 /// handle that type or not.
198 static bool containsGCPtrType(Type *Ty) {
199 if (isGCPointerType(Ty))
201 if (VectorType *VT = dyn_cast<VectorType>(Ty))
202 return isGCPointerType(VT->getScalarType());
203 if (ArrayType *AT = dyn_cast<ArrayType>(Ty))
204 return containsGCPtrType(AT->getElementType());
205 if (StructType *ST = dyn_cast<StructType>(Ty))
207 ST->subtypes().begin(), ST->subtypes().end(),
208 [](Type *SubType) { return containsGCPtrType(SubType); });
212 // Returns true if this is a type which a) is a gc pointer or contains a GC
213 // pointer and b) is of a type which the code doesn't expect (i.e. first class
214 // aggregates). Used to trip assertions.
215 static bool isUnhandledGCPointerType(Type *Ty) {
216 return containsGCPtrType(Ty) && !isHandledGCPointerType(Ty);
220 static bool order_by_name(llvm::Value *a, llvm::Value *b) {
221 if (a->hasName() && b->hasName()) {
222 return -1 == a->getName().compare(b->getName());
223 } else if (a->hasName() && !b->hasName()) {
225 } else if (!a->hasName() && b->hasName()) {
228 // Better than nothing, but not stable
233 // Conservatively identifies any definitions which might be live at the
234 // given instruction. The analysis is performed immediately before the
235 // given instruction. Values defined by that instruction are not considered
236 // live. Values used by that instruction are considered live.
237 static void analyzeParsePointLiveness(
238 DominatorTree &DT, GCPtrLivenessData &OriginalLivenessData,
239 const CallSite &CS, PartiallyConstructedSafepointRecord &result) {
240 Instruction *inst = CS.getInstruction();
242 StatepointLiveSetTy liveset;
243 findLiveSetAtInst(inst, OriginalLivenessData, liveset);
246 // Note: This output is used by several of the test cases
247 // The order of elemtns in a set is not stable, put them in a vec and sort
249 SmallVector<Value *, 64> temp;
250 temp.insert(temp.end(), liveset.begin(), liveset.end());
251 std::sort(temp.begin(), temp.end(), order_by_name);
252 errs() << "Live Variables:\n";
253 for (Value *V : temp) {
254 errs() << " " << V->getName(); // no newline
258 if (PrintLiveSetSize) {
259 errs() << "Safepoint For: " << CS.getCalledValue()->getName() << "\n";
260 errs() << "Number live values: " << liveset.size() << "\n";
262 result.liveset = liveset;
265 static Value *findBaseDefiningValue(Value *I);
267 /// If we can trivially determine that the index specified in the given vector
268 /// is a base pointer, return it. In cases where the entire vector is known to
269 /// consist of base pointers, the entire vector will be returned. This
270 /// indicates that the relevant extractelement is a valid base pointer and
271 /// should be used directly.
272 static Value *findBaseOfVector(Value *I, Value *Index) {
273 assert(I->getType()->isVectorTy() &&
274 cast<VectorType>(I->getType())->getElementType()->isPointerTy() &&
275 "Illegal to ask for the base pointer of a non-pointer type");
277 // Each case parallels findBaseDefiningValue below, see that code for
278 // detailed motivation.
280 if (isa<Argument>(I))
281 // An incoming argument to the function is a base pointer
284 // We shouldn't see the address of a global as a vector value?
285 assert(!isa<GlobalVariable>(I) &&
286 "unexpected global variable found in base of vector");
288 // inlining could possibly introduce phi node that contains
289 // undef if callee has multiple returns
290 if (isa<UndefValue>(I))
291 // utterly meaningless, but useful for dealing with partially optimized
295 // Due to inheritance, this must be _after_ the global variable and undef
297 if (Constant *Con = dyn_cast<Constant>(I)) {
298 assert(!isa<GlobalVariable>(I) && !isa<UndefValue>(I) &&
299 "order of checks wrong!");
300 assert(Con->isNullValue() && "null is the only case which makes sense");
304 if (isa<LoadInst>(I))
307 // For an insert element, we might be able to look through it if we know
308 // something about the indexes, but if the indices are arbitrary values, we
309 // can't without much more extensive scalarization.
310 if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(I)) {
311 Value *InsertIndex = IEI->getOperand(2);
312 // This index is inserting the value, look for it's base
313 if (InsertIndex == Index)
314 return findBaseDefiningValue(IEI->getOperand(1));
315 // Both constant, and can't be equal per above. This insert is definitely
316 // not relevant, look back at the rest of the vector and keep trying.
317 if (isa<ConstantInt>(Index) && isa<ConstantInt>(InsertIndex))
318 return findBaseOfVector(IEI->getOperand(0), Index);
321 // Note: This code is currently rather incomplete. We are essentially only
322 // handling cases where the vector element is trivially a base pointer. We
323 // need to update the entire base pointer construction algorithm to know how
324 // to track vector elements and potentially scalarize, but the case which
325 // would motivate the work hasn't shown up in real workloads yet.
326 llvm_unreachable("no base found for vector element");
329 /// Helper function for findBasePointer - Will return a value which either a)
330 /// defines the base pointer for the input or b) blocks the simple search
331 /// (i.e. a PHI or Select of two derived pointers)
332 static Value *findBaseDefiningValue(Value *I) {
333 assert(I->getType()->isPointerTy() &&
334 "Illegal to ask for the base pointer of a non-pointer type");
336 // This case is a bit of a hack - it only handles extracts from vectors which
337 // trivially contain only base pointers or cases where we can directly match
338 // the index of the original extract element to an insertion into the vector.
339 // See note inside the function for how to improve this.
340 if (auto *EEI = dyn_cast<ExtractElementInst>(I)) {
341 Value *VectorOperand = EEI->getVectorOperand();
342 Value *Index = EEI->getIndexOperand();
343 Value *VectorBase = findBaseOfVector(VectorOperand, Index);
344 // If the result returned is a vector, we know the entire vector must
345 // contain base pointers. In that case, the extractelement is a valid base
347 if (VectorBase->getType()->isVectorTy())
349 // Otherwise, we needed to look through the vector to find the base for
350 // this particular element.
351 assert(VectorBase->getType()->isPointerTy());
355 if (isa<Argument>(I))
356 // An incoming argument to the function is a base pointer
357 // We should have never reached here if this argument isn't an gc value
360 if (isa<GlobalVariable>(I))
364 // inlining could possibly introduce phi node that contains
365 // undef if callee has multiple returns
366 if (isa<UndefValue>(I))
367 // utterly meaningless, but useful for dealing with
368 // partially optimized code.
371 // Due to inheritance, this must be _after_ the global variable and undef
373 if (Constant *Con = dyn_cast<Constant>(I)) {
374 assert(!isa<GlobalVariable>(I) && !isa<UndefValue>(I) &&
375 "order of checks wrong!");
376 // Note: Finding a constant base for something marked for relocation
377 // doesn't really make sense. The most likely case is either a) some
378 // screwed up the address space usage or b) your validating against
379 // compiled C++ code w/o the proper separation. The only real exception
380 // is a null pointer. You could have generic code written to index of
381 // off a potentially null value and have proven it null. We also use
382 // null pointers in dead paths of relocation phis (which we might later
383 // want to find a base pointer for).
384 assert(isa<ConstantPointerNull>(Con) &&
385 "null is the only case which makes sense");
389 if (CastInst *CI = dyn_cast<CastInst>(I)) {
390 Value *Def = CI->stripPointerCasts();
391 // If we find a cast instruction here, it means we've found a cast which is
392 // not simply a pointer cast (i.e. an inttoptr). We don't know how to
393 // handle int->ptr conversion.
394 assert(!isa<CastInst>(Def) && "shouldn't find another cast here");
395 return findBaseDefiningValue(Def);
398 if (isa<LoadInst>(I))
399 return I; // The value loaded is an gc base itself
401 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I))
402 // The base of this GEP is the base
403 return findBaseDefiningValue(GEP->getPointerOperand());
405 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
406 switch (II->getIntrinsicID()) {
407 case Intrinsic::experimental_gc_result_ptr:
409 // fall through to general call handling
411 case Intrinsic::experimental_gc_statepoint:
412 case Intrinsic::experimental_gc_result_float:
413 case Intrinsic::experimental_gc_result_int:
414 llvm_unreachable("these don't produce pointers");
415 case Intrinsic::experimental_gc_relocate: {
416 // Rerunning safepoint insertion after safepoints are already
417 // inserted is not supported. It could probably be made to work,
418 // but why are you doing this? There's no good reason.
419 llvm_unreachable("repeat safepoint insertion is not supported");
421 case Intrinsic::gcroot:
422 // Currently, this mechanism hasn't been extended to work with gcroot.
423 // There's no reason it couldn't be, but I haven't thought about the
424 // implications much.
426 "interaction with the gcroot mechanism is not supported");
429 // We assume that functions in the source language only return base
430 // pointers. This should probably be generalized via attributes to support
431 // both source language and internal functions.
432 if (isa<CallInst>(I) || isa<InvokeInst>(I))
435 // I have absolutely no idea how to implement this part yet. It's not
436 // neccessarily hard, I just haven't really looked at it yet.
437 assert(!isa<LandingPadInst>(I) && "Landing Pad is unimplemented");
439 if (isa<AtomicCmpXchgInst>(I))
440 // A CAS is effectively a atomic store and load combined under a
441 // predicate. From the perspective of base pointers, we just treat it
445 assert(!isa<AtomicRMWInst>(I) && "Xchg handled above, all others are "
446 "binary ops which don't apply to pointers");
448 // The aggregate ops. Aggregates can either be in the heap or on the
449 // stack, but in either case, this is simply a field load. As a result,
450 // this is a defining definition of the base just like a load is.
451 if (isa<ExtractValueInst>(I))
454 // We should never see an insert vector since that would require we be
455 // tracing back a struct value not a pointer value.
456 assert(!isa<InsertValueInst>(I) &&
457 "Base pointer for a struct is meaningless");
459 // The last two cases here don't return a base pointer. Instead, they
460 // return a value which dynamically selects from amoung several base
461 // derived pointers (each with it's own base potentially). It's the job of
462 // the caller to resolve these.
463 assert((isa<SelectInst>(I) || isa<PHINode>(I)) &&
464 "missing instruction case in findBaseDefiningValing");
468 /// Returns the base defining value for this value.
469 static Value *findBaseDefiningValueCached(Value *I, DefiningValueMapTy &Cache) {
470 Value *&Cached = Cache[I];
472 Cached = findBaseDefiningValue(I);
474 assert(Cache[I] != nullptr);
477 dbgs() << "fBDV-cached: " << I->getName() << " -> " << Cached->getName()
483 /// Return a base pointer for this value if known. Otherwise, return it's
484 /// base defining value.
485 static Value *findBaseOrBDV(Value *I, DefiningValueMapTy &Cache) {
486 Value *Def = findBaseDefiningValueCached(I, Cache);
487 auto Found = Cache.find(Def);
488 if (Found != Cache.end()) {
489 // Either a base-of relation, or a self reference. Caller must check.
490 return Found->second;
492 // Only a BDV available
496 /// Given the result of a call to findBaseDefiningValue, or findBaseOrBDV,
497 /// is it known to be a base pointer? Or do we need to continue searching.
498 static bool isKnownBaseResult(Value *V) {
499 if (!isa<PHINode>(V) && !isa<SelectInst>(V)) {
500 // no recursion possible
503 if (isa<Instruction>(V) &&
504 cast<Instruction>(V)->getMetadata("is_base_value")) {
505 // This is a previously inserted base phi or select. We know
506 // that this is a base value.
510 // We need to keep searching
514 // TODO: find a better name for this
518 enum Status { Unknown, Base, Conflict };
520 PhiState(Status s, Value *b = nullptr) : status(s), base(b) {
521 assert(status != Base || b);
523 PhiState(Value *b) : status(Base), base(b) {}
524 PhiState() : status(Unknown), base(nullptr) {}
526 Status getStatus() const { return status; }
527 Value *getBase() const { return base; }
529 bool isBase() const { return getStatus() == Base; }
530 bool isUnknown() const { return getStatus() == Unknown; }
531 bool isConflict() const { return getStatus() == Conflict; }
533 bool operator==(const PhiState &other) const {
534 return base == other.base && status == other.status;
537 bool operator!=(const PhiState &other) const { return !(*this == other); }
540 errs() << status << " (" << base << " - "
541 << (base ? base->getName() : "nullptr") << "): ";
546 Value *base; // non null only if status == base
549 typedef DenseMap<Value *, PhiState> ConflictStateMapTy;
550 // Values of type PhiState form a lattice, and this is a helper
551 // class that implementes the meet operation. The meat of the meet
552 // operation is implemented in MeetPhiStates::pureMeet
553 class MeetPhiStates {
555 // phiStates is a mapping from PHINodes and SelectInst's to PhiStates.
556 explicit MeetPhiStates(const ConflictStateMapTy &phiStates)
557 : phiStates(phiStates) {}
559 // Destructively meet the current result with the base V. V can
560 // either be a merge instruction (SelectInst / PHINode), in which
561 // case its status is looked up in the phiStates map; or a regular
562 // SSA value, in which case it is assumed to be a base.
563 void meetWith(Value *V) {
564 PhiState otherState = getStateForBDV(V);
565 assert((MeetPhiStates::pureMeet(otherState, currentResult) ==
566 MeetPhiStates::pureMeet(currentResult, otherState)) &&
567 "math is wrong: meet does not commute!");
568 currentResult = MeetPhiStates::pureMeet(otherState, currentResult);
571 PhiState getResult() const { return currentResult; }
574 const ConflictStateMapTy &phiStates;
575 PhiState currentResult;
577 /// Return a phi state for a base defining value. We'll generate a new
578 /// base state for known bases and expect to find a cached state otherwise
579 PhiState getStateForBDV(Value *baseValue) {
580 if (isKnownBaseResult(baseValue)) {
581 return PhiState(baseValue);
583 return lookupFromMap(baseValue);
587 PhiState lookupFromMap(Value *V) {
588 auto I = phiStates.find(V);
589 assert(I != phiStates.end() && "lookup failed!");
593 static PhiState pureMeet(const PhiState &stateA, const PhiState &stateB) {
594 switch (stateA.getStatus()) {
595 case PhiState::Unknown:
599 assert(stateA.getBase() && "can't be null");
600 if (stateB.isUnknown())
603 if (stateB.isBase()) {
604 if (stateA.getBase() == stateB.getBase()) {
605 assert(stateA == stateB && "equality broken!");
608 return PhiState(PhiState::Conflict);
610 assert(stateB.isConflict() && "only three states!");
611 return PhiState(PhiState::Conflict);
613 case PhiState::Conflict:
616 llvm_unreachable("only three states!");
620 /// For a given value or instruction, figure out what base ptr it's derived
621 /// from. For gc objects, this is simply itself. On success, returns a value
622 /// which is the base pointer. (This is reliable and can be used for
623 /// relocation.) On failure, returns nullptr.
624 static Value *findBasePointer(Value *I, DefiningValueMapTy &cache) {
625 Value *def = findBaseOrBDV(I, cache);
627 if (isKnownBaseResult(def)) {
631 // Here's the rough algorithm:
632 // - For every SSA value, construct a mapping to either an actual base
633 // pointer or a PHI which obscures the base pointer.
634 // - Construct a mapping from PHI to unknown TOP state. Use an
635 // optimistic algorithm to propagate base pointer information. Lattice
640 // When algorithm terminates, all PHIs will either have a single concrete
641 // base or be in a conflict state.
642 // - For every conflict, insert a dummy PHI node without arguments. Add
643 // these to the base[Instruction] = BasePtr mapping. For every
644 // non-conflict, add the actual base.
645 // - For every conflict, add arguments for the base[a] of each input
648 // Note: A simpler form of this would be to add the conflict form of all
649 // PHIs without running the optimistic algorithm. This would be
650 // analougous to pessimistic data flow and would likely lead to an
651 // overall worse solution.
653 ConflictStateMapTy states;
654 states[def] = PhiState();
655 // Recursively fill in all phis & selects reachable from the initial one
656 // for which we don't already know a definite base value for
657 // TODO: This should be rewritten with a worklist
661 // Since we're adding elements to 'states' as we run, we can't keep
662 // iterators into the set.
663 SmallVector<Value *, 16> Keys;
664 Keys.reserve(states.size());
665 for (auto Pair : states) {
666 Value *V = Pair.first;
669 for (Value *v : Keys) {
670 assert(!isKnownBaseResult(v) && "why did it get added?");
671 if (PHINode *phi = dyn_cast<PHINode>(v)) {
672 assert(phi->getNumIncomingValues() > 0 &&
673 "zero input phis are illegal");
674 for (Value *InVal : phi->incoming_values()) {
675 Value *local = findBaseOrBDV(InVal, cache);
676 if (!isKnownBaseResult(local) && states.find(local) == states.end()) {
677 states[local] = PhiState();
681 } else if (SelectInst *sel = dyn_cast<SelectInst>(v)) {
682 Value *local = findBaseOrBDV(sel->getTrueValue(), cache);
683 if (!isKnownBaseResult(local) && states.find(local) == states.end()) {
684 states[local] = PhiState();
687 local = findBaseOrBDV(sel->getFalseValue(), cache);
688 if (!isKnownBaseResult(local) && states.find(local) == states.end()) {
689 states[local] = PhiState();
697 errs() << "States after initialization:\n";
698 for (auto Pair : states) {
699 Instruction *v = cast<Instruction>(Pair.first);
700 PhiState state = Pair.second;
706 // TODO: come back and revisit the state transitions around inputs which
707 // have reached conflict state. The current version seems too conservative.
709 bool progress = true;
712 size_t oldSize = states.size();
715 // We're only changing keys in this loop, thus safe to keep iterators
716 for (auto Pair : states) {
717 MeetPhiStates calculateMeet(states);
718 Value *v = Pair.first;
719 assert(!isKnownBaseResult(v) && "why did it get added?");
720 if (SelectInst *select = dyn_cast<SelectInst>(v)) {
721 calculateMeet.meetWith(findBaseOrBDV(select->getTrueValue(), cache));
722 calculateMeet.meetWith(findBaseOrBDV(select->getFalseValue(), cache));
724 for (Value *Val : cast<PHINode>(v)->incoming_values())
725 calculateMeet.meetWith(findBaseOrBDV(Val, cache));
727 PhiState oldState = states[v];
728 PhiState newState = calculateMeet.getResult();
729 if (oldState != newState) {
731 states[v] = newState;
735 assert(oldSize <= states.size());
736 assert(oldSize == states.size() || progress);
740 errs() << "States after meet iteration:\n";
741 for (auto Pair : states) {
742 Instruction *v = cast<Instruction>(Pair.first);
743 PhiState state = Pair.second;
749 // Insert Phis for all conflicts
750 // We want to keep naming deterministic in the loop that follows, so
751 // sort the keys before iteration. This is useful in allowing us to
752 // write stable tests. Note that there is no invalidation issue here.
753 SmallVector<Value *, 16> Keys;
754 Keys.reserve(states.size());
755 for (auto Pair : states) {
756 Value *V = Pair.first;
759 std::sort(Keys.begin(), Keys.end(), order_by_name);
760 // TODO: adjust naming patterns to avoid this order of iteration dependency
761 for (Value *V : Keys) {
762 Instruction *v = cast<Instruction>(V);
763 PhiState state = states[V];
764 assert(!isKnownBaseResult(v) && "why did it get added?");
765 assert(!state.isUnknown() && "Optimistic algorithm didn't complete!");
766 if (!state.isConflict())
769 if (isa<PHINode>(v)) {
771 std::distance(pred_begin(v->getParent()), pred_end(v->getParent()));
772 assert(num_preds > 0 && "how did we reach here");
773 PHINode *phi = PHINode::Create(v->getType(), num_preds, "base_phi", v);
774 // Add metadata marking this as a base value
775 auto *const_1 = ConstantInt::get(
777 v->getParent()->getParent()->getParent()->getContext()),
779 auto MDConst = ConstantAsMetadata::get(const_1);
780 MDNode *md = MDNode::get(
781 v->getParent()->getParent()->getParent()->getContext(), MDConst);
782 phi->setMetadata("is_base_value", md);
783 states[v] = PhiState(PhiState::Conflict, phi);
785 SelectInst *sel = cast<SelectInst>(v);
786 // The undef will be replaced later
787 UndefValue *undef = UndefValue::get(sel->getType());
788 SelectInst *basesel = SelectInst::Create(sel->getCondition(), undef,
789 undef, "base_select", sel);
790 // Add metadata marking this as a base value
791 auto *const_1 = ConstantInt::get(
793 v->getParent()->getParent()->getParent()->getContext()),
795 auto MDConst = ConstantAsMetadata::get(const_1);
796 MDNode *md = MDNode::get(
797 v->getParent()->getParent()->getParent()->getContext(), MDConst);
798 basesel->setMetadata("is_base_value", md);
799 states[v] = PhiState(PhiState::Conflict, basesel);
803 // Fixup all the inputs of the new PHIs
804 for (auto Pair : states) {
805 Instruction *v = cast<Instruction>(Pair.first);
806 PhiState state = Pair.second;
808 assert(!isKnownBaseResult(v) && "why did it get added?");
809 assert(!state.isUnknown() && "Optimistic algorithm didn't complete!");
810 if (!state.isConflict())
813 if (PHINode *basephi = dyn_cast<PHINode>(state.getBase())) {
814 PHINode *phi = cast<PHINode>(v);
815 unsigned NumPHIValues = phi->getNumIncomingValues();
816 for (unsigned i = 0; i < NumPHIValues; i++) {
817 Value *InVal = phi->getIncomingValue(i);
818 BasicBlock *InBB = phi->getIncomingBlock(i);
820 // If we've already seen InBB, add the same incoming value
821 // we added for it earlier. The IR verifier requires phi
822 // nodes with multiple entries from the same basic block
823 // to have the same incoming value for each of those
824 // entries. If we don't do this check here and basephi
825 // has a different type than base, we'll end up adding two
826 // bitcasts (and hence two distinct values) as incoming
827 // values for the same basic block.
829 int blockIndex = basephi->getBasicBlockIndex(InBB);
830 if (blockIndex != -1) {
831 Value *oldBase = basephi->getIncomingValue(blockIndex);
832 basephi->addIncoming(oldBase, InBB);
834 Value *base = findBaseOrBDV(InVal, cache);
835 if (!isKnownBaseResult(base)) {
836 // Either conflict or base.
837 assert(states.count(base));
838 base = states[base].getBase();
839 assert(base != nullptr && "unknown PhiState!");
842 // In essense this assert states: the only way two
843 // values incoming from the same basic block may be
844 // different is by being different bitcasts of the same
845 // value. A cleanup that remains TODO is changing
846 // findBaseOrBDV to return an llvm::Value of the correct
847 // type (and still remain pure). This will remove the
848 // need to add bitcasts.
849 assert(base->stripPointerCasts() == oldBase->stripPointerCasts() &&
850 "sanity -- findBaseOrBDV should be pure!");
855 // Find either the defining value for the PHI or the normal base for
857 Value *base = findBaseOrBDV(InVal, cache);
858 if (!isKnownBaseResult(base)) {
859 // Either conflict or base.
860 assert(states.count(base));
861 base = states[base].getBase();
862 assert(base != nullptr && "unknown PhiState!");
864 assert(base && "can't be null");
865 // Must use original input BB since base may not be Instruction
866 // The cast is needed since base traversal may strip away bitcasts
867 if (base->getType() != basephi->getType()) {
868 base = new BitCastInst(base, basephi->getType(), "cast",
869 InBB->getTerminator());
871 basephi->addIncoming(base, InBB);
873 assert(basephi->getNumIncomingValues() == NumPHIValues);
875 SelectInst *basesel = cast<SelectInst>(state.getBase());
876 SelectInst *sel = cast<SelectInst>(v);
877 // Operand 1 & 2 are true, false path respectively. TODO: refactor to
878 // something more safe and less hacky.
879 for (int i = 1; i <= 2; i++) {
880 Value *InVal = sel->getOperand(i);
881 // Find either the defining value for the PHI or the normal base for
883 Value *base = findBaseOrBDV(InVal, cache);
884 if (!isKnownBaseResult(base)) {
885 // Either conflict or base.
886 assert(states.count(base));
887 base = states[base].getBase();
888 assert(base != nullptr && "unknown PhiState!");
890 assert(base && "can't be null");
891 // Must use original input BB since base may not be Instruction
892 // The cast is needed since base traversal may strip away bitcasts
893 if (base->getType() != basesel->getType()) {
894 base = new BitCastInst(base, basesel->getType(), "cast", basesel);
896 basesel->setOperand(i, base);
901 // Cache all of our results so we can cheaply reuse them
902 // NOTE: This is actually two caches: one of the base defining value
903 // relation and one of the base pointer relation! FIXME
904 for (auto item : states) {
905 Value *v = item.first;
906 Value *base = item.second.getBase();
908 assert(!isKnownBaseResult(v) && "why did it get added?");
911 std::string fromstr =
912 cache.count(v) ? (cache[v]->hasName() ? cache[v]->getName() : "")
914 errs() << "Updating base value cache"
915 << " for: " << (v->hasName() ? v->getName() : "")
916 << " from: " << fromstr
917 << " to: " << (base->hasName() ? base->getName() : "") << "\n";
920 assert(isKnownBaseResult(base) &&
921 "must be something we 'know' is a base pointer");
922 if (cache.count(v)) {
923 // Once we transition from the BDV relation being store in the cache to
924 // the base relation being stored, it must be stable
925 assert((!isKnownBaseResult(cache[v]) || cache[v] == base) &&
926 "base relation should be stable");
930 assert(cache.find(def) != cache.end());
934 // For a set of live pointers (base and/or derived), identify the base
935 // pointer of the object which they are derived from. This routine will
936 // mutate the IR graph as needed to make the 'base' pointer live at the
937 // definition site of 'derived'. This ensures that any use of 'derived' can
938 // also use 'base'. This may involve the insertion of a number of
939 // additional PHI nodes.
941 // preconditions: live is a set of pointer type Values
943 // side effects: may insert PHI nodes into the existing CFG, will preserve
944 // CFG, will not remove or mutate any existing nodes
946 // post condition: PointerToBase contains one (derived, base) pair for every
947 // pointer in live. Note that derived can be equal to base if the original
948 // pointer was a base pointer.
950 findBasePointers(const StatepointLiveSetTy &live,
951 DenseMap<llvm::Value *, llvm::Value *> &PointerToBase,
952 DominatorTree *DT, DefiningValueMapTy &DVCache) {
953 // For the naming of values inserted to be deterministic - which makes for
954 // much cleaner and more stable tests - we need to assign an order to the
955 // live values. DenseSets do not provide a deterministic order across runs.
956 SmallVector<Value *, 64> Temp;
957 Temp.insert(Temp.end(), live.begin(), live.end());
958 std::sort(Temp.begin(), Temp.end(), order_by_name);
959 for (Value *ptr : Temp) {
960 Value *base = findBasePointer(ptr, DVCache);
961 assert(base && "failed to find base pointer");
962 PointerToBase[ptr] = base;
963 assert((!isa<Instruction>(base) || !isa<Instruction>(ptr) ||
964 DT->dominates(cast<Instruction>(base)->getParent(),
965 cast<Instruction>(ptr)->getParent())) &&
966 "The base we found better dominate the derived pointer");
968 // If you see this trip and like to live really dangerously, the code should
969 // be correct, just with idioms the verifier can't handle. You can try
970 // disabling the verifier at your own substaintial risk.
971 assert(!isa<ConstantPointerNull>(base) &&
972 "the relocation code needs adjustment to handle the relocation of "
973 "a null pointer constant without causing false positives in the "
974 "safepoint ir verifier.");
978 /// Find the required based pointers (and adjust the live set) for the given
980 static void findBasePointers(DominatorTree &DT, DefiningValueMapTy &DVCache,
982 PartiallyConstructedSafepointRecord &result) {
983 DenseMap<llvm::Value *, llvm::Value *> PointerToBase;
984 findBasePointers(result.liveset, PointerToBase, &DT, DVCache);
986 if (PrintBasePointers) {
987 // Note: Need to print these in a stable order since this is checked in
989 errs() << "Base Pairs (w/o Relocation):\n";
990 SmallVector<Value *, 64> Temp;
991 Temp.reserve(PointerToBase.size());
992 for (auto Pair : PointerToBase) {
993 Temp.push_back(Pair.first);
995 std::sort(Temp.begin(), Temp.end(), order_by_name);
996 for (Value *Ptr : Temp) {
997 Value *Base = PointerToBase[Ptr];
998 errs() << " derived %" << Ptr->getName() << " base %" << Base->getName()
1003 result.PointerToBase = PointerToBase;
1006 /// Given an updated version of the dataflow liveness results, update the
1007 /// liveset and base pointer maps for the call site CS.
1008 static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData,
1010 PartiallyConstructedSafepointRecord &result);
1012 static void recomputeLiveInValues(
1013 Function &F, DominatorTree &DT, Pass *P, ArrayRef<CallSite> toUpdate,
1014 MutableArrayRef<struct PartiallyConstructedSafepointRecord> records) {
1015 // TODO-PERF: reuse the original liveness, then simply run the dataflow
1016 // again. The old values are still live and will help it stablize quickly.
1017 GCPtrLivenessData RevisedLivenessData;
1018 computeLiveInValues(DT, F, RevisedLivenessData);
1019 for (size_t i = 0; i < records.size(); i++) {
1020 struct PartiallyConstructedSafepointRecord &info = records[i];
1021 const CallSite &CS = toUpdate[i];
1022 recomputeLiveInValues(RevisedLivenessData, CS, info);
1026 // When inserting gc.relocate calls, we need to ensure there are no uses
1027 // of the original value between the gc.statepoint and the gc.relocate call.
1028 // One case which can arise is a phi node starting one of the successor blocks.
1029 // We also need to be able to insert the gc.relocates only on the path which
1030 // goes through the statepoint. We might need to split an edge to make this
1033 normalizeForInvokeSafepoint(BasicBlock *BB, BasicBlock *InvokeParent, Pass *P) {
1034 DominatorTree *DT = nullptr;
1035 if (auto *DTP = P->getAnalysisIfAvailable<DominatorTreeWrapperPass>())
1036 DT = &DTP->getDomTree();
1038 BasicBlock *Ret = BB;
1039 if (!BB->getUniquePredecessor()) {
1040 Ret = SplitBlockPredecessors(BB, InvokeParent, "", nullptr, DT);
1043 // Now that 'ret' has unique predecessor we can safely remove all phi nodes
1045 FoldSingleEntryPHINodes(Ret);
1046 assert(!isa<PHINode>(Ret->begin()));
1048 // At this point, we can safely insert a gc.relocate as the first instruction
1049 // in Ret if needed.
1053 static int find_index(ArrayRef<Value *> livevec, Value *val) {
1054 auto itr = std::find(livevec.begin(), livevec.end(), val);
1055 assert(livevec.end() != itr);
1056 size_t index = std::distance(livevec.begin(), itr);
1057 assert(index < livevec.size());
1061 // Create new attribute set containing only attributes which can be transfered
1062 // from original call to the safepoint.
1063 static AttributeSet legalizeCallAttributes(AttributeSet AS) {
1066 for (unsigned Slot = 0; Slot < AS.getNumSlots(); Slot++) {
1067 unsigned index = AS.getSlotIndex(Slot);
1069 if (index == AttributeSet::ReturnIndex ||
1070 index == AttributeSet::FunctionIndex) {
1072 for (auto it = AS.begin(Slot), it_end = AS.end(Slot); it != it_end;
1074 Attribute attr = *it;
1076 // Do not allow certain attributes - just skip them
1077 // Safepoint can not be read only or read none.
1078 if (attr.hasAttribute(Attribute::ReadNone) ||
1079 attr.hasAttribute(Attribute::ReadOnly))
1082 ret = ret.addAttributes(
1083 AS.getContext(), index,
1084 AttributeSet::get(AS.getContext(), index, AttrBuilder(attr)));
1088 // Just skip parameter attributes for now
1094 /// Helper function to place all gc relocates necessary for the given
1097 /// liveVariables - list of variables to be relocated.
1098 /// liveStart - index of the first live variable.
1099 /// basePtrs - base pointers.
1100 /// statepointToken - statepoint instruction to which relocates should be
1102 /// Builder - Llvm IR builder to be used to construct new calls.
1103 static void CreateGCRelocates(ArrayRef<llvm::Value *> LiveVariables,
1104 const int LiveStart,
1105 ArrayRef<llvm::Value *> BasePtrs,
1106 Instruction *StatepointToken,
1107 IRBuilder<> Builder) {
1108 SmallVector<Instruction *, 64> NewDefs;
1109 NewDefs.reserve(LiveVariables.size());
1111 Module *M = StatepointToken->getParent()->getParent()->getParent();
1113 for (unsigned i = 0; i < LiveVariables.size(); i++) {
1114 // We generate a (potentially) unique declaration for every pointer type
1115 // combination. This results is some blow up the function declarations in
1116 // the IR, but removes the need for argument bitcasts which shrinks the IR
1117 // greatly and makes it much more readable.
1118 SmallVector<Type *, 1> Types; // one per 'any' type
1119 // All gc_relocate are set to i8 addrspace(1)* type. This could help avoid
1120 // cases where the actual value's type mangling is not supported by llvm. A
1121 // bitcast is added later to convert gc_relocate to the actual value's type.
1122 Types.push_back(Type::getInt8PtrTy(M->getContext(), 1));
1123 Value *GCRelocateDecl = Intrinsic::getDeclaration(
1124 M, Intrinsic::experimental_gc_relocate, Types);
1126 // Generate the gc.relocate call and save the result
1128 ConstantInt::get(Type::getInt32Ty(M->getContext()),
1129 LiveStart + find_index(LiveVariables, BasePtrs[i]));
1130 Value *LiveIdx = ConstantInt::get(
1131 Type::getInt32Ty(M->getContext()),
1132 LiveStart + find_index(LiveVariables, LiveVariables[i]));
1134 // only specify a debug name if we can give a useful one
1135 Value *Reloc = Builder.CreateCall(
1136 GCRelocateDecl, {StatepointToken, BaseIdx, LiveIdx},
1137 LiveVariables[i]->hasName() ? LiveVariables[i]->getName() + ".relocated"
1139 // Trick CodeGen into thinking there are lots of free registers at this
1141 cast<CallInst>(Reloc)->setCallingConv(CallingConv::Cold);
1143 NewDefs.push_back(cast<Instruction>(Reloc));
1145 assert(NewDefs.size() == LiveVariables.size() &&
1146 "missing or extra redefinition at safepoint");
1150 makeStatepointExplicitImpl(const CallSite &CS, /* to replace */
1151 const SmallVectorImpl<llvm::Value *> &basePtrs,
1152 const SmallVectorImpl<llvm::Value *> &liveVariables,
1154 PartiallyConstructedSafepointRecord &result) {
1155 assert(basePtrs.size() == liveVariables.size());
1156 assert(isStatepoint(CS) &&
1157 "This method expects to be rewriting a statepoint");
1159 BasicBlock *BB = CS.getInstruction()->getParent();
1161 Function *F = BB->getParent();
1162 assert(F && "must be set");
1163 Module *M = F->getParent();
1165 assert(M && "must be set");
1167 // We're not changing the function signature of the statepoint since the gc
1168 // arguments go into the var args section.
1169 Function *gc_statepoint_decl = CS.getCalledFunction();
1171 // Then go ahead and use the builder do actually do the inserts. We insert
1172 // immediately before the previous instruction under the assumption that all
1173 // arguments will be available here. We can't insert afterwards since we may
1174 // be replacing a terminator.
1175 Instruction *insertBefore = CS.getInstruction();
1176 IRBuilder<> Builder(insertBefore);
1177 // Copy all of the arguments from the original statepoint - this includes the
1178 // target, call args, and deopt args
1179 SmallVector<llvm::Value *, 64> args;
1180 args.insert(args.end(), CS.arg_begin(), CS.arg_end());
1181 // TODO: Clear the 'needs rewrite' flag
1183 // add all the pointers to be relocated (gc arguments)
1184 // Capture the start of the live variable list for use in the gc_relocates
1185 const int live_start = args.size();
1186 args.insert(args.end(), liveVariables.begin(), liveVariables.end());
1188 // Create the statepoint given all the arguments
1189 Instruction *token = nullptr;
1190 AttributeSet return_attributes;
1192 CallInst *toReplace = cast<CallInst>(CS.getInstruction());
1194 Builder.CreateCall(gc_statepoint_decl, args, "safepoint_token");
1195 call->setTailCall(toReplace->isTailCall());
1196 call->setCallingConv(toReplace->getCallingConv());
1198 // Currently we will fail on parameter attributes and on certain
1199 // function attributes.
1200 AttributeSet new_attrs = legalizeCallAttributes(toReplace->getAttributes());
1201 // In case if we can handle this set of sttributes - set up function attrs
1202 // directly on statepoint and return attrs later for gc_result intrinsic.
1203 call->setAttributes(new_attrs.getFnAttributes());
1204 return_attributes = new_attrs.getRetAttributes();
1208 // Put the following gc_result and gc_relocate calls immediately after the
1209 // the old call (which we're about to delete)
1210 BasicBlock::iterator next(toReplace);
1211 assert(BB->end() != next && "not a terminator, must have next");
1213 Instruction *IP = &*(next);
1214 Builder.SetInsertPoint(IP);
1215 Builder.SetCurrentDebugLocation(IP->getDebugLoc());
1218 InvokeInst *toReplace = cast<InvokeInst>(CS.getInstruction());
1220 // Insert the new invoke into the old block. We'll remove the old one in a
1221 // moment at which point this will become the new terminator for the
1223 InvokeInst *invoke = InvokeInst::Create(
1224 gc_statepoint_decl, toReplace->getNormalDest(),
1225 toReplace->getUnwindDest(), args, "", toReplace->getParent());
1226 invoke->setCallingConv(toReplace->getCallingConv());
1228 // Currently we will fail on parameter attributes and on certain
1229 // function attributes.
1230 AttributeSet new_attrs = legalizeCallAttributes(toReplace->getAttributes());
1231 // In case if we can handle this set of sttributes - set up function attrs
1232 // directly on statepoint and return attrs later for gc_result intrinsic.
1233 invoke->setAttributes(new_attrs.getFnAttributes());
1234 return_attributes = new_attrs.getRetAttributes();
1238 // Generate gc relocates in exceptional path
1239 BasicBlock *unwindBlock = toReplace->getUnwindDest();
1240 assert(!isa<PHINode>(unwindBlock->begin()) &&
1241 unwindBlock->getUniquePredecessor() &&
1242 "can't safely insert in this block!");
1244 Instruction *IP = &*(unwindBlock->getFirstInsertionPt());
1245 Builder.SetInsertPoint(IP);
1246 Builder.SetCurrentDebugLocation(toReplace->getDebugLoc());
1248 // Extract second element from landingpad return value. We will attach
1249 // exceptional gc relocates to it.
1250 const unsigned idx = 1;
1251 Instruction *exceptional_token =
1252 cast<Instruction>(Builder.CreateExtractValue(
1253 unwindBlock->getLandingPadInst(), idx, "relocate_token"));
1254 result.UnwindToken = exceptional_token;
1256 // Just throw away return value. We will use the one we got for normal
1258 (void)CreateGCRelocates(liveVariables, live_start, basePtrs,
1259 exceptional_token, Builder);
1261 // Generate gc relocates and returns for normal block
1262 BasicBlock *normalDest = toReplace->getNormalDest();
1263 assert(!isa<PHINode>(normalDest->begin()) &&
1264 normalDest->getUniquePredecessor() &&
1265 "can't safely insert in this block!");
1267 IP = &*(normalDest->getFirstInsertionPt());
1268 Builder.SetInsertPoint(IP);
1270 // gc relocates will be generated later as if it were regular call
1275 // Take the name of the original value call if it had one.
1276 token->takeName(CS.getInstruction());
1278 // The GCResult is already inserted, we just need to find it
1280 Instruction *toReplace = CS.getInstruction();
1281 assert((toReplace->hasNUses(0) || toReplace->hasNUses(1)) &&
1282 "only valid use before rewrite is gc.result");
1283 assert(!toReplace->hasOneUse() ||
1284 isGCResult(cast<Instruction>(*toReplace->user_begin())));
1287 // Update the gc.result of the original statepoint (if any) to use the newly
1288 // inserted statepoint. This is safe to do here since the token can't be
1289 // considered a live reference.
1290 CS.getInstruction()->replaceAllUsesWith(token);
1292 result.StatepointToken = token;
1294 // Second, create a gc.relocate for every live variable
1295 CreateGCRelocates(liveVariables, live_start, basePtrs, token, Builder);
1299 struct name_ordering {
1302 bool operator()(name_ordering const &a, name_ordering const &b) {
1303 return -1 == a.derived->getName().compare(b.derived->getName());
1307 static void stablize_order(SmallVectorImpl<Value *> &basevec,
1308 SmallVectorImpl<Value *> &livevec) {
1309 assert(basevec.size() == livevec.size());
1311 SmallVector<name_ordering, 64> temp;
1312 for (size_t i = 0; i < basevec.size(); i++) {
1314 v.base = basevec[i];
1315 v.derived = livevec[i];
1318 std::sort(temp.begin(), temp.end(), name_ordering());
1319 for (size_t i = 0; i < basevec.size(); i++) {
1320 basevec[i] = temp[i].base;
1321 livevec[i] = temp[i].derived;
1325 // Replace an existing gc.statepoint with a new one and a set of gc.relocates
1326 // which make the relocations happening at this safepoint explicit.
1328 // WARNING: Does not do any fixup to adjust users of the original live
1329 // values. That's the callers responsibility.
1331 makeStatepointExplicit(DominatorTree &DT, const CallSite &CS, Pass *P,
1332 PartiallyConstructedSafepointRecord &result) {
1333 auto liveset = result.liveset;
1334 auto PointerToBase = result.PointerToBase;
1336 // Convert to vector for efficient cross referencing.
1337 SmallVector<Value *, 64> basevec, livevec;
1338 livevec.reserve(liveset.size());
1339 basevec.reserve(liveset.size());
1340 for (Value *L : liveset) {
1341 livevec.push_back(L);
1343 assert(PointerToBase.find(L) != PointerToBase.end());
1344 Value *base = PointerToBase[L];
1345 basevec.push_back(base);
1347 assert(livevec.size() == basevec.size());
1349 // To make the output IR slightly more stable (for use in diffs), ensure a
1350 // fixed order of the values in the safepoint (by sorting the value name).
1351 // The order is otherwise meaningless.
1352 stablize_order(basevec, livevec);
1354 // Do the actual rewriting and delete the old statepoint
1355 makeStatepointExplicitImpl(CS, basevec, livevec, P, result);
1356 CS.getInstruction()->eraseFromParent();
1359 // Helper function for the relocationViaAlloca.
1360 // It receives iterator to the statepoint gc relocates and emits store to the
1362 // location (via allocaMap) for the each one of them.
1363 // Add visited values into the visitedLiveValues set we will later use them
1364 // for sanity check.
1366 insertRelocationStores(iterator_range<Value::user_iterator> GCRelocs,
1367 DenseMap<Value *, Value *> &AllocaMap,
1368 DenseSet<Value *> &VisitedLiveValues) {
1370 for (User *U : GCRelocs) {
1371 if (!isa<IntrinsicInst>(U))
1374 IntrinsicInst *RelocatedValue = cast<IntrinsicInst>(U);
1376 // We only care about relocates
1377 if (RelocatedValue->getIntrinsicID() !=
1378 Intrinsic::experimental_gc_relocate) {
1382 GCRelocateOperands RelocateOperands(RelocatedValue);
1383 Value *OriginalValue =
1384 const_cast<Value *>(RelocateOperands.getDerivedPtr());
1385 assert(AllocaMap.count(OriginalValue));
1386 Value *Alloca = AllocaMap[OriginalValue];
1388 // Emit store into the related alloca
1389 // All gc_relocate are i8 addrspace(1)* typed, and it must be bitcasted to
1390 // the correct type according to alloca.
1391 assert(RelocatedValue->getNextNode() && "Should always have one since it's not a terminator");
1392 IRBuilder<> Builder(RelocatedValue->getNextNode());
1393 Value *CastedRelocatedValue =
1394 Builder.CreateBitCast(RelocatedValue, cast<AllocaInst>(Alloca)->getAllocatedType(),
1395 RelocatedValue->hasName() ? RelocatedValue->getName() + ".casted" : "");
1397 StoreInst *Store = new StoreInst(CastedRelocatedValue, Alloca);
1398 Store->insertAfter(cast<Instruction>(CastedRelocatedValue));
1401 VisitedLiveValues.insert(OriginalValue);
1406 // Helper function for the "relocationViaAlloca". Similar to the
1407 // "insertRelocationStores" but works for rematerialized values.
1409 insertRematerializationStores(
1410 RematerializedValueMapTy RematerializedValues,
1411 DenseMap<Value *, Value *> &AllocaMap,
1412 DenseSet<Value *> &VisitedLiveValues) {
1414 for (auto RematerializedValuePair: RematerializedValues) {
1415 Instruction *RematerializedValue = RematerializedValuePair.first;
1416 Value *OriginalValue = RematerializedValuePair.second;
1418 assert(AllocaMap.count(OriginalValue) &&
1419 "Can not find alloca for rematerialized value");
1420 Value *Alloca = AllocaMap[OriginalValue];
1422 StoreInst *Store = new StoreInst(RematerializedValue, Alloca);
1423 Store->insertAfter(RematerializedValue);
1426 VisitedLiveValues.insert(OriginalValue);
1431 /// do all the relocation update via allocas and mem2reg
1432 static void relocationViaAlloca(
1433 Function &F, DominatorTree &DT, ArrayRef<Value *> live,
1434 ArrayRef<struct PartiallyConstructedSafepointRecord> records) {
1436 // record initial number of (static) allocas; we'll check we have the same
1437 // number when we get done.
1438 int InitialAllocaNum = 0;
1439 for (auto I = F.getEntryBlock().begin(), E = F.getEntryBlock().end(); I != E;
1441 if (isa<AllocaInst>(*I))
1445 // TODO-PERF: change data structures, reserve
1446 DenseMap<Value *, Value *> allocaMap;
1447 SmallVector<AllocaInst *, 200> PromotableAllocas;
1448 // Used later to chack that we have enough allocas to store all values
1449 std::size_t NumRematerializedValues = 0;
1450 PromotableAllocas.reserve(live.size());
1452 // Emit alloca for "LiveValue" and record it in "allocaMap" and
1453 // "PromotableAllocas"
1454 auto emitAllocaFor = [&](Value *LiveValue) {
1455 AllocaInst *Alloca = new AllocaInst(LiveValue->getType(), "",
1456 F.getEntryBlock().getFirstNonPHI());
1457 allocaMap[LiveValue] = Alloca;
1458 PromotableAllocas.push_back(Alloca);
1461 // emit alloca for each live gc pointer
1462 for (unsigned i = 0; i < live.size(); i++) {
1463 emitAllocaFor(live[i]);
1466 // emit allocas for rematerialized values
1467 for (size_t i = 0; i < records.size(); i++) {
1468 const struct PartiallyConstructedSafepointRecord &Info = records[i];
1470 for (auto RematerializedValuePair: Info.RematerializedValues) {
1471 Value *OriginalValue = RematerializedValuePair.second;
1472 if (allocaMap.count(OriginalValue) != 0)
1475 emitAllocaFor(OriginalValue);
1476 ++NumRematerializedValues;
1480 // The next two loops are part of the same conceptual operation. We need to
1481 // insert a store to the alloca after the original def and at each
1482 // redefinition. We need to insert a load before each use. These are split
1483 // into distinct loops for performance reasons.
1485 // update gc pointer after each statepoint
1486 // either store a relocated value or null (if no relocated value found for
1487 // this gc pointer and it is not a gc_result)
1488 // this must happen before we update the statepoint with load of alloca
1489 // otherwise we lose the link between statepoint and old def
1490 for (size_t i = 0; i < records.size(); i++) {
1491 const struct PartiallyConstructedSafepointRecord &info = records[i];
1492 Value *Statepoint = info.StatepointToken;
1494 // This will be used for consistency check
1495 DenseSet<Value *> visitedLiveValues;
1497 // Insert stores for normal statepoint gc relocates
1498 insertRelocationStores(Statepoint->users(), allocaMap, visitedLiveValues);
1500 // In case if it was invoke statepoint
1501 // we will insert stores for exceptional path gc relocates.
1502 if (isa<InvokeInst>(Statepoint)) {
1503 insertRelocationStores(info.UnwindToken->users(), allocaMap,
1507 // Do similar thing with rematerialized values
1508 insertRematerializationStores(info.RematerializedValues, allocaMap,
1511 if (ClobberNonLive) {
1512 // As a debuging aid, pretend that an unrelocated pointer becomes null at
1513 // the gc.statepoint. This will turn some subtle GC problems into
1514 // slightly easier to debug SEGVs. Note that on large IR files with
1515 // lots of gc.statepoints this is extremely costly both memory and time
1517 SmallVector<AllocaInst *, 64> ToClobber;
1518 for (auto Pair : allocaMap) {
1519 Value *Def = Pair.first;
1520 AllocaInst *Alloca = cast<AllocaInst>(Pair.second);
1522 // This value was relocated
1523 if (visitedLiveValues.count(Def)) {
1526 ToClobber.push_back(Alloca);
1529 auto InsertClobbersAt = [&](Instruction *IP) {
1530 for (auto *AI : ToClobber) {
1531 auto AIType = cast<PointerType>(AI->getType());
1532 auto PT = cast<PointerType>(AIType->getElementType());
1533 Constant *CPN = ConstantPointerNull::get(PT);
1534 StoreInst *store = new StoreInst(CPN, AI);
1535 store->insertBefore(IP);
1539 // Insert the clobbering stores. These may get intermixed with the
1540 // gc.results and gc.relocates, but that's fine.
1541 if (auto II = dyn_cast<InvokeInst>(Statepoint)) {
1542 InsertClobbersAt(II->getNormalDest()->getFirstInsertionPt());
1543 InsertClobbersAt(II->getUnwindDest()->getFirstInsertionPt());
1545 BasicBlock::iterator Next(cast<CallInst>(Statepoint));
1547 InsertClobbersAt(Next);
1551 // update use with load allocas and add store for gc_relocated
1552 for (auto Pair : allocaMap) {
1553 Value *def = Pair.first;
1554 Value *alloca = Pair.second;
1556 // we pre-record the uses of allocas so that we dont have to worry about
1558 // that change the user information.
1559 SmallVector<Instruction *, 20> uses;
1560 // PERF: trade a linear scan for repeated reallocation
1561 uses.reserve(std::distance(def->user_begin(), def->user_end()));
1562 for (User *U : def->users()) {
1563 if (!isa<ConstantExpr>(U)) {
1564 // If the def has a ConstantExpr use, then the def is either a
1565 // ConstantExpr use itself or null. In either case
1566 // (recursively in the first, directly in the second), the oop
1567 // it is ultimately dependent on is null and this particular
1568 // use does not need to be fixed up.
1569 uses.push_back(cast<Instruction>(U));
1573 std::sort(uses.begin(), uses.end());
1574 auto last = std::unique(uses.begin(), uses.end());
1575 uses.erase(last, uses.end());
1577 for (Instruction *use : uses) {
1578 if (isa<PHINode>(use)) {
1579 PHINode *phi = cast<PHINode>(use);
1580 for (unsigned i = 0; i < phi->getNumIncomingValues(); i++) {
1581 if (def == phi->getIncomingValue(i)) {
1582 LoadInst *load = new LoadInst(
1583 alloca, "", phi->getIncomingBlock(i)->getTerminator());
1584 phi->setIncomingValue(i, load);
1588 LoadInst *load = new LoadInst(alloca, "", use);
1589 use->replaceUsesOfWith(def, load);
1593 // emit store for the initial gc value
1594 // store must be inserted after load, otherwise store will be in alloca's
1595 // use list and an extra load will be inserted before it
1596 StoreInst *store = new StoreInst(def, alloca);
1597 if (Instruction *inst = dyn_cast<Instruction>(def)) {
1598 if (InvokeInst *invoke = dyn_cast<InvokeInst>(inst)) {
1599 // InvokeInst is a TerminatorInst so the store need to be inserted
1600 // into its normal destination block.
1601 BasicBlock *normalDest = invoke->getNormalDest();
1602 store->insertBefore(normalDest->getFirstNonPHI());
1604 assert(!inst->isTerminator() &&
1605 "The only TerminatorInst that can produce a value is "
1606 "InvokeInst which is handled above.");
1607 store->insertAfter(inst);
1610 assert(isa<Argument>(def));
1611 store->insertAfter(cast<Instruction>(alloca));
1615 assert(PromotableAllocas.size() == live.size() + NumRematerializedValues &&
1616 "we must have the same allocas with lives");
1617 if (!PromotableAllocas.empty()) {
1618 // apply mem2reg to promote alloca to SSA
1619 PromoteMemToReg(PromotableAllocas, DT);
1623 for (auto I = F.getEntryBlock().begin(), E = F.getEntryBlock().end(); I != E;
1625 if (isa<AllocaInst>(*I))
1627 assert(InitialAllocaNum == 0 && "We must not introduce any extra allocas");
1631 /// Implement a unique function which doesn't require we sort the input
1632 /// vector. Doing so has the effect of changing the output of a couple of
1633 /// tests in ways which make them less useful in testing fused safepoints.
1634 template <typename T> static void unique_unsorted(SmallVectorImpl<T> &Vec) {
1636 SmallVector<T, 128> TempVec;
1637 TempVec.reserve(Vec.size());
1638 for (auto Element : Vec)
1639 TempVec.push_back(Element);
1641 for (auto V : TempVec) {
1642 if (Seen.insert(V).second) {
1648 /// Insert holders so that each Value is obviously live through the entire
1649 /// lifetime of the call.
1650 static void insertUseHolderAfter(CallSite &CS, const ArrayRef<Value *> Values,
1651 SmallVectorImpl<CallInst *> &Holders) {
1653 // No values to hold live, might as well not insert the empty holder
1656 Module *M = CS.getInstruction()->getParent()->getParent()->getParent();
1657 // Use a dummy vararg function to actually hold the values live
1658 Function *Func = cast<Function>(M->getOrInsertFunction(
1659 "__tmp_use", FunctionType::get(Type::getVoidTy(M->getContext()), true)));
1661 // For call safepoints insert dummy calls right after safepoint
1662 BasicBlock::iterator Next(CS.getInstruction());
1664 Holders.push_back(CallInst::Create(Func, Values, "", Next));
1667 // For invoke safepooints insert dummy calls both in normal and
1668 // exceptional destination blocks
1669 auto *II = cast<InvokeInst>(CS.getInstruction());
1670 Holders.push_back(CallInst::Create(
1671 Func, Values, "", II->getNormalDest()->getFirstInsertionPt()));
1672 Holders.push_back(CallInst::Create(
1673 Func, Values, "", II->getUnwindDest()->getFirstInsertionPt()));
1676 static void findLiveReferences(
1677 Function &F, DominatorTree &DT, Pass *P, ArrayRef<CallSite> toUpdate,
1678 MutableArrayRef<struct PartiallyConstructedSafepointRecord> records) {
1679 GCPtrLivenessData OriginalLivenessData;
1680 computeLiveInValues(DT, F, OriginalLivenessData);
1681 for (size_t i = 0; i < records.size(); i++) {
1682 struct PartiallyConstructedSafepointRecord &info = records[i];
1683 const CallSite &CS = toUpdate[i];
1684 analyzeParsePointLiveness(DT, OriginalLivenessData, CS, info);
1688 /// Remove any vector of pointers from the liveset by scalarizing them over the
1689 /// statepoint instruction. Adds the scalarized pieces to the liveset. It
1690 /// would be preferrable to include the vector in the statepoint itself, but
1691 /// the lowering code currently does not handle that. Extending it would be
1692 /// slightly non-trivial since it requires a format change. Given how rare
1693 /// such cases are (for the moment?) scalarizing is an acceptable comprimise.
1694 static void splitVectorValues(Instruction *StatepointInst,
1695 StatepointLiveSetTy &LiveSet, DominatorTree &DT) {
1696 SmallVector<Value *, 16> ToSplit;
1697 for (Value *V : LiveSet)
1698 if (isa<VectorType>(V->getType()))
1699 ToSplit.push_back(V);
1701 if (ToSplit.empty())
1704 Function &F = *(StatepointInst->getParent()->getParent());
1706 DenseMap<Value *, AllocaInst *> AllocaMap;
1707 // First is normal return, second is exceptional return (invoke only)
1708 DenseMap<Value *, std::pair<Value *, Value *>> Replacements;
1709 for (Value *V : ToSplit) {
1712 AllocaInst *Alloca =
1713 new AllocaInst(V->getType(), "", F.getEntryBlock().getFirstNonPHI());
1714 AllocaMap[V] = Alloca;
1716 VectorType *VT = cast<VectorType>(V->getType());
1717 IRBuilder<> Builder(StatepointInst);
1718 SmallVector<Value *, 16> Elements;
1719 for (unsigned i = 0; i < VT->getNumElements(); i++)
1720 Elements.push_back(Builder.CreateExtractElement(V, Builder.getInt32(i)));
1721 LiveSet.insert(Elements.begin(), Elements.end());
1723 auto InsertVectorReform = [&](Instruction *IP) {
1724 Builder.SetInsertPoint(IP);
1725 Builder.SetCurrentDebugLocation(IP->getDebugLoc());
1726 Value *ResultVec = UndefValue::get(VT);
1727 for (unsigned i = 0; i < VT->getNumElements(); i++)
1728 ResultVec = Builder.CreateInsertElement(ResultVec, Elements[i],
1729 Builder.getInt32(i));
1733 if (isa<CallInst>(StatepointInst)) {
1734 BasicBlock::iterator Next(StatepointInst);
1736 Instruction *IP = &*(Next);
1737 Replacements[V].first = InsertVectorReform(IP);
1738 Replacements[V].second = nullptr;
1740 InvokeInst *Invoke = cast<InvokeInst>(StatepointInst);
1741 // We've already normalized - check that we don't have shared destination
1743 BasicBlock *NormalDest = Invoke->getNormalDest();
1744 assert(!isa<PHINode>(NormalDest->begin()));
1745 BasicBlock *UnwindDest = Invoke->getUnwindDest();
1746 assert(!isa<PHINode>(UnwindDest->begin()));
1747 // Insert insert element sequences in both successors
1748 Instruction *IP = &*(NormalDest->getFirstInsertionPt());
1749 Replacements[V].first = InsertVectorReform(IP);
1750 IP = &*(UnwindDest->getFirstInsertionPt());
1751 Replacements[V].second = InsertVectorReform(IP);
1754 for (Value *V : ToSplit) {
1755 AllocaInst *Alloca = AllocaMap[V];
1757 // Capture all users before we start mutating use lists
1758 SmallVector<Instruction *, 16> Users;
1759 for (User *U : V->users())
1760 Users.push_back(cast<Instruction>(U));
1762 for (Instruction *I : Users) {
1763 if (auto Phi = dyn_cast<PHINode>(I)) {
1764 for (unsigned i = 0; i < Phi->getNumIncomingValues(); i++)
1765 if (V == Phi->getIncomingValue(i)) {
1766 LoadInst *Load = new LoadInst(
1767 Alloca, "", Phi->getIncomingBlock(i)->getTerminator());
1768 Phi->setIncomingValue(i, Load);
1771 LoadInst *Load = new LoadInst(Alloca, "", I);
1772 I->replaceUsesOfWith(V, Load);
1776 // Store the original value and the replacement value into the alloca
1777 StoreInst *Store = new StoreInst(V, Alloca);
1778 if (auto I = dyn_cast<Instruction>(V))
1779 Store->insertAfter(I);
1781 Store->insertAfter(Alloca);
1783 // Normal return for invoke, or call return
1784 Instruction *Replacement = cast<Instruction>(Replacements[V].first);
1785 (new StoreInst(Replacement, Alloca))->insertAfter(Replacement);
1786 // Unwind return for invoke only
1787 Replacement = cast_or_null<Instruction>(Replacements[V].second);
1789 (new StoreInst(Replacement, Alloca))->insertAfter(Replacement);
1792 // apply mem2reg to promote alloca to SSA
1793 SmallVector<AllocaInst *, 16> Allocas;
1794 for (Value *V : ToSplit)
1795 Allocas.push_back(AllocaMap[V]);
1796 PromoteMemToReg(Allocas, DT);
1799 // Helper function for the "rematerializeLiveValues". It walks use chain
1800 // starting from the "CurrentValue" until it meets "BaseValue". Only "simple"
1801 // values are visited (currently it is GEP's and casts). Returns true if it
1802 // sucessfully reached "BaseValue" and false otherwise.
1803 // Fills "ChainToBase" array with all visited values. "BaseValue" is not
1805 static bool findRematerializableChainToBasePointer(
1806 SmallVectorImpl<Instruction*> &ChainToBase,
1807 Value *CurrentValue, Value *BaseValue) {
1809 // We have found a base value
1810 if (CurrentValue == BaseValue) {
1814 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurrentValue)) {
1815 ChainToBase.push_back(GEP);
1816 return findRematerializableChainToBasePointer(ChainToBase,
1817 GEP->getPointerOperand(),
1821 if (CastInst *CI = dyn_cast<CastInst>(CurrentValue)) {
1822 Value *Def = CI->stripPointerCasts();
1824 // This two checks are basically similar. First one is here for the
1825 // consistency with findBasePointers logic.
1826 assert(!isa<CastInst>(Def) && "not a pointer cast found");
1827 if (!CI->isNoopCast(CI->getModule()->getDataLayout()))
1830 ChainToBase.push_back(CI);
1831 return findRematerializableChainToBasePointer(ChainToBase, Def, BaseValue);
1834 // Not supported instruction in the chain
1838 // Helper function for the "rematerializeLiveValues". Compute cost of the use
1839 // chain we are going to rematerialize.
1841 chainToBasePointerCost(SmallVectorImpl<Instruction*> &Chain,
1842 TargetTransformInfo &TTI) {
1845 for (Instruction *Instr : Chain) {
1846 if (CastInst *CI = dyn_cast<CastInst>(Instr)) {
1847 assert(CI->isNoopCast(CI->getModule()->getDataLayout()) &&
1848 "non noop cast is found during rematerialization");
1850 Type *SrcTy = CI->getOperand(0)->getType();
1851 Cost += TTI.getCastInstrCost(CI->getOpcode(), CI->getType(), SrcTy);
1853 } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Instr)) {
1854 // Cost of the address calculation
1855 Type *ValTy = GEP->getPointerOperandType()->getPointerElementType();
1856 Cost += TTI.getAddressComputationCost(ValTy);
1858 // And cost of the GEP itself
1859 // TODO: Use TTI->getGEPCost here (it exists, but appears to be not
1860 // allowed for the external usage)
1861 if (!GEP->hasAllConstantIndices())
1865 llvm_unreachable("unsupported instruciton type during rematerialization");
1872 // From the statepoint liveset pick values that are cheaper to recompute then to
1873 // relocate. Remove this values from the liveset, rematerialize them after
1874 // statepoint and record them in "Info" structure. Note that similar to
1875 // relocated values we don't do any user adjustments here.
1876 static void rematerializeLiveValues(CallSite CS,
1877 PartiallyConstructedSafepointRecord &Info,
1878 TargetTransformInfo &TTI) {
1879 const int ChainLengthThreshold = 10;
1881 // Record values we are going to delete from this statepoint live set.
1882 // We can not di this in following loop due to iterator invalidation.
1883 SmallVector<Value *, 32> LiveValuesToBeDeleted;
1885 for (Value *LiveValue: Info.liveset) {
1886 // For each live pointer find it's defining chain
1887 SmallVector<Instruction *, 3> ChainToBase;
1888 assert(Info.PointerToBase.find(LiveValue) != Info.PointerToBase.end());
1890 findRematerializableChainToBasePointer(ChainToBase,
1892 Info.PointerToBase[LiveValue]);
1893 // Nothing to do, or chain is too long
1895 ChainToBase.size() == 0 ||
1896 ChainToBase.size() > ChainLengthThreshold)
1899 // Compute cost of this chain
1900 unsigned Cost = chainToBasePointerCost(ChainToBase, TTI);
1901 // TODO: We can also account for cases when we will be able to remove some
1902 // of the rematerialized values by later optimization passes. I.e if
1903 // we rematerialized several intersecting chains. Or if original values
1904 // don't have any uses besides this statepoint.
1906 // For invokes we need to rematerialize each chain twice - for normal and
1907 // for unwind basic blocks. Model this by multiplying cost by two.
1908 if (CS.isInvoke()) {
1911 // If it's too expensive - skip it
1912 if (Cost >= RematerializationThreshold)
1915 // Remove value from the live set
1916 LiveValuesToBeDeleted.push_back(LiveValue);
1918 // Clone instructions and record them inside "Info" structure
1920 // Walk backwards to visit top-most instructions first
1921 std::reverse(ChainToBase.begin(), ChainToBase.end());
1923 // Utility function which clones all instructions from "ChainToBase"
1924 // and inserts them before "InsertBefore". Returns rematerialized value
1925 // which should be used after statepoint.
1926 auto rematerializeChain = [&ChainToBase](Instruction *InsertBefore) {
1927 Instruction *LastClonedValue = nullptr;
1928 Instruction *LastValue = nullptr;
1929 for (Instruction *Instr: ChainToBase) {
1930 // Only GEP's and casts are suported as we need to be careful to not
1931 // introduce any new uses of pointers not in the liveset.
1932 // Note that it's fine to introduce new uses of pointers which were
1933 // otherwise not used after this statepoint.
1934 assert(isa<GetElementPtrInst>(Instr) || isa<CastInst>(Instr));
1936 Instruction *ClonedValue = Instr->clone();
1937 ClonedValue->insertBefore(InsertBefore);
1938 ClonedValue->setName(Instr->getName() + ".remat");
1940 // If it is not first instruction in the chain then it uses previously
1941 // cloned value. We should update it to use cloned value.
1942 if (LastClonedValue) {
1944 ClonedValue->replaceUsesOfWith(LastValue, LastClonedValue);
1946 // Assert that cloned instruction does not use any instructions
1947 // other than LastClonedValue
1948 for (auto OpValue: ClonedValue->operand_values()) {
1949 if (isa<Instruction>(OpValue))
1950 assert(OpValue == LastClonedValue &&
1951 "unexpected use found in rematerialized value");
1956 LastClonedValue = ClonedValue;
1959 assert(LastClonedValue);
1960 return LastClonedValue;
1963 // Different cases for calls and invokes. For invokes we need to clone
1964 // instructions both on normal and unwind path.
1966 Instruction *InsertBefore = CS.getInstruction()->getNextNode();
1967 assert(InsertBefore);
1968 Instruction *RematerializedValue = rematerializeChain(InsertBefore);
1969 Info.RematerializedValues[RematerializedValue] = LiveValue;
1971 InvokeInst *Invoke = cast<InvokeInst>(CS.getInstruction());
1973 Instruction *NormalInsertBefore =
1974 Invoke->getNormalDest()->getFirstInsertionPt();
1975 Instruction *UnwindInsertBefore =
1976 Invoke->getUnwindDest()->getFirstInsertionPt();
1978 Instruction *NormalRematerializedValue =
1979 rematerializeChain(NormalInsertBefore);
1980 Instruction *UnwindRematerializedValue =
1981 rematerializeChain(UnwindInsertBefore);
1983 Info.RematerializedValues[NormalRematerializedValue] = LiveValue;
1984 Info.RematerializedValues[UnwindRematerializedValue] = LiveValue;
1988 // Remove rematerializaed values from the live set
1989 for (auto LiveValue: LiveValuesToBeDeleted) {
1990 Info.liveset.erase(LiveValue);
1994 static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,
1995 SmallVectorImpl<CallSite> &toUpdate) {
1997 // sanity check the input
1998 std::set<CallSite> uniqued;
1999 uniqued.insert(toUpdate.begin(), toUpdate.end());
2000 assert(uniqued.size() == toUpdate.size() && "no duplicates please!");
2002 for (size_t i = 0; i < toUpdate.size(); i++) {
2003 CallSite &CS = toUpdate[i];
2004 assert(CS.getInstruction()->getParent()->getParent() == &F);
2005 assert(isStatepoint(CS) && "expected to already be a deopt statepoint");
2009 // When inserting gc.relocates for invokes, we need to be able to insert at
2010 // the top of the successor blocks. See the comment on
2011 // normalForInvokeSafepoint on exactly what is needed. Note that this step
2012 // may restructure the CFG.
2013 for (CallSite CS : toUpdate) {
2016 InvokeInst *invoke = cast<InvokeInst>(CS.getInstruction());
2017 normalizeForInvokeSafepoint(invoke->getNormalDest(), invoke->getParent(),
2019 normalizeForInvokeSafepoint(invoke->getUnwindDest(), invoke->getParent(),
2023 // A list of dummy calls added to the IR to keep various values obviously
2024 // live in the IR. We'll remove all of these when done.
2025 SmallVector<CallInst *, 64> holders;
2027 // Insert a dummy call with all of the arguments to the vm_state we'll need
2028 // for the actual safepoint insertion. This ensures reference arguments in
2029 // the deopt argument list are considered live through the safepoint (and
2030 // thus makes sure they get relocated.)
2031 for (size_t i = 0; i < toUpdate.size(); i++) {
2032 CallSite &CS = toUpdate[i];
2033 Statepoint StatepointCS(CS);
2035 SmallVector<Value *, 64> DeoptValues;
2036 for (Use &U : StatepointCS.vm_state_args()) {
2037 Value *Arg = cast<Value>(&U);
2038 assert(!isUnhandledGCPointerType(Arg->getType()) &&
2039 "support for FCA unimplemented");
2040 if (isHandledGCPointerType(Arg->getType()))
2041 DeoptValues.push_back(Arg);
2043 insertUseHolderAfter(CS, DeoptValues, holders);
2046 SmallVector<struct PartiallyConstructedSafepointRecord, 64> records;
2047 records.reserve(toUpdate.size());
2048 for (size_t i = 0; i < toUpdate.size(); i++) {
2049 struct PartiallyConstructedSafepointRecord info;
2050 records.push_back(info);
2052 assert(records.size() == toUpdate.size());
2054 // A) Identify all gc pointers which are staticly live at the given call
2056 findLiveReferences(F, DT, P, toUpdate, records);
2058 // Do a limited scalarization of any live at safepoint vector values which
2059 // contain pointers. This enables this pass to run after vectorization at
2060 // the cost of some possible performance loss. TODO: it would be nice to
2061 // natively support vectors all the way through the backend so we don't need
2062 // to scalarize here.
2063 for (size_t i = 0; i < records.size(); i++) {
2064 struct PartiallyConstructedSafepointRecord &info = records[i];
2065 Instruction *statepoint = toUpdate[i].getInstruction();
2066 splitVectorValues(cast<Instruction>(statepoint), info.liveset, DT);
2069 // B) Find the base pointers for each live pointer
2070 /* scope for caching */ {
2071 // Cache the 'defining value' relation used in the computation and
2072 // insertion of base phis and selects. This ensures that we don't insert
2073 // large numbers of duplicate base_phis.
2074 DefiningValueMapTy DVCache;
2076 for (size_t i = 0; i < records.size(); i++) {
2077 struct PartiallyConstructedSafepointRecord &info = records[i];
2078 CallSite &CS = toUpdate[i];
2079 findBasePointers(DT, DVCache, CS, info);
2081 } // end of cache scope
2083 // The base phi insertion logic (for any safepoint) may have inserted new
2084 // instructions which are now live at some safepoint. The simplest such
2087 // phi a <-- will be a new base_phi here
2088 // safepoint 1 <-- that needs to be live here
2092 // We insert some dummy calls after each safepoint to definitely hold live
2093 // the base pointers which were identified for that safepoint. We'll then
2094 // ask liveness for _every_ base inserted to see what is now live. Then we
2095 // remove the dummy calls.
2096 holders.reserve(holders.size() + records.size());
2097 for (size_t i = 0; i < records.size(); i++) {
2098 struct PartiallyConstructedSafepointRecord &info = records[i];
2099 CallSite &CS = toUpdate[i];
2101 SmallVector<Value *, 128> Bases;
2102 for (auto Pair : info.PointerToBase) {
2103 Bases.push_back(Pair.second);
2105 insertUseHolderAfter(CS, Bases, holders);
2108 // By selecting base pointers, we've effectively inserted new uses. Thus, we
2109 // need to rerun liveness. We may *also* have inserted new defs, but that's
2110 // not the key issue.
2111 recomputeLiveInValues(F, DT, P, toUpdate, records);
2113 if (PrintBasePointers) {
2114 for (size_t i = 0; i < records.size(); i++) {
2115 struct PartiallyConstructedSafepointRecord &info = records[i];
2116 errs() << "Base Pairs: (w/Relocation)\n";
2117 for (auto Pair : info.PointerToBase) {
2118 errs() << " derived %" << Pair.first->getName() << " base %"
2119 << Pair.second->getName() << "\n";
2123 for (size_t i = 0; i < holders.size(); i++) {
2124 holders[i]->eraseFromParent();
2125 holders[i] = nullptr;
2129 // In order to reduce live set of statepoint we might choose to rematerialize
2130 // some values instead of relocating them. This is purelly an optimization and
2131 // does not influence correctness.
2132 TargetTransformInfo &TTI =
2133 P->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
2135 for (size_t i = 0; i < records.size(); i++) {
2136 struct PartiallyConstructedSafepointRecord &info = records[i];
2137 CallSite &CS = toUpdate[i];
2139 rematerializeLiveValues(CS, info, TTI);
2142 // Now run through and replace the existing statepoints with new ones with
2143 // the live variables listed. We do not yet update uses of the values being
2144 // relocated. We have references to live variables that need to
2145 // survive to the last iteration of this loop. (By construction, the
2146 // previous statepoint can not be a live variable, thus we can and remove
2147 // the old statepoint calls as we go.)
2148 for (size_t i = 0; i < records.size(); i++) {
2149 struct PartiallyConstructedSafepointRecord &info = records[i];
2150 CallSite &CS = toUpdate[i];
2151 makeStatepointExplicit(DT, CS, P, info);
2153 toUpdate.clear(); // prevent accident use of invalid CallSites
2155 // Do all the fixups of the original live variables to their relocated selves
2156 SmallVector<Value *, 128> live;
2157 for (size_t i = 0; i < records.size(); i++) {
2158 struct PartiallyConstructedSafepointRecord &info = records[i];
2159 // We can't simply save the live set from the original insertion. One of
2160 // the live values might be the result of a call which needs a safepoint.
2161 // That Value* no longer exists and we need to use the new gc_result.
2162 // Thankfully, the liveset is embedded in the statepoint (and updated), so
2163 // we just grab that.
2164 Statepoint statepoint(info.StatepointToken);
2165 live.insert(live.end(), statepoint.gc_args_begin(),
2166 statepoint.gc_args_end());
2168 // Do some basic sanity checks on our liveness results before performing
2169 // relocation. Relocation can and will turn mistakes in liveness results
2170 // into non-sensical code which is must harder to debug.
2171 // TODO: It would be nice to test consistency as well
2172 assert(DT.isReachableFromEntry(info.StatepointToken->getParent()) &&
2173 "statepoint must be reachable or liveness is meaningless");
2174 for (Value *V : statepoint.gc_args()) {
2175 if (!isa<Instruction>(V))
2176 // Non-instruction values trivial dominate all possible uses
2178 auto LiveInst = cast<Instruction>(V);
2179 assert(DT.isReachableFromEntry(LiveInst->getParent()) &&
2180 "unreachable values should never be live");
2181 assert(DT.dominates(LiveInst, info.StatepointToken) &&
2182 "basic SSA liveness expectation violated by liveness analysis");
2186 unique_unsorted(live);
2190 for (auto ptr : live) {
2191 assert(isGCPointerType(ptr->getType()) && "must be a gc pointer type");
2195 relocationViaAlloca(F, DT, live, records);
2196 return !records.empty();
2199 /// Returns true if this function should be rewritten by this pass. The main
2200 /// point of this function is as an extension point for custom logic.
2201 static bool shouldRewriteStatepointsIn(Function &F) {
2202 // TODO: This should check the GCStrategy
2204 const std::string StatepointExampleName("statepoint-example");
2205 return StatepointExampleName == F.getGC();
2210 bool RewriteStatepointsForGC::runOnFunction(Function &F) {
2211 // Nothing to do for declarations.
2212 if (F.isDeclaration() || F.empty())
2215 // Policy choice says not to rewrite - the most common reason is that we're
2216 // compiling code without a GCStrategy.
2217 if (!shouldRewriteStatepointsIn(F))
2220 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2222 // Gather all the statepoints which need rewritten. Be careful to only
2223 // consider those in reachable code since we need to ask dominance queries
2224 // when rewriting. We'll delete the unreachable ones in a moment.
2225 SmallVector<CallSite, 64> ParsePointNeeded;
2226 bool HasUnreachableStatepoint = false;
2227 for (Instruction &I : inst_range(F)) {
2228 // TODO: only the ones with the flag set!
2229 if (isStatepoint(I)) {
2230 if (DT.isReachableFromEntry(I.getParent()))
2231 ParsePointNeeded.push_back(CallSite(&I));
2233 HasUnreachableStatepoint = true;
2237 bool MadeChange = false;
2239 // Delete any unreachable statepoints so that we don't have unrewritten
2240 // statepoints surviving this pass. This makes testing easier and the
2241 // resulting IR less confusing to human readers. Rather than be fancy, we
2242 // just reuse a utility function which removes the unreachable blocks.
2243 if (HasUnreachableStatepoint)
2244 MadeChange |= removeUnreachableBlocks(F);
2246 // Return early if no work to do.
2247 if (ParsePointNeeded.empty())
2250 // As a prepass, go ahead and aggressively destroy single entry phi nodes.
2251 // These are created by LCSSA. They have the effect of increasing the size
2252 // of liveness sets for no good reason. It may be harder to do this post
2253 // insertion since relocations and base phis can confuse things.
2254 for (BasicBlock &BB : F)
2255 if (BB.getUniquePredecessor()) {
2257 FoldSingleEntryPHINodes(&BB);
2260 MadeChange |= insertParsePoints(F, DT, this, ParsePointNeeded);
2264 // liveness computation via standard dataflow
2265 // -------------------------------------------------------------------
2267 // TODO: Consider using bitvectors for liveness, the set of potentially
2268 // interesting values should be small and easy to pre-compute.
2270 /// Compute the live-in set for the location rbegin starting from
2271 /// the live-out set of the basic block
2272 static void computeLiveInValues(BasicBlock::reverse_iterator rbegin,
2273 BasicBlock::reverse_iterator rend,
2274 DenseSet<Value *> &LiveTmp) {
2276 for (BasicBlock::reverse_iterator ritr = rbegin; ritr != rend; ritr++) {
2277 Instruction *I = &*ritr;
2279 // KILL/Def - Remove this definition from LiveIn
2282 // Don't consider *uses* in PHI nodes, we handle their contribution to
2283 // predecessor blocks when we seed the LiveOut sets
2284 if (isa<PHINode>(I))
2287 // USE - Add to the LiveIn set for this instruction
2288 for (Value *V : I->operands()) {
2289 assert(!isUnhandledGCPointerType(V->getType()) &&
2290 "support for FCA unimplemented");
2291 if (isHandledGCPointerType(V->getType()) && !isa<Constant>(V)) {
2292 // The choice to exclude all things constant here is slightly subtle.
2293 // There are two idependent reasons:
2294 // - We assume that things which are constant (from LLVM's definition)
2295 // do not move at runtime. For example, the address of a global
2296 // variable is fixed, even though it's contents may not be.
2297 // - Second, we can't disallow arbitrary inttoptr constants even
2298 // if the language frontend does. Optimization passes are free to
2299 // locally exploit facts without respect to global reachability. This
2300 // can create sections of code which are dynamically unreachable and
2301 // contain just about anything. (see constants.ll in tests)
2308 static void computeLiveOutSeed(BasicBlock *BB, DenseSet<Value *> &LiveTmp) {
2310 for (BasicBlock *Succ : successors(BB)) {
2311 const BasicBlock::iterator E(Succ->getFirstNonPHI());
2312 for (BasicBlock::iterator I = Succ->begin(); I != E; I++) {
2313 PHINode *Phi = cast<PHINode>(&*I);
2314 Value *V = Phi->getIncomingValueForBlock(BB);
2315 assert(!isUnhandledGCPointerType(V->getType()) &&
2316 "support for FCA unimplemented");
2317 if (isHandledGCPointerType(V->getType()) && !isa<Constant>(V)) {
2324 static DenseSet<Value *> computeKillSet(BasicBlock *BB) {
2325 DenseSet<Value *> KillSet;
2326 for (Instruction &I : *BB)
2327 if (isHandledGCPointerType(I.getType()))
2333 /// Check that the items in 'Live' dominate 'TI'. This is used as a basic
2334 /// sanity check for the liveness computation.
2335 static void checkBasicSSA(DominatorTree &DT, DenseSet<Value *> &Live,
2336 TerminatorInst *TI, bool TermOkay = false) {
2337 for (Value *V : Live) {
2338 if (auto *I = dyn_cast<Instruction>(V)) {
2339 // The terminator can be a member of the LiveOut set. LLVM's definition
2340 // of instruction dominance states that V does not dominate itself. As
2341 // such, we need to special case this to allow it.
2342 if (TermOkay && TI == I)
2344 assert(DT.dominates(I, TI) &&
2345 "basic SSA liveness expectation violated by liveness analysis");
2350 /// Check that all the liveness sets used during the computation of liveness
2351 /// obey basic SSA properties. This is useful for finding cases where we miss
2353 static void checkBasicSSA(DominatorTree &DT, GCPtrLivenessData &Data,
2355 checkBasicSSA(DT, Data.LiveSet[&BB], BB.getTerminator());
2356 checkBasicSSA(DT, Data.LiveOut[&BB], BB.getTerminator(), true);
2357 checkBasicSSA(DT, Data.LiveIn[&BB], BB.getTerminator());
2361 static void computeLiveInValues(DominatorTree &DT, Function &F,
2362 GCPtrLivenessData &Data) {
2364 SmallSetVector<BasicBlock *, 200> Worklist;
2365 auto AddPredsToWorklist = [&](BasicBlock *BB) {
2366 // We use a SetVector so that we don't have duplicates in the worklist.
2367 Worklist.insert(pred_begin(BB), pred_end(BB));
2369 auto NextItem = [&]() {
2370 BasicBlock *BB = Worklist.back();
2371 Worklist.pop_back();
2375 // Seed the liveness for each individual block
2376 for (BasicBlock &BB : F) {
2377 Data.KillSet[&BB] = computeKillSet(&BB);
2378 Data.LiveSet[&BB].clear();
2379 computeLiveInValues(BB.rbegin(), BB.rend(), Data.LiveSet[&BB]);
2382 for (Value *Kill : Data.KillSet[&BB])
2383 assert(!Data.LiveSet[&BB].count(Kill) && "live set contains kill");
2386 Data.LiveOut[&BB] = DenseSet<Value *>();
2387 computeLiveOutSeed(&BB, Data.LiveOut[&BB]);
2388 Data.LiveIn[&BB] = Data.LiveSet[&BB];
2389 set_union(Data.LiveIn[&BB], Data.LiveOut[&BB]);
2390 set_subtract(Data.LiveIn[&BB], Data.KillSet[&BB]);
2391 if (!Data.LiveIn[&BB].empty())
2392 AddPredsToWorklist(&BB);
2395 // Propagate that liveness until stable
2396 while (!Worklist.empty()) {
2397 BasicBlock *BB = NextItem();
2399 // Compute our new liveout set, then exit early if it hasn't changed
2400 // despite the contribution of our successor.
2401 DenseSet<Value *> LiveOut = Data.LiveOut[BB];
2402 const auto OldLiveOutSize = LiveOut.size();
2403 for (BasicBlock *Succ : successors(BB)) {
2404 assert(Data.LiveIn.count(Succ));
2405 set_union(LiveOut, Data.LiveIn[Succ]);
2407 // assert OutLiveOut is a subset of LiveOut
2408 if (OldLiveOutSize == LiveOut.size()) {
2409 // If the sets are the same size, then we didn't actually add anything
2410 // when unioning our successors LiveIn Thus, the LiveIn of this block
2414 Data.LiveOut[BB] = LiveOut;
2416 // Apply the effects of this basic block
2417 DenseSet<Value *> LiveTmp = LiveOut;
2418 set_union(LiveTmp, Data.LiveSet[BB]);
2419 set_subtract(LiveTmp, Data.KillSet[BB]);
2421 assert(Data.LiveIn.count(BB));
2422 const DenseSet<Value *> &OldLiveIn = Data.LiveIn[BB];
2423 // assert: OldLiveIn is a subset of LiveTmp
2424 if (OldLiveIn.size() != LiveTmp.size()) {
2425 Data.LiveIn[BB] = LiveTmp;
2426 AddPredsToWorklist(BB);
2428 } // while( !worklist.empty() )
2431 // Sanity check our ouput against SSA properties. This helps catch any
2432 // missing kills during the above iteration.
2433 for (BasicBlock &BB : F) {
2434 checkBasicSSA(DT, Data, BB);
2439 static void findLiveSetAtInst(Instruction *Inst, GCPtrLivenessData &Data,
2440 StatepointLiveSetTy &Out) {
2442 BasicBlock *BB = Inst->getParent();
2444 // Note: The copy is intentional and required
2445 assert(Data.LiveOut.count(BB));
2446 DenseSet<Value *> LiveOut = Data.LiveOut[BB];
2448 // We want to handle the statepoint itself oddly. It's
2449 // call result is not live (normal), nor are it's arguments
2450 // (unless they're used again later). This adjustment is
2451 // specifically what we need to relocate
2452 BasicBlock::reverse_iterator rend(Inst);
2453 computeLiveInValues(BB->rbegin(), rend, LiveOut);
2454 LiveOut.erase(Inst);
2455 Out.insert(LiveOut.begin(), LiveOut.end());
2458 static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData,
2460 PartiallyConstructedSafepointRecord &Info) {
2461 Instruction *Inst = CS.getInstruction();
2462 StatepointLiveSetTy Updated;
2463 findLiveSetAtInst(Inst, RevisedLivenessData, Updated);
2466 DenseSet<Value *> Bases;
2467 for (auto KVPair : Info.PointerToBase) {
2468 Bases.insert(KVPair.second);
2471 // We may have base pointers which are now live that weren't before. We need
2472 // to update the PointerToBase structure to reflect this.
2473 for (auto V : Updated)
2474 if (!Info.PointerToBase.count(V)) {
2475 assert(Bases.count(V) && "can't find base for unexpected live value");
2476 Info.PointerToBase[V] = V;
2481 for (auto V : Updated) {
2482 assert(Info.PointerToBase.count(V) &&
2483 "must be able to find base for live value");
2487 // Remove any stale base mappings - this can happen since our liveness is
2488 // more precise then the one inherent in the base pointer analysis
2489 DenseSet<Value *> ToErase;
2490 for (auto KVPair : Info.PointerToBase)
2491 if (!Updated.count(KVPair.first))
2492 ToErase.insert(KVPair.first);
2493 for (auto V : ToErase)
2494 Info.PointerToBase.erase(V);
2497 for (auto KVPair : Info.PointerToBase)
2498 assert(Updated.count(KVPair.first) && "record for non-live value");
2501 Info.liveset = Updated;