deoptimization, in which case control will not be returned to the
compiled frame.
+The inliner knows how to inline through calls that have deoptimization
+operand bundles. Just like inlining through a normal call site
+involves composing the normal and exceptional continuations, inlining
+through a call site with a deoptimization operand bundle needs to
+appropriately compose the "safe" deoptimization continuation. The
+inliner does this by prepending the parent's deoptimization
+continuation to every deoptimization continuation in the inlined body.
+E.g. inlining ``@f`` into ``@g`` in the following example
+
+.. code-block:: llvm
+
+ define void @f() {
+ call void @x() ;; no deopt state
+ call void @y() [ "deopt"(i32 10) ]
+ call void @y() [ "deopt"(i32 10), "unknown"(i8* null) ]
+ ret void
+ }
+
+ define void @g() {
+ call void @f() [ "deopt"(i32 20) ]
+ ret void
+ }
+
+will result in
+
+.. code-block:: llvm
+
+ define void @g() {
+ call void @x() ;; still no deopt state
+ call void @y() [ "deopt"(i32 20, i32 10) ]
+ call void @y() [ "deopt"(i32 20, i32 10), "unknown"(i8* null) ]
+ ret void
+ }
+
+It is the frontend's responsibility to structure or encode the
+deoptimization state in a way that syntactically prepending the
+caller's deoptimization state to the callee's deoptimization state is
+semantically equivalent to composing the caller's deoptimization
+continuation after the callee's deoptimization continuation.
+
.. _moduleasm:
Module-Level Inline Assembly
OperandBundleDefT() {}
explicit OperandBundleDefT(StringRef Tag, const std::vector<InputTy> &Inputs)
: Tag(Tag), Inputs(Inputs) {}
+
+ explicit OperandBundleDefT(StringRef Tag, std::vector<InputTy> &&Inputs)
+ : Tag(Tag), Inputs(Inputs) {}
+
+ explicit OperandBundleDefT(const OperandBundleUse &OBU) {
+ Tag = OBU.getTagName();
+ Inputs.insert(Inputs.end(), OBU.Inputs.begin(), OBU.Inputs.end());
+ }
};
typedef OperandBundleDefT<Value *> OperandBundleDef;
BasicBlock *InsertAtEnd) {
return new(1) CallInst(F, NameStr, InsertAtEnd);
}
+
+ /// \brief Create a clone of \p CI with a different set of operand bundles and
+ /// insert it before \p InsertPt.
+ ///
+ /// The returned call instruction is identical \p CI in every way except that
+ /// the operand bundles for the new instruction are set to the operand bundles
+ /// in \p Bundles.
+ static CallInst *Create(CallInst *CI, ArrayRef<OperandBundleDef> Bundles,
+ Instruction *InsertPt = nullptr);
+
/// CreateMalloc - Generate the IR for a call to malloc:
/// 1. Compute the malloc call's argument as the specified type's size,
/// possibly multiplied by the array size if the array size is not
InsertAtEnd);
}
+ /// \brief Create a clone of \p II with a different set of operand bundles and
+ /// insert it before \p InsertPt.
+ ///
+ /// The returned invoke instruction is identical to \p II in every way except
+ /// that the operand bundles for the new instruction are set to the operand
+ /// bundles in \p Bundles.
+ static InvokeInst *Create(InvokeInst *II, ArrayRef<OperandBundleDef> Bundles,
+ Instruction *InsertPt = nullptr);
+
/// Provide fast operand accessors
DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
/// size.
bool ContainsDynamicAllocas;
+ /// All cloned call sites that have operand bundles attached are appended to
+ /// this vector.
+ std::vector<AssertingVH<Instruction>> OperandBundleCallSites;
+
ClonedCodeInfo() : ContainsCalls(false), ContainsDynamicAllocas(false) {}
};
SubclassOptionalData = CI.SubclassOptionalData;
}
+CallInst *CallInst::Create(CallInst *CI, ArrayRef<OperandBundleDef> OpB,
+ Instruction *InsertPt) {
+ CallSite CS(CI);
+ std::vector<Value *> Args(CS.arg_begin(), CS.arg_end());
+
+ auto *NewCI = CallInst::Create(CI->getCalledValue(), Args, OpB, CI->getName(),
+ InsertPt);
+ NewCI->setTailCallKind(CI->getTailCallKind());
+ NewCI->setCallingConv(CI->getCallingConv());
+ NewCI->SubclassOptionalData = CI->SubclassOptionalData;
+ return NewCI;
+}
+
void CallInst::addAttribute(unsigned i, Attribute::AttrKind attr) {
AttributeSet PAL = getAttributes();
PAL = PAL.addAttribute(getContext(), i, attr);
SubclassOptionalData = II.SubclassOptionalData;
}
+InvokeInst *InvokeInst::Create(InvokeInst *II, ArrayRef<OperandBundleDef> OpB,
+ Instruction *InsertPt) {
+ CallSite CS(II);
+ std::vector<Value *> Args(CS.arg_begin(), CS.arg_end());
+
+ auto *NewII = InvokeInst::Create(II->getCalledValue(), II->getNormalDest(),
+ II->getUnwindDest(), Args, OpB,
+ II->getName(), InsertPt);
+ NewII->setCallingConv(II->getCallingConv());
+ NewII->SubclassOptionalData = II->SubclassOptionalData;
+ return NewII;
+}
+
BasicBlock *InvokeInst::getSuccessorV(unsigned idx) const {
return getSuccessor(idx);
}
VMap[&*II] = NewInst; // Add instruction map to value.
NewBB->getInstList().push_back(NewInst);
hasCalls |= (isa<CallInst>(II) && !isa<DbgInfoIntrinsic>(II));
+
+ if (CodeInfo)
+ if (auto CS = ImmutableCallSite(&*II))
+ if (CS.hasOperandBundles())
+ CodeInfo->OperandBundleCallSites.push_back(NewInst);
+
if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
if (isa<ConstantInt>(AI->getArraySize()))
hasStaticAllocas = true;
NewInst->setName(OldTI->getName()+NameSuffix);
NewBB->getInstList().push_back(NewInst);
VMap[OldTI] = NewInst; // Add instruction map to value.
-
+
+ if (CodeInfo)
+ if (auto CS = ImmutableCallSite(OldTI))
+ if (CS.hasOperandBundles())
+ CodeInfo->OperandBundleCallSites.push_back(NewInst);
+
// Recursively clone any reachable successor blocks.
const TerminatorInst *TI = BB->getTerminator();
for (const BasicBlock *Succ : TI->successors())
// Create the new invoke instruction.
ImmutableCallSite CS(CI);
SmallVector<Value*, 8> InvokeArgs(CS.arg_begin(), CS.arg_end());
- InvokeInst *II = InvokeInst::Create(CI->getCalledValue(), Split, UnwindEdge,
- InvokeArgs, CI->getName(), BB);
+ SmallVector<OperandBundleDef, 1> OpBundles;
+
+ // Copy the OperandBundeUse instances to OperandBundleDefs. These two are
+ // *different* representations of operand bundles: see the documentation in
+ // InstrTypes.h for more details.
+ for (unsigned i = 0, e = CS.getNumOperandBundles(); i != e; ++i)
+ OpBundles.emplace_back(CS.getOperandBundleAt(i));
+
+ // Note: we're round tripping operand bundles through memory here, and that
+ // can potentially be avoided with a cleverer API design that we do not have
+ // as of this time.
+
+ InvokeInst *II =
+ InvokeInst::Create(CI->getCalledValue(), Split, UnwindEdge, InvokeArgs,
+ OpBundles, CI->getName(), BB);
II->setDebugLoc(CI->getDebugLoc());
II->setCallingConv(CI->getCallingConv());
II->setAttributes(CI->getAttributes());
CalledFunc->isDeclaration() || // call, or call to a vararg function!
CalledFunc->getFunctionType()->isVarArg()) return false;
- // The inliner does not know how to inline through calls with operand bundles.
- if (CS.hasOperandBundles())
- return false;
+ // The inliner does not know how to inline through calls with operand bundles
+ // in general ...
+ if (CS.hasOperandBundles()) {
+ // ... but it knows how to inline through "deopt" operand bundles.
+ bool CanInline =
+ CS.getNumOperandBundles() == 1 &&
+ CS.getOperandBundleAt(0).getTagID() == LLVMContext::OB_deopt;
+ if (!CanInline)
+ return false;
+ }
// If the call to the callee cannot throw, set the 'nounwind' flag on any
// calls that we inline.
HandleByValArgumentInit(Init.first, Init.second, Caller->getParent(),
&*FirstNewBlock, IFI);
+ if (CS.hasOperandBundles()) {
+ auto ParentDeopt = CS.getOperandBundleAt(0);
+ assert(ParentDeopt.getTagID() == LLVMContext::OB_deopt &&
+ "Checked on entry!");
+
+ SmallVector<OperandBundleDef, 2> OpDefs;
+
+ for (auto &VH : InlinedFunctionInfo.OperandBundleCallSites) {
+ Instruction *I = VH;
+
+ OpDefs.clear();
+
+ CallSite ICS(I);
+ OpDefs.reserve(ICS.getNumOperandBundles());
+
+ for (unsigned i = 0, e = ICS.getNumOperandBundles(); i < e; ++i) {
+ auto ChildOB = ICS.getOperandBundleAt(i);
+ if (ChildOB.getTagID() != LLVMContext::OB_deopt) {
+ // If the inlined call has other operand bundles, let them be
+ OpDefs.emplace_back(ChildOB);
+ continue;
+ }
+
+ // It may be useful to separate this logic (of handling operand
+ // bundles) out to a separate "policy" component if this gets crowded.
+ // Prepend the parent's deoptimization continuation to the newly
+ // inlined call's deoptimization continuation.
+ std::vector<Value *> MergedDeoptArgs;
+ MergedDeoptArgs.reserve(ParentDeopt.Inputs.size() +
+ ChildOB.Inputs.size());
+
+ MergedDeoptArgs.insert(MergedDeoptArgs.end(),
+ ParentDeopt.Inputs.begin(),
+ ParentDeopt.Inputs.end());
+ MergedDeoptArgs.insert(MergedDeoptArgs.end(), ChildOB.Inputs.begin(),
+ ChildOB.Inputs.end());
+
+ OpDefs.emplace_back("deopt", std::move(MergedDeoptArgs));
+ }
+
+ Instruction *NewI = nullptr;
+ if (isa<CallInst>(I))
+ NewI = CallInst::Create(cast<CallInst>(I), OpDefs, I);
+ else
+ NewI = InvokeInst::Create(cast<InvokeInst>(I), OpDefs, I);
+
+ // Note: the RAUW does the appropriate fixup in VMap, so we need to do
+ // this even if the call returns void.
+ I->replaceAllUsesWith(NewI);
+
+ VH = nullptr;
+ I->eraseFromParent();
+ }
+ }
+
// Update the callgraph if requested.
if (IFI.CG)
UpdateCallGraphAfterInlining(CS, FirstNewBlock, VMap, IFI);
--- /dev/null
+; RUN: opt -S -always-inline < %s | FileCheck %s
+
+declare void @f()
+declare i32 @g()
+
+define i32 @callee_0() alwaysinline {
+ entry:
+ call void @f()
+ ret i32 2
+}
+
+define i32 @caller_0() {
+; CHECK-LABEL: @caller_0(
+ entry:
+; CHECK: entry:
+; CHECK-NEXT: call void @f()
+; CHECK-NEXT: ret i32 2
+ %x = call i32 @callee_0() [ "deopt"(i32 5) ]
+ ret i32 %x
+}
+
+define i32 @callee_1() alwaysinline {
+ entry:
+ call void @f() [ "deopt"() ]
+ call void @f() [ "deopt"(i32 0, i32 1) ]
+ call void @f() [ "deopt"(i32 0, i32 1), "foo"(double 0.0) ]
+ ret i32 2
+}
+
+define i32 @caller_1() {
+; CHECK-LABEL: @caller_1(
+ entry:
+; CHECK: entry:
+; CHECK-NEXT: call void @f() [ "deopt"(i32 5) ]
+; CHECK-NEXT: call void @f() [ "deopt"(i32 5, i32 0, i32 1) ]
+; CHECK-NEXT: call void @f() [ "deopt"(i32 5, i32 0, i32 1), "foo"(double 0.000000e+00) ]
+; CHECK-NEXT: ret i32 2
+
+ %x = call i32 @callee_1() [ "deopt"(i32 5) ]
+ ret i32 %x
+}
+
+define i32 @callee_2() alwaysinline {
+ entry:
+ %v = call i32 @g() [ "deopt"(i32 0, i32 1), "foo"(double 0.0) ]
+ ret i32 %v
+}
+
+define i32 @caller_2(i32 %val) {
+; CHECK-LABEL: @caller_2(
+ entry:
+; CHECK: entry:
+; CHECK-NEXT: [[RVAL:%[^ ]+]] = call i32 @g() [ "deopt"(i32 %val, i32 0, i32 1), "foo"(double 0.000000e+00) ]
+; CHECK-NEXT: ret i32 [[RVAL]]
+ %x = call i32 @callee_2() [ "deopt"(i32 %val) ]
+ ret i32 %x
+}
+
+define i32 @callee_3() alwaysinline {
+ entry:
+ %v = call i32 @g() [ "deopt"(i32 0, i32 1), "foo"(double 0.0) ]
+ ret i32 %v
+}
+
+define i32 @caller_3() personality i8 3 {
+; CHECK-LABEL: @caller_3(
+ entry:
+ %x = invoke i32 @callee_3() [ "deopt"(i32 7) ] to label %normal unwind label %unwind
+; CHECK: invoke i32 @g() [ "deopt"(i32 7, i32 0, i32 1), "foo"(double 0.000000e+00) ]
+
+ normal:
+ ret i32 %x
+
+ unwind:
+ %cleanup = landingpad i8 cleanup
+ ret i32 101
+}
+
+define i32 @callee_4() alwaysinline personality i8 3 {
+ entry:
+ %v = invoke i32 @g() [ "deopt"(i32 0, i32 1), "foo"(double 0.0) ] to label %normal unwind label %unwind
+
+ normal:
+ ret i32 %v
+
+ unwind:
+ %cleanup = landingpad i8 cleanup
+ ret i32 100
+}
+
+define i32 @caller_4() {
+; CHECK-LABEL: @caller_4(
+ entry:
+; CHECK: invoke i32 @g() [ "deopt"(i32 7, i32 0, i32 1), "foo"(double 0.000000e+00) ]
+ %x = call i32 @callee_4() [ "deopt"(i32 7) ]
+ ret i32 %x
+}