Teach the inliner to track deoptimization state

author Sanjoy Das <sanjoy@playingwithpointers.com>

Wed, 18 Nov 2015 06:23:38 +0000 (06:23 +0000)

committer Sanjoy Das <sanjoy@playingwithpointers.com>

Wed, 18 Nov 2015 06:23:38 +0000 (06:23 +0000)
author Sanjoy Das <sanjoy@playingwithpointers.com>
Wed, 18 Nov 2015 06:23:38 +0000 (06:23 +0000)
committer Sanjoy Das <sanjoy@playingwithpointers.com>
Wed, 18 Nov 2015 06:23:38 +0000 (06:23 +0000)
diff --git a/docs/LangRef.rst b/docs/LangRef.rst

index b1a92dc0ebde442a14d3104467faaecace415e35..cafc208ba8d046e77b454b246fa74ab5f3955cb3 100644 (file)
--- a/docs/LangRef.rst
+++ b/docs/LangRef.rst
@@ -1509,6 +1509,46 @@ operand bundles do not capture their operands except during
  deoptimization, in which case control will not be returned to the
  compiled frame.
  
+The inliner knows how to inline through calls that have deoptimization
+operand bundles.  Just like inlining through a normal call site
+involves composing the normal and exceptional continuations, inlining
+through a call site with a deoptimization operand bundle needs to
+appropriately compose the "safe" deoptimization continuation.  The
+inliner does this by prepending the parent's deoptimization
+continuation to every deoptimization continuation in the inlined body.
+E.g. inlining ``@f`` into ``@g`` in the following example
+
+.. code-block:: llvm
+
+    define void @f() {
+      call void @x()  ;; no deopt state
+      call void @y() [ "deopt"(i32 10) ]
+      call void @y() [ "deopt"(i32 10), "unknown"(i8* null) ]
+      ret void
+    }
+
+    define void @g() {
+      call void @f() [ "deopt"(i32 20) ]
+      ret void
+    }
+
+will result in
+
+.. code-block:: llvm
+
+    define void @g() {
+      call void @x()  ;; still no deopt state
+      call void @y() [ "deopt"(i32 20, i32 10) ]
+      call void @y() [ "deopt"(i32 20, i32 10), "unknown"(i8* null) ]
+      ret void
+    }
+
+It is the frontend's responsibility to structure or encode the
+deoptimization state in a way that syntactically prepending the
+caller's deoptimization state to the callee's deoptimization state is
+semantically equivalent to composing the caller's deoptimization
+continuation after the callee's deoptimization continuation.
+
  .. _moduleasm:
  
  Module-Level Inline Assembly
diff --git a/include/llvm/IR/InstrTypes.h b/include/llvm/IR/InstrTypes.h

index 815fb73be06514bb1b96a925478cc4874548c91a..f0035ca3d4270e24e524beb8c3bb642af1d6b0e0 100644 (file)
--- a/include/llvm/IR/InstrTypes.h
+++ b/include/llvm/IR/InstrTypes.h
@@ -1162,6 +1162,14 @@ template <typename InputTy> struct OperandBundleDefT {
    OperandBundleDefT() {}
    explicit OperandBundleDefT(StringRef Tag, const std::vector<InputTy> &Inputs)
        : Tag(Tag), Inputs(Inputs) {}
+
+  explicit OperandBundleDefT(StringRef Tag, std::vector<InputTy> &&Inputs)
+      : Tag(Tag), Inputs(Inputs) {}
+
+  explicit OperandBundleDefT(const OperandBundleUse &OBU) {
+    Tag = OBU.getTagName();
+    Inputs.insert(Inputs.end(), OBU.Inputs.begin(), OBU.Inputs.end());
+  }
  };
  
  typedef OperandBundleDefT<Value *> OperandBundleDef;
diff --git a/include/llvm/IR/Instructions.h b/include/llvm/IR/Instructions.h

index 263d0918d8415c7f0a4b4b76f36c6cb6b0903d29..a5a48cb30b05a087090181d44ccec04ba9461d2e 100644 (file)
--- a/include/llvm/IR/Instructions.h
+++ b/include/llvm/IR/Instructions.h
@@ -1459,6 +1459,16 @@ public:
                            BasicBlock *InsertAtEnd) {
      return new(1) CallInst(F, NameStr, InsertAtEnd);
    }
+
+  /// \brief Create a clone of \p CI with a different set of operand bundles and
+  /// insert it before \p InsertPt.
+  ///
+  /// The returned call instruction is identical \p CI in every way except that
+  /// the operand bundles for the new instruction are set to the operand bundles
+  /// in \p Bundles.
+  static CallInst *Create(CallInst *CI, ArrayRef<OperandBundleDef> Bundles,
+                          Instruction *InsertPt = nullptr);
+
    /// CreateMalloc - Generate the IR for a call to malloc:
    /// 1. Compute the malloc call's argument as the specified type's size,
    ///    possibly multiplied by the array size if the array size is not
@@ -3403,6 +3413,15 @@ public:
                     InsertAtEnd);
    }
  
+  /// \brief Create a clone of \p II with a different set of operand bundles and
+  /// insert it before \p InsertPt.
+  ///
+  /// The returned invoke instruction is identical to \p II in every way except
+  /// that the operand bundles for the new instruction are set to the operand
+  /// bundles in \p Bundles.
+  static InvokeInst *Create(InvokeInst *II, ArrayRef<OperandBundleDef> Bundles,
+                            Instruction *InsertPt = nullptr);
+
    /// Provide fast operand accessors
    DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
  
diff --git a/include/llvm/Transforms/Utils/Cloning.h b/include/llvm/Transforms/Utils/Cloning.h

index e4748c1afccc89cf50a61d45cf908b7782381248..2fccbb8029648afa175dddc6ad0f9e2974e0d445 100644 (file)
--- a/include/llvm/Transforms/Utils/Cloning.h
+++ b/include/llvm/Transforms/Utils/Cloning.h
@@ -74,6 +74,10 @@ struct ClonedCodeInfo {
    /// size.
    bool ContainsDynamicAllocas;
  
+  /// All cloned call sites that have operand bundles attached are appended to
+  /// this vector.
+  std::vector<AssertingVH<Instruction>> OperandBundleCallSites;
+
    ClonedCodeInfo() : ContainsCalls(false), ContainsDynamicAllocas(false) {}
  };
  
diff --git a/lib/IR/Instructions.cpp b/lib/IR/Instructions.cpp

index 9b6dfc2bf6aeac5e849219f38f661817bd57fc0f..b8c72dd7e39d93ba43c47222cc354bcb5eb85753 100644 (file)
--- a/lib/IR/Instructions.cpp
+++ b/lib/IR/Instructions.cpp
@@ -297,6 +297,19 @@ CallInst::CallInst(const CallInst &CI)
    SubclassOptionalData = CI.SubclassOptionalData;
  }
  
+CallInst *CallInst::Create(CallInst *CI, ArrayRef<OperandBundleDef> OpB,
+                           Instruction *InsertPt) {
+  CallSite CS(CI);
+  std::vector<Value *> Args(CS.arg_begin(), CS.arg_end());
+
+  auto *NewCI = CallInst::Create(CI->getCalledValue(), Args, OpB, CI->getName(),
+                                 InsertPt);
+  NewCI->setTailCallKind(CI->getTailCallKind());
+  NewCI->setCallingConv(CI->getCallingConv());
+  NewCI->SubclassOptionalData = CI->SubclassOptionalData;
+  return NewCI;
+}
+
  void CallInst::addAttribute(unsigned i, Attribute::AttrKind attr) {
    AttributeSet PAL = getAttributes();
    PAL = PAL.addAttribute(getContext(), i, attr);
@@ -571,6 +584,19 @@ InvokeInst::InvokeInst(const InvokeInst &II)
    SubclassOptionalData = II.SubclassOptionalData;
  }
  
+InvokeInst *InvokeInst::Create(InvokeInst *II, ArrayRef<OperandBundleDef> OpB,
+                               Instruction *InsertPt) {
+  CallSite CS(II);
+  std::vector<Value *> Args(CS.arg_begin(), CS.arg_end());
+
+  auto *NewII = InvokeInst::Create(II->getCalledValue(), II->getNormalDest(),
+                                   II->getUnwindDest(), Args, OpB,
+                                   II->getName(), InsertPt);
+  NewII->setCallingConv(II->getCallingConv());
+  NewII->SubclassOptionalData = II->SubclassOptionalData;
+  return NewII;
+}
+
  BasicBlock *InvokeInst::getSuccessorV(unsigned idx) const {
    return getSuccessor(idx);
  }
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp

index 72b2c37542aff6de54a278207aed8ea8d70c8912..465fc34f6f1da09205d9317959a326261a5474d1 100644 (file)
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -380,6 +380,12 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
      VMap[&*II] = NewInst; // Add instruction map to value.
      NewBB->getInstList().push_back(NewInst);
      hasCalls |= (isa<CallInst>(II) && !isa<DbgInfoIntrinsic>(II));
+
+    if (CodeInfo)
+      if (auto CS = ImmutableCallSite(&*II))
+        if (CS.hasOperandBundles())
+          CodeInfo->OperandBundleCallSites.push_back(NewInst);
+
      if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
        if (isa<ConstantInt>(AI->getArraySize()))
          hasStaticAllocas = true;
@@ -451,7 +457,12 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
        NewInst->setName(OldTI->getName()+NameSuffix);
      NewBB->getInstList().push_back(NewInst);
      VMap[OldTI] = NewInst;             // Add instruction map to value.
-    
+
+    if (CodeInfo)
+      if (auto CS = ImmutableCallSite(OldTI))
+        if (CS.hasOperandBundles())
+          CodeInfo->OperandBundleCallSites.push_back(NewInst);
+
      // Recursively clone any reachable successor blocks.
      const TerminatorInst *TI = BB->getTerminator();
      for (const BasicBlock *Succ : TI->successors())
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp

index 1507d313aa073cf7559fc6eac42d2e6c96ad8c66..dfb028111e9d17443ad321e73749e4febd9acd79 100644 (file)
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -208,8 +208,21 @@ HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, BasicBlock *UnwindEdge) {
      // Create the new invoke instruction.
      ImmutableCallSite CS(CI);
      SmallVector<Value*, 8> InvokeArgs(CS.arg_begin(), CS.arg_end());
-    InvokeInst *II = InvokeInst::Create(CI->getCalledValue(), Split, UnwindEdge,
-                                        InvokeArgs, CI->getName(), BB);
+    SmallVector<OperandBundleDef, 1> OpBundles;
+
+    // Copy the OperandBundeUse instances to OperandBundleDefs.  These two are
+    // *different* representations of operand bundles: see the documentation in
+    // InstrTypes.h for more details.
+    for (unsigned i = 0, e = CS.getNumOperandBundles(); i != e; ++i)
+      OpBundles.emplace_back(CS.getOperandBundleAt(i));
+
+    // Note: we're round tripping operand bundles through memory here, and that
+    // can potentially be avoided with a cleverer API design that we do not have
+    // as of this time.
+
+    InvokeInst *II =
+        InvokeInst::Create(CI->getCalledValue(), Split, UnwindEdge, InvokeArgs,
+                           OpBundles, CI->getName(), BB);
      II->setDebugLoc(CI->getDebugLoc());
      II->setCallingConv(CI->getCallingConv());
      II->setAttributes(CI->getAttributes());
@@ -1029,9 +1042,16 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
        CalledFunc->isDeclaration() || // call, or call to a vararg function!
        CalledFunc->getFunctionType()->isVarArg()) return false;
  
-  // The inliner does not know how to inline through calls with operand bundles.
-  if (CS.hasOperandBundles())
-    return false;
+  // The inliner does not know how to inline through calls with operand bundles
+  // in general ...
+  if (CS.hasOperandBundles()) {
+    // ... but it knows how to inline through "deopt" operand bundles.
+    bool CanInline =
+        CS.getNumOperandBundles() == 1 &&
+        CS.getOperandBundleAt(0).getTagID() == LLVMContext::OB_deopt;
+    if (!CanInline)
+      return false;
+  }
  
    // If the call to the callee cannot throw, set the 'nounwind' flag on any
    // calls that we inline.
@@ -1138,6 +1158,61 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
        HandleByValArgumentInit(Init.first, Init.second, Caller->getParent(),
                                &*FirstNewBlock, IFI);
  
+    if (CS.hasOperandBundles()) {
+      auto ParentDeopt = CS.getOperandBundleAt(0);
+      assert(ParentDeopt.getTagID() == LLVMContext::OB_deopt &&
+             "Checked on entry!");
+
+      SmallVector<OperandBundleDef, 2> OpDefs;
+
+      for (auto &VH : InlinedFunctionInfo.OperandBundleCallSites) {
+        Instruction *I = VH;
+
+        OpDefs.clear();
+
+        CallSite ICS(I);
+        OpDefs.reserve(ICS.getNumOperandBundles());
+
+        for (unsigned i = 0, e = ICS.getNumOperandBundles(); i < e; ++i) {
+          auto ChildOB = ICS.getOperandBundleAt(i);
+          if (ChildOB.getTagID() != LLVMContext::OB_deopt) {
+            // If the inlined call has other operand bundles, let them be
+            OpDefs.emplace_back(ChildOB);
+            continue;
+          }
+
+          // It may be useful to separate this logic (of handling operand
+          // bundles) out to a separate "policy" component if this gets crowded.
+          // Prepend the parent's deoptimization continuation to the newly
+          // inlined call's deoptimization continuation.
+          std::vector<Value *> MergedDeoptArgs;
+          MergedDeoptArgs.reserve(ParentDeopt.Inputs.size() +
+                                  ChildOB.Inputs.size());
+
+          MergedDeoptArgs.insert(MergedDeoptArgs.end(),
+                                 ParentDeopt.Inputs.begin(),
+                                 ParentDeopt.Inputs.end());
+          MergedDeoptArgs.insert(MergedDeoptArgs.end(), ChildOB.Inputs.begin(),
+                                 ChildOB.Inputs.end());
+
+          OpDefs.emplace_back("deopt", std::move(MergedDeoptArgs));
+        }
+
+        Instruction *NewI = nullptr;
+        if (isa<CallInst>(I))
+          NewI = CallInst::Create(cast<CallInst>(I), OpDefs, I);
+        else
+          NewI = InvokeInst::Create(cast<InvokeInst>(I), OpDefs, I);
+
+        // Note: the RAUW does the appropriate fixup in VMap, so we need to do
+        // this even if the call returns void.
+        I->replaceAllUsesWith(NewI);
+
+        VH = nullptr;
+        I->eraseFromParent();
+      }
+    }
+
      // Update the callgraph if requested.
      if (IFI.CG)
        UpdateCallGraphAfterInlining(CS, FirstNewBlock, VMap, IFI);
diff --git a/test/Transforms/Inline/deopt-bundles.ll b/test/Transforms/Inline/deopt-bundles.ll

new file mode 100644 (file)

index 0000000..91d4690
--- /dev/null
+++ b/test/Transforms/Inline/deopt-bundles.ll
@@ -0,0 +1,97 @@
+; RUN: opt -S -always-inline < %s | FileCheck %s
+
+declare void @f()
+declare i32 @g()
+
+define i32 @callee_0() alwaysinline {
+ entry:
+  call void @f()
+  ret i32 2
+}
+
+define i32 @caller_0() {
+; CHECK-LABEL: @caller_0(
+ entry:
+; CHECK: entry:
+; CHECK-NEXT: call void @f()
+; CHECK-NEXT: ret i32 2
+  %x = call i32 @callee_0() [ "deopt"(i32 5) ]
+  ret i32 %x
+}
+
+define i32 @callee_1() alwaysinline {
+ entry:
+  call void @f() [ "deopt"() ]
+  call void @f() [ "deopt"(i32 0, i32 1) ]
+  call void @f() [ "deopt"(i32 0, i32 1), "foo"(double 0.0) ]
+  ret i32 2
+}
+
+define i32 @caller_1() {
+; CHECK-LABEL: @caller_1(
+ entry:
+; CHECK: entry:
+; CHECK-NEXT:  call void @f() [ "deopt"(i32 5) ]
+; CHECK-NEXT:  call void @f() [ "deopt"(i32 5, i32 0, i32 1) ]
+; CHECK-NEXT:  call void @f() [ "deopt"(i32 5, i32 0, i32 1), "foo"(double 0.000000e+00) ]
+; CHECK-NEXT:  ret i32 2
+
+  %x = call i32 @callee_1() [ "deopt"(i32 5) ]
+  ret i32 %x
+}
+
+define i32 @callee_2() alwaysinline {
+ entry:
+  %v = call i32 @g() [ "deopt"(i32 0, i32 1), "foo"(double 0.0) ]
+  ret i32 %v
+}
+
+define i32 @caller_2(i32 %val) {
+; CHECK-LABEL: @caller_2(
+ entry:
+; CHECK: entry:
+; CHECK-NEXT:   [[RVAL:%[^ ]+]] = call i32 @g() [ "deopt"(i32 %val, i32 0, i32 1), "foo"(double 0.000000e+00) ]
+; CHECK-NEXT:   ret i32 [[RVAL]]
+  %x = call i32 @callee_2() [ "deopt"(i32 %val) ]
+  ret i32 %x
+}
+
+define i32 @callee_3() alwaysinline {
+ entry:
+  %v = call i32 @g() [ "deopt"(i32 0, i32 1), "foo"(double 0.0) ]
+  ret i32 %v
+}
+
+define i32 @caller_3() personality i8 3 {
+; CHECK-LABEL: @caller_3(
+ entry:
+  %x = invoke i32 @callee_3() [ "deopt"(i32 7) ] to label %normal unwind label %unwind
+; CHECK: invoke i32 @g() [ "deopt"(i32 7, i32 0, i32 1), "foo"(double 0.000000e+00) ]
+
+ normal:
+  ret i32 %x
+
+ unwind:
+  %cleanup = landingpad i8 cleanup
+  ret i32 101
+}
+
+define i32 @callee_4() alwaysinline personality i8 3 {
+ entry:
+  %v = invoke i32 @g() [ "deopt"(i32 0, i32 1), "foo"(double 0.0) ] to label %normal unwind label %unwind
+
+ normal:
+  ret i32 %v
+
+ unwind:
+  %cleanup = landingpad i8 cleanup
+  ret i32 100
+}
+
+define i32 @caller_4() {
+; CHECK-LABEL: @caller_4(
+ entry:
+; CHECK: invoke i32 @g() [ "deopt"(i32 7, i32 0, i32 1), "foo"(double 0.000000e+00) ]
+  %x = call i32 @callee_4() [ "deopt"(i32 7) ]
+  ret i32 %x
+}
author	Sanjoy Das <sanjoy@playingwithpointers.com>
	Wed, 18 Nov 2015 06:23:38 +0000 (06:23 +0000)
committer	Sanjoy Das <sanjoy@playingwithpointers.com>
	Wed, 18 Nov 2015 06:23:38 +0000 (06:23 +0000)
docs/LangRef.rst		patch \| blob \| history
include/llvm/IR/InstrTypes.h		patch \| blob \| history
include/llvm/IR/Instructions.h		patch \| blob \| history
include/llvm/Transforms/Utils/Cloning.h		patch \| blob \| history
lib/IR/Instructions.cpp		patch \| blob \| history
lib/Transforms/Utils/CloneFunction.cpp		patch \| blob \| history
lib/Transforms/Utils/InlineFunction.cpp		patch \| blob \| history
test/Transforms/Inline/deopt-bundles.ll	[new file with mode: 0644]	patch \| blob