show that no exceptions passes by it. This is normally the case for
the ELF x86-64 abi, but it can be disabled for some compilation
units.
+``noduplicate``
+ This attribute indicates that calls to the function cannot be
+ duplicated. A call to a ``noduplicate`` function may be moved
+ within its parent function, but may not be duplicated within
+ its parent function.
+
+ A function containing a ``noduplicate`` call may still
+ be an inlining candidate, provided that the call is not
+ duplicated by inlining. That implies that the function has
+ internal linkage and only has one call site, so the original
+ call is dead after inlining.
.. _moduleasm:
/// \brief True if this function calls itself.
bool isRecursive;
- /// \brief True if this function contains one or more indirect branches.
- bool containsIndirectBr;
+ /// \brief True if this function cannot be duplicated.
+ ///
+ /// True if this function contains one or more indirect branches, or it contains
+ /// one or more 'noduplicate' instructions.
+ bool notDuplicatable;
/// \brief True if this function calls alloca (in the C sense).
bool usesDynamicAlloca;
unsigned NumRets;
CodeMetrics() : exposesReturnsTwice(false), isRecursive(false),
- containsIndirectBr(false), usesDynamicAlloca(false),
+ notDuplicatable(false), usesDynamicAlloca(false),
NumInsts(0), NumBlocks(0), NumCalls(0),
NumInlineCandidates(0), NumVectorInsts(0),
NumRets(0) {}
Nest, ///< Nested function static chain
NoAlias, ///< Considered to not alias after call
NoCapture, ///< Function creates no aliases of pointer
+ NoDuplicate, ///< Call cannot be duplicated
NoImplicitFloat, ///< Disable implicit floating point insts
NoInline, ///< inline=never
NonLazyBind, ///< Function is called early and/or
.removeAttribute(Attribute::NonLazyBind)
.removeAttribute(Attribute::ReturnsTwice)
.removeAttribute(Attribute::AddressSafety)
- .removeAttribute(Attribute::MinSize);
+ .removeAttribute(Attribute::MinSize)
+ .removeAttribute(Attribute::NoDuplicate);
}
uint64_t Raw() const { return Bits; }
addFnAttr(Attribute::NoUnwind);
}
+ /// @brief Determine if the call cannot be duplicated.
+ bool cannotDuplicate() const {
+ return getFnAttributes().hasAttribute(Attribute::NoDuplicate);
+ }
+ void setCannotDuplicate() {
+ addFnAttr(Attribute::NoDuplicate);
+ }
+
/// @brief True if the ABI mandates (or the user requested) that this
/// function be in a unwind table.
bool hasUWTable() const {
Attribute::get(getContext(), Attribute::NoUnwind));
}
+ /// \brief Determine if the call cannot be duplicated.
+ bool cannotDuplicate() const {return hasFnAttr(Attribute::NoDuplicate); }
+ void setCannotDuplicate() {
+ addAttribute(AttributeSet::FunctionIndex,
+ Attribute::get(getContext(), Attribute::NoDuplicate));
+ }
+
/// \brief Determine if the call returns a structure through first
/// pointer argument.
bool hasStructRetAttr() const {
if (isa<ExtractElementInst>(II) || II->getType()->isVectorTy())
++NumVectorInsts;
+ if (const CallInst *CI = dyn_cast<CallInst>(II))
+ if (CI->hasFnAttr(Attribute::NoDuplicate))
+ notDuplicatable = true;
+
+ if (const InvokeInst *InvI = dyn_cast<InvokeInst>(II))
+ if (InvI->hasFnAttr(Attribute::NoDuplicate))
+ notDuplicatable = true;
+
++NumInsts;
}
// if someone is using a blockaddress without an indirectbr, and that
// reference somehow ends up in another function or global, we probably
// don't want to inline this function.
- if (isa<IndirectBrInst>(BB->getTerminator()))
- containsIndirectBr = true;
+ notDuplicatable |= isa<IndirectBrInst>(BB->getTerminator());
// Remember NumInsts for this BB.
NumBBInsts[BB] = NumInsts - NumInstsBeforeThisBB;
bool IsRecursiveCall;
bool ExposesReturnsTwice;
bool HasDynamicAlloca;
+ bool ContainsNoDuplicateCall;
+
/// Number of bytes allocated statically by the callee.
uint64_t AllocatedSize;
unsigned NumInstructions, NumVectorInstructions;
CallAnalyzer(const DataLayout *TD, Function &Callee, int Threshold)
: TD(TD), F(Callee), Threshold(Threshold), Cost(0),
IsCallerRecursive(false), IsRecursiveCall(false),
- ExposesReturnsTwice(false), HasDynamicAlloca(false), AllocatedSize(0),
- NumInstructions(0), NumVectorInstructions(0),
+ ExposesReturnsTwice(false), HasDynamicAlloca(false), ContainsNoDuplicateCall(false),
+ AllocatedSize(0), NumInstructions(0), NumVectorInstructions(0),
FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), VectorBonus(0),
NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0),
NumConstantPtrCmps(0), NumConstantPtrDiffs(0),
ExposesReturnsTwice = true;
return false;
}
+ if (CS.isCall() &&
+ cast<CallInst>(CS.getInstruction())->hasFnAttr(Attribute::NoDuplicate))
+ ContainsNoDuplicateCall = true;
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
switch (II->getIntrinsicID()) {
// If there is only one call of the function, and it has internal linkage,
// the cost of inlining it drops dramatically.
- if (F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction())
+ bool OnlyOneCallAndLocalLinkage = F.hasLocalLinkage() && F.hasOneUse() &&
+ &F == CS.getCalledFunction();
+ if (OnlyOneCallAndLocalLinkage)
Cost += InlineConstants::LastCallToStaticBonus;
// If the instruction after the call, or if the normal destination of the
}
}
+ // If this is a noduplicate call, we can still inline as long as
+ // inlining this would cause the removal of the caller (so the instruction
+ // is not actually duplicated, just moved).
+ if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall)
+ return false;
+
Threshold += VectorBonus;
return Cost < Threshold;
DEBUG_PRINT_STAT(NumInstructionsSimplified);
DEBUG_PRINT_STAT(SROACostSavings);
DEBUG_PRINT_STAT(SROACostSavingsLost);
+ DEBUG_PRINT_STAT(ContainsNoDuplicateCall);
#undef DEBUG_PRINT_STAT
}
#endif
/// isSafeToClone - Return true if the loop body is safe to clone in practice.
/// Routines that reform the loop CFG and split edges often fail on indirectbr.
bool Loop::isSafeToClone() const {
- // Return false if any loop blocks contain indirectbrs.
+ // Return false if any loop blocks contain indirectbrs, or there are any calls
+ // to noduplicate functions.
for (Loop::block_iterator I = block_begin(), E = block_end(); I != E; ++I) {
- if (isa<IndirectBrInst>((*I)->getTerminator()))
+ if (isa<IndirectBrInst>((*I)->getTerminator())) {
return false;
+ } else if (const InvokeInst *II = dyn_cast<InvokeInst>((*I)->getTerminator())) {
+ if (II->hasFnAttr(Attribute::NoDuplicate))
+ return false;
+ }
+
+ for (BasicBlock::iterator BI = (*I)->begin(), BE = (*I)->end(); BI != BE; ++BI) {
+ if (const CallInst *CI = dyn_cast<CallInst>(BI)) {
+ if (CI->hasFnAttr(Attribute::NoDuplicate))
+ return false;
+ }
+ }
}
return true;
}
KEYWORD(nonlazybind);
KEYWORD(address_safety);
KEYWORD(minsize);
+ KEYWORD(noduplicate);
KEYWORD(type);
KEYWORD(opaque);
case lltok::kw_ssp: B.addAttribute(Attribute::StackProtect); break;
case lltok::kw_sspreq: B.addAttribute(Attribute::StackProtectReq); break;
case lltok::kw_uwtable: B.addAttribute(Attribute::UWTable); break;
+ case lltok::kw_noduplicate: B.addAttribute(Attribute::NoDuplicate); break;
// Error handling.
case lltok::kw_zeroext:
case lltok::kw_byval: case lltok::kw_nest:
HaveError |= Error(Lex.getLoc(), "invalid use of parameter-only attribute");
break;
+
case lltok::kw_noreturn: case lltok::kw_nounwind:
case lltok::kw_uwtable: case lltok::kw_returns_twice:
case lltok::kw_noinline: case lltok::kw_readnone:
case lltok::kw_naked: case lltok::kw_nonlazybind:
case lltok::kw_address_safety: case lltok::kw_minsize:
case lltok::kw_alignstack: case lltok::kw_align:
+ case lltok::kw_noduplicate:
HaveError |= Error(Lex.getLoc(), "invalid use of function-only attribute");
break;
}
kw_nonlazybind,
kw_address_safety,
kw_minsize,
+ kw_noduplicate,
kw_type,
kw_opaque,
// as having cost of 2 total, and if they are a vector intrinsic, we model
// them as having cost 1.
if (const CallInst *CI = dyn_cast<CallInst>(I)) {
- if (!isa<IntrinsicInst>(CI))
+ if (CI->hasFnAttr(Attribute::NoDuplicate))
+ // Blocks with NoDuplicate are modelled as having infinite cost, so they
+ // are never duplicated.
+ return ~0U;
+ else if (!isa<IntrinsicInst>(CI))
Size += 3;
else if (!CI->getType()->isVectorTy())
Size += 1;
if (OrigLatch == 0 || L->isLoopExiting(OrigLatch))
return false;
- // Check size of original header and reject loop if it is very big.
+ // Check size of original header and reject loop if it is very big or we can't
+ // duplicate blocks inside it.
{
CodeMetrics Metrics;
Metrics.analyzeBasicBlock(OrigHeader);
+ if (Metrics.notDuplicatable) {
+ DEBUG(dbgs() << "LoopRotation: NOT rotating - contains non duplicatable"
+ << " instructions: "; L->dump());
+ return false;
+ }
if (Metrics.NumInsts > MAX_HEADER_SIZE)
return false;
}
/// ApproximateLoopSize - Approximate the size of the loop.
static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls,
- const DataLayout *TD) {
+ bool &NotDuplicatable, const DataLayout *TD) {
CodeMetrics Metrics;
for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
I != E; ++I)
Metrics.analyzeBasicBlock(*I, TD);
NumCalls = Metrics.NumInlineCandidates;
+ NotDuplicatable = Metrics.notDuplicatable;
unsigned LoopSize = Metrics.NumInsts;
if (Threshold != NoThreshold) {
const DataLayout *TD = getAnalysisIfAvailable<DataLayout>();
unsigned NumInlineCandidates;
- unsigned LoopSize = ApproximateLoopSize(L, NumInlineCandidates, TD);
+ bool notDuplicatable;
+ unsigned LoopSize = ApproximateLoopSize(L, NumInlineCandidates,
+ notDuplicatable, TD);
DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n");
+ if (notDuplicatable) {
+ DEBUG(dbgs() << " Not unrolling loop which contains non duplicatable"
+ << " instructions.\n");
+ return false;
+ }
if (NumInlineCandidates != 0) {
DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n");
return false;
Props.SizeEstimation = std::min(Metrics.NumInsts, Metrics.NumBlocks * 5);
Props.CanBeUnswitchedCount = MaxSize / (Props.SizeEstimation);
MaxSize -= Props.SizeEstimation * Props.CanBeUnswitchedCount;
+
+ if (Metrics.notDuplicatable) {
+ DEBUG(dbgs() << "NOT unswitching loop %"
+ << L->getHeader()->getName() << ", contents cannot be "
+ << "duplicated!\n");
+ return false;
+ }
}
if (!Props.CanBeUnswitchedCount) {
Result += utostr(getAlignment());
Result += " ";
}
+ if (hasAttribute(Attribute::NoDuplicate))
+ Result += "noduplicate ";
// Trim the trailing space.
assert(!Result.empty() && "Unknown attribute!");
Result.erase(Result.end()-1);
case Attribute::NonLazyBind: return 1U << 31;
case Attribute::AddressSafety: return 1ULL << 32;
case Attribute::MinSize: return 1ULL << 33;
+ case Attribute::NoDuplicate: return 1ULL << 34;
}
llvm_unreachable("Unsupported attribute type");
}
; CHECK-NOT: = alloca
; CHECK: ret i32
}
+
+declare void @barrier() noduplicate
+
+define internal i32 @f() {
+ call void @barrier() noduplicate
+ ret i32 1
+}
+
+define i32 @g() {
+ call void @barrier() noduplicate
+ ret i32 2
+}
+
+define internal i32 @h() {
+ call void @barrier() noduplicate
+ ret i32 3
+}
+
+define i32 @test3() {
+ %b = call i32 @f()
+ ret i32 %b
+}
+
+; The call to @f cannot be inlined as there is another callsite
+; calling @f, and @f contains a noduplicate call.
+;
+; The call to @g cannot be inlined as it has external linkage.
+;
+; The call to @h *can* be inlined.
+
+; CHECK: @test
+define i32 @test() {
+; CHECK: call i32 @f()
+ %a = call i32 @f()
+; CHECK: call i32 @g()
+ %b = call i32 @g()
+; CHECK-NOT: call i32 @h()
+ %c = call i32 @h()
+
+ %d = add i32 %a, %b
+ %e = add i32 %d, %c
+
+ ret i32 %e
+; CHECK: }
+}
; CHECK: }
}
+; In this test we check that block duplication is inhibited by the presence
+; of a function with the 'noduplicate' attribute.
+
+declare void @g()
+declare void @j()
+declare void @k()
+
+; CHECK: define void @h(i32 %p) {
+define void @h(i32 %p) {
+ %x = icmp ult i32 %p, 5
+ br i1 %x, label %l1, label %l2
+
+l1:
+ call void @j()
+ br label %l3
+
+l2:
+ call void @k()
+ br label %l3
+
+l3:
+; CHECK: call void @g() noduplicate
+; CHECK-NOT: call void @g() noduplicate
+ call void @g() noduplicate
+ %y = icmp ult i32 %p, 5
+ br i1 %y, label %l4, label %l5
+
+l4:
+ call void @j()
+ ret void
+
+l5:
+ call void @k()
+ ret void
+; CHECK: }
+}
declare void @g(i32*)
+; CHECK: @test2
+define void @test2() nounwind ssp {
+entry:
+ %array = alloca [20 x i32], align 16
+ br label %for.cond
+
+for.cond: ; preds = %for.body, %entry
+ %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %cmp = icmp slt i32 %i.0, 100
+; CHECK: call void @f
+; CHECK-NOT: call void @f
+ call void @f() noduplicate
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %inc = add nsw i32 %i.0, 1
+ call void @h()
+ br label %for.cond
+
+for.end: ; preds = %for.cond
+ ret void
+; CHECK: }
+}
+
+declare void @f() noduplicate
+declare void @h()
l2: ; preds = %l1
ret i32 0
}
+
+; This should not unroll since the call is 'noduplicate'.
+
+; CHECK: @test2
+define i32 @test2(i8** %P) nounwind ssp {
+entry:
+ br label %l1
+
+l1: ; preds = %l1, %entry
+ %x.0 = phi i32 [ 0, %entry ], [ %inc, %l1 ]
+; CHECK: call void @f()
+; CHECK-NOT: call void @f()
+ call void @f() noduplicate
+ %inc = add nsw i32 %x.0, 1
+ %exitcond = icmp eq i32 %inc, 3
+ br i1 %exitcond, label %l2, label %l1
+
+l2: ; preds = %l1
+ ret i32 0
+; CHECK: }
+}
+
+declare void @f()
-; RUN: opt < %s -loop-unswitch -disable-output
+; RUN: opt < %s -loop-unswitch -verify-loop-info -S < %s 2>&1 | FileCheck %s
define i32 @test(i32* %A, i1 %C) {
entry:
ret i32 %tmp.13
}
+; This simple test would normally unswitch, but should be inhibited by the presence of
+; the noduplicate call.
+
+; CHECK: @test2
+define i32 @test2(i32* %var) {
+ %mem = alloca i32
+ store i32 2, i32* %mem
+ %c = load i32* %mem
+
+ br label %loop_begin
+
+loop_begin:
+
+ %var_val = load i32* %var
+
+ switch i32 %c, label %default [
+ i32 1, label %inc
+ i32 2, label %dec
+ ]
+
+inc:
+ call void @incf() noreturn nounwind
+ br label %loop_begin
+dec:
+; CHECK: call void @decf()
+; CHECK-NOT: call void @decf()
+ call void @decf() noreturn nounwind noduplicate
+ br label %loop_begin
+default:
+ br label %loop_exit
+loop_exit:
+ ret i32 0
+; CHECK: }
+}
+
+declare void @incf() noreturn
+declare void @decf() noreturn