/// entry here.
std::vector<ArgInfo> ArgumentWeights;
+ /// PointerArgPairWeights - Weights to use when giving an inline bonus to
+ /// a call site due to correlated pairs of pointers.
+ DenseMap<std::pair<unsigned, unsigned>, unsigned> PointerArgPairWeights;
+
/// countCodeReductionForConstant - Figure out an approximation for how
/// many instructions will be constant folded if the specified value is
/// constant.
unsigned countCodeReductionForAlloca(const CodeMetrics &Metrics,
Value *V);
+ /// countCodeReductionForPointerPair - Count the bonus to apply to an
+ /// inline call site where a pair of arguments are pointers and one
+ /// argument is a constant offset from the other. The idea is to
+ /// recognize a common C++ idiom where a begin and end iterator are
+ /// actually pointers, and many operations on the pair of them will be
+ /// constants if the function is called with arguments that have
+ /// a constant offset.
+ void countCodeReductionForPointerPair(
+ const CodeMetrics &Metrics,
+ DenseMap<Value *, unsigned> &PointerArgs,
+ Value *V, unsigned ArgIdx);
+
/// analyzeFunction - Add information about the specified function
/// to the current structure.
void analyzeFunction(Function *F, const TargetData *TD);
return Reduction + (CanSROAAlloca ? SROAReduction : 0);
}
+void InlineCostAnalyzer::FunctionInfo::countCodeReductionForPointerPair(
+ const CodeMetrics &Metrics, DenseMap<Value *, unsigned> &PointerArgs,
+ Value *V, unsigned ArgIdx) {
+ SmallVector<Value *, 4> Worklist;
+ Worklist.push_back(V);
+ do {
+ Value *V = Worklist.pop_back_val();
+ for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
+ UI != E; ++UI){
+ Instruction *I = cast<Instruction>(*UI);
+
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
+ // If the GEP has variable indices, we won't be able to do much with it.
+ if (!GEP->hasAllConstantIndices())
+ continue;
+ // Unless the GEP is in-bounds, some comparisons will be non-constant.
+ // Fortunately, the real-world cases where this occurs uses in-bounds
+ // GEPs, and so we restrict the optimization to them here.
+ if (!GEP->isInBounds())
+ continue;
+
+ // Constant indices just change the constant offset. Add the resulting
+ // value both to our worklist for this argument, and to the set of
+ // viable paired values with future arguments.
+ PointerArgs[GEP] = ArgIdx;
+ Worklist.push_back(GEP);
+ continue;
+ }
+
+ // Track pointer through casts. Even when the result is not a pointer, it
+ // remains a constant relative to constants derived from other constant
+ // pointers.
+ if (CastInst *CI = dyn_cast<CastInst>(I)) {
+ PointerArgs[CI] = ArgIdx;
+ Worklist.push_back(CI);
+ continue;
+ }
+
+ // There are two instructions which produce a strict constant value when
+ // applied to two related pointer values. Ignore everything else.
+ if (!isa<ICmpInst>(I) && I->getOpcode() != Instruction::Sub)
+ continue;
+ assert(I->getNumOperands() == 2);
+
+ // Ensure that the two operands are in our set of potentially paired
+ // pointers (or are derived from them).
+ Value *OtherArg = I->getOperand(0);
+ if (OtherArg == V)
+ OtherArg = I->getOperand(1);
+ DenseMap<Value *, unsigned>::const_iterator ArgIt
+ = PointerArgs.find(OtherArg);
+ if (ArgIt == PointerArgs.end())
+ continue;
+ assert(ArgIt->second < ArgIdx);
+
+ PointerArgPairWeights[std::make_pair(ArgIt->second, ArgIdx)]
+ += countCodeReductionForConstant(Metrics, I);
+ }
+ } while (!Worklist.empty());
+}
+
/// analyzeFunction - Fill in the current structure with information gleaned
/// from the specified function.
void CodeMetrics::analyzeFunction(Function *F, const TargetData *TD) {
if (Metrics.NumRets==1)
--Metrics.NumInsts;
- // Check out all of the arguments to the function, figuring out how much
- // code can be eliminated if one of the arguments is a constant.
ArgumentWeights.reserve(F->arg_size());
- for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I)
+ DenseMap<Value *, unsigned> PointerArgs;
+ unsigned ArgIdx = 0;
+ for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
+ ++I, ++ArgIdx) {
+ // Count how much code can be eliminated if one of the arguments is
+ // a constant or an alloca.
ArgumentWeights.push_back(ArgInfo(countCodeReductionForConstant(Metrics, I),
countCodeReductionForAlloca(Metrics, I)));
+
+ // If the argument is a pointer, also check for pairs of pointers where
+ // knowing a fixed offset between them allows simplification. This pattern
+ // arises mostly due to STL algorithm patterns where pointers are used as
+ // random access iterators.
+ if (!I->getType()->isPointerTy())
+ continue;
+ PointerArgs[I] = ArgIdx;
+ countCodeReductionForPointerPair(Metrics, PointerArgs, I, ArgIdx);
+ }
}
/// NeverInline - returns true if the function should never be inlined into
InlineCost -= CalleeFI->ArgumentWeights[ArgNo].ConstantWeight;
}
+ const DenseMap<std::pair<unsigned, unsigned>, unsigned> &ArgPairWeights
+ = CalleeFI->PointerArgPairWeights;
+ for (DenseMap<std::pair<unsigned, unsigned>, unsigned>::const_iterator I
+ = ArgPairWeights.begin(), E = ArgPairWeights.end();
+ I != E; ++I)
+ if (CS.getArgument(I->first.first)->stripInBoundsConstantOffsets() ==
+ CS.getArgument(I->first.second)->stripInBoundsConstantOffsets())
+ InlineCost -= I->second;
+
// Each argument passed in has a cost at both the caller and the callee
// sides. Measurements show that each argument costs about the same as an
// instruction.
--- /dev/null
+; RUN: opt -inline < %s -S -o - -inline-threshold=10 | FileCheck %s
+
+define i32 @outer1() {
+; CHECK: @outer1
+; CHECK-NOT: call
+; CHECK: ret i32
+
+ %ptr = alloca i32
+ %ptr1 = getelementptr inbounds i32* %ptr, i32 0
+ %ptr2 = getelementptr inbounds i32* %ptr, i32 42
+ %result = call i32 @inner1(i32* %ptr1, i32* %ptr2)
+ ret i32 %result
+}
+
+define i32 @inner1(i32* %begin, i32* %end) {
+ %begin.i = ptrtoint i32* %begin to i32
+ %end.i = ptrtoint i32* %end to i32
+ %distance = sub i32 %end.i, %begin.i
+ %icmp = icmp sle i32 %distance, 42
+ br i1 %icmp, label %then, label %else
+
+then:
+ ret i32 3
+
+else:
+ %t = load i32* %begin
+ ret i32 %t
+}
+
+define i32 @outer2(i32* %ptr) {
+; Test that an inbounds GEP disables this -- it isn't safe in general as
+; wrapping changes the behavior of lessthan and greaterthan comparisions.
+; CHECK: @outer2
+; CHECK: call i32 @inner2
+; CHECK: ret i32
+
+ %ptr1 = getelementptr i32* %ptr, i32 0
+ %ptr2 = getelementptr i32* %ptr, i32 42
+ %result = call i32 @inner2(i32* %ptr1, i32* %ptr2)
+ ret i32 %result
+}
+
+define i32 @inner2(i32* %begin, i32* %end) {
+ %begin.i = ptrtoint i32* %begin to i32
+ %end.i = ptrtoint i32* %end to i32
+ %distance = sub i32 %end.i, %begin.i
+ %icmp = icmp sle i32 %distance, 42
+ br i1 %icmp, label %then, label %else
+
+then:
+ ret i32 3
+
+else:
+ %t = load i32* %begin
+ ret i32 %t
+}