namespace llvm {
class CastInst;
+class DominatorTree;
class IVUsers;
class Loop;
class LPPassManager;
/// Interface for visiting interesting IV users that are recognized but not
/// simplified by this utility.
class IVVisitor {
+protected:
+ const DominatorTree *DT;
+ bool ShouldSplitOverflowIntrinsics;
+
virtual void anchor();
public:
+ IVVisitor(): DT(NULL), ShouldSplitOverflowIntrinsics(false) {}
virtual ~IVVisitor() {}
+
+ const DominatorTree *getDomTree() const { return DT; }
+
+ bool shouldSplitOverflowInstrinsics() const {
+ return ShouldSplitOverflowIntrinsics;
+ }
+ void setSplitOverflowIntrinsics() {
+ ShouldSplitOverflowIntrinsics = true;
+ assert(DT && "Splitting overflow intrinsics requires a DomTree.");
+ }
+
virtual void visitCast(CastInst *Cast) = 0;
};
"verify-indvars", cl::Hidden,
cl::desc("Verify the ScalarEvolution result after running indvars"));
+static cl::opt<bool> ReduceLiveIVs("liv-reduce", cl::Hidden,
+ cl::desc("Reduce live induction variables."));
+
namespace {
class IndVarSimplify : public LoopPass {
LoopInfo *LI;
WideIVInfo WI;
WideIVVisitor(PHINode *NarrowIV, ScalarEvolution *SCEV,
- const DataLayout *TData) :
- SE(SCEV), TD(TData) { WI.NarrowIV = NarrowIV; }
+ const DataLayout *TData, const DominatorTree *DTree):
+ SE(SCEV), TD(TData) {
+ DT = DTree;
+ WI.NarrowIV = NarrowIV;
+ }
// Implement the interface used by simplifyUsersOfIV.
virtual void visitCast(CastInst *Cast);
PHINode *CurrIV = LoopPhis.pop_back_val();
// Information about sign/zero extensions of CurrIV.
- WideIVVisitor WIV(CurrIV, SE, TD);
+ WideIVVisitor WIV(CurrIV, SE, TD, DT);
+ if (ReduceLiveIVs)
+ WIV.setSplitOverflowIntrinsics();
Changed |= simplifyUsersOfIV(CurrIV, SE, &LPM, DeadInsts, &WIV);
#include "llvm/Transforms/Utils/SimplifyIndVar.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/IVUsers.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
void eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand);
void eliminateIVRemainder(BinaryOperator *Rem, Value *IVOperand,
bool IsSigned);
+
+ Instruction *splitOverflowIntrinsic(Instruction *IVUser,
+ const DominatorTree *DT);
};
}
return true;
}
+/// \brief Split sadd.with.overflow into add + sadd.with.overflow to allow
+/// analysis and optimization.
+///
+/// \return A new value representing the non-overflowing add if possible,
+/// otherwise return the original value.
+Instruction *SimplifyIndvar::splitOverflowIntrinsic(Instruction *IVUser,
+ const DominatorTree *DT) {
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(IVUser);
+ if (!II || II->getIntrinsicID() != Intrinsic::sadd_with_overflow)
+ return IVUser;
+
+ // Find a branch guarded by the overflow check.
+ BranchInst *Branch = 0;
+ Instruction *AddVal = 0;
+ for (Value::use_iterator UI = II->use_begin(), E = II->use_end();
+ UI != E; ++UI) {
+ if (ExtractValueInst *ExtractInst = dyn_cast<ExtractValueInst>(*UI)) {
+ if (ExtractInst->getNumIndices() != 1)
+ continue;
+ if (ExtractInst->getIndices()[0] == 0)
+ AddVal = ExtractInst;
+ else if (ExtractInst->getIndices()[0] == 1 && ExtractInst->hasOneUse())
+ Branch = dyn_cast<BranchInst>(ExtractInst->use_back());
+ }
+ }
+ if (!AddVal || !Branch)
+ return IVUser;
+
+ BasicBlock *ContinueBB = Branch->getSuccessor(1);
+ if (llvm::next(pred_begin(ContinueBB)) != pred_end(ContinueBB))
+ return IVUser;
+
+ // Check if all users of the add are provably NSW.
+ bool AllNSW = true;
+ for (Value::use_iterator UI = AddVal->use_begin(), E = AddVal->use_end();
+ UI != E; ++UI) {
+ if (Instruction *UseInst = dyn_cast<Instruction>(*UI)) {
+ BasicBlock *UseBB = UseInst->getParent();
+ if (PHINode *PHI = dyn_cast<PHINode>(UseInst))
+ UseBB = PHI->getIncomingBlock(UI);
+ if (!DT->dominates(ContinueBB, UseBB)) {
+ AllNSW = false;
+ break;
+ }
+ }
+ }
+ if (!AllNSW)
+ return IVUser;
+
+ // Go for it...
+ IRBuilder<> Builder(IVUser);
+ Instruction *AddInst = dyn_cast<Instruction>(
+ Builder.CreateNSWAdd(II->getOperand(0), II->getOperand(1)));
+
+ // The caller expects the new add to have the same form as the intrinsic. The
+ // IV operand position must be the same.
+ assert((AddInst->getOpcode() == Instruction::Add &&
+ AddInst->getOperand(0) == II->getOperand(0)) &&
+ "Bad add instruction created from overflow intrinsic.");
+
+ AddVal->replaceAllUsesWith(AddInst);
+ DeadInsts.push_back(AddVal);
+ return AddInst;
+}
+
/// pushIVUsers - Add all uses of Def to the current IV's worklist.
///
static void pushIVUsers(
while (!SimpleIVUsers.empty()) {
std::pair<Instruction*, Instruction*> UseOper =
SimpleIVUsers.pop_back_val();
+ Instruction *UseInst = UseOper.first;
+
// Bypass back edges to avoid extra work.
- if (UseOper.first == CurrIV) continue;
+ if (UseInst == CurrIV) continue;
+
+ if (V && V->shouldSplitOverflowInstrinsics()) {
+ UseInst = splitOverflowIntrinsic(UseInst, V->getDomTree());
+ if (!UseInst)
+ continue;
+ }
Instruction *IVOperand = UseOper.second;
for (unsigned N = 0; IVOperand; ++N) {
--- /dev/null
+; RUN: opt < %s -indvars -liv-reduce -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx"
+
+; CHECK-LABEL: @addwithoverflow
+; CHECK-LABEL: loop1:
+; CHECK-NOT: zext
+; CHECK: add nsw
+; CHECK: @llvm.sadd.with.overflow
+; CHECK-LABEL: loop2:
+; CHECK-NOT: extractvalue
+; CHECK: add nuw nsw
+; CHECK: @llvm.sadd.with.overflow
+; CHECK-LABEL: loop3:
+; CHECK-NOT: extractvalue
+; CHECK: ret
+define i64 @addwithoverflow(i32 %n, i64* %a) {
+entry:
+ br label %loop0
+
+loop0:
+ %i = phi i32 [ 0, %entry ], [ %i1val, %loop3 ]
+ %s = phi i32 [ 0, %entry ], [ %addsval, %loop3 ]
+ %bc = icmp ult i32 %i, %n
+ br i1 %bc, label %loop1, label %exit
+
+loop1:
+ %zxt = zext i32 %i to i64
+ %ofs = shl nuw nsw i64 %zxt, 3
+ %gep = getelementptr i64* %a, i64 %zxt
+ %v = load i64* %gep, align 8
+ %truncv = trunc i64 %v to i32
+ %adds = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %s, i32 %truncv)
+ %ovflows = extractvalue { i32, i1 } %adds, 1
+ br i1 %ovflows, label %exit, label %loop2
+
+loop2:
+ %addsval = extractvalue { i32, i1 } %adds, 0
+ %i1 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %i, i32 1)
+ %i1check = extractvalue { i32, i1 } %i1, 1
+ br i1 %i1check, label %exit, label %loop3
+
+loop3:
+ %i1val = extractvalue { i32, i1 } %i1, 0
+ %test = icmp slt i32 %i1val, %n
+ br i1 %test, label %return, label %loop0
+
+return:
+ %ret = zext i32 %addsval to i64
+ ret i64 %ret
+
+exit:
+ unreachable
+}
+
+declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32)