Add support to indvars for optimizing sadd.with.overflow.

author Andrew Trick <atrick@apple.com>

Mon, 23 Dec 2013 23:31:49 +0000 (23:31 +0000)

committer Andrew Trick <atrick@apple.com>

Mon, 23 Dec 2013 23:31:49 +0000 (23:31 +0000)
author Andrew Trick <atrick@apple.com>
Mon, 23 Dec 2013 23:31:49 +0000 (23:31 +0000)
committer Andrew Trick <atrick@apple.com>
Mon, 23 Dec 2013 23:31:49 +0000 (23:31 +0000)
diff --git a/include/llvm/Transforms/Utils/SimplifyIndVar.h b/include/llvm/Transforms/Utils/SimplifyIndVar.h

index 7e97e218fb0b63cfe0e93352e598c26c0b53c668..3c3de467c45e5a4f9a19ed38e434e23f43db96f4 100644 (file)
--- a/include/llvm/Transforms/Utils/SimplifyIndVar.h
+++ b/include/llvm/Transforms/Utils/SimplifyIndVar.h
@@ -22,6 +22,7 @@
  namespace llvm {
  
  class CastInst;
+class DominatorTree;
  class IVUsers;
  class Loop;
  class LPPassManager;
@@ -31,9 +32,25 @@ class ScalarEvolution;
  /// Interface for visiting interesting IV users that are recognized but not
  /// simplified by this utility.
  class IVVisitor {
+protected:
+  const DominatorTree *DT;
+  bool ShouldSplitOverflowIntrinsics;
+
    virtual void anchor();
  public:
+  IVVisitor(): DT(NULL), ShouldSplitOverflowIntrinsics(false) {}
    virtual ~IVVisitor() {}
+
+  const DominatorTree *getDomTree() const { return DT; }
+
+  bool shouldSplitOverflowInstrinsics() const {
+    return ShouldSplitOverflowIntrinsics;
+  }
+  void setSplitOverflowIntrinsics() {
+    ShouldSplitOverflowIntrinsics = true;
+    assert(DT && "Splitting overflow intrinsics requires a DomTree.");
+  }
+
    virtual void visitCast(CastInst *Cast) = 0;
  };
  
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp

index 235aaaa6f801deca1d28794031c0e5a0fd284959..c291f68bd6343adc545467775103839a61e3c2cf 100644 (file)
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -63,6 +63,9 @@ static cl::opt<bool> VerifyIndvars(
    "verify-indvars", cl::Hidden,
    cl::desc("Verify the ScalarEvolution result after running indvars"));
  
+static cl::opt<bool> ReduceLiveIVs("liv-reduce", cl::Hidden,
+  cl::desc("Reduce live induction variables."));
+
  namespace {
    class IndVarSimplify : public LoopPass {
      LoopInfo        *LI;
@@ -643,8 +646,11 @@ namespace {
      WideIVInfo WI;
  
      WideIVVisitor(PHINode *NarrowIV, ScalarEvolution *SCEV,
-                  const DataLayout *TData) :
-      SE(SCEV), TD(TData) { WI.NarrowIV = NarrowIV; }
+                  const DataLayout *TData, const DominatorTree *DTree):
+      SE(SCEV), TD(TData) {
+      DT = DTree;
+      WI.NarrowIV = NarrowIV;
+    }
  
      // Implement the interface used by simplifyUsersOfIV.
      virtual void visitCast(CastInst *Cast);
@@ -1114,7 +1120,9 @@ void IndVarSimplify::SimplifyAndExtend(Loop *L,
        PHINode *CurrIV = LoopPhis.pop_back_val();
  
        // Information about sign/zero extensions of CurrIV.
-      WideIVVisitor WIV(CurrIV, SE, TD);
+      WideIVVisitor WIV(CurrIV, SE, TD, DT);
+      if (ReduceLiveIVs)
+        WIV.setSplitOverflowIntrinsics();
  
        Changed |= simplifyUsersOfIV(CurrIV, SE, &LPM, DeadInsts, &WIV);
  
diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp

index bf3442aeaaad16d7121abce2d1173bc5772ab2f1..d1f6c5c62a2f928df880d5dd2150e2fdd3c321ed 100644 (file)
--- a/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -18,12 +18,16 @@
  #include "llvm/Transforms/Utils/SimplifyIndVar.h"
  #include "llvm/ADT/SmallVector.h"
  #include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/Dominators.h"
  #include "llvm/Analysis/IVUsers.h"
  #include "llvm/Analysis/LoopInfo.h"
  #include "llvm/Analysis/LoopPass.h"
  #include "llvm/Analysis/ScalarEvolutionExpressions.h"
  #include "llvm/IR/DataLayout.h"
  #include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IRBuilder.h"
  #include "llvm/Support/CommandLine.h"
  #include "llvm/Support/Debug.h"
  #include "llvm/Support/raw_ostream.h"
@@ -75,6 +79,9 @@ namespace {
      void eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand);
      void eliminateIVRemainder(BinaryOperator *Rem, Value *IVOperand,
                                bool IsSigned);
+
+    Instruction *splitOverflowIntrinsic(Instruction *IVUser,
+                                        const DominatorTree *DT);
    };
  }
  
@@ -263,6 +270,71 @@ bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst,
    return true;
  }
  
+/// \brief Split sadd.with.overflow into add + sadd.with.overflow to allow
+/// analysis and optimization.
+///
+/// \return A new value representing the non-overflowing add if possible,
+/// otherwise return the original value.
+Instruction *SimplifyIndvar::splitOverflowIntrinsic(Instruction *IVUser,
+                                                    const DominatorTree *DT) {
+  IntrinsicInst *II = dyn_cast<IntrinsicInst>(IVUser);
+  if (!II || II->getIntrinsicID() != Intrinsic::sadd_with_overflow)
+    return IVUser;
+
+  // Find a branch guarded by the overflow check.
+  BranchInst *Branch = 0;
+  Instruction *AddVal = 0;
+  for (Value::use_iterator UI = II->use_begin(), E = II->use_end();
+       UI != E; ++UI) {
+    if (ExtractValueInst *ExtractInst = dyn_cast<ExtractValueInst>(*UI)) {
+      if (ExtractInst->getNumIndices() != 1)
+        continue;
+      if (ExtractInst->getIndices()[0] == 0)
+        AddVal = ExtractInst;
+      else if (ExtractInst->getIndices()[0] == 1 && ExtractInst->hasOneUse())
+        Branch = dyn_cast<BranchInst>(ExtractInst->use_back());
+    }
+  }
+  if (!AddVal || !Branch)
+    return IVUser;
+
+  BasicBlock *ContinueBB = Branch->getSuccessor(1);
+  if (llvm::next(pred_begin(ContinueBB)) != pred_end(ContinueBB))
+    return IVUser;
+
+  // Check if all users of the add are provably NSW.
+  bool AllNSW = true;
+  for (Value::use_iterator UI = AddVal->use_begin(), E = AddVal->use_end();
+       UI != E; ++UI) {
+    if (Instruction *UseInst = dyn_cast<Instruction>(*UI)) {
+      BasicBlock *UseBB = UseInst->getParent();
+      if (PHINode *PHI = dyn_cast<PHINode>(UseInst))
+        UseBB = PHI->getIncomingBlock(UI);
+      if (!DT->dominates(ContinueBB, UseBB)) {
+        AllNSW = false;
+        break;
+      }
+    }
+  }
+  if (!AllNSW)
+    return IVUser;
+
+  // Go for it...
+  IRBuilder<> Builder(IVUser);
+  Instruction *AddInst = dyn_cast<Instruction>(
+    Builder.CreateNSWAdd(II->getOperand(0), II->getOperand(1)));
+
+  // The caller expects the new add to have the same form as the intrinsic. The
+  // IV operand position must be the same.
+  assert((AddInst->getOpcode() == Instruction::Add &&
+          AddInst->getOperand(0) == II->getOperand(0)) &&
+         "Bad add instruction created from overflow intrinsic.");
+
+  AddVal->replaceAllUsesWith(AddInst);
+  DeadInsts.push_back(AddVal);
+  return AddInst;
+}
+
  /// pushIVUsers - Add all uses of Def to the current IV's worklist.
  ///
  static void pushIVUsers(
@@ -334,8 +406,16 @@ void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) {
    while (!SimpleIVUsers.empty()) {
      std::pair<Instruction*, Instruction*> UseOper =
        SimpleIVUsers.pop_back_val();
+    Instruction *UseInst = UseOper.first;
+
      // Bypass back edges to avoid extra work.
-    if (UseOper.first == CurrIV) continue;
+    if (UseInst == CurrIV) continue;
+
+    if (V && V->shouldSplitOverflowInstrinsics()) {
+      UseInst = splitOverflowIntrinsic(UseInst, V->getDomTree());
+      if (!UseInst)
+        continue;
+    }
  
      Instruction *IVOperand = UseOper.second;
      for (unsigned N = 0; IVOperand; ++N) {
diff --git a/test/Transforms/IndVarSimplify/overflowcheck.ll b/test/Transforms/IndVarSimplify/overflowcheck.ll

new file mode 100644 (file)

index 0000000..2603f36
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/overflowcheck.ll
@@ -0,0 +1,56 @@
+; RUN: opt < %s -indvars -liv-reduce -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx"
+
+; CHECK-LABEL: @addwithoverflow
+; CHECK-LABEL: loop1:
+; CHECK-NOT: zext
+; CHECK: add nsw
+; CHECK: @llvm.sadd.with.overflow
+; CHECK-LABEL: loop2:
+; CHECK-NOT: extractvalue
+; CHECK: add nuw nsw
+; CHECK: @llvm.sadd.with.overflow
+; CHECK-LABEL: loop3:
+; CHECK-NOT: extractvalue
+; CHECK: ret
+define i64 @addwithoverflow(i32 %n, i64* %a) {
+entry:
+  br label %loop0
+
+loop0:
+  %i = phi i32 [ 0, %entry ], [ %i1val, %loop3 ]
+  %s = phi i32 [ 0, %entry ], [ %addsval, %loop3 ]
+  %bc = icmp ult i32 %i, %n
+  br i1 %bc, label %loop1, label %exit
+
+loop1:
+  %zxt = zext i32 %i to i64
+  %ofs = shl nuw nsw i64 %zxt, 3
+  %gep = getelementptr i64* %a, i64 %zxt
+  %v = load i64* %gep, align 8
+  %truncv = trunc i64 %v to i32
+  %adds = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %s, i32 %truncv)
+  %ovflows = extractvalue { i32, i1 } %adds, 1
+  br i1 %ovflows, label %exit, label %loop2
+
+loop2:
+  %addsval = extractvalue { i32, i1 } %adds, 0
+  %i1 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %i, i32 1)
+  %i1check = extractvalue { i32, i1 } %i1, 1
+  br i1 %i1check, label %exit, label %loop3
+
+loop3:
+  %i1val = extractvalue { i32, i1 } %i1, 0
+  %test = icmp slt i32 %i1val, %n
+  br i1 %test, label %return, label %loop0
+
+return:
+  %ret = zext i32 %addsval to i64
+  ret i64 %ret
+
+exit:
+  unreachable
+}
+
+declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32)
author	Andrew Trick <atrick@apple.com>
	Mon, 23 Dec 2013 23:31:49 +0000 (23:31 +0000)
committer	Andrew Trick <atrick@apple.com>
	Mon, 23 Dec 2013 23:31:49 +0000 (23:31 +0000)
include/llvm/Transforms/Utils/SimplifyIndVar.h		patch \| blob \| history
lib/Transforms/Scalar/IndVarSimplify.cpp		patch \| blob \| history
lib/Transforms/Utils/SimplifyIndVar.cpp		patch \| blob \| history
test/Transforms/IndVarSimplify/overflowcheck.ll	[new file with mode: 0644]	patch \| blob