From dd7e83737491b14ebf98e09fa6cb9b515f9f2e3e Mon Sep 17 00:00:00 2001
From: Chris Lattner <sabre@nondot.org>
Date: Sun, 19 Dec 2010 18:22:06 +0000
Subject: [PATCH] fix another miscompile in the llvm.sadd formation logic: it
 wasn't checking to see if the high bits of the original add result were dead.
 Inserting a smaller add and zexting back to that size is not good enough.

This is likely to be the fix for 8816.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@122177 91177308-0d34-0410-b5e6-96231b3b80d8
---
 .../InstCombine/InstCombineCompares.cpp       | 43 +++++++++++++++++--
 test/Transforms/InstCombine/overflow.ll       | 23 +++++++++-
 2 files changed, 61 insertions(+), 5 deletions(-)
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 376a38edda3..11426863f61 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -1585,6 +1585,11 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
 
 /// ProcessUGT_ADDCST_ADD - The caller has matched a pattern of the form:
 ///   I = icmp ugt (add (add A, B), CI2), CI1
+/// If this is of the form:
+///   sum = a + b
+///   if (sum+128 >u 255)
+/// Then replace it with llvm.sadd.with.overflow.i8.
+///
 static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
                                           ConstantInt *CI2, ConstantInt *CI1,
                                           InstCombiner::BuilderTy *Builder) {
@@ -1595,9 +1600,41 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
   
   // In order to eliminate the add-with-constant, the compare can be its only
   // use.
-  Value *AddWithCst = I.getOperand(0);
+  Instruction *AddWithCst = cast<Instruction>(I.getOperand(0));
   if (!AddWithCst->hasOneUse()) return 0;
-
+  
+  // If CI2 is 2^7, 2^15, 2^31, then it might be an sadd.with.overflow.
+  if (!CI2->getValue().isPowerOf2()) return 0;
+  unsigned NewWidth = CI2->getValue().countTrailingZeros();
+  if (NewWidth != 7 && NewWidth != 15 && NewWidth != 31) return 0;
+    
+  // The width of the new add formed is 1 more than the bias.
+  ++NewWidth;
+  
+  // Check to see that CI1 is an all-ones value with NewWidth bits.
+  if (CI1->getBitWidth() == NewWidth ||
+      CI1->getValue() != APInt::getLowBitsSet(CI1->getBitWidth(), NewWidth))
+    return 0;
+  
+  // In order to replace the original add with a narrower 
+  // llvm.sadd.with.overflow, the only uses allowed are the add-with-constant
+  // and truncates that discard the high bits of the add.  Verify that this is
+  // the case.
+  Instruction *OrigAdd = cast<Instruction>(AddWithCst->getOperand(0));
+  for (Value::use_iterator UI = OrigAdd->use_begin(), E = OrigAdd->use_end();
+       UI != E; ++UI) {
+    if (*UI == AddWithCst) continue;
+    
+    // Only accept truncates for now.  We would really like a nice recursive
+    // predicate like SimplifyDemandedBits, but which goes downwards the use-def
+    // chain to see which bits of a value are actually demanded.  If the
+    // original add had another add which was then immediately truncated, we
+    // could still do the transformation.
+    TruncInst *TI = dyn_cast<TruncInst>(*UI);
+    if (TI == 0 ||
+        TI->getType()->getPrimitiveSizeInBits() > NewWidth) return 0;
+  }
+  
   const IntegerType *WideType = cast<IntegerType>(CI1->getType());
   unsigned WideWidth = WideType->getBitWidth();
   unsigned NarrowWidth = WideWidth / 2;
@@ -1630,8 +1667,6 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
   // If the pattern matches, truncate the inputs to the narrower type and
   // use the sadd_with_overflow intrinsic to efficiently compute both the
   // result and the overflow bit.
-  Instruction *OrigAdd =
-  cast<Instruction>(cast<Instruction>(I.getOperand(0))->getOperand(0));
   Builder->SetInsertPoint(OrigAdd->getParent(),
                           BasicBlock::iterator(OrigAdd));
   
diff --git a/test/Transforms/InstCombine/overflow.ll b/test/Transforms/InstCombine/overflow.ll
index e54d315cd04..da20e2c983d 100644
--- a/test/Transforms/InstCombine/overflow.ll
+++ b/test/Transforms/InstCombine/overflow.ll
@@ -11,7 +11,7 @@ entry:
   %conv2 = sext i32 %b to i64
   %add = add nsw i64 %conv2, %conv
   %add.off = add i64 %add, 2147483648
-; CHECK: llvm.sadd.with.overflow
+; CHECK: llvm.sadd.with.overflow.i32
   %0 = icmp ugt i64 %add.off, 4294967295
   br i1 %0, label %if.then, label %if.end
 
@@ -53,3 +53,24 @@ if.end:
   ret i32 %conv9
 }
 
+; CHECK: test3
+; This is illegal to transform because the high bits of the original add are
+; live out.
+define i64 @test3(i32 %a, i32 %b) nounwind ssp {
+entry:
+  %conv = sext i32 %a to i64
+  %conv2 = sext i32 %b to i64
+  %add = add nsw i64 %conv2, %conv
+  %add.off = add i64 %add, 2147483648
+; CHECK-NOT: llvm.sadd.with.overflow
+  %0 = icmp ugt i64 %add.off, 4294967295
+  br i1 %0, label %if.then, label %if.end
+
+if.then:
+  %call = tail call i32 (...)* @throwAnExceptionOrWhatever() nounwind
+  br label %if.end
+
+if.end:
+  ret i64 %add
+; CHECK: ret i64
+}
-- 
2.34.1