From dd7e83737491b14ebf98e09fa6cb9b515f9f2e3e Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Sun, 19 Dec 2010 18:22:06 +0000 Subject: [PATCH] fix another miscompile in the llvm.sadd formation logic: it wasn't checking to see if the high bits of the original add result were dead. Inserting a smaller add and zexting back to that size is not good enough. This is likely to be the fix for 8816. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@122177 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../InstCombine/InstCombineCompares.cpp | 43 +++++++++++++++++-- test/Transforms/InstCombine/overflow.ll | 23 +++++++++- 2 files changed, 61 insertions(+), 5 deletions(-) diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index 376a38edda3..11426863f61 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -1585,6 +1585,11 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) { /// ProcessUGT_ADDCST_ADD - The caller has matched a pattern of the form: /// I = icmp ugt (add (add A, B), CI2), CI1 +/// If this is of the form: +/// sum = a + b +/// if (sum+128 >u 255) +/// Then replace it with llvm.sadd.with.overflow.i8. +/// static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B, ConstantInt *CI2, ConstantInt *CI1, InstCombiner::BuilderTy *Builder) { @@ -1595,9 +1600,41 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B, // In order to eliminate the add-with-constant, the compare can be its only // use. - Value *AddWithCst = I.getOperand(0); + Instruction *AddWithCst = cast(I.getOperand(0)); if (!AddWithCst->hasOneUse()) return 0; - + + // If CI2 is 2^7, 2^15, 2^31, then it might be an sadd.with.overflow. + if (!CI2->getValue().isPowerOf2()) return 0; + unsigned NewWidth = CI2->getValue().countTrailingZeros(); + if (NewWidth != 7 && NewWidth != 15 && NewWidth != 31) return 0; + + // The width of the new add formed is 1 more than the bias. + ++NewWidth; + + // Check to see that CI1 is an all-ones value with NewWidth bits. + if (CI1->getBitWidth() == NewWidth || + CI1->getValue() != APInt::getLowBitsSet(CI1->getBitWidth(), NewWidth)) + return 0; + + // In order to replace the original add with a narrower + // llvm.sadd.with.overflow, the only uses allowed are the add-with-constant + // and truncates that discard the high bits of the add. Verify that this is + // the case. + Instruction *OrigAdd = cast(AddWithCst->getOperand(0)); + for (Value::use_iterator UI = OrigAdd->use_begin(), E = OrigAdd->use_end(); + UI != E; ++UI) { + if (*UI == AddWithCst) continue; + + // Only accept truncates for now. We would really like a nice recursive + // predicate like SimplifyDemandedBits, but which goes downwards the use-def + // chain to see which bits of a value are actually demanded. If the + // original add had another add which was then immediately truncated, we + // could still do the transformation. + TruncInst *TI = dyn_cast(*UI); + if (TI == 0 || + TI->getType()->getPrimitiveSizeInBits() > NewWidth) return 0; + } + const IntegerType *WideType = cast(CI1->getType()); unsigned WideWidth = WideType->getBitWidth(); unsigned NarrowWidth = WideWidth / 2; @@ -1630,8 +1667,6 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B, // If the pattern matches, truncate the inputs to the narrower type and // use the sadd_with_overflow intrinsic to efficiently compute both the // result and the overflow bit. - Instruction *OrigAdd = - cast(cast(I.getOperand(0))->getOperand(0)); Builder->SetInsertPoint(OrigAdd->getParent(), BasicBlock::iterator(OrigAdd)); diff --git a/test/Transforms/InstCombine/overflow.ll b/test/Transforms/InstCombine/overflow.ll index e54d315cd04..da20e2c983d 100644 --- a/test/Transforms/InstCombine/overflow.ll +++ b/test/Transforms/InstCombine/overflow.ll @@ -11,7 +11,7 @@ entry: %conv2 = sext i32 %b to i64 %add = add nsw i64 %conv2, %conv %add.off = add i64 %add, 2147483648 -; CHECK: llvm.sadd.with.overflow +; CHECK: llvm.sadd.with.overflow.i32 %0 = icmp ugt i64 %add.off, 4294967295 br i1 %0, label %if.then, label %if.end @@ -53,3 +53,24 @@ if.end: ret i32 %conv9 } +; CHECK: test3 +; This is illegal to transform because the high bits of the original add are +; live out. +define i64 @test3(i32 %a, i32 %b) nounwind ssp { +entry: + %conv = sext i32 %a to i64 + %conv2 = sext i32 %b to i64 + %add = add nsw i64 %conv2, %conv + %add.off = add i64 %add, 2147483648 +; CHECK-NOT: llvm.sadd.with.overflow + %0 = icmp ugt i64 %add.off, 4294967295 + br i1 %0, label %if.then, label %if.end + +if.then: + %call = tail call i32 (...)* @throwAnExceptionOrWhatever() nounwind + br label %if.end + +if.end: + ret i64 %add +; CHECK: ret i64 +} -- 2.34.1