From: Sanjoy Das Date: Fri, 10 Apr 2015 21:07:09 +0000 (+0000) Subject: [InstCombine][CodeGenPrep] Create llvm.uadd.with.overflow in CGP. X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=8aca90e5b6322f60c631e635083e877caac4c590;p=oota-llvm.git [InstCombine][CodeGenPrep] Create llvm.uadd.with.overflow in CGP. Summary: This change moves creating calls to `llvm.uadd.with.overflow` from InstCombine to CodeGenPrep. Combining overflow check patterns into calls to the said intrinsic in InstCombine inhibits optimization because it introduces an intrinsic call that not all other transforms and analyses understand. Depends on D8888. Reviewers: majnemer, atrick Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D8889 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@234638 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/IR/PatternMatch.h b/include/llvm/IR/PatternMatch.h index f94e1057689..41154e6441a 100644 --- a/include/llvm/IR/PatternMatch.h +++ b/include/llvm/IR/PatternMatch.h @@ -295,6 +295,9 @@ template struct bind_ty { /// \brief Match a value, capturing it if we match. inline bind_ty m_Value(Value *&V) { return V; } +/// \brief Match an instruction, capturing it if we match. +inline bind_ty m_Instruction(Instruction *&I) { return I; } + /// \brief Match a binary operator, capturing it if we match. inline bind_ty m_BinOp(BinaryOperator *&I) { return I; } @@ -1103,6 +1106,52 @@ m_UnordFMax(const LHS &L, const RHS &R) { return MaxMin_match(L, R); } +//===----------------------------------------------------------------------===// +// Matchers for overflow check patterns: e.g. (a + b) u< a +// + +template +struct UAddWithOverflow_match { + LHS_t L; + RHS_t R; + Sum_t S; + + UAddWithOverflow_match(const LHS_t &L, const RHS_t &R, const Sum_t &S) + : L(L), R(R), S(S) {} + + template bool match(OpTy *V) { + Value *ICmpLHS, *ICmpRHS; + ICmpInst::Predicate Pred; + if (!m_ICmp(Pred, m_Value(ICmpLHS), m_Value(ICmpRHS)).match(V)) + return false; + + Value *AddLHS, *AddRHS; + auto AddExpr = m_Add(m_Value(AddLHS), m_Value(AddRHS)); + + // (a + b) u< a, (a + b) u< b + if (Pred == ICmpInst::ICMP_ULT) + if (AddExpr.match(ICmpLHS) && (ICmpRHS == AddLHS || ICmpRHS == AddRHS)) + return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpLHS); + + // a >u (a + b), b >u (a + b) + if (Pred == ICmpInst::ICMP_UGT) + if (AddExpr.match(ICmpRHS) && (ICmpLHS == AddLHS || ICmpLHS == AddRHS)) + return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpRHS); + + return false; + } +}; + +/// \brief Match an icmp instruction checking for unsigned overflow on addition. +/// +/// S is matched to the addition whose result is being checked for overflow, and +/// L and R are matched to the LHS and RHS of S. +template +UAddWithOverflow_match +m_UAddWithOverflow(const LHS_t &L, const RHS_t &R, const Sum_t &S) { + return UAddWithOverflow_match(L, R, S); +} + /// \brief Match an 'unordered' floating point minimum function. /// Floating point has one special value 'NaN'. Therefore, there is no total /// order. However, if we can ignore the 'NaN' value (for example, because of a diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp index 8b48186a9c4..d101f52b6ec 100644 --- a/lib/CodeGen/CodeGenPrepare.cpp +++ b/lib/CodeGen/CodeGenPrepare.cpp @@ -747,13 +747,60 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){ return SinkCast(CI); } -/// OptimizeCmpExpression - sink the given CmpInst into user blocks to reduce +/// CombineUAddWithOverflow - try to combine CI into a call to the +/// llvm.uadd.with.overflow intrinsic if possible. +/// +/// Return true if any changes were made. +static bool CombineUAddWithOverflow(CmpInst *CI) { + Value *A, *B; + Instruction *AddI; + if (!match(CI, + m_UAddWithOverflow(m_Value(A), m_Value(B), m_Instruction(AddI)))) + return false; + + Type *Ty = AddI->getType(); + if (!isa(Ty)) + return false; + + // We don't want to move around uses of condition values this late, so we we + // check if it is legal to create the call to the intrinsic in the basic + // block containing the icmp: + + if (AddI->getParent() != CI->getParent() && !AddI->hasOneUse()) + return false; + +#ifndef NDEBUG + // Someday m_UAddWithOverflow may get smarter, but this is a safe assumption + // for now: + if (AddI->hasOneUse()) + assert(*AddI->user_begin() == CI && "expected!"); +#endif + + Module *M = CI->getParent()->getParent()->getParent(); + Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, Ty); + + auto *InsertPt = AddI->hasOneUse() ? CI : AddI; + + auto *UAddWithOverflow = + CallInst::Create(F, {A, B}, "uadd.overflow", InsertPt); + auto *UAdd = ExtractValueInst::Create(UAddWithOverflow, 0, "uadd", InsertPt); + auto *Overflow = + ExtractValueInst::Create(UAddWithOverflow, 1, "overflow", InsertPt); + + CI->replaceAllUsesWith(Overflow); + AddI->replaceAllUsesWith(UAdd); + CI->eraseFromParent(); + AddI->eraseFromParent(); + return true; +} + +/// SinkCmpExpression - Sink the given CmpInst into user blocks to reduce /// the number of virtual registers that must be created and coalesced. This is /// a clear win except on targets with multiple condition code registers /// (PowerPC), where it might lose; some adjustment may be wanted there. /// /// Return true if any changes are made. -static bool OptimizeCmpExpression(CmpInst *CI) { +static bool SinkCmpExpression(CmpInst *CI) { BasicBlock *DefBB = CI->getParent(); /// InsertedCmp - Only insert a cmp in each block once. @@ -802,6 +849,16 @@ static bool OptimizeCmpExpression(CmpInst *CI) { return MadeChange; } +static bool OptimizeCmpExpression(CmpInst *CI) { + if (SinkCmpExpression(CI)) + return true; + + if (CombineUAddWithOverflow(CI)) + return true; + + return false; +} + /// isExtractBitsCandidateUse - Check if the candidates could /// be combined with shift instruction, which includes: /// 1. Truncate instruction diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index 17d2634d66f..e084d3d8b03 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -2109,35 +2109,6 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B, return ExtractValueInst::Create(Call, 1, "sadd.overflow"); } -static Instruction *ProcessUAddIdiom(Instruction &I, Value *OrigAddV, - InstCombiner &IC) { - // Don't bother doing this transformation for pointers, don't do it for - // vectors. - if (!isa(OrigAddV->getType())) return nullptr; - - // If the add is a constant expr, then we don't bother transforming it. - Instruction *OrigAdd = dyn_cast(OrigAddV); - if (!OrigAdd) return nullptr; - - Value *LHS = OrigAdd->getOperand(0), *RHS = OrigAdd->getOperand(1); - - // Put the new code above the original add, in case there are any uses of the - // add between the add and the compare. - InstCombiner::BuilderTy *Builder = IC.Builder; - Builder->SetInsertPoint(OrigAdd); - - Module *M = I.getParent()->getParent()->getParent(); - Type *Ty = LHS->getType(); - Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, Ty); - CallInst *Call = Builder->CreateCall2(F, LHS, RHS, "uadd"); - Value *Add = Builder->CreateExtractValue(Call, 0); - - IC.ReplaceInstUsesWith(*OrigAdd, Add); - - // The original icmp gets replaced with the overflow value. - return ExtractValueInst::Create(Call, 1, "uadd.overflow"); -} - bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS, Value *RHS, Instruction &OrigI, Value *&Result, Constant *&Overflow) { @@ -3539,21 +3510,18 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { return new ICmpInst(I.getPredicate(), ConstantExpr::getNot(RHSC), A); } - // (a+b) llvm.uadd.with.overflow. - // (a+b) llvm.uadd.with.overflow. - if (I.getPredicate() == ICmpInst::ICMP_ULT && - match(Op0, m_Add(m_Value(A), m_Value(B))) && - (Op1 == A || Op1 == B)) - if (Instruction *R = ProcessUAddIdiom(I, Op0, *this)) - return R; - - // a >u (a+b) --> llvm.uadd.with.overflow. - // b >u (a+b) --> llvm.uadd.with.overflow. - if (I.getPredicate() == ICmpInst::ICMP_UGT && - match(Op1, m_Add(m_Value(A), m_Value(B))) && - (Op0 == A || Op0 == B)) - if (Instruction *R = ProcessUAddIdiom(I, Op1, *this)) - return R; + Instruction *AddI = nullptr; + if (match(&I, m_UAddWithOverflow(m_Value(A), m_Value(B), + m_Instruction(AddI))) && + isa(A->getType())) { + Value *Result; + Constant *Overflow; + if (OptimizeOverflowCheck(OCF_UNSIGNED_ADD, A, B, *AddI, Result, + Overflow)) { + ReplaceInstUsesWith(*AddI, Result); + return ReplaceInstUsesWith(I, Overflow); + } + } // (zext a) * (zext b) --> llvm.umul.with.overflow. if (match(Op0, m_Mul(m_ZExt(m_Value(A)), m_ZExt(m_Value(B))))) { diff --git a/test/CodeGen/X86/add-of-carry.ll b/test/CodeGen/X86/add-of-carry.ll index 9c24be4289f..44b587af3aa 100644 --- a/test/CodeGen/X86/add-of-carry.ll +++ b/test/CodeGen/X86/add-of-carry.ll @@ -4,43 +4,26 @@ define i32 @test1(i32 %sum, i32 %x) nounwind readnone ssp { entry: ; CHECK-LABEL: test1: -; CHECK: cmpl %ecx, %eax -; CHECK-NOT: addl -; CHECK: adcl $0, %eax - %add4 = add i32 %x, %sum - %cmp = icmp ult i32 %add4, %x - %inc = zext i1 %cmp to i32 - %z.0 = add i32 %add4, %inc - ret i32 %z.0 -} - -; Instcombine transforms test1 into test2: -; CHECK-LABEL: test2: ; CHECK: movl ; CHECK-NEXT: addl ; CHECK-NEXT: adcl $0 ; CHECK-NEXT: ret -define i32 @test2(i32 %sum, i32 %x) nounwind readnone ssp { -entry: - %uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x, i32 %sum) - %0 = extractvalue { i32, i1 } %uadd, 0 - %cmp = extractvalue { i32, i1 } %uadd, 1 + %add4 = add i32 %x, %sum + %cmp = icmp ult i32 %add4, %x %inc = zext i1 %cmp to i32 - %z.0 = add i32 %0, %inc + %z.0 = add i32 %add4, %inc ret i32 %z.0 } ; -define i32 @test3(i32 %x, i32 %y, i32 %res) nounwind uwtable readnone ssp { +define i32 @test2(i32 %x, i32 %y, i32 %res) nounwind uwtable readnone ssp { entry: %cmp = icmp ugt i32 %x, %y %dec = sext i1 %cmp to i32 %dec.res = add nsw i32 %dec, %res ret i32 %dec.res -; CHECK-LABEL: test3: +; CHECK-LABEL: test2: ; CHECK: cmpl ; CHECK: sbbl ; CHECK: ret } - -declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone diff --git a/test/Transforms/CodeGenPrepare/overflow-intrinsics.ll b/test/Transforms/CodeGenPrepare/overflow-intrinsics.ll new file mode 100644 index 00000000000..1e413191e06 --- /dev/null +++ b/test/Transforms/CodeGenPrepare/overflow-intrinsics.ll @@ -0,0 +1,74 @@ +; RUN: opt -codegenprepare -S < %s | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-darwin10.0.0" + +; CHECK-LABEL: @test1( +; CHECK: llvm.uadd.with.overflow +; CHECK: ret i64 +define i64 @test1(i64 %a, i64 %b) nounwind ssp { +entry: + %add = add i64 %b, %a + %cmp = icmp ult i64 %add, %a + %Q = select i1 %cmp, i64 %b, i64 42 + ret i64 %Q +} + +; CHECK-LABEL: @test2( +; CHECK: llvm.uadd.with.overflow +; CHECK: ret i64 +define i64 @test2(i64 %a, i64 %b) nounwind ssp { +entry: + %add = add i64 %b, %a + %cmp = icmp ult i64 %add, %b + %Q = select i1 %cmp, i64 %b, i64 42 + ret i64 %Q +} + +; CHECK-LABEL: @test3( +; CHECK: llvm.uadd.with.overflow +; CHECK: ret i64 +define i64 @test3(i64 %a, i64 %b) nounwind ssp { +entry: + %add = add i64 %b, %a + %cmp = icmp ugt i64 %b, %add + %Q = select i1 %cmp, i64 %b, i64 42 + ret i64 %Q +} + +; CHECK-LABEL: @test4( +; CHECK: llvm.uadd.with.overflow +; CHECK: extractvalue +; CHECK: extractvalue +; CHECK: select +define i64 @test4(i64 %a, i64 %b, i1 %c) nounwind ssp { +entry: + %add = add i64 %b, %a + %cmp = icmp ugt i64 %b, %add + br i1 %c, label %next, label %exit + + next: + %Q = select i1 %cmp, i64 %b, i64 42 + ret i64 %Q + + exit: + ret i64 0 +} + +; CHECK-LABEL: @test5( +; CHECK-NOT: llvm.uadd.with.overflow +; CHECK: next +define i64 @test5(i64 %a, i64 %b, i64* %ptr, i1 %c) nounwind ssp { +entry: + %add = add i64 %b, %a + store i64 %add, i64* %ptr + %cmp = icmp ugt i64 %b, %add + br i1 %c, label %next, label %exit + + next: + %Q = select i1 %cmp, i64 %b, i64 42 + ret i64 %Q + + exit: + ret i64 0 +} diff --git a/test/Transforms/InstCombine/overflow.ll b/test/Transforms/InstCombine/overflow.ll index 3eddc80a704..4c0a8a2c0df 100644 --- a/test/Transforms/InstCombine/overflow.ll +++ b/test/Transforms/InstCombine/overflow.ll @@ -97,39 +97,6 @@ if.end: ; preds = %entry ; CHECK: ret i8 } -; CHECK-LABEL: @test5( -; CHECK: llvm.uadd.with.overflow -; CHECK: ret i64 -define i64 @test5(i64 %a, i64 %b) nounwind ssp { -entry: - %add = add i64 %b, %a - %cmp = icmp ult i64 %add, %a - %Q = select i1 %cmp, i64 %b, i64 42 - ret i64 %Q -} - -; CHECK-LABEL: @test6( -; CHECK: llvm.uadd.with.overflow -; CHECK: ret i64 -define i64 @test6(i64 %a, i64 %b) nounwind ssp { -entry: - %add = add i64 %b, %a - %cmp = icmp ult i64 %add, %b - %Q = select i1 %cmp, i64 %b, i64 42 - ret i64 %Q -} - -; CHECK-LABEL: @test7( -; CHECK: llvm.uadd.with.overflow -; CHECK: ret i64 -define i64 @test7(i64 %a, i64 %b) nounwind ssp { -entry: - %add = add i64 %b, %a - %cmp = icmp ugt i64 %b, %add - %Q = select i1 %cmp, i64 %b, i64 42 - ret i64 %Q -} - ; CHECK-LABEL: @test8( ; PR11438 ; This is @test1, but the operands are not sign-extended. Make sure