From 23e8b715267a64381e2fff8c208da1f24b387b83 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Mon, 28 Apr 2008 17:02:21 +0000 Subject: [PATCH] Teach InstCombine's ComputeMaskedBits what SelectionDAG's ComputeMaskedBits knows about cttz, ctlz, and ctpop. Teach SelectionDAG's ComputeMaskedBits what InstCombine's knows about SRem. And teach them both some things about high bits in Mul, UDiv, URem, and Sub. This allows instcombine and dagcombine to eliminate sign-extension operations in several new cases. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@50358 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 157 ++++++++++++++---- .../Scalar/InstructionCombining.cpp | 145 +++++++++++----- test/Transforms/InstCombine/sext-misc.ll | 45 +++++ 3 files changed, 269 insertions(+), 78 deletions(-) create mode 100644 test/Transforms/InstCombine/sext-misc.ll diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 90d64832d71..bfc35068994 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -1229,6 +1229,50 @@ void SelectionDAG::ComputeMaskedBits(SDOperand Op, const APInt &Mask, KnownZero = KnownZeroOut; return; } + case ISD::MUL: { + APInt Mask2 = APInt::getAllOnesValue(BitWidth); + ComputeMaskedBits(Op.getOperand(1), Mask2, KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero2, KnownOne2, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // If low bits are zero in either operand, output low known-0 bits. + // Also compute a conserative estimate for high known-0 bits. + // More trickiness is possible, but this is sufficient for the + // interesting case of alignment computation. + KnownOne.clear(); + unsigned TrailZ = KnownZero.countTrailingOnes() + + KnownZero2.countTrailingOnes(); + unsigned LeadZ = std::max(KnownZero.countLeadingOnes() + + KnownZero2.countLeadingOnes() + + 1, BitWidth) - BitWidth; + + TrailZ = std::min(TrailZ, BitWidth); + LeadZ = std::min(LeadZ, BitWidth); + KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) | + APInt::getHighBitsSet(BitWidth, LeadZ); + KnownZero &= Mask; + return; + } + case ISD::UDIV: { + // For the purposes of computing leading zeros we can conservatively + // treat a udiv as a logical right shift by the power of 2 known to + // be greater than the denominator. + APInt AllOnes = APInt::getAllOnesValue(BitWidth); + ComputeMaskedBits(Op.getOperand(0), + AllOnes, KnownZero2, KnownOne2, Depth+1); + unsigned LeadZ = KnownZero2.countLeadingOnes(); + + KnownOne2.clear(); + KnownZero2.clear(); + ComputeMaskedBits(Op.getOperand(1), + AllOnes, KnownZero2, KnownOne2, Depth+1); + LeadZ = std::min(BitWidth, + LeadZ + BitWidth - KnownOne2.countLeadingZeros()); + + KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ) & Mask; + return; + } case ISD::SELECT: ComputeMaskedBits(Op.getOperand(2), Mask, KnownZero, KnownOne, Depth+1); ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero2, KnownOne2, Depth+1); @@ -1469,47 +1513,94 @@ void SelectionDAG::ComputeMaskedBits(SDOperand Op, const APInt &Mask, KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - 1); return; + case ISD::SUB: { + if (ConstantSDNode *CLHS = dyn_cast(Op.getOperand(0))) { + // We know that the top bits of C-X are clear if X contains less bits + // than C (i.e. no wrap-around can happen). For example, 20-X is + // positive if we can prove that X is >= 0 and < 16. + if (CLHS->getAPIntValue().isNonNegative()) { + unsigned NLZ = (CLHS->getAPIntValue()+1).countLeadingZeros(); + // NLZ can't be BitWidth with no sign bit + APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1); + ComputeMaskedBits(Op.getOperand(1), MaskV, KnownZero2, KnownOne2, + Depth+1); + + // If all of the MaskV bits are known to be zero, then we know the + // output top bits are zero, because we now know that the output is + // from [0-C]. + if ((KnownZero2 & MaskV) == MaskV) { + unsigned NLZ2 = CLHS->getAPIntValue().countLeadingZeros(); + // Top bits known zero. + KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2) & Mask; + } + } + } + } + // fall through case ISD::ADD: { - // If either the LHS or the RHS are Zero, the result is zero. - ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); - ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); - // Output known-0 bits are known if clear or set in both the low clear bits // common to both LHS & RHS. For example, 8+(X<<3) is known to have the // low 3 bits clear. - unsigned KnownZeroOut = std::min(KnownZero.countTrailingOnes(), - KnownZero2.countTrailingOnes()); - - KnownZero = APInt::getLowBitsSet(BitWidth, KnownZeroOut); - KnownOne = APInt(BitWidth, 0); + APInt Mask2 = APInt::getLowBitsSet(BitWidth, Mask.countTrailingOnes()); + ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero2, KnownOne2, Depth+1); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + unsigned KnownZeroOut = KnownZero2.countTrailingOnes(); + + ComputeMaskedBits(Op.getOperand(1), Mask2, KnownZero2, KnownOne2, Depth+1); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + KnownZeroOut = std::min(KnownZeroOut, + KnownZero2.countTrailingOnes()); + + KnownZero |= APInt::getLowBitsSet(BitWidth, KnownZeroOut); return; } - case ISD::SUB: { - ConstantSDNode *CLHS = dyn_cast(Op.getOperand(0)); - if (!CLHS) return; - - // We know that the top bits of C-X are clear if X contains less bits - // than C (i.e. no wrap-around can happen). For example, 20-X is - // positive if we can prove that X is >= 0 and < 16. - if (CLHS->getAPIntValue().isNonNegative()) { - unsigned NLZ = (CLHS->getAPIntValue()+1).countLeadingZeros(); - // NLZ can't be BitWidth with no sign bit - APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1); - ComputeMaskedBits(Op.getOperand(1), MaskV, KnownZero, KnownOne, Depth+1); - - // If all of the MaskV bits are known to be zero, then we know the output - // top bits are zero, because we now know that the output is from [0-C]. - if ((KnownZero & MaskV) == MaskV) { - unsigned NLZ2 = CLHS->getAPIntValue().countLeadingZeros(); - // Top bits known zero. - KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2) & Mask; - KnownOne = APInt(BitWidth, 0); // No one bits known. - } else { - KnownZero = KnownOne = APInt(BitWidth, 0); // Otherwise, nothing known. + case ISD::SREM: + if (ConstantSDNode *Rem = dyn_cast(Op.getOperand(1))) { + APInt RA = Rem->getAPIntValue(); + if (RA.isPowerOf2() || (-RA).isPowerOf2()) { + APInt LowBits = RA.isStrictlyPositive() ? ((RA - 1) | RA) : ~RA; + APInt Mask2 = LowBits | APInt::getSignBit(BitWidth); + ComputeMaskedBits(Op.getOperand(0), Mask2,KnownZero2,KnownOne2,Depth+1); + + // The sign of a remainder is equal to the sign of the first + // operand (zero being positive). + if (KnownZero2[BitWidth-1] || ((KnownZero2 & LowBits) == LowBits)) + KnownZero2 |= ~LowBits; + else if (KnownOne2[BitWidth-1]) + KnownOne2 |= ~LowBits; + + KnownZero |= KnownZero2 & Mask; + KnownOne |= KnownOne2 & Mask; + + assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?"); + } + } + return; + case ISD::UREM: { + if (ConstantSDNode *Rem = dyn_cast(Op.getOperand(1))) { + APInt RA = Rem->getAPIntValue(); + if (RA.isStrictlyPositive() && RA.isPowerOf2()) { + APInt LowBits = (RA - 1) | RA; + APInt Mask2 = LowBits & Mask; + KnownZero |= ~LowBits & Mask; + ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero, KnownOne,Depth+1); + assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?"); + break; } } + + // Since the result is less than or equal to either operand, any leading + // zero bits in either operand must also exist in the result. + APInt AllOnes = APInt::getAllOnesValue(BitWidth); + ComputeMaskedBits(Op.getOperand(0), AllOnes, KnownZero, KnownOne, + Depth+1); + ComputeMaskedBits(Op.getOperand(1), AllOnes, KnownZero2, KnownOne2, + Depth+1); + + uint32_t Leaders = std::max(KnownZero.countLeadingOnes(), + KnownZero2.countLeadingOnes()); + KnownOne.clear(); + KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & Mask; return; } default: diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp index 6bf06e77ebf..655ab102599 100644 --- a/lib/Transforms/Scalar/InstructionCombining.cpp +++ b/lib/Transforms/Scalar/InstructionCombining.cpp @@ -700,15 +700,15 @@ void InstCombiner::ComputeMaskedBits(Value *V, const APInt &Mask, return; } + KnownZero.clear(); KnownOne.clear(); // Start out not knowing anything. + if (Depth == 6 || Mask == 0) return; // Limit search depth. User *I = dyn_cast(V); if (!I) return; - KnownZero.clear(); KnownOne.clear(); // Don't know anything. APInt KnownZero2(KnownZero), KnownOne2(KnownOne); - switch (getOpcode(I)) { default: break; case Instruction::And: { @@ -759,16 +759,42 @@ void InstCombiner::ComputeMaskedBits(Value *V, const APInt &Mask, assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); // If low bits are zero in either operand, output low known-0 bits. + // Also compute a conserative estimate for high known-0 bits. // More trickiness is possible, but this is sufficient for the // interesting case of alignment computation. KnownOne.clear(); unsigned TrailZ = KnownZero.countTrailingOnes() + KnownZero2.countTrailingOnes(); + unsigned LeadZ = std::max(KnownZero.countLeadingOnes() + + KnownZero2.countLeadingOnes() + + 1, BitWidth) - BitWidth; + TrailZ = std::min(TrailZ, BitWidth); - KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ); + LeadZ = std::min(LeadZ, BitWidth); + KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) | + APInt::getHighBitsSet(BitWidth, LeadZ); KnownZero &= Mask; return; } + case Instruction::UDiv: { + // For the purposes of computing leading zeros we can conservatively + // treat a udiv as a logical right shift by the power of 2 known to + // be greater than the denominator. + APInt AllOnes = APInt::getAllOnesValue(BitWidth); + ComputeMaskedBits(I->getOperand(0), + AllOnes, KnownZero2, KnownOne2, Depth+1); + unsigned LeadZ = KnownZero2.countLeadingOnes(); + + KnownOne2.clear(); + KnownZero2.clear(); + ComputeMaskedBits(I->getOperand(1), + AllOnes, KnownZero2, KnownOne2, Depth+1); + LeadZ = std::min(BitWidth, + LeadZ + BitWidth - KnownOne2.countLeadingZeros()); + + KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ) & Mask; + return; + } case Instruction::Select: ComputeMaskedBits(I->getOperand(2), Mask, KnownZero, KnownOne, Depth+1); ComputeMaskedBits(I->getOperand(1), Mask, KnownZero2, KnownOne2, Depth+1); @@ -900,38 +926,36 @@ void InstCombiner::ComputeMaskedBits(Value *V, const APInt &Mask, unsigned NLZ = (CLHS->getValue()+1).countLeadingZeros(); // NLZ can't be BitWidth with no sign bit APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1); - ComputeMaskedBits(I->getOperand(1), MaskV, KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(I->getOperand(1), MaskV, KnownZero2, KnownOne2, + Depth+1); - // If all of the MaskV bits are known to be zero, then we know the output - // top bits are zero, because we now know that the output is from [0-C]. - if ((KnownZero & MaskV) == MaskV) { + // If all of the MaskV bits are known to be zero, then we know the + // output top bits are zero, because we now know that the output is + // from [0-C]. + if ((KnownZero2 & MaskV) == MaskV) { unsigned NLZ2 = CLHS->getValue().countLeadingZeros(); // Top bits known zero. KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2) & Mask; - KnownOne = APInt(BitWidth, 0); // No one bits known. - } else { - KnownZero = KnownOne = APInt(BitWidth, 0); // Otherwise, nothing known. } - return; } } } // fall through case Instruction::Add: { - // If either the LHS or the RHS are Zero, the result is zero. - ComputeMaskedBits(I->getOperand(1), Mask, KnownZero, KnownOne, Depth+1); - ComputeMaskedBits(I->getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); - // Output known-0 bits are known if clear or set in both the low clear bits // common to both LHS & RHS. For example, 8+(X<<3) is known to have the // low 3 bits clear. - unsigned KnownZeroOut = std::min(KnownZero.countTrailingOnes(), - KnownZero2.countTrailingOnes()); - - KnownZero = APInt::getLowBitsSet(BitWidth, KnownZeroOut); - KnownOne = APInt(BitWidth, 0); + APInt Mask2 = APInt::getLowBitsSet(BitWidth, Mask.countTrailingOnes()); + ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, Depth+1); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + unsigned KnownZeroOut = KnownZero2.countTrailingOnes(); + + ComputeMaskedBits(I->getOperand(1), Mask2, KnownZero2, KnownOne2, Depth+1); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + KnownZeroOut = std::min(KnownZeroOut, + KnownZero2.countTrailingOnes()); + + KnownZero |= APInt::getLowBitsSet(BitWidth, KnownZeroOut); return; } case Instruction::SRem: @@ -956,7 +980,7 @@ void InstCombiner::ComputeMaskedBits(Value *V, const APInt &Mask, } } break; - case Instruction::URem: + case Instruction::URem: { if (ConstantInt *Rem = dyn_cast(I->getOperand(1))) { APInt RA = Rem->getValue(); if (RA.isStrictlyPositive() && RA.isPowerOf2()) { @@ -965,19 +989,24 @@ void InstCombiner::ComputeMaskedBits(Value *V, const APInt &Mask, KnownZero |= ~LowBits & Mask; ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero, KnownOne,Depth+1); assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?"); + break; } - } else { - // Since the result is less than or equal to RHS, any leading zero bits - // in RHS must also exist in the result. - APInt AllOnes = APInt::getAllOnesValue(BitWidth); - ComputeMaskedBits(I->getOperand(1), AllOnes, KnownZero2, KnownOne2, - Depth+1); - - uint32_t Leaders = KnownZero2.countLeadingOnes(); - KnownZero |= APInt::getHighBitsSet(BitWidth, Leaders) & Mask; - assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?"); } + + // Since the result is less than or equal to either operand, any leading + // zero bits in either operand must also exist in the result. + APInt AllOnes = APInt::getAllOnesValue(BitWidth); + ComputeMaskedBits(I->getOperand(0), AllOnes, KnownZero, KnownOne, + Depth+1); + ComputeMaskedBits(I->getOperand(1), AllOnes, KnownZero2, KnownOne2, + Depth+1); + + uint32_t Leaders = std::max(KnownZero.countLeadingOnes(), + KnownZero2.countLeadingOnes()); + KnownOne.clear(); + KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & Mask; break; + } case Instruction::Alloca: case Instruction::Malloc: { @@ -1088,6 +1117,20 @@ void InstCombiner::ComputeMaskedBits(Value *V, const APInt &Mask, } break; } + case Instruction::Call: + if (IntrinsicInst *II = dyn_cast(I)) { + switch (II->getIntrinsicID()) { + default: break; + case Intrinsic::ctpop: + case Intrinsic::ctlz: + case Intrinsic::cttz: { + unsigned LowBits = Log2_32(BitWidth)+1; + KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits); + break; + } + } + } + break; } } @@ -1232,7 +1275,9 @@ bool InstCombiner::SimplifyDemandedBits(Value *V, APInt DemandedMask, APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); APInt &RHSKnownZero = KnownZero, &RHSKnownOne = KnownOne; switch (I->getOpcode()) { - default: break; + default: + ComputeMaskedBits(V, DemandedMask, RHSKnownZero, RHSKnownOne, Depth); + break; case Instruction::And: // If either the LHS or the RHS are Zero, the result is zero. if (SimplifyDemandedBits(I->getOperand(1), DemandedMask, @@ -1578,6 +1623,9 @@ bool InstCombiner::SimplifyDemandedBits(Value *V, APInt DemandedMask, LHSKnownZero, LHSKnownOne, Depth+1)) return true; } + // Otherwise just hand the sub off to ComputeMaskedBits to fill in + // the known zeros and ones. + ComputeMaskedBits(V, DemandedMask, RHSKnownZero, RHSKnownOne, Depth); break; case Instruction::Shl: if (ConstantInt *SA = dyn_cast(I->getOperand(1))) { @@ -1695,10 +1743,10 @@ bool InstCombiner::SimplifyDemandedBits(Value *V, APInt DemandedMask, } } break; - case Instruction::URem: + case Instruction::URem: { if (ConstantInt *Rem = dyn_cast(I->getOperand(1))) { APInt RA = Rem->getValue(); - if (RA.isPowerOf2()) { + if (RA.isStrictlyPositive() && RA.isPowerOf2()) { APInt LowBits = (RA - 1) | RA; APInt Mask2 = LowBits & DemandedMask; KnownZero |= ~LowBits & DemandedMask; @@ -1707,19 +1755,26 @@ bool InstCombiner::SimplifyDemandedBits(Value *V, APInt DemandedMask, return true; assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?"); + break; } - } else { - APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0); - APInt AllOnes = APInt::getAllOnesValue(BitWidth); - if (SimplifyDemandedBits(I->getOperand(1), AllOnes, - KnownZero2, KnownOne2, Depth+1)) - return true; - - uint32_t Leaders = KnownZero2.countLeadingOnes(); - KnownZero |= APInt::getHighBitsSet(BitWidth, Leaders) & DemandedMask; } + + APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0); + APInt AllOnes = APInt::getAllOnesValue(BitWidth); + ComputeMaskedBits(I->getOperand(0), AllOnes, + KnownZero2, KnownOne2, Depth+1); + uint32_t Leaders = KnownZero2.countLeadingOnes(); + APInt HighZeros = APInt::getHighBitsSet(BitWidth, Leaders); + if (SimplifyDemandedBits(I->getOperand(1), ~HighZeros, + KnownZero2, KnownOne2, Depth+1)) + return true; + + Leaders = std::max(Leaders, + KnownZero2.countLeadingOnes()); + KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & DemandedMask; break; } + } // If the client is only demanding bits that we know, return the known // constant. diff --git a/test/Transforms/InstCombine/sext-misc.ll b/test/Transforms/InstCombine/sext-misc.ll new file mode 100644 index 00000000000..da69e85ff5f --- /dev/null +++ b/test/Transforms/InstCombine/sext-misc.ll @@ -0,0 +1,45 @@ +; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep sext +; RUN: llvm-as < %s | llc -march=x86-64 | not grep movslq +; RUN: llvm-as < %s | llc -march=x86 | not grep sar + +declare i32 @llvm.ctpop.i32(i32) +declare i32 @llvm.ctlz.i32(i32) +declare i32 @llvm.cttz.i32(i32) + +define i64 @foo(i32 %x) { + %t = call i32 @llvm.ctpop.i32(i32 %x) + %s = sext i32 %t to i64 + ret i64 %s +} +define i64 @boo(i32 %x) { + %t = call i32 @llvm.ctlz.i32(i32 %x) + %s = sext i32 %t to i64 + ret i64 %s +} +define i64 @zoo(i32 %x) { + %t = call i32 @llvm.cttz.i32(i32 %x) + %s = sext i32 %t to i64 + ret i64 %s +} +define i64 @coo(i32 %x) { + %t = udiv i32 %x, 3 + %s = sext i32 %t to i64 + ret i64 %s +} +define i64 @moo(i32 %x) { + %t = urem i32 %x, 30000 + %s = sext i32 %t to i64 + ret i64 %s +} +define i64 @yoo(i32 %x) { + %u = lshr i32 %x, 3 + %t = mul i32 %u, 3 + %s = sext i32 %t to i64 + ret i64 %s +} +define i64 @voo(i32 %x) { + %t = and i32 %x, 511 + %u = sub i32 20000, %t + %s = sext i32 %u to i64 + ret i64 %s +} -- 2.34.1