if (!OpC) return false;
// If there are no bits set that aren't demanded, nothing to do.
- Demanded.zextOrTrunc(OpC->getValue().getBitWidth());
+ Demanded = Demanded.zextOrTrunc(OpC->getValue().getBitWidth());
if ((~Demanded & OpC->getValue()) == 0)
return false;
assert(V != 0 && "Null pointer of Value???");
assert(Depth <= 6 && "Limit Search Depth");
uint32_t BitWidth = DemandedMask.getBitWidth();
- const Type *VTy = V->getType();
+ Type *VTy = V->getType();
assert((TD || !VTy->isPointerTy()) &&
"SimplifyDemandedBits needs to know bit widths!");
assert((!TD || TD->getTypeSizeInBits(VTy->getScalarType()) == BitWidth) &&
Instruction *I = dyn_cast<Instruction>(V);
if (!I) {
- ComputeMaskedBits(V, DemandedMask, KnownZero, KnownOne, Depth);
+ ComputeMaskedBits(V, KnownZero, KnownOne, Depth);
return 0; // Only analyze instructions.
}
// this instruction has a simpler value in that context.
if (I->getOpcode() == Instruction::And) {
// If either the LHS or the RHS are Zero, the result is zero.
- ComputeMaskedBits(I->getOperand(1), DemandedMask,
- RHSKnownZero, RHSKnownOne, Depth+1);
- ComputeMaskedBits(I->getOperand(0), DemandedMask & ~RHSKnownZero,
- LHSKnownZero, LHSKnownOne, Depth+1);
+ ComputeMaskedBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1);
+ ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1);
// If all of the demanded bits are known 1 on one side, return the other.
// These bits cannot contribute to the result of the 'and' in this
// only bits from X or Y are demanded.
// If either the LHS or the RHS are One, the result is One.
- ComputeMaskedBits(I->getOperand(1), DemandedMask,
- RHSKnownZero, RHSKnownOne, Depth+1);
- ComputeMaskedBits(I->getOperand(0), DemandedMask & ~RHSKnownOne,
- LHSKnownZero, LHSKnownOne, Depth+1);
+ ComputeMaskedBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1);
+ ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1);
// If all of the demanded bits are known zero on one side, return the
// other. These bits cannot contribute to the result of the 'or' in this
}
// Compute the KnownZero/KnownOne bits to simplify things downstream.
- ComputeMaskedBits(I, DemandedMask, KnownZero, KnownOne, Depth);
+ ComputeMaskedBits(I, KnownZero, KnownOne, Depth);
return 0;
}
switch (I->getOpcode()) {
default:
- ComputeMaskedBits(I, DemandedMask, KnownZero, KnownOne, Depth);
+ ComputeMaskedBits(I, KnownZero, KnownOne, Depth);
break;
case Instruction::And:
// If either the LHS or the RHS are Zero, the result is zero.
Instruction *Or =
BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1),
I->getName());
- return InsertNewInstBefore(Or, *I);
+ return InsertNewInstWith(Or, *I);
}
// If all of the demanded bits on one side are known, and all of the set
if ((RHSKnownOne & LHSKnownOne) == RHSKnownOne) {
Constant *AndC = Constant::getIntegerValue(VTy,
~RHSKnownOne & DemandedMask);
- Instruction *And =
- BinaryOperator::CreateAnd(I->getOperand(0), AndC, "tmp");
- return InsertNewInstBefore(And, *I);
+ Instruction *And = BinaryOperator::CreateAnd(I->getOperand(0), AndC);
+ return InsertNewInstWith(And, *I);
}
}
Constant *AndC =
ConstantInt::get(I->getType(), NewMask & AndRHS->getValue());
- Instruction *NewAnd =
- BinaryOperator::CreateAnd(I->getOperand(0), AndC, "tmp");
- InsertNewInstBefore(NewAnd, *I);
+ Instruction *NewAnd = BinaryOperator::CreateAnd(I->getOperand(0), AndC);
+ InsertNewInstWith(NewAnd, *I);
Constant *XorC =
ConstantInt::get(I->getType(), NewMask & XorRHS->getValue());
- Instruction *NewXor =
- BinaryOperator::CreateXor(NewAnd, XorC, "tmp");
- return InsertNewInstBefore(NewXor, *I);
+ Instruction *NewXor = BinaryOperator::CreateXor(NewAnd, XorC);
+ return InsertNewInstWith(NewXor, *I);
}
// Output known-0 bits are known if clear or set in both the LHS & RHS.
break;
case Instruction::Trunc: {
unsigned truncBf = I->getOperand(0)->getType()->getScalarSizeInBits();
- DemandedMask.zext(truncBf);
- KnownZero.zext(truncBf);
- KnownOne.zext(truncBf);
+ DemandedMask = DemandedMask.zext(truncBf);
+ KnownZero = KnownZero.zext(truncBf);
+ KnownOne = KnownOne.zext(truncBf);
if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
KnownZero, KnownOne, Depth+1))
return I;
- DemandedMask.trunc(BitWidth);
- KnownZero.trunc(BitWidth);
- KnownOne.trunc(BitWidth);
+ DemandedMask = DemandedMask.trunc(BitWidth);
+ KnownZero = KnownZero.trunc(BitWidth);
+ KnownOne = KnownOne.trunc(BitWidth);
assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
break;
}
if (!I->getOperand(0)->getType()->isIntOrIntVectorTy())
return 0; // vector->int or fp->int?
- if (const VectorType *DstVTy = dyn_cast<VectorType>(I->getType())) {
- if (const VectorType *SrcVTy =
+ if (VectorType *DstVTy = dyn_cast<VectorType>(I->getType())) {
+ if (VectorType *SrcVTy =
dyn_cast<VectorType>(I->getOperand(0)->getType())) {
if (DstVTy->getNumElements() != SrcVTy->getNumElements())
// Don't touch a bitcast between vectors of different element counts.
// Compute the bits in the result that are not present in the input.
unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits();
- DemandedMask.trunc(SrcBitWidth);
- KnownZero.trunc(SrcBitWidth);
- KnownOne.trunc(SrcBitWidth);
+ DemandedMask = DemandedMask.trunc(SrcBitWidth);
+ KnownZero = KnownZero.trunc(SrcBitWidth);
+ KnownOne = KnownOne.trunc(SrcBitWidth);
if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
KnownZero, KnownOne, Depth+1))
return I;
- DemandedMask.zext(BitWidth);
- KnownZero.zext(BitWidth);
- KnownOne.zext(BitWidth);
+ DemandedMask = DemandedMask.zext(BitWidth);
+ KnownZero = KnownZero.zext(BitWidth);
+ KnownOne = KnownOne.zext(BitWidth);
assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
// The top bits are known to be zero.
KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
if ((NewBits & DemandedMask) != 0)
InputDemandedBits.setBit(SrcBitWidth-1);
- InputDemandedBits.trunc(SrcBitWidth);
- KnownZero.trunc(SrcBitWidth);
- KnownOne.trunc(SrcBitWidth);
+ InputDemandedBits = InputDemandedBits.trunc(SrcBitWidth);
+ KnownZero = KnownZero.trunc(SrcBitWidth);
+ KnownOne = KnownOne.trunc(SrcBitWidth);
if (SimplifyDemandedBits(I->getOperandUse(0), InputDemandedBits,
KnownZero, KnownOne, Depth+1))
return I;
- InputDemandedBits.zext(BitWidth);
- KnownZero.zext(BitWidth);
- KnownOne.zext(BitWidth);
+ InputDemandedBits = InputDemandedBits.zext(BitWidth);
+ KnownZero = KnownZero.zext(BitWidth);
+ KnownOne = KnownOne.zext(BitWidth);
assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
// If the sign bit of the input is known set or clear, then we know the
if (KnownZero[SrcBitWidth-1] || (NewBits & ~DemandedMask) == NewBits) {
// Convert to ZExt cast
CastInst *NewCast = new ZExtInst(I->getOperand(0), VTy, I->getName());
- return InsertNewInstBefore(NewCast, *I);
+ return InsertNewInstWith(NewCast, *I);
} else if (KnownOne[SrcBitWidth-1]) { // Input sign bit known set
KnownOne |= NewBits;
}
Instruction *Or =
BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1),
I->getName());
- return InsertNewInstBefore(Or, *I);
+ return InsertNewInstWith(Or, *I);
}
// We can say something about the output known-zero and known-one bits,
LHSKnownZero, LHSKnownOne, Depth+1))
return I;
}
+
// Otherwise just hand the sub off to ComputeMaskedBits to fill in
// the known zeros and ones.
- ComputeMaskedBits(V, DemandedMask, KnownZero, KnownOne, Depth);
+ ComputeMaskedBits(V, KnownZero, KnownOne, Depth);
+
+ // Turn this into a xor if LHS is 2^n-1 and the remaining bits are known
+ // zero.
+ if (ConstantInt *C0 = dyn_cast<ConstantInt>(I->getOperand(0))) {
+ APInt I0 = C0->getValue();
+ if ((I0 + 1).isPowerOf2() && (I0 | KnownZero).isAllOnesValue()) {
+ Instruction *Xor = BinaryOperator::CreateXor(I->getOperand(1), C0);
+ return InsertNewInstWith(Xor, *I);
+ }
+ }
break;
case Instruction::Shl:
if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
- uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
+ uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
APInt DemandedMaskIn(DemandedMask.lshr(ShiftAmt));
+
+ // If the shift is NUW/NSW, then it does demand the high bits.
+ ShlOperator *IOp = cast<ShlOperator>(I);
+ if (IOp->hasNoSignedWrap())
+ DemandedMaskIn |= APInt::getHighBitsSet(BitWidth, ShiftAmt+1);
+ else if (IOp->hasNoUnsignedWrap())
+ DemandedMaskIn |= APInt::getHighBitsSet(BitWidth, ShiftAmt);
+
if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
KnownZero, KnownOne, Depth+1))
return I;
case Instruction::LShr:
// For a logical shift right
if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
- uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
+ uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
// Unsigned shift right.
APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
+
+ // If the shift is exact, then it does demand the low bits (and knows that
+ // they are zero).
+ if (cast<LShrOperator>(I)->isExact())
+ DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
+
if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
KnownZero, KnownOne, Depth+1))
return I;
// Perform the logical shift right.
Instruction *NewVal = BinaryOperator::CreateLShr(
I->getOperand(0), I->getOperand(1), I->getName());
- return InsertNewInstBefore(NewVal, *I);
+ return InsertNewInstWith(NewVal, *I);
}
// If the sign bit is the only bit demanded by this ashr, then there is no
return I->getOperand(0);
if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
- uint32_t ShiftAmt = SA->getLimitedValue(BitWidth);
+ uint32_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
// Signed shift right.
APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
// demanded.
if (DemandedMask.countLeadingZeros() <= ShiftAmt)
DemandedMaskIn.setBit(BitWidth-1);
+
+ // If the shift is exact, then it does demand the low bits (and knows that
+ // they are zero).
+ if (cast<AShrOperator>(I)->isExact())
+ DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
+
if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
KnownZero, KnownOne, Depth+1))
return I;
if (BitWidth <= ShiftAmt || KnownZero[BitWidth-ShiftAmt-1] ||
(HighBits & ~DemandedMask) == HighBits) {
// Perform the logical shift right.
- Instruction *NewVal = BinaryOperator::CreateLShr(
- I->getOperand(0), SA, I->getName());
- return InsertNewInstBefore(NewVal, *I);
+ BinaryOperator *NewVal = BinaryOperator::CreateLShr(I->getOperand(0),
+ SA, I->getName());
+ NewVal->setIsExact(cast<BinaryOperator>(I)->isExact());
+ return InsertNewInstWith(NewVal, *I);
} else if ((KnownOne & SignBit) != 0) { // New bits are known one.
KnownOne |= HighBits;
}
break;
case Instruction::SRem:
if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ // X % -1 demands all the bits because we don't want to introduce
+ // INT_MIN % -1 (== undef) by accident.
+ if (Rem->isAllOnesValue())
+ break;
APInt RA = Rem->getValue().abs();
if (RA.isPowerOf2()) {
if (DemandedMask.ult(RA)) // srem won't affect demanded bits
assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
}
}
+
+ // The sign bit is the LHS's sign bit, except when the result of the
+ // remainder is zero.
+ if (DemandedMask.isNegative() && KnownZero.isNonNegative()) {
+ APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
+ ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1);
+ // If it's known zero, our sign bit is also zero.
+ if (LHSKnownZero.isNegative())
+ KnownZero |= LHSKnownZero;
+ }
break;
case Instruction::URem: {
APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0);
NewVal = BinaryOperator::CreateShl(II->getArgOperand(0),
ConstantInt::get(I->getType(), ResultBit-InputBit));
NewVal->takeName(I);
- return InsertNewInstBefore(NewVal, *I);
+ return InsertNewInstWith(NewVal, *I);
}
// TODO: Could compute known zero/one bits based on the input.
break;
}
+ case Intrinsic::x86_sse42_crc32_64_8:
+ case Intrinsic::x86_sse42_crc32_64_64:
+ KnownZero = APInt::getHighBitsSet(64, 32);
+ return 0;
}
}
- ComputeMaskedBits(V, DemandedMask, KnownZero, KnownOne, Depth);
+ ComputeMaskedBits(V, KnownZero, KnownOne, Depth);
break;
}
}
UndefElts = 0;
- if (ConstantVector *CV = dyn_cast<ConstantVector>(V)) {
- const Type *EltTy = cast<VectorType>(V->getType())->getElementType();
- Constant *Undef = UndefValue::get(EltTy);
+
+ // Handle ConstantAggregateZero, ConstantVector, ConstantDataSequential.
+ if (Constant *C = dyn_cast<Constant>(V)) {
+ // Check if this is identity. If so, return 0 since we are not simplifying
+ // anything.
+ if (DemandedElts.isAllOnesValue())
+ return 0;
- std::vector<Constant*> Elts;
- for (unsigned i = 0; i != VWidth; ++i)
+ Type *EltTy = cast<VectorType>(V->getType())->getElementType();
+ Constant *Undef = UndefValue::get(EltTy);
+
+ SmallVector<Constant*, 16> Elts;
+ for (unsigned i = 0; i != VWidth; ++i) {
if (!DemandedElts[i]) { // If not demanded, set to undef.
Elts.push_back(Undef);
UndefElts.setBit(i);
- } else if (isa<UndefValue>(CV->getOperand(i))) { // Already undef.
+ continue;
+ }
+
+ Constant *Elt = C->getAggregateElement(i);
+ if (Elt == 0) return 0;
+
+ if (isa<UndefValue>(Elt)) { // Already undef.
Elts.push_back(Undef);
UndefElts.setBit(i);
} else { // Otherwise, defined.
- Elts.push_back(CV->getOperand(i));
+ Elts.push_back(Elt);
}
-
- // If we changed the constant, return it.
- Constant *NewCP = ConstantVector::get(Elts);
- return NewCP != CV ? NewCP : 0;
- }
-
- if (isa<ConstantAggregateZero>(V)) {
- // Simplify the CAZ to a ConstantVector where the non-demanded elements are
- // set to undef.
-
- // Check if this is identity. If so, return 0 since we are not simplifying
- // anything.
- if (DemandedElts.isAllOnesValue())
- return 0;
-
- const Type *EltTy = cast<VectorType>(V->getType())->getElementType();
- Constant *Zero = Constant::getNullValue(EltTy);
- Constant *Undef = UndefValue::get(EltTy);
- std::vector<Constant*> Elts;
- for (unsigned i = 0; i != VWidth; ++i) {
- Constant *Elt = DemandedElts[i] ? Zero : Undef;
- Elts.push_back(Elt);
}
- UndefElts = DemandedElts ^ EltMask;
- return ConstantVector::get(Elts);
+
+ // If we changed the constant, return it.
+ Constant *NewCV = ConstantVector::get(Elts);
+ return NewCV != C ? NewCV : 0;
}
// Limit search depth.
if (Depth == 10)
return 0;
- // If multiple users are using the root value, procede with
+ // If multiple users are using the root value, proceed with
// simplification conservatively assuming that all elements
// are needed.
if (!V->hasOneUse()) {
unsigned MaskVal = Shuffle->getMaskValue(i);
if (MaskVal == -1u) {
UndefElts.setBit(i);
+ } else if (!DemandedElts[i]) {
+ NewUndefElts = true;
+ UndefElts.setBit(i);
} else if (MaskVal < LHSVWidth) {
if (UndefElts4[MaskVal]) {
NewUndefElts = true;
if (NewUndefElts) {
// Add additional discovered undefs.
- std::vector<Constant*> Elts;
+ SmallVector<Constant*, 16> Elts;
for (unsigned i = 0; i < VWidth; ++i) {
if (UndefElts[i])
Elts.push_back(UndefValue::get(Type::getInt32Ty(I->getContext())));
}
case Instruction::BitCast: {
// Vector->vector casts only.
- const VectorType *VTy = dyn_cast<VectorType>(I->getOperand(0)->getType());
+ VectorType *VTy = dyn_cast<VectorType>(I->getOperand(0)->getType());
if (!VTy) break;
unsigned InVWidth = VTy->getNumElements();
APInt InputDemandedElts(InVWidth, 0);
Value *LHS = II->getArgOperand(0);
Value *RHS = II->getArgOperand(1);
// Extract the element as scalars.
- LHS = InsertNewInstBefore(ExtractElementInst::Create(LHS,
+ LHS = InsertNewInstWith(ExtractElementInst::Create(LHS,
ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U)), *II);
- RHS = InsertNewInstBefore(ExtractElementInst::Create(RHS,
+ RHS = InsertNewInstWith(ExtractElementInst::Create(RHS,
ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U)), *II);
switch (II->getIntrinsicID()) {
default: llvm_unreachable("Case stmts out of sync!");
case Intrinsic::x86_sse_sub_ss:
case Intrinsic::x86_sse2_sub_sd:
- TmpV = InsertNewInstBefore(BinaryOperator::CreateFSub(LHS, RHS,
+ TmpV = InsertNewInstWith(BinaryOperator::CreateFSub(LHS, RHS,
II->getName()), *II);
break;
case Intrinsic::x86_sse_mul_ss:
case Intrinsic::x86_sse2_mul_sd:
- TmpV = InsertNewInstBefore(BinaryOperator::CreateFMul(LHS, RHS,
+ TmpV = InsertNewInstWith(BinaryOperator::CreateFMul(LHS, RHS,
II->getName()), *II);
break;
}
UndefValue::get(II->getType()), TmpV,
ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U, false),
II->getName());
- InsertNewInstBefore(New, *II);
+ InsertNewInstWith(New, *II);
return New;
}
}