/// properties that allow us to simplify its operands.
bool SimplifyDemandedInstructionBits(Instruction &Inst);
- Value *SimplifyDemandedVectorElts(Value *V, uint64_t DemandedElts,
- uint64_t &UndefElts, unsigned Depth = 0);
+ Value *SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
+ APInt& UndefElts, unsigned Depth = 0);
// FoldOpIntoPhi - Given a binary operator or cast instruction which has a
// PHI node as operand #0, see if we can fold the instruction into the PHI
if (isa<UndefValue>(V))
return 0;
return UndefValue::get(VTy);
- } else if (!V->hasOneUse()) { // Other users may use these bits.
- if (Depth != 0) { // Not at the root.
- // Just compute the KnownZero/KnownOne bits to simplify things downstream.
- ComputeMaskedBits(V, DemandedMask, KnownZero, KnownOne, Depth);
- return 0;
- }
- // If this is the root being simplified, allow it to have multiple uses,
- // just set the DemandedMask to all bits.
- DemandedMask = APInt::getAllOnesValue(BitWidth);
- } else if (Depth == 6) { // Limit search depth.
- return 0;
}
+ if (Depth == 6) // Limit search depth.
+ return 0;
+
Instruction *I = dyn_cast<Instruction>(V);
if (!I) return 0; // Only analyze instructions.
-
+
APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
APInt &RHSKnownZero = KnownZero, &RHSKnownOne = KnownOne;
+
+ // If there are multiple uses of this value and we aren't at the root, then
+ // we can't do any simplifications of the operands, because DemandedMask
+ // only reflects the bits demanded by *one* of the users.
+ if (Depth != 0 && !I->hasOneUse()) {
+ // Despite the fact that we can't simplify this instruction in all User's
+ // context, we can at least compute the knownzero/knownone bits, and we can
+ // do simplifications that apply to *just* the one user if we know that
+ // this instruction has a simpler value in that context.
+ if (I->getOpcode() == Instruction::And) {
+ // If either the LHS or the RHS are Zero, the result is zero.
+ ComputeMaskedBits(I->getOperand(1), DemandedMask,
+ RHSKnownZero, RHSKnownOne, Depth+1);
+ ComputeMaskedBits(I->getOperand(0), DemandedMask & ~RHSKnownZero,
+ LHSKnownZero, LHSKnownOne, Depth+1);
+
+ // If all of the demanded bits are known 1 on one side, return the other.
+ // These bits cannot contribute to the result of the 'and' in this
+ // context.
+ if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) ==
+ (DemandedMask & ~LHSKnownZero))
+ return I->getOperand(0);
+ if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) ==
+ (DemandedMask & ~RHSKnownZero))
+ return I->getOperand(1);
+
+ // If all of the demanded bits in the inputs are known zeros, return zero.
+ if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask)
+ return Constant::getNullValue(VTy);
+
+ } else if (I->getOpcode() == Instruction::Or) {
+ // We can simplify (X|Y) -> X or Y in the user's context if we know that
+ // only bits from X or Y are demanded.
+
+ // If either the LHS or the RHS are One, the result is One.
+ ComputeMaskedBits(I->getOperand(1), DemandedMask,
+ RHSKnownZero, RHSKnownOne, Depth+1);
+ ComputeMaskedBits(I->getOperand(0), DemandedMask & ~RHSKnownOne,
+ LHSKnownZero, LHSKnownOne, Depth+1);
+
+ // If all of the demanded bits are known zero on one side, return the
+ // other. These bits cannot contribute to the result of the 'or' in this
+ // context.
+ if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) ==
+ (DemandedMask & ~LHSKnownOne))
+ return I->getOperand(0);
+ if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) ==
+ (DemandedMask & ~RHSKnownOne))
+ return I->getOperand(1);
+
+ // If all of the potentially set bits on one side are known to be set on
+ // the other side, just use the 'other' side.
+ if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) ==
+ (DemandedMask & (~RHSKnownZero)))
+ return I->getOperand(0);
+ if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) ==
+ (DemandedMask & (~LHSKnownZero)))
+ return I->getOperand(1);
+ }
+
+ // Compute the KnownZero/KnownOne bits to simplify things downstream.
+ ComputeMaskedBits(I, DemandedMask, KnownZero, KnownOne, Depth);
+ return 0;
+ }
+
+ // If this is the root being simplified, allow it to have multiple uses,
+ // just set the DemandedMask to all bits so that we can try to simplify the
+ // operands. This allows visitTruncInst (for example) to simplify the
+ // operand of a trunc without duplicating all the logic below.
+ if (Depth == 0 && !V->hasOneUse())
+ DemandedMask = APInt::getAllOnesValue(BitWidth);
+
switch (I->getOpcode()) {
default:
ComputeMaskedBits(I, DemandedMask, RHSKnownZero, RHSKnownOne, Depth);
/// SimplifyDemandedVectorElts - The specified value produces a vector with
-/// 64 or fewer elements. DemandedElts contains the set of elements that are
+/// any number of elements. DemandedElts contains the set of elements that are
/// actually used by the caller. This method analyzes which elements of the
/// operand are undef and returns that information in UndefElts.
///
/// If the information about demanded elements can be used to simplify the
/// operation, the operation is simplified, then the resultant value is
/// returned. This returns null if no change was made.
-Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, uint64_t DemandedElts,
- uint64_t &UndefElts,
+Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
+ APInt& UndefElts,
unsigned Depth) {
unsigned VWidth = cast<VectorType>(V->getType())->getNumElements();
- assert(VWidth <= 64 && "Vector too wide to analyze!");
- uint64_t EltMask = ~0ULL >> (64-VWidth);
+ APInt EltMask(APInt::getAllOnesValue(VWidth));
assert((DemandedElts & ~EltMask) == 0 && "Invalid DemandedElts!");
if (isa<UndefValue>(V)) {
std::vector<Constant*> Elts;
for (unsigned i = 0; i != VWidth; ++i)
- if (!(DemandedElts & (1ULL << i))) { // If not demanded, set to undef.
+ if (!DemandedElts[i]) { // If not demanded, set to undef.
Elts.push_back(Undef);
- UndefElts |= (1ULL << i);
+ UndefElts.set(i);
} else if (isa<UndefValue>(CP->getOperand(i))) { // Already undef.
Elts.push_back(Undef);
- UndefElts |= (1ULL << i);
+ UndefElts.set(i);
} else { // Otherwise, defined.
Elts.push_back(CP->getOperand(i));
}
Constant *Zero = Constant::getNullValue(EltTy);
Constant *Undef = UndefValue::get(EltTy);
std::vector<Constant*> Elts;
- for (unsigned i = 0; i != VWidth; ++i)
- Elts.push_back((DemandedElts & (1ULL << i)) ? Zero : Undef);
+ for (unsigned i = 0; i != VWidth; ++i) {
+ Constant *Elt = DemandedElts[i] ? Zero : Undef;
+ Elts.push_back(Elt);
+ }
UndefElts = DemandedElts ^ EltMask;
return ConstantVector::get(Elts);
}
if (!I) return false; // Only analyze instructions.
bool MadeChange = false;
- uint64_t UndefElts2;
+ APInt UndefElts2(VWidth, 0);
Value *TmpV;
switch (I->getOpcode()) {
default: break;
// If this is inserting an element that isn't demanded, remove this
// insertelement.
unsigned IdxNo = Idx->getZExtValue();
- if (IdxNo >= VWidth || (DemandedElts & (1ULL << IdxNo)) == 0)
+ if (IdxNo >= VWidth || !DemandedElts[IdxNo])
return AddSoonDeadInstToWorklist(*I, 0);
// Otherwise, the element inserted overwrites whatever was there, so the
// input demanded set is simpler than the output set.
- TmpV = SimplifyDemandedVectorElts(I->getOperand(0),
- DemandedElts & ~(1ULL << IdxNo),
+ APInt DemandedElts2 = DemandedElts;
+ DemandedElts2.clear(IdxNo);
+ TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts2,
UndefElts, Depth+1);
if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
// The inserted element is defined.
- UndefElts &= ~(1ULL << IdxNo);
+ UndefElts.clear(IdxNo);
break;
}
case Instruction::ShuffleVector: {
ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
uint64_t LHSVWidth =
cast<VectorType>(Shuffle->getOperand(0)->getType())->getNumElements();
- uint64_t LeftDemanded = 0, RightDemanded = 0;
+ APInt LeftDemanded(LHSVWidth, 0), RightDemanded(LHSVWidth, 0);
for (unsigned i = 0; i < VWidth; i++) {
- if (DemandedElts & (1ULL << i)) {
+ if (DemandedElts[i]) {
unsigned MaskVal = Shuffle->getMaskValue(i);
if (MaskVal != -1u) {
assert(MaskVal < LHSVWidth * 2 &&
"shufflevector mask index out of range!");
if (MaskVal < LHSVWidth)
- LeftDemanded |= 1ULL << MaskVal;
+ LeftDemanded.set(MaskVal);
else
- RightDemanded |= 1ULL << (MaskVal - LHSVWidth);
+ RightDemanded.set(MaskVal - LHSVWidth);
}
}
}
+ APInt UndefElts4(LHSVWidth, 0);
TmpV = SimplifyDemandedVectorElts(I->getOperand(0), LeftDemanded,
- UndefElts2, Depth+1);
+ UndefElts4, Depth+1);
if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
- uint64_t UndefElts3;
+ APInt UndefElts3(LHSVWidth, 0);
TmpV = SimplifyDemandedVectorElts(I->getOperand(1), RightDemanded,
UndefElts3, Depth+1);
if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
for (unsigned i = 0; i < VWidth; i++) {
unsigned MaskVal = Shuffle->getMaskValue(i);
if (MaskVal == -1u) {
- uint64_t NewBit = 1ULL << i;
- UndefElts |= NewBit;
+ UndefElts.set(i);
} else if (MaskVal < LHSVWidth) {
- uint64_t NewBit = ((UndefElts2 >> MaskVal) & 1) << i;
- NewUndefElts |= NewBit;
- UndefElts |= NewBit;
+ if (UndefElts4[MaskVal]) {
+ NewUndefElts = true;
+ UndefElts.set(i);
+ }
} else {
- uint64_t NewBit = ((UndefElts3 >> (MaskVal - LHSVWidth)) & 1) << i;
- NewUndefElts |= NewBit;
- UndefElts |= NewBit;
+ if (UndefElts3[MaskVal - LHSVWidth]) {
+ NewUndefElts = true;
+ UndefElts.set(i);
+ }
}
}
// Add additional discovered undefs.
std::vector<Constant*> Elts;
for (unsigned i = 0; i < VWidth; ++i) {
- if (UndefElts & (1ULL << i))
+ if (UndefElts[i])
Elts.push_back(UndefValue::get(Type::Int32Ty));
else
Elts.push_back(ConstantInt::get(Type::Int32Ty,
const VectorType *VTy = dyn_cast<VectorType>(I->getOperand(0)->getType());
if (!VTy) break;
unsigned InVWidth = VTy->getNumElements();
- uint64_t InputDemandedElts = 0;
+ APInt InputDemandedElts(InVWidth, 0);
unsigned Ratio;
if (VWidth == InVWidth) {
// elements are live.
Ratio = VWidth/InVWidth;
for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) {
- if (DemandedElts & (1ULL << OutIdx))
- InputDemandedElts |= 1ULL << (OutIdx/Ratio);
+ if (DemandedElts[OutIdx])
+ InputDemandedElts.set(OutIdx/Ratio);
}
} else {
// Untested so far.
// live.
Ratio = InVWidth/VWidth;
for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx)
- if (DemandedElts & (1ULL << InIdx/Ratio))
- InputDemandedElts |= 1ULL << InIdx;
+ if (DemandedElts[InIdx/Ratio])
+ InputDemandedElts.set(InIdx);
}
// div/rem demand all inputs, because they don't want divide by zero.
// then an output element is undef if the corresponding input element is
// undef.
for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx)
- if (UndefElts2 & (1ULL << (OutIdx/Ratio)))
- UndefElts |= 1ULL << OutIdx;
+ if (UndefElts2[OutIdx/Ratio])
+ UndefElts.set(OutIdx);
} else if (VWidth < InVWidth) {
assert(0 && "Unimp");
// If there are more elements in the source than there are in the result,
// elements are undef.
UndefElts = ~0ULL >> (64-VWidth); // Start out all undef.
for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx)
- if ((UndefElts2 & (1ULL << InIdx)) == 0) // Not undef?
- UndefElts &= ~(1ULL << (InIdx/Ratio)); // Clear undef bit.
+ if (!UndefElts2[InIdx]) // Not undef?
+ UndefElts.clear(InIdx/Ratio); // Clear undef bit.
}
break;
}
if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
if (BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1)) {
if (Op0I->getOpcode() == Op1I->getOpcode() && Op0I->hasOneUse() &&
- Op1I->hasOneUse() && Op0I->getOperand(1) == Op1I->getOperand(1) &&
- I.isEquality()) {
+ Op1I->hasOneUse() && Op0I->getOperand(1) == Op1I->getOperand(1)) {
switch (Op0I->getOpcode()) {
default: break;
case Instruction::Add:
case Instruction::Sub:
case Instruction::Xor:
- // a+x icmp eq/ne b+x --> a icmp b
- return new ICmpInst(I.getPredicate(), Op0I->getOperand(0),
- Op1I->getOperand(0));
+ if (I.isEquality()) // a+x icmp eq/ne b+x --> a icmp b
+ return new ICmpInst(I.getPredicate(), Op0I->getOperand(0),
+ Op1I->getOperand(0));
+ // icmp u/s (a ^ signbit), (b ^ signbit) --> icmp s/u a, b
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) {
+ if (CI->getValue().isSignBit()) {
+ ICmpInst::Predicate Pred = I.isSignedPredicate()
+ ? I.getUnsignedPredicate()
+ : I.getSignedPredicate();
+ return new ICmpInst(Pred, Op0I->getOperand(0),
+ Op1I->getOperand(0));
+ }
+
+ if (CI->getValue().isMaxSignedValue()) {
+ ICmpInst::Predicate Pred = I.isSignedPredicate()
+ ? I.getUnsignedPredicate()
+ : I.getSignedPredicate();
+ Pred = I.getSwappedPredicate(Pred);
+ return new ICmpInst(Pred, Op0I->getOperand(0),
+ Op1I->getOperand(0));
+ }
+ }
break;
case Instruction::Mul:
+ if (!I.isEquality())
+ break;
+
if (ConstantInt *CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) {
// a * Cst icmp eq/ne b * Cst --> a & Mask icmp b & Mask
// Mask = -1 >> count-trailing-zeros(Cst).
else
return new ICmpInst(ICmpInst::ICMP_SLT, CompareVal, AddOne(RHS));
}
+
+ if (LHSI->hasOneUse()) {
+ // (icmp u/s (xor A SignBit), C) -> (icmp s/u A, (xor C SignBit))
+ if (!ICI.isEquality() && XorCST->getValue().isSignBit()) {
+ const APInt &SignBit = XorCST->getValue();
+ ICmpInst::Predicate Pred = ICI.isSignedPredicate()
+ ? ICI.getUnsignedPredicate()
+ : ICI.getSignedPredicate();
+ return new ICmpInst(Pred, LHSI->getOperand(0),
+ ConstantInt::get(RHSV ^ SignBit));
+ }
+
+ // (icmp u/s (xor A ~SignBit), C) -> (icmp s/u (xor C ~SignBit), A)
+ if (!ICI.isEquality() && XorCST->getValue().isMaxSignedValue()) {
+ const APInt &NotSignBit = XorCST->getValue();
+ ICmpInst::Predicate Pred = ICI.isSignedPredicate()
+ ? ICI.getUnsignedPredicate()
+ : ICI.getSignedPredicate();
+ Pred = ICI.getSwappedPredicate(Pred);
+ return new ICmpInst(Pred, LHSI->getOperand(0),
+ ConstantInt::get(RHSV ^ NotSignBit));
+ }
+ }
}
break;
case Instruction::And: // (icmp pred (and X, AndCST), RHS)
MaskedValueIsZero(Op0,
APInt::getSignBit(I.getType()->getPrimitiveSizeInBits())))
return BinaryOperator::CreateLShr(Op0, I.getOperand(1));
-
+
+ // Arithmetic shifting an all-sign-bit value is a no-op.
+ unsigned NumSignBits = ComputeNumSignBits(Op0);
+ if (NumSignBits == Op0->getType()->getPrimitiveSizeInBits())
+ return ReplaceInstUsesWith(I, Op0);
+
return 0;
}
Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
BinaryOperator &I) {
- bool isLeftShift = I.getOpcode() == Instruction::Shl;
+ bool isLeftShift = I.getOpcode() == Instruction::Shl;
// See if we can simplify any instructions used by the instruction whose sole
// purpose is to compute bits we don't care about.
break;
case Instruction::ZExt: {
DoXForm = NumCastsRemoved >= 1;
- if (!DoXForm) {
+ if (!DoXForm && 0) {
// If it's unnecessary to issue an AND to clear the high bits, it's
// always profitable to do this xform.
- Value *TryRes = EvaluateInDifferentType(SrcI, DestTy,
- CI.getOpcode() == Instruction::SExt);
+ Value *TryRes = EvaluateInDifferentType(SrcI, DestTy, false);
APInt Mask(APInt::getBitsSet(DestBitSize, SrcBitSize, DestBitSize));
if (MaskedValueIsZero(TryRes, Mask))
return ReplaceInstUsesWith(CI, TryRes);
- else if (Instruction *TryI = dyn_cast<Instruction>(TryRes))
+
+ if (Instruction *TryI = dyn_cast<Instruction>(TryRes))
if (TryI->use_empty())
EraseInstFromFunction(*TryI);
}
}
case Instruction::SExt: {
DoXForm = NumCastsRemoved >= 2;
- if (!DoXForm && !isa<TruncInst>(SrcI)) {
+ if (!DoXForm && !isa<TruncInst>(SrcI) && 0) {
// If we do not have to emit the truncate + sext pair, then it's always
// profitable to do this xform.
//
// t3 = sext i16 t2 to i32
// !=
// i32 t1
- Value *TryRes = EvaluateInDifferentType(SrcI, DestTy,
- CI.getOpcode() == Instruction::SExt);
+ Value *TryRes = EvaluateInDifferentType(SrcI, DestTy, true);
unsigned NumSignBits = ComputeNumSignBits(TryRes);
if (NumSignBits > (DestBitSize - SrcBitSize))
return ReplaceInstUsesWith(CI, TryRes);
- else if (Instruction *TryI = dyn_cast<Instruction>(TryRes))
+
+ if (Instruction *TryI = dyn_cast<Instruction>(TryRes))
if (TryI->use_empty())
EraseInstFromFunction(*TryI);
}
}
if (DoXForm) {
+ DOUT << "ICE: EvaluateInDifferentType converting expression type to avoid"
+ << " cast: " << CI;
Value *Res = EvaluateInDifferentType(SrcI, DestTy,
CI.getOpcode() == Instruction::SExt);
if (JustReplace)
- // Just replace this cast with the result.
- return ReplaceInstUsesWith(CI, Res);
+ // Just replace this cast with the result.
+ return ReplaceInstUsesWith(CI, Res);
assert(Res->getType() == DestTy);
switch (CI.getOpcode()) {
Value *Src = CI.getOperand(0);
- // If this is a cast of a cast
- if (CastInst *CSrc = dyn_cast<CastInst>(Src)) { // A->B->C cast
- // If this is a TRUNC followed by a ZEXT then we are dealing with integral
- // types and if the sizes are just right we can convert this into a logical
- // 'and' which will be much cheaper than the pair of casts.
- if (isa<TruncInst>(CSrc)) {
- // Get the sizes of the types involved
- Value *A = CSrc->getOperand(0);
- uint32_t SrcSize = A->getType()->getPrimitiveSizeInBits();
- uint32_t MidSize = CSrc->getType()->getPrimitiveSizeInBits();
- uint32_t DstSize = CI.getType()->getPrimitiveSizeInBits();
- // If we're actually extending zero bits and the trunc is a no-op
- if (MidSize < DstSize && SrcSize == DstSize) {
- // Replace both of the casts with an And of the type mask.
- APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
- Constant *AndConst = ConstantInt::get(AndValue);
- Instruction *And =
- BinaryOperator::CreateAnd(CSrc->getOperand(0), AndConst);
- // Unfortunately, if the type changed, we need to cast it back.
- if (And->getType() != CI.getType()) {
- And->setName(CSrc->getName()+".mask");
- InsertNewInstBefore(And, CI);
- And = CastInst::CreateIntegerCast(And, CI.getType(), false/*ZExt*/);
- }
- return And;
- }
+ // If this is a TRUNC followed by a ZEXT then we are dealing with integral
+ // types and if the sizes are just right we can convert this into a logical
+ // 'and' which will be much cheaper than the pair of casts.
+ if (TruncInst *CSrc = dyn_cast<TruncInst>(Src)) { // A->B->C cast
+ // Get the sizes of the types involved. We know that the intermediate type
+ // will be smaller than A or C, but don't know the relation between A and C.
+ Value *A = CSrc->getOperand(0);
+ unsigned SrcSize = A->getType()->getPrimitiveSizeInBits();
+ unsigned MidSize = CSrc->getType()->getPrimitiveSizeInBits();
+ unsigned DstSize = CI.getType()->getPrimitiveSizeInBits();
+ // If we're actually extending zero bits, then if
+ // SrcSize < DstSize: zext(a & mask)
+ // SrcSize == DstSize: a & mask
+ // SrcSize > DstSize: trunc(a) & mask
+ if (SrcSize < DstSize) {
+ APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
+ Constant *AndConst = ConstantInt::get(AndValue);
+ Instruction *And =
+ BinaryOperator::CreateAnd(A, AndConst, CSrc->getName()+".mask");
+ InsertNewInstBefore(And, CI);
+ return new ZExtInst(And, CI.getType());
+ } else if (SrcSize == DstSize) {
+ APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
+ return BinaryOperator::CreateAnd(A, ConstantInt::get(AndValue));
+ } else if (SrcSize > DstSize) {
+ Instruction *Trunc = new TruncInst(A, CI.getType(), "tmp");
+ InsertNewInstBefore(Trunc, CI);
+ APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize));
+ return BinaryOperator::CreateAnd(Trunc, ConstantInt::get(AndValue));
}
}
// If there is a large requested alignment and we can, bump up the alignment
// of the global.
if (!GV->isDeclaration()) {
- GV->setAlignment(PrefAlign);
- Align = PrefAlign;
+ if (GV->getAlignment() >= PrefAlign)
+ Align = GV->getAlignment();
+ else {
+ GV->setAlignment(PrefAlign);
+ Align = PrefAlign;
+ }
}
} else if (AllocationInst *AI = dyn_cast<AllocationInst>(V)) {
// If there is a requested alignment and if this is an alloca, round up. We
// don't do this for malloc, because some systems can't respect the request.
if (isa<AllocaInst>(AI)) {
- AI->setAlignment(PrefAlign);
- Align = PrefAlign;
+ if (AI->getAlignment() >= PrefAlign)
+ Align = AI->getAlignment();
+ else {
+ AI->setAlignment(PrefAlign);
+ Align = PrefAlign;
+ }
}
}
case Intrinsic::x86_sse_cvttss2si: {
// These intrinsics only demands the 0th element of its input vector. If
// we can simplify the input based on that, do so now.
- uint64_t UndefElts;
- if (Value *V = SimplifyDemandedVectorElts(II->getOperand(1), 1,
+ unsigned VWidth =
+ cast<VectorType>(II->getOperand(1)->getType())->getNumElements();
+ APInt DemandedElts(VWidth, 1);
+ APInt UndefElts(VWidth, 0);
+ if (Value *V = SimplifyDemandedVectorElts(II->getOperand(1), DemandedElts,
UndefElts)) {
II->setOperand(1, V);
return II;
SmallVector<Value*, 16> FixedOperands(FirstInst->op_begin(),
FirstInst->op_end());
+ // This is true if all GEP bases are allocas and if all indices into them are
+ // constants.
+ bool AllBasePointersAreAllocas = true;
// Scan to see if all operands are the same opcode, all have one use, and all
// kill their operands (i.e. the operands have one use).
GEP->getNumOperands() != FirstInst->getNumOperands())
return 0;
+ // Keep track of whether or not all GEPs are of alloca pointers.
+ if (AllBasePointersAreAllocas &&
+ (!isa<AllocaInst>(GEP->getOperand(0)) ||
+ !GEP->hasAllConstantIndices()))
+ AllBasePointersAreAllocas = false;
+
// Compare the operand lists.
for (unsigned op = 0, e = FirstInst->getNumOperands(); op != e; ++op) {
if (FirstInst->getOperand(op) == GEP->getOperand(op))
}
}
+ // If all of the base pointers of the PHI'd GEPs are from allocas, don't
+ // bother doing this transformation. At best, this will just save a bit of
+ // offset calculation, but all the predecessors will have to materialize the
+ // stack address into a register anyway. We'd actually rather *clone* the
+ // load up into the predecessors so that we have a load of a gep of an alloca,
+ // which can usually all be folded into the load.
+ if (AllBasePointersAreAllocas)
+ return 0;
+
// Otherwise, this is safe to transform. Insert PHI nodes for each operand
// that is variable.
SmallVector<PHINode*, 16> OperandPhis(FixedOperands.size());
}
-/// isSafeToSinkLoad - Return true if we know that it is safe sink the load out
-/// of the block that defines it. This means that it must be obvious the value
-/// of the load is not changed from the point of the load to the end of the
-/// block it is in.
+/// isSafeAndProfitableToSinkLoad - Return true if we know that it is safe to
+/// sink the load out of the block that defines it. This means that it must be
+/// obvious the value of the load is not changed from the point of the load to
+/// the end of the block it is in.
///
/// Finally, it is safe, but not profitable, to sink a load targetting a
/// non-address-taken alloca. Doing so will cause us to not promote the alloca
/// to a register.
-static bool isSafeToSinkLoad(LoadInst *L) {
+static bool isSafeAndProfitableToSinkLoad(LoadInst *L) {
BasicBlock::iterator BBI = L, E = L->getParent()->end();
for (++BBI; BBI != E; ++BBI)
break;
}
- if (!isAddressTaken)
+ if (!isAddressTaken && AI->isStaticAlloca())
return false;
}
+ // If this load is a load from a GEP with a constant offset from an alloca,
+ // then we don't want to sink it. In its present form, it will be
+ // load [constant stack offset]. Sinking it will cause us to have to
+ // materialize the stack addresses in each predecessor in a register only to
+ // do a shared load from register in the successor.
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(L->getOperand(0)))
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(GEP->getOperand(0)))
+ if (AI->isStaticAlloca() && GEP->hasAllConstantIndices())
+ return false;
+
return true;
}
// We can't sink the load if the loaded value could be modified between the
// load and the PHI.
if (LI->getParent() != PN.getIncomingBlock(0) ||
- !isSafeToSinkLoad(LI))
+ !isSafeAndProfitableToSinkLoad(LI))
return 0;
// If the PHI is of volatile loads and the load block has multiple
// the load and the PHI.
if (LI->isVolatile() != isVolatile ||
LI->getParent() != PN.getIncomingBlock(i) ||
- !isSafeToSinkLoad(LI))
+ !isSafeAndProfitableToSinkLoad(LI))
return 0;
// If the PHI is of volatile loads and the load block has multiple
if (isVolatile &&
LI->getParent()->getTerminator()->getNumSuccessors() != 1)
return 0;
-
} else if (I->getOperand(1) != ConstantOp) {
return 0;
APInt SingleChar(numBits, 0);
if (TD->isLittleEndian()) {
for (signed i = len-1; i >= 0; i--) {
- SingleChar = (uint64_t) Str[i];
+ SingleChar = (uint64_t) Str[i] & UCHAR_MAX;
StrVal = (StrVal << 8) | SingleChar;
}
} else {
for (unsigned i = 0; i < len; i++) {
- SingleChar = (uint64_t) Str[i];
+ SingleChar = (uint64_t) Str[i] & UCHAR_MAX;
StrVal = (StrVal << 8) | SingleChar;
}
// Append NULL at the end.
}
}
- const Type *DestPTy = cast<PointerType>(CI->getType())->getElementType();
+ const PointerType *DestTy = cast<PointerType>(CI->getType());
+ const Type *DestPTy = DestTy->getElementType();
if (const PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType())) {
+
+ // If the address spaces don't match, don't eliminate the cast.
+ if (DestTy->getAddressSpace() != SrcTy->getAddressSpace())
+ return 0;
+
const Type *SrcPTy = SrcTy->getElementType();
if (DestPTy->isInteger() || isa<PointerType>(DestPTy) ||
Value *Op = LI.getOperand(0);
// Attempt to improve the alignment.
- unsigned KnownAlign = GetOrEnforceKnownAlignment(Op);
+ unsigned KnownAlign =
+ GetOrEnforceKnownAlignment(Op, TD->getPrefTypeAlignment(LI.getType()));
if (KnownAlign >
(LI.getAlignment() == 0 ? TD->getABITypeAlignment(LI.getType()) :
LI.getAlignment()))
}
// Attempt to improve the alignment.
- unsigned KnownAlign = GetOrEnforceKnownAlignment(Ptr);
+ unsigned KnownAlign =
+ GetOrEnforceKnownAlignment(Ptr, TD->getPrefTypeAlignment(Val->getType()));
if (KnownAlign >
(SI.getAlignment() == 0 ? TD->getABITypeAlignment(Val->getType()) :
SI.getAlignment()))
// If the input vector has a single use, simplify it based on this use
// property.
if (EI.getOperand(0)->hasOneUse() && VectorWidth != 1) {
- uint64_t UndefElts;
+ APInt UndefElts(VectorWidth, 0);
+ APInt DemandedMask(VectorWidth, 1 << IndexVal);
if (Value *V = SimplifyDemandedVectorElts(EI.getOperand(0),
- 1 << IndexVal,
- UndefElts)) {
+ DemandedMask, UndefElts)) {
EI.setOperand(0, V);
return &EI;
}
if (isa<UndefValue>(SVI.getOperand(2)))
return ReplaceInstUsesWith(SVI, UndefValue::get(SVI.getType()));
- uint64_t UndefElts;
unsigned VWidth = cast<VectorType>(SVI.getType())->getNumElements();
if (VWidth != cast<VectorType>(LHS->getType())->getNumElements())
return 0;
- uint64_t AllOnesEltMask = ~0ULL >> (64-VWidth);
- if (VWidth <= 64 &&
- SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) {
+ APInt UndefElts(VWidth, 0);
+ APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
+ if (SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) {
LHS = SVI.getOperand(0);
RHS = SVI.getOperand(1);
MadeChange = true;
DBI_Prev->eraseFromParent();
}
DBI_Prev = DBI_Next;
+ } else {
+ DBI_Prev = 0;
}
IC.AddToWorkList(Inst);