X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FAnalysis%2FValueTracking.cpp;h=6f58afe527906428fddb9659e6b40eadac6789b8;hb=124708d9b47cc53cb11d8bfed75b241b6eec86d4;hp=f9331e76d0d07cf8b6ca57ac68798223d124761e;hpb=cfd54181a44db5ac75cd4a7d0a3c6a199ab01c29;p=oota-llvm.git diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index f9331e76d0d..6f58afe5279 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Constants.h" #include "llvm/Instructions.h" #include "llvm/GlobalVariable.h" @@ -23,6 +24,7 @@ #include "llvm/Target/TargetData.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/MathExtras.h" +#include "llvm/ADT/SmallPtrSet.h" #include using namespace llvm; @@ -49,11 +51,11 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, assert(V && "No Value?"); assert(Depth <= MaxDepth && "Limit Search Depth"); unsigned BitWidth = Mask.getBitWidth(); - assert((V->getType()->isIntOrIntVector() || isa(V->getType())) && - "Not integer or pointer type!"); + assert((V->getType()->isIntOrIntVectorTy() || V->getType()->isPointerTy()) + && "Not integer or pointer type!"); assert((!TD || TD->getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth) && - (!V->getType()->isIntOrIntVector() || + (!V->getType()->isIntOrIntVectorTy() || V->getType()->getScalarSizeInBits() == BitWidth) && KnownZero.getBitWidth() == BitWidth && KnownOne.getBitWidth() == BitWidth && @@ -68,14 +70,14 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, // Null and aggregate-zero are all-zeros. if (isa(V) || isa(V)) { - KnownOne.clear(); + KnownOne.clearAllBits(); KnownZero = Mask; return; } // Handle a constant vector by taking the intersection of the known bits of // each element. if (ConstantVector *CV = dyn_cast(V)) { - KnownZero.set(); KnownOne.set(); + KnownZero.setAllBits(); KnownOne.setAllBits(); for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) { APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0); ComputeMaskedBits(CV->getOperand(i), Mask, KnownZero2, KnownOne2, @@ -102,15 +104,15 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, KnownZero = Mask & APInt::getLowBitsSet(BitWidth, CountTrailingZeros_32(Align)); else - KnownZero.clear(); - KnownOne.clear(); + KnownZero.clearAllBits(); + KnownOne.clearAllBits(); return; } // A weak GlobalAlias is totally unknown. A non-weak GlobalAlias has // the bits of its aliasee. if (GlobalAlias *GA = dyn_cast(V)) { if (GA->mayBeOverridden()) { - KnownZero.clear(); KnownOne.clear(); + KnownZero.clearAllBits(); KnownOne.clearAllBits(); } else { ComputeMaskedBits(GA->getAliasee(), Mask, KnownZero, KnownOne, TD, Depth+1); @@ -118,7 +120,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, return; } - KnownZero.clear(); KnownOne.clear(); // Start out not knowing anything. + KnownZero.clearAllBits(); KnownOne.clearAllBits(); // Start out not knowing anything. if (Depth == MaxDepth || Mask == 0) return; // Limit search depth. @@ -184,7 +186,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, // Also compute a conserative estimate for high known-0 bits. // More trickiness is possible, but this is sufficient for the // interesting case of alignment computation. - KnownOne.clear(); + KnownOne.clearAllBits(); unsigned TrailZ = KnownZero.countTrailingOnes() + KnownZero2.countTrailingOnes(); unsigned LeadZ = std::max(KnownZero.countLeadingOnes() + @@ -207,8 +209,8 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, AllOnes, KnownZero2, KnownOne2, TD, Depth+1); unsigned LeadZ = KnownZero2.countLeadingOnes(); - KnownOne2.clear(); - KnownZero2.clear(); + KnownOne2.clearAllBits(); + KnownZero2.clearAllBits(); ComputeMaskedBits(I->getOperand(1), AllOnes, KnownZero2, KnownOne2, TD, Depth+1); unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros(); @@ -249,19 +251,18 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, unsigned SrcBitWidth; // Note that we handle pointer operands here because of inttoptr/ptrtoint // which fall through here. - if (isa(SrcTy)) + if (SrcTy->isPointerTy()) SrcBitWidth = TD->getTypeSizeInBits(SrcTy); else SrcBitWidth = SrcTy->getScalarSizeInBits(); - APInt MaskIn(Mask); - MaskIn.zextOrTrunc(SrcBitWidth); - KnownZero.zextOrTrunc(SrcBitWidth); - KnownOne.zextOrTrunc(SrcBitWidth); + APInt MaskIn = Mask.zextOrTrunc(SrcBitWidth); + KnownZero = KnownZero.zextOrTrunc(SrcBitWidth); + KnownOne = KnownOne.zextOrTrunc(SrcBitWidth); ComputeMaskedBits(I->getOperand(0), MaskIn, KnownZero, KnownOne, TD, Depth+1); - KnownZero.zextOrTrunc(BitWidth); - KnownOne.zextOrTrunc(BitWidth); + KnownZero = KnownZero.zextOrTrunc(BitWidth); + KnownOne = KnownOne.zextOrTrunc(BitWidth); // Any top bits are known to be zero. if (BitWidth > SrcBitWidth) KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth); @@ -269,10 +270,10 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, } case Instruction::BitCast: { const Type *SrcTy = I->getOperand(0)->getType(); - if ((SrcTy->isInteger() || isa(SrcTy)) && + if ((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && // TODO: For now, not handling conversions like: // (bitcast i64 %x to <2 x i32>) - !isa(I->getType())) { + !I->getType()->isVectorTy()) { ComputeMaskedBits(I->getOperand(0), Mask, KnownZero, KnownOne, TD, Depth+1); return; @@ -283,15 +284,14 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, // Compute the bits in the result that are not present in the input. unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits(); - APInt MaskIn(Mask); - MaskIn.trunc(SrcBitWidth); - KnownZero.trunc(SrcBitWidth); - KnownOne.trunc(SrcBitWidth); + APInt MaskIn = Mask.trunc(SrcBitWidth); + KnownZero = KnownZero.trunc(SrcBitWidth); + KnownOne = KnownOne.trunc(SrcBitWidth); ComputeMaskedBits(I->getOperand(0), MaskIn, KnownZero, KnownOne, TD, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - KnownZero.zext(BitWidth); - KnownOne.zext(BitWidth); + KnownZero = KnownZero.zext(BitWidth); + KnownOne = KnownOne.zext(BitWidth); // If the sign bit of the input is known set or clear, then we know the // top bits of the result. @@ -473,7 +473,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, unsigned Leaders = std::max(KnownZero.countLeadingOnes(), KnownZero2.countLeadingOnes()); - KnownOne.clear(); + KnownOne.clearAllBits(); KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & Mask; break; } @@ -649,7 +649,7 @@ bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask, /// unsigned llvm::ComputeNumSignBits(Value *V, const TargetData *TD, unsigned Depth) { - assert((TD || V->getType()->isIntOrIntVector()) && + assert((TD || V->getType()->isIntOrIntVectorTy()) && "ComputeNumSignBits requires a TargetData object to operate " "on non-integer values!"); const Type *Ty = V->getType(); @@ -678,6 +678,13 @@ unsigned llvm::ComputeNumSignBits(Value *V, const TargetData *TD, Tmp += C->getZExtValue(); if (Tmp > TyBits) Tmp = TyBits; } + // vector ashr X, -> adds C sign bits + if (ConstantVector *C = dyn_cast(U->getOperand(1))) { + if (ConstantInt *CI = dyn_cast_or_null(C->getSplatValue())) { + Tmp += CI->getZExtValue(); + if (Tmp > TyBits) Tmp = TyBits; + } + } return Tmp; case Instruction::Shl: if (ConstantInt *C = dyn_cast(U->getOperand(1))) { @@ -778,7 +785,7 @@ unsigned llvm::ComputeNumSignBits(Value *V, const TargetData *TD, for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i) { if (Tmp == 1) return Tmp; Tmp = std::min(Tmp, - ComputeNumSignBits(PN->getIncomingValue(1), TD, Depth+1)); + ComputeNumSignBits(PN->getIncomingValue(i), TD, Depth+1)); } return Tmp; } @@ -823,7 +830,7 @@ bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple, assert(V && "No Value?"); assert(Depth <= MaxDepth && "Limit Search Depth"); - assert(V->getType()->isInteger() && "Not integer or pointer type!"); + assert(V->getType()->isIntegerTy() && "Not integer or pointer type!"); const Type *T = V->getType(); @@ -874,24 +881,26 @@ bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple, // Turn Op0 << Op1 into Op0 * 2^Op1 APInt Op1Int = Op1CI->getValue(); uint64_t BitToSet = Op1Int.getLimitedValue(Op1Int.getBitWidth() - 1); - Op1 = ConstantInt::get(V->getContext(), - APInt(Op1Int.getBitWidth(), 0).set(BitToSet)); + APInt API(Op1Int.getBitWidth(), 0); + API.setBit(BitToSet); + Op1 = ConstantInt::get(V->getContext(), API); } Value *Mul0 = NULL; - Value *Mul1 = NULL; - bool M0 = ComputeMultiple(Op0, Base, Mul0, - LookThroughSExt, Depth+1); - bool M1 = ComputeMultiple(Op1, Base, Mul1, - LookThroughSExt, Depth+1); - - if (M0) { - if (isa(Op1) && isa(Mul0)) { - // V == Base * (Mul0 * Op1), so return (Mul0 * Op1) - Multiple = ConstantExpr::getMul(cast(Mul0), - cast(Op1)); - return true; - } + if (ComputeMultiple(Op0, Base, Mul0, LookThroughSExt, Depth+1)) { + if (Constant *Op1C = dyn_cast(Op1)) + if (Constant *MulC = dyn_cast(Mul0)) { + if (Op1C->getType()->getPrimitiveSizeInBits() < + MulC->getType()->getPrimitiveSizeInBits()) + Op1C = ConstantExpr::getZExt(Op1C, MulC->getType()); + if (Op1C->getType()->getPrimitiveSizeInBits() > + MulC->getType()->getPrimitiveSizeInBits()) + MulC = ConstantExpr::getZExt(MulC, Op1C->getType()); + + // V == Base * (Mul0 * Op1), so return (Mul0 * Op1) + Multiple = ConstantExpr::getMul(MulC, Op1C); + return true; + } if (ConstantInt *Mul0CI = dyn_cast(Mul0)) if (Mul0CI->getValue() == 1) { @@ -901,13 +910,21 @@ bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple, } } - if (M1) { - if (isa(Op0) && isa(Mul1)) { - // V == Base * (Mul1 * Op0), so return (Mul1 * Op0) - Multiple = ConstantExpr::getMul(cast(Mul1), - cast(Op0)); - return true; - } + Value *Mul1 = NULL; + if (ComputeMultiple(Op1, Base, Mul1, LookThroughSExt, Depth+1)) { + if (Constant *Op0C = dyn_cast(Op0)) + if (Constant *MulC = dyn_cast(Mul1)) { + if (Op0C->getType()->getPrimitiveSizeInBits() < + MulC->getType()->getPrimitiveSizeInBits()) + Op0C = ConstantExpr::getZExt(Op0C, MulC->getType()); + if (Op0C->getType()->getPrimitiveSizeInBits() > + MulC->getType()->getPrimitiveSizeInBits()) + MulC = ConstantExpr::getZExt(MulC, Op0C->getType()); + + // V == Base * (Mul1 * Op0), so return (Mul1 * Op0) + Multiple = ConstantExpr::getMul(MulC, Op0C); + return true; + } if (ConstantInt *Mul1CI = dyn_cast(Mul1)) if (Mul1CI->getValue() == 1) { @@ -952,7 +969,7 @@ bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) { if (const IntrinsicInst *II = dyn_cast(I)) // sqrt(-0.0) = -0.0, no other negative results are possible. if (II->getIntrinsicID() == Intrinsic::sqrt) - return CannotBeNegativeZero(II->getOperand(1), Depth+1); + return CannotBeNegativeZero(II->getArgOperand(0), Depth+1); if (const CallInst *CI = dyn_cast(I)) if (const Function *F = CI->getCalledFunction()) { @@ -965,199 +982,79 @@ bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) { if (F->getName() == "fabsl") return true; if (F->getName() == "sqrt" || F->getName() == "sqrtf" || F->getName() == "sqrtl") - return CannotBeNegativeZero(CI->getOperand(1), Depth+1); + return CannotBeNegativeZero(CI->getArgOperand(0), Depth+1); } } return false; } - -/// GetLinearExpression - Analyze the specified value as a linear expression: -/// "A*V + B", where A and B are constant integers. Return the scale and offset -/// values as APInts and return V as a Value*. The incoming Value is known to -/// have IntegerType. Note that this looks through extends, so the high bits -/// may not be represented in the result. -static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset, - const TargetData *TD, unsigned Depth) { - assert(isa(V->getType()) && "Not an integer value"); - - // Limit our recursion depth. - if (Depth == 6) { - Scale = 1; - Offset = 0; - return V; +/// isBytewiseValue - If the specified value can be set by repeating the same +/// byte in memory, return the i8 value that it is represented with. This is +/// true for all i8 values obviously, but is also true for i32 0, i32 -1, +/// i16 0xF0F0, double 0.0 etc. If the value can't be handled with a repeated +/// byte store (e.g. i16 0x1234), return null. +Value *llvm::isBytewiseValue(Value *V) { + // All byte-wide stores are splatable, even of arbitrary variables. + if (V->getType()->isIntegerTy(8)) return V; + + // Constant float and double values can be handled as integer values if the + // corresponding integer value is "byteable". An important case is 0.0. + if (ConstantFP *CFP = dyn_cast(V)) { + if (CFP->getType()->isFloatTy()) + V = ConstantExpr::getBitCast(CFP, Type::getInt32Ty(V->getContext())); + if (CFP->getType()->isDoubleTy()) + V = ConstantExpr::getBitCast(CFP, Type::getInt64Ty(V->getContext())); + // Don't handle long double formats, which have strange constraints. } - if (BinaryOperator *BOp = dyn_cast(V)) { - if (ConstantInt *RHSC = dyn_cast(BOp->getOperand(1))) { - switch (BOp->getOpcode()) { - default: break; - case Instruction::Or: - // X|C == X+C if all the bits in C are unset in X. Otherwise we can't - // analyze it. - if (!MaskedValueIsZero(BOp->getOperand(0), RHSC->getValue(), TD)) - break; - // FALL THROUGH. - case Instruction::Add: - V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, TD, Depth+1); - Offset += RHSC->getValue(); - return V; - case Instruction::Mul: - V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, TD, Depth+1); - Offset *= RHSC->getValue(); - Scale *= RHSC->getValue(); - return V; - case Instruction::Shl: - V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, TD, Depth+1); - Offset <<= RHSC->getValue().getLimitedValue(); - Scale <<= RHSC->getValue().getLimitedValue(); - return V; + // We can handle constant integers that are power of two in size and a + // multiple of 8 bits. + if (ConstantInt *CI = dyn_cast(V)) { + unsigned Width = CI->getBitWidth(); + if (isPowerOf2_32(Width) && Width > 8) { + // We can handle this value if the recursive binary decomposition is the + // same at all levels. + APInt Val = CI->getValue(); + APInt Val2; + while (Val.getBitWidth() != 8) { + unsigned NextWidth = Val.getBitWidth()/2; + Val2 = Val.lshr(NextWidth); + Val2 = Val2.trunc(Val.getBitWidth()/2); + Val = Val.trunc(Val.getBitWidth()/2); + + // If the top/bottom halves aren't the same, reject it. + if (Val != Val2) + return 0; } + return ConstantInt::get(V->getContext(), Val); } } - // Since clients don't care about the high bits of the value, just scales and - // offsets, we can look through extensions. - if (isa(V) || isa(V)) { - Value *CastOp = cast(V)->getOperand(0); - unsigned OldWidth = Scale.getBitWidth(); - unsigned SmallWidth = CastOp->getType()->getPrimitiveSizeInBits(); - Scale.trunc(SmallWidth); - Offset.trunc(SmallWidth); - Value *Result = GetLinearExpression(CastOp, Scale, Offset, TD, Depth+1); - Scale.zext(OldWidth); - Offset.zext(OldWidth); - return Result; - } - - Scale = 1; - Offset = 0; - return V; -} - -/// DecomposeGEPExpression - If V is a symbolic pointer expression, decompose it -/// into a base pointer with a constant offset and a number of scaled symbolic -/// offsets. -/// -/// The scaled symbolic offsets (represented by pairs of a Value* and a scale in -/// the VarIndices vector) are Value*'s that are known to be scaled by the -/// specified amount, but which may have other unrepresented high bits. As such, -/// the gep cannot necessarily be reconstructed from its decomposed form. -/// -/// When TargetData is around, this function is capable of analyzing everything -/// that Value::getUnderlyingObject() can look through. When not, it just looks -/// through pointer casts. -/// -const Value *llvm::DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, - SmallVectorImpl > &VarIndices, - const TargetData *TD) { - // Limit recursion depth to limit compile time in crazy cases. - unsigned MaxLookup = 6; - - BaseOffs = 0; - do { - // See if this is a bitcast or GEP. - const Operator *Op = dyn_cast(V); - if (Op == 0) { - // The only non-operator case we can handle are GlobalAliases. - if (const GlobalAlias *GA = dyn_cast(V)) { - if (!GA->mayBeOverridden()) { - V = GA->getAliasee(); - continue; - } - } - return V; - } + // A ConstantArray is splatable if all its members are equal and also + // splatable. + if (ConstantArray *CA = dyn_cast(V)) { + if (CA->getNumOperands() == 0) + return 0; - if (Op->getOpcode() == Instruction::BitCast) { - V = Op->getOperand(0); - continue; - } + Value *Val = isBytewiseValue(CA->getOperand(0)); + if (!Val) + return 0; - const GEPOperator *GEPOp = dyn_cast(Op); - if (GEPOp == 0) - return V; + for (unsigned I = 1, E = CA->getNumOperands(); I != E; ++I) + if (CA->getOperand(I-1) != CA->getOperand(I)) + return 0; - // Don't attempt to analyze GEPs over unsized objects. - if (!cast(GEPOp->getOperand(0)->getType()) - ->getElementType()->isSized()) - return V; - - // If we are lacking TargetData information, we can't compute the offets of - // elements computed by GEPs. However, we can handle bitcast equivalent - // GEPs. - if (!TD) { - if (!GEPOp->hasAllZeroIndices()) - return V; - V = GEPOp->getOperand(0); - continue; - } - - // Walk the indices of the GEP, accumulating them into BaseOff/VarIndices. - gep_type_iterator GTI = gep_type_begin(GEPOp); - for (User::const_op_iterator I = GEPOp->op_begin()+1, - E = GEPOp->op_end(); I != E; ++I) { - Value *Index = *I; - // Compute the (potentially symbolic) offset in bytes for this index. - if (const StructType *STy = dyn_cast(*GTI++)) { - // For a struct, add the member offset. - unsigned FieldNo = cast(Index)->getZExtValue(); - if (FieldNo == 0) continue; - - BaseOffs += TD->getStructLayout(STy)->getElementOffset(FieldNo); - continue; - } - - // For an array/pointer, add the element offset, explicitly scaled. - if (ConstantInt *CIdx = dyn_cast(Index)) { - if (CIdx->isZero()) continue; - BaseOffs += TD->getTypeAllocSize(*GTI)*CIdx->getSExtValue(); - continue; - } - - uint64_t Scale = TD->getTypeAllocSize(*GTI); - - // Use GetLinearExpression to decompose the index into a C1*V+C2 form. - unsigned Width = cast(Index->getType())->getBitWidth(); - APInt IndexScale(Width, 0), IndexOffset(Width, 0); - Index = GetLinearExpression(Index, IndexScale, IndexOffset, TD, 0); - - // The GEP index scale ("Scale") scales C1*V+C2, yielding (C1*V+C2)*Scale. - // This gives us an aggregate computation of (C1*Scale)*V + C2*Scale. - BaseOffs += IndexOffset.getZExtValue()*Scale; - Scale *= IndexScale.getZExtValue(); - - - // If we already had an occurrance of this index variable, merge this - // scale into it. For example, we want to handle: - // A[x][x] -> x*16 + x*4 -> x*20 - // This also ensures that 'x' only appears in the index list once. - for (unsigned i = 0, e = VarIndices.size(); i != e; ++i) { - if (VarIndices[i].first == Index) { - Scale += VarIndices[i].second; - VarIndices.erase(VarIndices.begin()+i); - break; - } - } - - // Make sure that we have a scale that makes sense for this target's - // pointer size. - if (unsigned ShiftBits = 64-TD->getPointerSizeInBits()) { - Scale <<= ShiftBits; - Scale >>= ShiftBits; - } - - if (Scale) - VarIndices.push_back(std::make_pair(Index, Scale)); - } - - // Analyze the base pointer next. - V = GEPOp->getOperand(0); - } while (--MaxLookup); + return Val; + } - // If the chain of expressions is too deep, just return early. - return V; + // Conceptually, we could handle things like: + // %a = zext i8 %X to i16 + // %b = shl i16 %a, 8 + // %c = or i16 %a, %b + // but until there is an example that actually needs this, it doesn't seem + // worth worrying about. + return 0; } @@ -1253,7 +1150,7 @@ Value *llvm::FindInsertedValue(Value *V, const unsigned *idx_begin, if (idx_begin == idx_end) return V; // We have indices, so V should have an indexable type - assert((isa(V->getType()) || isa(V->getType())) + assert((V->getType()->isStructTy() || V->getType()->isArrayTy()) && "Not looking at a struct or array?"); assert(ExtractValueInst::getIndexedType(V->getType(), idx_begin, idx_end) && "Invalid indices for type?"); @@ -1338,25 +1235,67 @@ Value *llvm::FindInsertedValue(Value *V, const unsigned *idx_begin, return 0; } +/// GetPointerBaseWithConstantOffset - Analyze the specified pointer to see if +/// it can be expressed as a base pointer plus a constant offset. Return the +/// base and offset to the caller. +Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, + const TargetData &TD) { + Operator *PtrOp = dyn_cast(Ptr); + if (PtrOp == 0) return Ptr; + + // Just look through bitcasts. + if (PtrOp->getOpcode() == Instruction::BitCast) + return GetPointerBaseWithConstantOffset(PtrOp->getOperand(0), Offset, TD); + + // If this is a GEP with constant indices, we can look through it. + GEPOperator *GEP = dyn_cast(PtrOp); + if (GEP == 0 || !GEP->hasAllConstantIndices()) return Ptr; + + gep_type_iterator GTI = gep_type_begin(GEP); + for (User::op_iterator I = GEP->idx_begin(), E = GEP->idx_end(); I != E; + ++I, ++GTI) { + ConstantInt *OpC = cast(*I); + if (OpC->isZero()) continue; + + // Handle a struct and array indices which add their offset to the pointer. + if (const StructType *STy = dyn_cast(*GTI)) { + Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); + } else { + uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()); + Offset += OpC->getSExtValue()*Size; + } + } + + // Re-sign extend from the pointer size if needed to get overflow edge cases + // right. + unsigned PtrSize = TD.getPointerSizeInBits(); + if (PtrSize < 64) + Offset = (Offset << (64-PtrSize)) >> (64-PtrSize); + + return GetPointerBaseWithConstantOffset(GEP->getPointerOperand(), Offset, TD); +} + + /// GetConstantStringInfo - This function computes the length of a /// null-terminated C string pointed to by V. If successful, it returns true /// and returns the string in Str. If unsuccessful, it returns false. -bool llvm::GetConstantStringInfo(Value *V, std::string &Str, uint64_t Offset, +bool llvm::GetConstantStringInfo(const Value *V, std::string &Str, + uint64_t Offset, bool StopAtNul) { // If V is NULL then return false; if (V == NULL) return false; // Look through bitcast instructions. - if (BitCastInst *BCI = dyn_cast(V)) + if (const BitCastInst *BCI = dyn_cast(V)) return GetConstantStringInfo(BCI->getOperand(0), Str, Offset, StopAtNul); // If the value is not a GEP instruction nor a constant expression with a // GEP instruction, then return false because ConstantArray can't occur // any other way - User *GEP = 0; - if (GetElementPtrInst *GEPI = dyn_cast(V)) { + const User *GEP = 0; + if (const GetElementPtrInst *GEPI = dyn_cast(V)) { GEP = GEPI; - } else if (ConstantExpr *CE = dyn_cast(V)) { + } else if (const ConstantExpr *CE = dyn_cast(V)) { if (CE->getOpcode() == Instruction::BitCast) return GetConstantStringInfo(CE->getOperand(0), Str, Offset, StopAtNul); if (CE->getOpcode() != Instruction::GetElementPtr) @@ -1372,12 +1311,12 @@ bool llvm::GetConstantStringInfo(Value *V, std::string &Str, uint64_t Offset, // Make sure the index-ee is a pointer to array of i8. const PointerType *PT = cast(GEP->getOperand(0)->getType()); const ArrayType *AT = dyn_cast(PT->getElementType()); - if (AT == 0 || !AT->getElementType()->isInteger(8)) + if (AT == 0 || !AT->getElementType()->isIntegerTy(8)) return false; // Check to make sure that the first operand of the GEP is an integer and // has value 0 so that we are sure we're indexing into the initializer. - ConstantInt *FirstIdx = dyn_cast(GEP->getOperand(1)); + const ConstantInt *FirstIdx = dyn_cast(GEP->getOperand(1)); if (FirstIdx == 0 || !FirstIdx->isZero()) return false; @@ -1385,7 +1324,7 @@ bool llvm::GetConstantStringInfo(Value *V, std::string &Str, uint64_t Offset, // into the array. If this occurs, we can't say anything meaningful about // the string. uint64_t StartIdx = 0; - if (ConstantInt *CI = dyn_cast(GEP->getOperand(2))) + if (const ConstantInt *CI = dyn_cast(GEP->getOperand(2))) StartIdx = CI->getZExtValue(); else return false; @@ -1396,10 +1335,10 @@ bool llvm::GetConstantStringInfo(Value *V, std::string &Str, uint64_t Offset, // The GEP instruction, constant or instruction, must reference a global // variable that is a constant and is initialized. The referenced constant // initializer is the array that we'll use for optimization. - GlobalVariable* GV = dyn_cast(V); + const GlobalVariable* GV = dyn_cast(V); if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer()) return false; - Constant *GlobalInit = GV->getInitializer(); + const Constant *GlobalInit = GV->getInitializer(); // Handle the ConstantAggregateZero case if (isa(GlobalInit)) { @@ -1410,8 +1349,8 @@ bool llvm::GetConstantStringInfo(Value *V, std::string &Str, uint64_t Offset, } // Must be a Constant Array - ConstantArray *Array = dyn_cast(GlobalInit); - if (Array == 0 || !Array->getType()->getElementType()->isInteger(8)) + const ConstantArray *Array = dyn_cast(GlobalInit); + if (Array == 0 || !Array->getType()->getElementType()->isIntegerTy(8)) return false; // Get the number of elements in the array @@ -1424,8 +1363,8 @@ bool llvm::GetConstantStringInfo(Value *V, std::string &Str, uint64_t Offset, // to in the array. Str.reserve(NumElts-Offset); for (unsigned i = Offset; i != NumElts; ++i) { - Constant *Elt = Array->getOperand(i); - ConstantInt *CI = dyn_cast(Elt); + const Constant *Elt = Array->getOperand(i); + const ConstantInt *CI = dyn_cast(Elt); if (!CI) // This array isn't suitable, non-int initializer. return false; if (StopAtNul && CI->isZero()) @@ -1436,3 +1375,159 @@ bool llvm::GetConstantStringInfo(Value *V, std::string &Str, uint64_t Offset, // The array isn't null terminated, but maybe this is a memcpy, not a strcpy. return true; } + +// These next two are very similar to the above, but also look through PHI +// nodes. +// TODO: See if we can integrate these two together. + +/// GetStringLengthH - If we can compute the length of the string pointed to by +/// the specified pointer, return 'len+1'. If we can't, return 0. +static uint64_t GetStringLengthH(Value *V, SmallPtrSet &PHIs) { + // Look through noop bitcast instructions. + if (BitCastInst *BCI = dyn_cast(V)) + return GetStringLengthH(BCI->getOperand(0), PHIs); + + // If this is a PHI node, there are two cases: either we have already seen it + // or we haven't. + if (PHINode *PN = dyn_cast(V)) { + if (!PHIs.insert(PN)) + return ~0ULL; // already in the set. + + // If it was new, see if all the input strings are the same length. + uint64_t LenSoFar = ~0ULL; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + uint64_t Len = GetStringLengthH(PN->getIncomingValue(i), PHIs); + if (Len == 0) return 0; // Unknown length -> unknown. + + if (Len == ~0ULL) continue; + + if (Len != LenSoFar && LenSoFar != ~0ULL) + return 0; // Disagree -> unknown. + LenSoFar = Len; + } + + // Success, all agree. + return LenSoFar; + } + + // strlen(select(c,x,y)) -> strlen(x) ^ strlen(y) + if (SelectInst *SI = dyn_cast(V)) { + uint64_t Len1 = GetStringLengthH(SI->getTrueValue(), PHIs); + if (Len1 == 0) return 0; + uint64_t Len2 = GetStringLengthH(SI->getFalseValue(), PHIs); + if (Len2 == 0) return 0; + if (Len1 == ~0ULL) return Len2; + if (Len2 == ~0ULL) return Len1; + if (Len1 != Len2) return 0; + return Len1; + } + + // If the value is not a GEP instruction nor a constant expression with a + // GEP instruction, then return unknown. + User *GEP = 0; + if (GetElementPtrInst *GEPI = dyn_cast(V)) { + GEP = GEPI; + } else if (ConstantExpr *CE = dyn_cast(V)) { + if (CE->getOpcode() != Instruction::GetElementPtr) + return 0; + GEP = CE; + } else { + return 0; + } + + // Make sure the GEP has exactly three arguments. + if (GEP->getNumOperands() != 3) + return 0; + + // Check to make sure that the first operand of the GEP is an integer and + // has value 0 so that we are sure we're indexing into the initializer. + if (ConstantInt *Idx = dyn_cast(GEP->getOperand(1))) { + if (!Idx->isZero()) + return 0; + } else + return 0; + + // If the second index isn't a ConstantInt, then this is a variable index + // into the array. If this occurs, we can't say anything meaningful about + // the string. + uint64_t StartIdx = 0; + if (ConstantInt *CI = dyn_cast(GEP->getOperand(2))) + StartIdx = CI->getZExtValue(); + else + return 0; + + // The GEP instruction, constant or instruction, must reference a global + // variable that is a constant and is initialized. The referenced constant + // initializer is the array that we'll use for optimization. + GlobalVariable* GV = dyn_cast(GEP->getOperand(0)); + if (!GV || !GV->isConstant() || !GV->hasInitializer() || + GV->mayBeOverridden()) + return 0; + Constant *GlobalInit = GV->getInitializer(); + + // Handle the ConstantAggregateZero case, which is a degenerate case. The + // initializer is constant zero so the length of the string must be zero. + if (isa(GlobalInit)) + return 1; // Len = 0 offset by 1. + + // Must be a Constant Array + ConstantArray *Array = dyn_cast(GlobalInit); + if (!Array || !Array->getType()->getElementType()->isIntegerTy(8)) + return false; + + // Get the number of elements in the array + uint64_t NumElts = Array->getType()->getNumElements(); + + // Traverse the constant array from StartIdx (derived above) which is + // the place the GEP refers to in the array. + for (unsigned i = StartIdx; i != NumElts; ++i) { + Constant *Elt = Array->getOperand(i); + ConstantInt *CI = dyn_cast(Elt); + if (!CI) // This array isn't suitable, non-int initializer. + return 0; + if (CI->isZero()) + return i-StartIdx+1; // We found end of string, success! + } + + return 0; // The array isn't null terminated, conservatively return 'unknown'. +} + +/// GetStringLength - If we can compute the length of the string pointed to by +/// the specified pointer, return 'len+1'. If we can't, return 0. +uint64_t llvm::GetStringLength(Value *V) { + if (!V->getType()->isPointerTy()) return 0; + + SmallPtrSet PHIs; + uint64_t Len = GetStringLengthH(V, PHIs); + // If Len is ~0ULL, we had an infinite phi cycle: this is dead code, so return + // an empty string as a length. + return Len == ~0ULL ? 1 : Len; +} + +Value *llvm::GetUnderlyingObject(Value *V, unsigned MaxLookup) { + if (!V->getType()->isPointerTy()) + return V; + for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) { + if (GEPOperator *GEP = dyn_cast(V)) { + V = GEP->getPointerOperand(); + } else if (Operator::getOpcode(V) == Instruction::BitCast) { + V = cast(V)->getOperand(0); + } else if (GlobalAlias *GA = dyn_cast(V)) { + if (GA->mayBeOverridden()) + return V; + V = GA->getAliasee(); + } else { + // See if InstructionSimplify knows any relevant tricks. + if (Instruction *I = dyn_cast(V)) + // TODO: Aquire TargetData and DominatorTree and use them. + if (Value *Simplified = SimplifyInstruction(I, 0, 0)) { + V = Simplified; + continue; + } + + return V; + } + assert(V->getType()->isPointerTy() && "Unexpected operand type!"); + } + return V; +}