X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTransforms%2FInstCombine%2FInstructionCombining.cpp;h=bc9a43fa7ecbe2f4831b7cef6ffca9047157ed7e;hb=96363d50018da6e9cfa549695f01aa0d4d2d6a31;hp=7f8c3ae55812914d8b4bfc8e177820b9b84eedba;hpb=f1ec4e4123a8e6d811c072f383399781cfeaa9aa;p=oota-llvm.git diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index 7f8c3ae5581..bc9a43fa7ec 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -33,30 +33,32 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "instcombine" #include "llvm/Transforms/Scalar.h" #include "InstCombine.h" -#include "llvm/IntrinsicInst.h" +#include "llvm-c/Initialization.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" -#include "llvm/DataLayout.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/GetElementPtrTypeIterator.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/IR/ValueHandle.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" #include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/Local.h" -#include "llvm/Support/CFG.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/GetElementPtrTypeIterator.h" -#include "llvm/Support/PatternMatch.h" -#include "llvm/Support/ValueHandle.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/StringSwitch.h" -#include "llvm-c/Initialization.h" #include #include using namespace llvm; using namespace llvm::PatternMatch; +#define DEBUG_TYPE "instcombine" + STATISTIC(NumCombined , "Number of insts combined"); STATISTIC(NumConstProp, "Number of constant folds"); STATISTIC(NumDeadInst , "Number of dead inst eliminated"); @@ -65,6 +67,11 @@ STATISTIC(NumExpand, "Number of expansions"); STATISTIC(NumFactor , "Number of factorizations"); STATISTIC(NumReassoc , "Number of reassociations"); +static cl::opt UnsafeFPShrink("enable-double-float-shrink", cl::Hidden, + cl::init(false), + cl::desc("Enable unsafe double to float " + "shrinking for math lib calls")); + // Initialization Routines void llvm::initializeInstCombine(PassRegistry &Registry) { initializeInstCombinerPass(Registry); @@ -97,13 +104,13 @@ Value *InstCombiner::EmitGEPOffset(User *GEP) { bool InstCombiner::ShouldChangeType(Type *From, Type *To) const { assert(From->isIntegerTy() && To->isIntegerTy()); - // If we don't have TD, we don't know if the source/dest are legal. - if (!TD) return false; + // If we don't have DL, we don't know if the source/dest are legal. + if (!DL) return false; unsigned FromWidth = From->getPrimitiveSizeInBits(); unsigned ToWidth = To->getPrimitiveSizeInBits(); - bool FromLegal = TD->isLegalInteger(FromWidth); - bool ToLegal = TD->isLegalInteger(ToWidth); + bool FromLegal = DL->isLegalInteger(FromWidth); + bool ToLegal = DL->isLegalInteger(ToWidth); // If this is a legal integer from type, and the result would be an illegal // type, don't do the transformation. @@ -156,6 +163,21 @@ static bool MaintainNoSignedWrap(BinaryOperator &I, Value *B, Value *C) { return !Overflow; } +/// Conservatively clears subclassOptionalData after a reassociation or +/// commutation. We preserve fast-math flags when applicable as they can be +/// preserved. +static void ClearSubclassDataAfterReassociation(BinaryOperator &I) { + FPMathOperator *FPMO = dyn_cast(&I); + if (!FPMO) { + I.clearSubclassOptionalData(); + return; + } + + FastMathFlags FMF = I.getFastMathFlags(); + I.clearSubclassOptionalData(); + I.setFastMathFlags(FMF); +} + /// SimplifyAssociativeOrCommutative - This performs a few simplifications for /// operators which are associative or commutative: // @@ -200,7 +222,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) { Value *C = I.getOperand(1); // Does "B op C" simplify? - if (Value *V = SimplifyBinOp(Opcode, B, C, TD)) { + if (Value *V = SimplifyBinOp(Opcode, B, C, DL)) { // It simplifies to V. Form "A op V". I.setOperand(0, A); I.setOperand(1, V); @@ -213,7 +235,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) { I.clearSubclassOptionalData(); I.setHasNoSignedWrap(true); } else { - I.clearSubclassOptionalData(); + ClearSubclassDataAfterReassociation(I); } Changed = true; @@ -229,13 +251,13 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) { Value *C = Op1->getOperand(1); // Does "A op B" simplify? - if (Value *V = SimplifyBinOp(Opcode, A, B, TD)) { + if (Value *V = SimplifyBinOp(Opcode, A, B, DL)) { // It simplifies to V. Form "V op C". I.setOperand(0, V); I.setOperand(1, C); // Conservatively clear the optional flags, since they may not be // preserved by the reassociation. - I.clearSubclassOptionalData(); + ClearSubclassDataAfterReassociation(I); Changed = true; ++NumReassoc; continue; @@ -251,13 +273,13 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) { Value *C = I.getOperand(1); // Does "C op A" simplify? - if (Value *V = SimplifyBinOp(Opcode, C, A, TD)) { + if (Value *V = SimplifyBinOp(Opcode, C, A, DL)) { // It simplifies to V. Form "V op B". I.setOperand(0, V); I.setOperand(1, B); // Conservatively clear the optional flags, since they may not be // preserved by the reassociation. - I.clearSubclassOptionalData(); + ClearSubclassDataAfterReassociation(I); Changed = true; ++NumReassoc; continue; @@ -271,13 +293,13 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) { Value *C = Op1->getOperand(1); // Does "C op A" simplify? - if (Value *V = SimplifyBinOp(Opcode, C, A, TD)) { + if (Value *V = SimplifyBinOp(Opcode, C, A, DL)) { // It simplifies to V. Form "B op V". I.setOperand(0, B); I.setOperand(1, V); // Conservatively clear the optional flags, since they may not be // preserved by the reassociation. - I.clearSubclassOptionalData(); + ClearSubclassDataAfterReassociation(I); Changed = true; ++NumReassoc; continue; @@ -298,13 +320,19 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) { Constant *Folded = ConstantExpr::get(Opcode, C1, C2); BinaryOperator *New = BinaryOperator::Create(Opcode, A, B); + if (isa(New)) { + FastMathFlags Flags = I.getFastMathFlags(); + Flags &= Op0->getFastMathFlags(); + Flags &= Op1->getFastMathFlags(); + New->setFastMathFlags(Flags); + } InsertNewInstWith(New, I); New->takeName(Op1); I.setOperand(0, New); I.setOperand(1, Folded); // Conservatively clear the optional flags, since they may not be // preserved by the reassociation. - I.clearSubclassOptionalData(); + ClearSubclassDataAfterReassociation(I); Changed = true; continue; @@ -398,7 +426,7 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) { std::swap(C, D); // Consider forming "A op' (B op D)". // If "B op D" simplifies then it can be formed with no cost. - Value *V = SimplifyBinOp(TopLevelOpcode, B, D, TD); + Value *V = SimplifyBinOp(TopLevelOpcode, B, D, DL); // If "B op D" doesn't simplify then only go on if both of the existing // operations "A op' B" and "C op' D" will be zapped as no longer used. if (!V && Op0->hasOneUse() && Op1->hasOneUse()) @@ -420,7 +448,7 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) { std::swap(C, D); // Consider forming "(A op C) op' B". // If "A op C" simplifies then it can be formed with no cost. - Value *V = SimplifyBinOp(TopLevelOpcode, A, C, TD); + Value *V = SimplifyBinOp(TopLevelOpcode, A, C, DL); // If "A op C" doesn't simplify then only go on if both of the existing // operations "A op' B" and "C op' D" will be zapped as no longer used. if (!V && Op0->hasOneUse() && Op1->hasOneUse()) @@ -442,8 +470,8 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) { Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op' // Do "A op C" and "B op C" both simplify? - if (Value *L = SimplifyBinOp(TopLevelOpcode, A, C, TD)) - if (Value *R = SimplifyBinOp(TopLevelOpcode, B, C, TD)) { + if (Value *L = SimplifyBinOp(TopLevelOpcode, A, C, DL)) + if (Value *R = SimplifyBinOp(TopLevelOpcode, B, C, DL)) { // They do! Return "L op' R". ++NumExpand; // If "L op' R" equals "A op' B" then "L op' R" is just the LHS. @@ -451,7 +479,7 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) { (Instruction::isCommutative(InnerOpcode) && L == B && R == A)) return Op0; // Otherwise return "L op' R" if it simplifies. - if (Value *V = SimplifyBinOp(InnerOpcode, L, R, TD)) + if (Value *V = SimplifyBinOp(InnerOpcode, L, R, DL)) return V; // Otherwise, create a new instruction. C = Builder->CreateBinOp(InnerOpcode, L, R); @@ -467,8 +495,8 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) { Instruction::BinaryOps InnerOpcode = Op1->getOpcode(); // op' // Do "A op B" and "A op C" both simplify? - if (Value *L = SimplifyBinOp(TopLevelOpcode, A, B, TD)) - if (Value *R = SimplifyBinOp(TopLevelOpcode, A, C, TD)) { + if (Value *L = SimplifyBinOp(TopLevelOpcode, A, B, DL)) + if (Value *R = SimplifyBinOp(TopLevelOpcode, A, C, DL)) { // They do! Return "L op' R". ++NumExpand; // If "L op' R" equals "B op' C" then "L op' R" is just the RHS. @@ -476,7 +504,7 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) { (Instruction::isCommutative(InnerOpcode) && L == C && R == B)) return Op1; // Otherwise return "L op' R" if it simplifies. - if (Value *V = SimplifyBinOp(InnerOpcode, L, R, TD)) + if (Value *V = SimplifyBinOp(InnerOpcode, L, R, DL)) return V; // Otherwise, create a new instruction. A = Builder->CreateBinOp(InnerOpcode, L, R); @@ -510,8 +538,8 @@ Value *InstCombiner::dyn_castNegVal(Value *V) const { // instruction if the LHS is a constant negative zero (which is the 'negate' // form). // -Value *InstCombiner::dyn_castFNegVal(Value *V) const { - if (BinaryOperator::isFNeg(V)) +Value *InstCombiner::dyn_castFNegVal(Value *V, bool IgnoreZeroSign) const { + if (BinaryOperator::isFNeg(V, IgnoreZeroSign)) return BinaryOperator::getFNegArgument(V); // Constants can be considered to be negated values if they can be folded. @@ -545,9 +573,14 @@ static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO, if (!ConstIsRHS) std::swap(Op0, Op1); - if (BinaryOperator *BO = dyn_cast(&I)) - return IC->Builder->CreateBinOp(BO->getOpcode(), Op0, Op1, + if (BinaryOperator *BO = dyn_cast(&I)) { + Value *RI = IC->Builder->CreateBinOp(BO->getOpcode(), Op0, Op1, SO->getName()+".op"); + Instruction *FPInst = dyn_cast(RI); + if (FPInst && isa(FPInst)) + FPInst->copyFastMathFlags(BO); + return RI; + } if (ICmpInst *CI = dyn_cast(&I)) return IC->Builder->CreateICmp(CI->getPredicate(), Op0, Op1, SO->getName()+".cmp"); @@ -609,10 +642,9 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { // uses into the PHI. if (!PN->hasOneUse()) { // Walk the use list for the instruction, comparing them to I. - for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); - UI != E; ++UI) { - Instruction *User = cast(*UI); - if (User != &I && !I.isIdenticalTo(User)) + for (User *U : PN->users()) { + Instruction *UI = cast(U); + if (UI != &I && !I.isIdenticalTo(UI)) return 0; } // Otherwise, we can replace *all* users with the new PHI we form. @@ -678,7 +710,10 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { Value *TrueVInPred = TrueV->DoPHITranslation(PhiTransBB, ThisBB); Value *FalseVInPred = FalseV->DoPHITranslation(PhiTransBB, ThisBB); Value *InV = 0; - if (Constant *InC = dyn_cast(PN->getIncomingValue(i))) + // Beware of ConstantExpr: it may eventually evaluate to getNullValue, + // even if currently isNullValue gives false. + Constant *InC = dyn_cast(PN->getIncomingValue(i)); + if (InC && !isa(InC)) InV = InC->isNullValue() ? FalseVInPred : TrueVInPred; else InV = Builder->CreateSelect(PN->getIncomingValue(i), @@ -724,8 +759,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { } } - for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); - UI != E; ) { + for (auto UI = PN->user_begin(), E = PN->user_end(); UI != E;) { Instruction *User = cast(*UI++); if (User == &I) continue; ReplaceInstUsesWith(*User, NewPN); @@ -734,21 +768,27 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { return ReplaceInstUsesWith(I, NewPN); } -/// FindElementAtOffset - Given a type and a constant offset, determine whether -/// or not there is a sequence of GEP indices into the type that will land us at -/// the specified offset. If so, fill them into NewIndices and return the -/// resultant element type, otherwise return null. -Type *InstCombiner::FindElementAtOffset(Type *Ty, int64_t Offset, - SmallVectorImpl &NewIndices) { - if (!TD) return 0; - if (!Ty->isSized()) return 0; +/// FindElementAtOffset - Given a pointer type and a constant offset, determine +/// whether or not there is a sequence of GEP indices into the pointed type that +/// will land us at the specified offset. If so, fill them into NewIndices and +/// return the resultant element type, otherwise return null. +Type *InstCombiner::FindElementAtOffset(Type *PtrTy, int64_t Offset, + SmallVectorImpl &NewIndices) { + assert(PtrTy->isPtrOrPtrVectorTy()); + + if (!DL) + return 0; + + Type *Ty = PtrTy->getPointerElementType(); + if (!Ty->isSized()) + return 0; // Start with the index over the outer type. Note that the type size // might be zero (even if the offset isn't zero) if the indexed type // is something like [0 x {int, int}] - Type *IntPtrTy = TD->getIntPtrType(Ty->getContext()); + Type *IntPtrTy = DL->getIntPtrType(PtrTy); int64_t FirstIdx = 0; - if (int64_t TySize = TD->getTypeAllocSize(Ty)) { + if (int64_t TySize = DL->getTypeAllocSize(Ty)) { FirstIdx = Offset/TySize; Offset -= FirstIdx*TySize; @@ -766,11 +806,11 @@ Type *InstCombiner::FindElementAtOffset(Type *Ty, int64_t Offset, // Index into the types. If we fail, set OrigBase to null. while (Offset) { // Indexing into tail padding between struct/array elements. - if (uint64_t(Offset*8) >= TD->getTypeSizeInBits(Ty)) + if (uint64_t(Offset*8) >= DL->getTypeSizeInBits(Ty)) return 0; if (StructType *STy = dyn_cast(Ty)) { - const StructLayout *SL = TD->getStructLayout(STy); + const StructLayout *SL = DL->getStructLayout(STy); assert(Offset < (int64_t)SL->getSizeInBytes() && "Offset must stay within the indexed type"); @@ -781,7 +821,7 @@ Type *InstCombiner::FindElementAtOffset(Type *Ty, int64_t Offset, Offset -= SL->getElementOffset(Elt); Ty = STy->getElementType(Elt); } else if (ArrayType *AT = dyn_cast(Ty)) { - uint64_t EltSize = TD->getTypeAllocSize(AT->getElementType()); + uint64_t EltSize = DL->getTypeAllocSize(AT->getElementType()); assert(EltSize && "Cannot index into a zero-sized array"); NewIndices.push_back(ConstantInt::get(IntPtrTy,Offset/EltSize)); Offset %= EltSize; @@ -1039,23 +1079,23 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) { // Move up one level in the expression. assert(Ancestor->hasOneUse() && "Drilled down when more than one use!"); - Ancestor = Ancestor->use_back(); + Ancestor = Ancestor->user_back(); } while (1); } Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { SmallVector Ops(GEP.op_begin(), GEP.op_end()); - if (Value *V = SimplifyGEPInst(Ops, TD)) + if (Value *V = SimplifyGEPInst(Ops, DL)) return ReplaceInstUsesWith(GEP, V); Value *PtrOp = GEP.getOperand(0); // Eliminate unneeded casts for indices, and replace indices which displace // by multiples of a zero size type with zero. - if (TD) { + if (DL) { bool MadeChange = false; - Type *IntPtrTy = TD->getIntPtrType(GEP.getContext()); + Type *IntPtrTy = DL->getIntPtrType(GEP.getPointerOperandType()); gep_type_iterator GTI = gep_type_begin(GEP); for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); @@ -1067,14 +1107,14 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // If the element type has zero size then any index over it is equivalent // to an index of zero, so replace it with zero if it is not zero already. if (SeqTy->getElementType()->isSized() && - TD->getTypeAllocSize(SeqTy->getElementType()) == 0) + DL->getTypeAllocSize(SeqTy->getElementType()) == 0) if (!isa(*I) || !cast(*I)->isNullValue()) { *I = Constant::getNullValue(IntPtrTy); MadeChange = true; } Type *IndexTy = (*I)->getType(); - if (IndexTy != IntPtrTy && !IndexTy->isVectorTy()) { + if (IndexTy != IntPtrTy) { // If we are using a wider index than needed for this platform, shrink // it to what we need. If narrower, sign-extend it to what we need. // This explicit cast can make subsequent optimizations more obvious. @@ -1155,6 +1195,22 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { GetElementPtrInst::Create(Src->getOperand(0), Indices, GEP.getName()); } + // Canonicalize (gep i8* X, -(ptrtoint Y)) to (sub (ptrtoint X), (ptrtoint Y)) + // The GEP pattern is emitted by the SCEV expander for certain kinds of + // pointer arithmetic. + if (DL && GEP.getNumIndices() == 1 && + match(GEP.getOperand(1), m_Neg(m_PtrToInt(m_Value())))) { + unsigned AS = GEP.getPointerAddressSpace(); + if (GEP.getType() == Builder->getInt8PtrTy(AS) && + GEP.getOperand(1)->getType()->getScalarSizeInBits() == + DL->getPointerSizeInBits(AS)) { + Operator *Index = cast(GEP.getOperand(1)); + Value *PtrToInt = Builder->CreatePtrToInt(PtrOp, Index->getType()); + Value *NewSub = Builder->CreateSub(PtrToInt, Index->getOperand(1)); + return CastInst::Create(Instruction::IntToPtr, NewSub, GEP.getType()); + } + } + // Handle gep(bitcast x) and gep(gep x, 0, 0, 0). Value *StrippedPtr = PtrOp->stripPointerCasts(); PointerType *StrippedPtrTy = dyn_cast(StrippedPtr->getType()); @@ -1163,9 +1219,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { if (!StrippedPtrTy) return 0; - if (StrippedPtr != PtrOp && - StrippedPtrTy->getAddressSpace() == GEP.getPointerAddressSpace()) { - + if (StrippedPtr != PtrOp) { bool HasZeroPointerIndex = false; if (ConstantInt *C = dyn_cast(GEP.getOperand(1))) HasZeroPointerIndex = C->isZero(); @@ -1188,7 +1242,15 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { GetElementPtrInst *Res = GetElementPtrInst::Create(StrippedPtr, Idx, GEP.getName()); Res->setIsInBounds(GEP.isInBounds()); - return Res; + if (StrippedPtrTy->getAddressSpace() == GEP.getAddressSpace()) + return Res; + // Insert Res, and create an addrspacecast. + // e.g., + // GEP (addrspacecast i8 addrspace(1)* X to [0 x i8]*), i32 0, ... + // -> + // %0 = GEP i8 addrspace(1)* X, ... + // addrspacecast i8 addrspace(1)* %0 to i8* + return new AddrSpaceCastInst(Builder->Insert(Res), GEP.getType()); } if (ArrayType *XATy = @@ -1200,8 +1262,24 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // to an array of the same type as the destination pointer // array. Because the array type is never stepped over (there // is a leading zero) we can fold the cast into this GEP. - GEP.setOperand(0, StrippedPtr); - return &GEP; + if (StrippedPtrTy->getAddressSpace() == GEP.getAddressSpace()) { + GEP.setOperand(0, StrippedPtr); + return &GEP; + } + // Cannot replace the base pointer directly because StrippedPtr's + // address space is different. Instead, create a new GEP followed by + // an addrspacecast. + // e.g., + // GEP (addrspacecast [10 x i8] addrspace(1)* X to [0 x i8]*), + // i32 0, ... + // -> + // %0 = GEP [10 x i8] addrspace(1)* X, ... + // addrspacecast i8 addrspace(1)* %0 to i8* + SmallVector Idx(GEP.idx_begin(), GEP.idx_end()); + Value *NewGEP = GEP.isInBounds() ? + Builder->CreateInBoundsGEP(StrippedPtr, Idx, GEP.getName()) : + Builder->CreateGEP(StrippedPtr, Idx, GEP.getName()); + return new AddrSpaceCastInst(NewGEP, GEP.getType()); } } } @@ -1210,29 +1288,31 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // %t = getelementptr i32* bitcast ([2 x i32]* %str to i32*), i32 %V // into: %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast Type *SrcElTy = StrippedPtrTy->getElementType(); - Type *ResElTy=cast(PtrOp->getType())->getElementType(); - if (TD && SrcElTy->isArrayTy() && - TD->getTypeAllocSize(cast(SrcElTy)->getElementType()) == - TD->getTypeAllocSize(ResElTy)) { - Value *Idx[2]; - Idx[0] = Constant::getNullValue(Type::getInt32Ty(GEP.getContext())); - Idx[1] = GEP.getOperand(1); + Type *ResElTy = PtrOp->getType()->getPointerElementType(); + if (DL && SrcElTy->isArrayTy() && + DL->getTypeAllocSize(SrcElTy->getArrayElementType()) == + DL->getTypeAllocSize(ResElTy)) { + Type *IdxType = DL->getIntPtrType(GEP.getType()); + Value *Idx[2] = { Constant::getNullValue(IdxType), GEP.getOperand(1) }; Value *NewGEP = GEP.isInBounds() ? Builder->CreateInBoundsGEP(StrippedPtr, Idx, GEP.getName()) : Builder->CreateGEP(StrippedPtr, Idx, GEP.getName()); + // V and GEP are both pointer types --> BitCast - return new BitCastInst(NewGEP, GEP.getType()); + if (StrippedPtrTy->getAddressSpace() == GEP.getPointerAddressSpace()) + return new BitCastInst(NewGEP, GEP.getType()); + return new AddrSpaceCastInst(NewGEP, GEP.getType()); } // Transform things like: // %V = mul i64 %N, 4 // %t = getelementptr i8* bitcast (i32* %arr to i8*), i32 %V // into: %t1 = getelementptr i32* %arr, i32 %N; bitcast - if (TD && ResElTy->isSized() && SrcElTy->isSized()) { + if (DL && ResElTy->isSized() && SrcElTy->isSized()) { // Check that changing the type amounts to dividing the index by a scale // factor. - uint64_t ResSize = TD->getTypeAllocSize(ResElTy); - uint64_t SrcSize = TD->getTypeAllocSize(SrcElTy); + uint64_t ResSize = DL->getTypeAllocSize(ResElTy); + uint64_t SrcSize = DL->getTypeAllocSize(SrcElTy); if (ResSize && SrcSize % ResSize == 0) { Value *Idx = GEP.getOperand(1); unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits(); @@ -1240,7 +1320,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // Earlier transforms ensure that the index has type IntPtrType, which // considerably simplifies the logic by eliminating implicit casts. - assert(Idx->getType() == TD->getIntPtrType(GEP.getContext()) && + assert(Idx->getType() == DL->getIntPtrType(GEP.getType()) && "Index not cast to pointer width?"); bool NSW; @@ -1251,8 +1331,11 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { Value *NewGEP = GEP.isInBounds() && NSW ? Builder->CreateInBoundsGEP(StrippedPtr, NewIdx, GEP.getName()) : Builder->CreateGEP(StrippedPtr, NewIdx, GEP.getName()); + // The NewGEP must be pointer typed, so must the old one -> BitCast - return new BitCastInst(NewGEP, GEP.getType()); + if (StrippedPtrTy->getAddressSpace() == GEP.getPointerAddressSpace()) + return new BitCastInst(NewGEP, GEP.getType()); + return new AddrSpaceCastInst(NewGEP, GEP.getType()); } } } @@ -1261,13 +1344,13 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // getelementptr i8* bitcast ([100 x double]* X to i8*), i32 %tmp // (where tmp = 8*tmp2) into: // getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast - if (TD && ResElTy->isSized() && SrcElTy->isSized() && + if (DL && ResElTy->isSized() && SrcElTy->isSized() && SrcElTy->isArrayTy()) { // Check that changing to the array element type amounts to dividing the // index by a scale factor. - uint64_t ResSize = TD->getTypeAllocSize(ResElTy); - uint64_t ArrayEltSize = - TD->getTypeAllocSize(cast(SrcElTy)->getElementType()); + uint64_t ResSize = DL->getTypeAllocSize(ResElTy); + uint64_t ArrayEltSize + = DL->getTypeAllocSize(SrcElTy->getArrayElementType()); if (ResSize && ArrayEltSize % ResSize == 0) { Value *Idx = GEP.getOperand(1); unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits(); @@ -1275,7 +1358,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // Earlier transforms ensure that the index has type IntPtrType, which // considerably simplifies the logic by eliminating implicit casts. - assert(Idx->getType() == TD->getIntPtrType(GEP.getContext()) && + assert(Idx->getType() == DL->getIntPtrType(GEP.getType()) && "Index not cast to pointer width?"); bool NSW; @@ -1283,41 +1366,47 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // Successfully decomposed Idx as NewIdx * Scale, form a new GEP. // If the multiplication NewIdx * Scale may overflow then the new // GEP may not be "inbounds". - Value *Off[2]; - Off[0] = Constant::getNullValue(Type::getInt32Ty(GEP.getContext())); - Off[1] = NewIdx; + Value *Off[2] = { + Constant::getNullValue(DL->getIntPtrType(GEP.getType())), + NewIdx + }; + Value *NewGEP = GEP.isInBounds() && NSW ? Builder->CreateInBoundsGEP(StrippedPtr, Off, GEP.getName()) : Builder->CreateGEP(StrippedPtr, Off, GEP.getName()); // The NewGEP must be pointer typed, so must the old one -> BitCast - return new BitCastInst(NewGEP, GEP.getType()); + if (StrippedPtrTy->getAddressSpace() == GEP.getPointerAddressSpace()) + return new BitCastInst(NewGEP, GEP.getType()); + return new AddrSpaceCastInst(NewGEP, GEP.getType()); } } } } } + if (!DL) + return 0; + /// See if we can simplify: /// X = bitcast A* to B* /// Y = gep X, <...constant indices...> /// into a gep of the original struct. This is important for SROA and alias /// analysis of unions. If "A" is also a bitcast, wait for A/X to be merged. if (BitCastInst *BCI = dyn_cast(PtrOp)) { - if (TD && - !isa(BCI->getOperand(0)) && GEP.hasAllConstantIndices() && + Value *Operand = BCI->getOperand(0); + PointerType *OpType = cast(Operand->getType()); + unsigned OffsetBits = DL->getPointerTypeSizeInBits(OpType); + APInt Offset(OffsetBits, 0); + if (!isa(Operand) && + GEP.accumulateConstantOffset(*DL, Offset) && StrippedPtrTy->getAddressSpace() == GEP.getPointerAddressSpace()) { - // Determine how much the GEP moves the pointer. - SmallVector Ops(GEP.idx_begin(), GEP.idx_end()); - int64_t Offset = TD->getIndexedOffset(GEP.getPointerOperandType(), Ops); - // If this GEP instruction doesn't move the pointer, just replace the GEP // with a bitcast of the real input to the dest type. - if (Offset == 0) { + if (!Offset) { // If the bitcast is of an allocation, and the allocation will be // converted to match the type of the cast, don't touch this. - if (isa(BCI->getOperand(0)) || - isAllocationFn(BCI->getOperand(0), TLI)) { + if (isa(Operand) || isAllocationFn(Operand, TLI)) { // See if the bitcast simplifies, if so, don't nuke this GEP yet. if (Instruction *I = visitBitCast(*BCI)) { if (I != BCI) { @@ -1328,19 +1417,17 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { return &GEP; } } - return new BitCastInst(BCI->getOperand(0), GEP.getType()); + return new BitCastInst(Operand, GEP.getType()); } // Otherwise, if the offset is non-zero, we need to find out if there is a // field at Offset in 'A's type. If so, we can pull the cast through the // GEP. SmallVector NewIndices; - Type *InTy = - cast(BCI->getOperand(0)->getType())->getElementType(); - if (FindElementAtOffset(InTy, Offset, NewIndices)) { + if (FindElementAtOffset(OpType, Offset.getSExtValue(), NewIndices)) { Value *NGEP = GEP.isInBounds() ? - Builder->CreateInBoundsGEP(BCI->getOperand(0), NewIndices) : - Builder->CreateGEP(BCI->getOperand(0), NewIndices); + Builder->CreateInBoundsGEP(Operand, NewIndices) : + Builder->CreateGEP(Operand, NewIndices); if (NGEP->getType() == GEP.getType()) return ReplaceInstUsesWith(GEP, NGEP); @@ -1353,8 +1440,6 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { return 0; } - - static bool isAllocSiteRemovable(Instruction *AI, SmallVectorImpl &Users, const TargetLibraryInfo *TLI) { @@ -1363,9 +1448,8 @@ isAllocSiteRemovable(Instruction *AI, SmallVectorImpl &Users, do { Instruction *PI = Worklist.pop_back_val(); - for (Value::use_iterator UI = PI->use_begin(), UE = PI->use_end(); UI != UE; - ++UI) { - Instruction *I = cast(*UI); + for (User *U : PI->users()) { + Instruction *I = cast(U); switch (I->getOpcode()) { default: // Give up the moment we see something we can't handle. @@ -1464,13 +1548,69 @@ Instruction *InstCombiner::visitAllocSite(Instruction &MI) { Module *M = II->getParent()->getParent()->getParent(); Function *F = Intrinsic::getDeclaration(M, Intrinsic::donothing); InvokeInst::Create(F, II->getNormalDest(), II->getUnwindDest(), - ArrayRef(), "", II->getParent()); + None, "", II->getParent()); } return EraseInstFromFunction(MI); } return 0; } +/// \brief Move the call to free before a NULL test. +/// +/// Check if this free is accessed after its argument has been test +/// against NULL (property 0). +/// If yes, it is legal to move this call in its predecessor block. +/// +/// The move is performed only if the block containing the call to free +/// will be removed, i.e.: +/// 1. it has only one predecessor P, and P has two successors +/// 2. it contains the call and an unconditional branch +/// 3. its successor is the same as its predecessor's successor +/// +/// The profitability is out-of concern here and this function should +/// be called only if the caller knows this transformation would be +/// profitable (e.g., for code size). +static Instruction * +tryToMoveFreeBeforeNullTest(CallInst &FI) { + Value *Op = FI.getArgOperand(0); + BasicBlock *FreeInstrBB = FI.getParent(); + BasicBlock *PredBB = FreeInstrBB->getSinglePredecessor(); + + // Validate part of constraint #1: Only one predecessor + // FIXME: We can extend the number of predecessor, but in that case, we + // would duplicate the call to free in each predecessor and it may + // not be profitable even for code size. + if (!PredBB) + return 0; + + // Validate constraint #2: Does this block contains only the call to + // free and an unconditional branch? + // FIXME: We could check if we can speculate everything in the + // predecessor block + if (FreeInstrBB->size() != 2) + return 0; + BasicBlock *SuccBB; + if (!match(FreeInstrBB->getTerminator(), m_UnconditionalBr(SuccBB))) + return 0; + + // Validate the rest of constraint #1 by matching on the pred branch. + TerminatorInst *TI = PredBB->getTerminator(); + BasicBlock *TrueBB, *FalseBB; + ICmpInst::Predicate Pred; + if (!match(TI, m_Br(m_ICmp(Pred, m_Specific(Op), m_Zero()), TrueBB, FalseBB))) + return 0; + if (Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE) + return 0; + + // Validate constraint #3: Ensure the null case just falls through. + if (SuccBB != (Pred == ICmpInst::ICMP_EQ ? TrueBB : FalseBB)) + return 0; + assert(FreeInstrBB == (Pred == ICmpInst::ICMP_EQ ? FalseBB : TrueBB) && + "Broken CFG: missing edge from predecessor to successor"); + + FI.moveBefore(TI); + return &FI; +} Instruction *InstCombiner::visitFree(CallInst &FI) { @@ -1489,6 +1629,16 @@ Instruction *InstCombiner::visitFree(CallInst &FI) { if (isa(Op)) return EraseInstFromFunction(FI); + // If we optimize for code size, try to move the call to free before the null + // test so that simplify cfg can remove the empty block and dead code + // elimination the branch. I.e., helps to turn something like: + // if (foo) free(foo); + // into + // free(foo); + if (MinimizeSize) + if (Instruction *I = tryToMoveFreeBeforeNullTest(FI)) + return I; + return 0; } @@ -1507,7 +1657,7 @@ Instruction *InstCombiner::visitBranchInst(BranchInst &BI) { return &BI; } - // Cannonicalize fcmp_one -> fcmp_oeq + // Canonicalize fcmp_one -> fcmp_oeq FCmpInst::Predicate FPred; Value *Y; if (match(&BI, m_Br(m_FCmp(FPred, m_Value(X), m_Value(Y)), TrueDest, FalseDest)) && @@ -1523,7 +1673,7 @@ Instruction *InstCombiner::visitBranchInst(BranchInst &BI) { return &BI; } - // Cannonicalize icmp_ne -> icmp_eq + // Canonicalize icmp_ne -> icmp_eq ICmpInst::Predicate IPred; if (match(&BI, m_Br(m_ICmp(IPred, m_Value(X), m_Value(Y)), TrueDest, FalseDest)) && @@ -1957,7 +2107,7 @@ Instruction *InstCombiner::visitLandingPadInst(LandingPadInst &LI) { continue; // If Filter is a subset of LFilter, i.e. every element of Filter is also // an element of LFilter, then discard LFilter. - SmallVector::iterator J = NewClauses.begin() + j; + SmallVectorImpl::iterator J = NewClauses.begin() + j; // If Filter is empty then it is a subset of LFilter. if (!FElts) { // Discard LFilter. @@ -2103,7 +2253,7 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) { static bool AddReachableCodeToWorklist(BasicBlock *BB, SmallPtrSet &Visited, InstCombiner &IC, - const DataLayout *TD, + const DataLayout *DL, const TargetLibraryInfo *TLI) { bool MadeIRChange = false; SmallVector Worklist; @@ -2124,15 +2274,15 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, // DCE instruction if trivially dead. if (isInstructionTriviallyDead(Inst, TLI)) { ++NumDeadInst; - DEBUG(errs() << "IC: DCE: " << *Inst << '\n'); + DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n'); Inst->eraseFromParent(); continue; } // ConstantProp instruction if trivially constant. if (!Inst->use_empty() && isa(Inst->getOperand(0))) - if (Constant *C = ConstantFoldInstruction(Inst, TD, TLI)) { - DEBUG(errs() << "IC: ConstFold to: " << *C << " from: " + if (Constant *C = ConstantFoldInstruction(Inst, DL, TLI)) { + DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << *Inst << '\n'); Inst->replaceAllUsesWith(C); ++NumConstProp; @@ -2140,7 +2290,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, continue; } - if (TD) { + if (DL) { // See if we can constant fold its operands. for (User::op_iterator i = Inst->op_begin(), e = Inst->op_end(); i != e; ++i) { @@ -2149,7 +2299,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, Constant*& FoldRes = FoldedConstants[CE]; if (!FoldRes) - FoldRes = ConstantFoldConstantExpression(CE, TD, TLI); + FoldRes = ConstantFoldConstantExpression(CE, DL, TLI); if (!FoldRes) FoldRes = CE; @@ -2208,7 +2358,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { MadeIRChange = false; - DEBUG(errs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on " + DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on " << F.getName() << "\n"); { @@ -2216,7 +2366,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { // the reachable instructions. Ignore blocks that are not reachable. Keep // track of which blocks we visit. SmallPtrSet Visited; - MadeIRChange |= AddReachableCodeToWorklist(F.begin(), Visited, *this, TD, + MadeIRChange |= AddReachableCodeToWorklist(F.begin(), Visited, *this, DL, TLI); // Do a quick scan over the function. If we find any blocks that are @@ -2253,7 +2403,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { // Check to see if we can DCE the instruction. if (isInstructionTriviallyDead(I, TLI)) { - DEBUG(errs() << "IC: DCE: " << *I << '\n'); + DEBUG(dbgs() << "IC: DCE: " << *I << '\n'); EraseInstFromFunction(*I); ++NumDeadInst; MadeIRChange = true; @@ -2262,8 +2412,8 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { // Instruction isn't dead, see if we can constant propagate it. if (!I->use_empty() && isa(I->getOperand(0))) - if (Constant *C = ConstantFoldInstruction(I, TD, TLI)) { - DEBUG(errs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n'); + if (Constant *C = ConstantFoldInstruction(I, DL, TLI)) { + DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n'); // Add operands to the worklist. ReplaceInstUsesWith(*I, C); @@ -2276,12 +2426,12 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { // See if we can trivially sink this instruction to a successor basic block. if (I->hasOneUse()) { BasicBlock *BB = I->getParent(); - Instruction *UserInst = cast(I->use_back()); + Instruction *UserInst = cast(*I->user_begin()); BasicBlock *UserParent; // Get the block the use occurs in. if (PHINode *PN = dyn_cast(UserInst)) - UserParent = PN->getIncomingBlock(I->use_begin().getUse()); + UserParent = PN->getIncomingBlock(*I->use_begin()); else UserParent = UserInst->getParent(); @@ -2311,13 +2461,13 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { std::string OrigI; #endif DEBUG(raw_string_ostream SS(OrigI); I->print(SS); OrigI = SS.str();); - DEBUG(errs() << "IC: Visiting: " << OrigI << '\n'); + DEBUG(dbgs() << "IC: Visiting: " << OrigI << '\n'); if (Instruction *Result = visit(*I)) { ++NumCombined; // Should we replace the old instruction with a new one? if (Result != I) { - DEBUG(errs() << "IC: Old = " << *I << '\n' + DEBUG(dbgs() << "IC: Old = " << *I << '\n' << " New = " << *Result << '\n'); if (!I->getDebugLoc().isUnknown()) @@ -2346,7 +2496,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { EraseInstFromFunction(*I); } else { #ifndef NDEBUG - DEBUG(errs() << "IC: Mod = " << OrigI << '\n' + DEBUG(dbgs() << "IC: Mod = " << OrigI << '\n' << " New = " << *I << '\n'); #endif @@ -2367,19 +2517,44 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { return MadeIRChange; } +namespace { +class InstCombinerLibCallSimplifier : public LibCallSimplifier { + InstCombiner *IC; +public: + InstCombinerLibCallSimplifier(const DataLayout *DL, + const TargetLibraryInfo *TLI, + InstCombiner *IC) + : LibCallSimplifier(DL, TLI, UnsafeFPShrink) { + this->IC = IC; + } + + /// replaceAllUsesWith - override so that instruction replacement + /// can be defined in terms of the instruction combiner framework. + void replaceAllUsesWith(Instruction *I, Value *With) const override { + IC->ReplaceInstUsesWith(*I, With); + } +}; +} bool InstCombiner::runOnFunction(Function &F) { - TD = getAnalysisIfAvailable(); + if (skipOptnoneFunction(F)) + return false; + + DataLayoutPass *DLP = getAnalysisIfAvailable(); + DL = DLP ? &DLP->getDataLayout() : 0; TLI = &getAnalysis(); + // Minimizing size? + MinimizeSize = F.getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::MinSize); /// Builder - This is an IRBuilder that automatically inserts new /// instructions into the worklist when they are created. IRBuilder - TheBuilder(F.getContext(), TargetFolder(TD), + TheBuilder(F.getContext(), TargetFolder(DL), InstCombineIRInserter(Worklist)); Builder = &TheBuilder; - LibCallSimplifier TheSimplifier(TD, TLI); + InstCombinerLibCallSimplifier TheSimplifier(DL, TLI, this); Simplifier = &TheSimplifier; bool EverMadeChange = false;