X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;ds=sidebyside;f=lib%2FTransforms%2FIPO%2FGlobalOpt.cpp;h=ed68101362b822c7b84088d65a2579b300305e89;hb=23ec5d7759ed9a3b52fc8c470695248a1719cce8;hp=f61dc91ee03c71e0b4d827d712d58891fd6e3708;hpb=fa1f5c22c5186d1055343aae74224a6637b831b2;p=oota-llvm.git diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index f61dc91ee03..ed68101362b 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -21,10 +21,12 @@ #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/Module.h" +#include "llvm/Operator.h" #include "llvm/Pass.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -40,6 +42,7 @@ using namespace llvm; STATISTIC(NumMarked , "Number of globals marked constant"); +STATISTIC(NumUnnamed , "Number of globals marked unnamed_addr"); STATISTIC(NumSRA , "Number of aggregate globals broken into scalars"); STATISTIC(NumHeapSRA , "Number of heap objects SRA'd"); STATISTIC(NumSubstitute,"Number of globals with initializers stored into them"); @@ -53,13 +56,18 @@ STATISTIC(NumCtorsEvaluated, "Number of static ctors evaluated"); STATISTIC(NumNestRemoved , "Number of nest attributes removed"); STATISTIC(NumAliasesResolved, "Number of global aliases resolved"); STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated"); +STATISTIC(NumCXXDtorsRemoved, "Number of global C++ destructors removed"); namespace { + struct GlobalStatus; struct GlobalOpt : public ModulePass { virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); } static char ID; // Pass identification, replacement for typeid - GlobalOpt() : ModulePass(&ID) {} + GlobalOpt() : ModulePass(ID) { + initializeGlobalOptPass(*PassRegistry::getPassRegistry()); + } bool runOnModule(Module &M); @@ -69,12 +77,23 @@ namespace { bool OptimizeGlobalVars(Module &M); bool OptimizeGlobalAliases(Module &M); bool OptimizeGlobalCtorsList(GlobalVariable *&GCL); - bool ProcessInternalGlobal(GlobalVariable *GV,Module::global_iterator &GVI); + bool ProcessGlobal(GlobalVariable *GV,Module::global_iterator &GVI); + bool ProcessInternalGlobal(GlobalVariable *GV,Module::global_iterator &GVI, + const SmallPtrSet &PHIUsers, + const GlobalStatus &GS); + bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn); + + TargetData *TD; + TargetLibraryInfo *TLI; }; } char GlobalOpt::ID = 0; -static RegisterPass X("globalopt", "Global Variable Optimizer"); +INITIALIZE_PASS_BEGIN(GlobalOpt, "globalopt", + "Global Variable Optimizer", false, false) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) +INITIALIZE_PASS_END(GlobalOpt, "globalopt", + "Global Variable Optimizer", false, false) ModulePass *llvm::createGlobalOptimizerPass() { return new GlobalOpt(); } @@ -84,6 +103,9 @@ namespace { /// about it. If we find out that the address of the global is taken, none of /// this info will be accurate. struct GlobalStatus { + /// isCompared - True if the global's address is used in a comparison. + bool isCompared; + /// isLoaded - True if the global is ever loaded. If the global isn't ever /// loaded it can be deleted. bool isLoaded; @@ -128,22 +150,37 @@ struct GlobalStatus { /// HasPHIUser - Set to true if this global has a user that is a PHI node. bool HasPHIUser; - - GlobalStatus() : isLoaded(false), StoredType(NotStored), StoredOnceValue(0), - AccessingFunction(0), HasMultipleAccessingFunctions(false), - HasNonInstructionUser(false), HasPHIUser(false) {} + + /// AtomicOrdering - Set to the strongest atomic ordering requirement. + AtomicOrdering Ordering; + + GlobalStatus() : isCompared(false), isLoaded(false), StoredType(NotStored), + StoredOnceValue(0), AccessingFunction(0), + HasMultipleAccessingFunctions(false), + HasNonInstructionUser(false), HasPHIUser(false), + Ordering(NotAtomic) {} }; } -// SafeToDestroyConstant - It is safe to destroy a constant iff it is only used -// by constants itself. Note that constants cannot be cyclic, so this test is -// pretty easy to implement recursively. -// +/// StrongerOrdering - Return the stronger of the two ordering. If the two +/// orderings are acquire and release, then return AcquireRelease. +/// +static AtomicOrdering StrongerOrdering(AtomicOrdering X, AtomicOrdering Y) { + if (X == Acquire && Y == Release) return AcquireRelease; + if (Y == Acquire && X == Release) return AcquireRelease; + return (AtomicOrdering)std::max(X, Y); +} + +/// SafeToDestroyConstant - It is safe to destroy a constant iff it is only used +/// by constants itself. Note that constants cannot be cyclic, so this test is +/// pretty easy to implement recursively. +/// static bool SafeToDestroyConstant(const Constant *C) { if (isa(C)) return false; - for (Value::const_use_iterator UI = C->use_begin(), E = C->use_end(); UI != E; ++UI) + for (Value::const_use_iterator UI = C->use_begin(), E = C->use_end(); UI != E; + ++UI) if (const Constant *CU = dyn_cast(*UI)) { if (!SafeToDestroyConstant(CU)) return false; } else @@ -158,13 +195,18 @@ static bool SafeToDestroyConstant(const Constant *C) { /// static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS, SmallPtrSet &PHIUsers) { - for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI) - if (const ConstantExpr *CE = dyn_cast(*UI)) { + for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; + ++UI) { + const User *U = *UI; + if (const ConstantExpr *CE = dyn_cast(U)) { GS.HasNonInstructionUser = true; - + + // If the result of the constantexpr isn't pointer type, then we won't + // know to expect it in various places. Just reject early. + if (!isa(CE->getType())) return true; + if (AnalyzeGlobal(CE, GS, PHIUsers)) return true; - - } else if (const Instruction *I = dyn_cast(*UI)) { + } else if (const Instruction *I = dyn_cast(U)) { if (!GS.HasMultipleAccessingFunctions) { const Function *F = I->getParent()->getParent(); if (GS.AccessingFunction == 0) @@ -174,18 +216,23 @@ static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS, } if (const LoadInst *LI = dyn_cast(I)) { GS.isLoaded = true; - if (LI->isVolatile()) return true; // Don't hack on volatile loads. + // Don't hack on volatile loads. + if (LI->isVolatile()) return true; + GS.Ordering = StrongerOrdering(GS.Ordering, LI->getOrdering()); } else if (const StoreInst *SI = dyn_cast(I)) { // Don't allow a store OF the address, only stores TO the address. if (SI->getOperand(0) == V) return true; - if (SI->isVolatile()) return true; // Don't hack on volatile stores. + // Don't hack on volatile stores. + if (SI->isVolatile()) return true; + GS.Ordering = StrongerOrdering(GS.Ordering, SI->getOrdering()); // If this is a direct store to the global (i.e., the global is a scalar // value, not an aggregate), keep more specific information about // stores. if (GS.StoredType != GlobalStatus::isStored) { - if (const GlobalVariable *GV = dyn_cast(SI->getOperand(1))){ + if (const GlobalVariable *GV = dyn_cast( + SI->getOperand(1))) { Value *StoredVal = SI->getOperand(0); if (StoredVal == GV->getInitializer()) { if (GS.StoredType < GlobalStatus::isInitializerStored) @@ -218,18 +265,21 @@ static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS, if (AnalyzeGlobal(I, GS, PHIUsers)) return true; GS.HasPHIUser = true; } else if (isa(I)) { - } else if (isa(I)) { - if (I->getOperand(1) == V) + GS.isCompared = true; + } else if (const MemTransferInst *MTI = dyn_cast(I)) { + if (MTI->isVolatile()) return true; + if (MTI->getArgOperand(0) == V) GS.StoredType = GlobalStatus::isStored; - if (I->getOperand(2) == V) + if (MTI->getArgOperand(1) == V) GS.isLoaded = true; - } else if (isa(I)) { - assert(I->getOperand(1) == V && "Memset only takes one pointer!"); + } else if (const MemSetInst *MSI = dyn_cast(I)) { + assert(MSI->getArgOperand(0) == V && "Memset only takes one pointer!"); + if (MSI->isVolatile()) return true; GS.StoredType = GlobalStatus::isStored; } else { return true; // Any other non-load instruction might take address! } - } else if (const Constant *C = dyn_cast(*UI)) { + } else if (const Constant *C = dyn_cast(U)) { GS.HasNonInstructionUser = true; // We might have a dead and dangling constant hanging off of here. if (!SafeToDestroyConstant(C)) @@ -239,47 +289,17 @@ static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS, // Otherwise must be some other user. return true; } + } return false; } -static Constant *getAggregateConstantElement(Constant *Agg, Constant *Idx) { - ConstantInt *CI = dyn_cast(Idx); - if (!CI) return 0; - unsigned IdxV = CI->getZExtValue(); - - if (ConstantStruct *CS = dyn_cast(Agg)) { - if (IdxV < CS->getNumOperands()) return CS->getOperand(IdxV); - } else if (ConstantArray *CA = dyn_cast(Agg)) { - if (IdxV < CA->getNumOperands()) return CA->getOperand(IdxV); - } else if (ConstantVector *CP = dyn_cast(Agg)) { - if (IdxV < CP->getNumOperands()) return CP->getOperand(IdxV); - } else if (isa(Agg)) { - if (const StructType *STy = dyn_cast(Agg->getType())) { - if (IdxV < STy->getNumElements()) - return Constant::getNullValue(STy->getElementType(IdxV)); - } else if (const SequentialType *STy = - dyn_cast(Agg->getType())) { - return Constant::getNullValue(STy->getElementType()); - } - } else if (isa(Agg)) { - if (const StructType *STy = dyn_cast(Agg->getType())) { - if (IdxV < STy->getNumElements()) - return UndefValue::get(STy->getElementType(IdxV)); - } else if (const SequentialType *STy = - dyn_cast(Agg->getType())) { - return UndefValue::get(STy->getElementType()); - } - } - return 0; -} - - /// CleanupConstantGlobalUsers - We just marked GV constant. Loop over all /// users of the global, cleaning up the obvious ones. This is largely just a /// quick scan over the use list to clean up the easy and obvious cruft. This /// returns true if it made a change. -static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) { +static bool CleanupConstantGlobalUsers(Value *V, Constant *Init, + TargetData *TD, TargetLibraryInfo *TLI) { bool Changed = false; for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;) { User *U = *UI++; @@ -300,11 +320,11 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) { Constant *SubInit = 0; if (Init) SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE); - Changed |= CleanupConstantGlobalUsers(CE, SubInit); - } else if (CE->getOpcode() == Instruction::BitCast && + Changed |= CleanupConstantGlobalUsers(CE, SubInit, TD, TLI); + } else if (CE->getOpcode() == Instruction::BitCast && CE->getType()->isPointerTy()) { // Pointer cast, delete any stores and memsets to the global. - Changed |= CleanupConstantGlobalUsers(CE, 0); + Changed |= CleanupConstantGlobalUsers(CE, 0, TD, TLI); } if (CE->use_empty()) { @@ -317,12 +337,12 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) { // and will invalidate our notion of what Init is. Constant *SubInit = 0; if (!isa(GEP->getOperand(0))) { - ConstantExpr *CE = - dyn_cast_or_null(ConstantFoldInstruction(GEP)); + ConstantExpr *CE = + dyn_cast_or_null(ConstantFoldInstruction(GEP, TD, TLI)); if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr) SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE); } - Changed |= CleanupConstantGlobalUsers(GEP, SubInit); + Changed |= CleanupConstantGlobalUsers(GEP, SubInit, TD, TLI); if (GEP->use_empty()) { GEP->eraseFromParent(); @@ -340,7 +360,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) { if (SafeToDestroyConstant(C)) { C->destroyConstant(); // This could have invalidated UI, start over from scratch. - CleanupConstantGlobalUsers(V, Init); + CleanupConstantGlobalUsers(V, Init, TD, TLI); return true; } } @@ -354,7 +374,7 @@ static bool isSafeSROAElementUse(Value *V) { // We might have a dead and dangling constant hanging off of here. if (Constant *C = dyn_cast(V)) return SafeToDestroyConstant(C); - + Instruction *I = dyn_cast(V); if (!I) return false; @@ -364,15 +384,15 @@ static bool isSafeSROAElementUse(Value *V) { // Stores *to* the pointer are ok. if (StoreInst *SI = dyn_cast(I)) return SI->getOperand(0) != V; - + // Otherwise, it must be a GEP. GetElementPtrInst *GEPI = dyn_cast(I); if (GEPI == 0) return false; - + if (GEPI->getNumOperands() < 3 || !isa(GEPI->getOperand(1)) || !cast(GEPI->getOperand(1))->isNullValue()) return false; - + for (Value::use_iterator I = GEPI->use_begin(), E = GEPI->use_end(); I != E; ++I) if (!isSafeSROAElementUse(*I)) @@ -386,11 +406,11 @@ static bool isSafeSROAElementUse(Value *V) { /// static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) { // The user of the global must be a GEP Inst or a ConstantExpr GEP. - if (!isa(U) && - (!isa(U) || + if (!isa(U) && + (!isa(U) || cast(U)->getOpcode() != Instruction::GetElementPtr)) return false; - + // Check to see if this ConstantExpr GEP is SRA'able. In particular, we // don't like < 3 operand CE's, and we don't like non-constant integer // indices. This enforces that all uses are 'gep GV, 0, C, ...' for some @@ -402,18 +422,18 @@ static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) { gep_type_iterator GEPI = gep_type_begin(U), E = gep_type_end(U); ++GEPI; // Skip over the pointer index. - + // If this is a use of an array allocation, do a bit more checking for sanity. - if (const ArrayType *AT = dyn_cast(*GEPI)) { + if (ArrayType *AT = dyn_cast(*GEPI)) { uint64_t NumElements = AT->getNumElements(); ConstantInt *Idx = cast(U->getOperand(2)); - + // Check to make sure that index falls within the array. If not, // something funny is going on, so we won't do the optimization. // if (Idx->getZExtValue() >= NumElements) return false; - + // We cannot scalar repl this level of the array unless any array // sub-indices are in-range constants. In particular, consider: // A[0][i]. We cannot know that the user isn't doing invalid things like @@ -425,16 +445,16 @@ static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) { GEPI != E; ++GEPI) { uint64_t NumElements; - if (const ArrayType *SubArrayTy = dyn_cast(*GEPI)) + if (ArrayType *SubArrayTy = dyn_cast(*GEPI)) NumElements = SubArrayTy->getNumElements(); - else if (const VectorType *SubVectorTy = dyn_cast(*GEPI)) + else if (VectorType *SubVectorTy = dyn_cast(*GEPI)) NumElements = SubVectorTy->getNumElements(); else { assert((*GEPI)->isStructTy() && "Indexed GEP type is not array, vector, or struct!"); continue; } - + ConstantInt *IdxVal = dyn_cast(GEPI.getOperand()); if (!IdxVal || IdxVal->getZExtValue() >= NumElements) return false; @@ -458,7 +478,7 @@ static bool GlobalUsersSafeToSRA(GlobalValue *GV) { } return true; } - + /// SRAGlobal - Perform scalar replacement of aggregates on the specified global /// variable. This opens the door for other optimizations by exposing the @@ -469,10 +489,10 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) { // Make sure this global only has simple uses that we can SRA. if (!GlobalUsersSafeToSRA(GV)) return 0; - + assert(GV->hasLocalLinkage() && !GV->isConstant()); Constant *Init = GV->getInitializer(); - const Type *Ty = Init->getType(); + Type *Ty = Init->getType(); std::vector NewGlobals; Module::GlobalListType &Globals = GV->getParent()->getGlobalList(); @@ -481,13 +501,12 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) { unsigned StartAlignment = GV->getAlignment(); if (StartAlignment == 0) StartAlignment = TD.getABITypeAlignment(GV->getType()); - - if (const StructType *STy = dyn_cast(Ty)) { + + if (StructType *STy = dyn_cast(Ty)) { NewGlobals.reserve(STy->getNumElements()); const StructLayout &Layout = *TD.getStructLayout(STy); for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { - Constant *In = getAggregateConstantElement(Init, - ConstantInt::get(Type::getInt32Ty(STy->getContext()), i)); + Constant *In = Init->getAggregateElement(i); assert(In && "Couldn't get element of initializer?"); GlobalVariable *NGV = new GlobalVariable(STy->getElementType(i), false, GlobalVariable::InternalLinkage, @@ -496,7 +515,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) { GV->getType()->getAddressSpace()); Globals.insert(GV, NGV); NewGlobals.push_back(NGV); - + // Calculate the known alignment of the field. If the original aggregate // had 256 byte alignment for example, something might depend on that: // propagate info to each field. @@ -505,9 +524,9 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) { if (NewAlign > TD.getABITypeAlignment(STy->getElementType(i))) NGV->setAlignment(NewAlign); } - } else if (const SequentialType *STy = dyn_cast(Ty)) { + } else if (SequentialType *STy = dyn_cast(Ty)) { unsigned NumElements = 0; - if (const ArrayType *ATy = dyn_cast(STy)) + if (ArrayType *ATy = dyn_cast(STy)) NumElements = ATy->getNumElements(); else NumElements = cast(STy)->getNumElements(); @@ -515,12 +534,11 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) { if (NumElements > 16 && GV->hasNUsesOrMore(16)) return 0; // It's not worth it. NewGlobals.reserve(NumElements); - + uint64_t EltSize = TD.getTypeAllocSize(STy->getElementType()); unsigned EltAlign = TD.getABITypeAlignment(STy->getElementType()); for (unsigned i = 0, e = NumElements; i != e; ++i) { - Constant *In = getAggregateConstantElement(Init, - ConstantInt::get(Type::getInt32Ty(Init->getContext()), i)); + Constant *In = Init->getAggregateElement(i); assert(In && "Couldn't get element of initializer?"); GlobalVariable *NGV = new GlobalVariable(STy->getElementType(), false, @@ -530,7 +548,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) { GV->getType()->getAddressSpace()); Globals.insert(GV, NGV); NewGlobals.push_back(NGV); - + // Calculate the known alignment of the field. If the original aggregate // had 256 byte alignment for example, something might depend on that: // propagate info to each field. @@ -542,7 +560,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) { if (NewGlobals.empty()) return 0; - + DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV); Constant *NullInt =Constant::getNullValue(Type::getInt32Ty(GV->getContext())); @@ -570,15 +588,14 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) { Idxs.push_back(NullInt); for (unsigned i = 3, e = CE->getNumOperands(); i != e; ++i) Idxs.push_back(CE->getOperand(i)); - NewPtr = ConstantExpr::getGetElementPtr(cast(NewPtr), - &Idxs[0], Idxs.size()); + NewPtr = ConstantExpr::getGetElementPtr(cast(NewPtr), Idxs); } else { GetElementPtrInst *GEPI = cast(GEP); SmallVector Idxs; Idxs.push_back(NullInt); for (unsigned i = 3, e = GEPI->getNumOperands(); i != e; ++i) Idxs.push_back(GEPI->getOperand(i)); - NewPtr = GetElementPtrInst::Create(NewPtr, Idxs.begin(), Idxs.end(), + NewPtr = GetElementPtrInst::Create(NewPtr, Idxs, GEPI->getName()+"."+Twine(Val),GEPI); } } @@ -608,64 +625,71 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) { } /// AllUsesOfValueWillTrapIfNull - Return true if all users of the specified -/// value will trap if the value is dynamically null. PHIs keeps track of any +/// value will trap if the value is dynamically null. PHIs keeps track of any /// phi nodes we've seen to avoid reprocessing them. -static bool AllUsesOfValueWillTrapIfNull(Value *V, - SmallPtrSet &PHIs) { - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI) - if (isa(*UI)) { +static bool AllUsesOfValueWillTrapIfNull(const Value *V, + SmallPtrSet &PHIs) { + for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; + ++UI) { + const User *U = *UI; + + if (isa(U)) { // Will trap. - } else if (StoreInst *SI = dyn_cast(*UI)) { + } else if (const StoreInst *SI = dyn_cast(U)) { if (SI->getOperand(0) == V) { - //cerr << "NONTRAPPING USE: " << **UI; + //cerr << "NONTRAPPING USE: " << *U; return false; // Storing the value. } - } else if (CallInst *CI = dyn_cast(*UI)) { + } else if (const CallInst *CI = dyn_cast(U)) { if (CI->getCalledValue() != V) { - //cerr << "NONTRAPPING USE: " << **UI; + //cerr << "NONTRAPPING USE: " << *U; return false; // Not calling the ptr } - } else if (InvokeInst *II = dyn_cast(*UI)) { + } else if (const InvokeInst *II = dyn_cast(U)) { if (II->getCalledValue() != V) { - //cerr << "NONTRAPPING USE: " << **UI; + //cerr << "NONTRAPPING USE: " << *U; return false; // Not calling the ptr } - } else if (BitCastInst *CI = dyn_cast(*UI)) { + } else if (const BitCastInst *CI = dyn_cast(U)) { if (!AllUsesOfValueWillTrapIfNull(CI, PHIs)) return false; - } else if (GetElementPtrInst *GEPI = dyn_cast(*UI)) { + } else if (const GetElementPtrInst *GEPI = dyn_cast(U)) { if (!AllUsesOfValueWillTrapIfNull(GEPI, PHIs)) return false; - } else if (PHINode *PN = dyn_cast(*UI)) { + } else if (const PHINode *PN = dyn_cast(U)) { // If we've already seen this phi node, ignore it, it has already been // checked. if (PHIs.insert(PN) && !AllUsesOfValueWillTrapIfNull(PN, PHIs)) return false; - } else if (isa(*UI) && + } else if (isa(U) && isa(UI->getOperand(1))) { // Ignore icmp X, null } else { - //cerr << "NONTRAPPING USE: " << **UI; + //cerr << "NONTRAPPING USE: " << *U; return false; } + } return true; } /// AllUsesOfLoadedValueWillTrapIfNull - Return true if all uses of any loads /// from GV will trap if the loaded value is null. Note that this also permits /// comparisons of the loaded value against null, as a special case. -static bool AllUsesOfLoadedValueWillTrapIfNull(GlobalVariable *GV) { - for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI!=E; ++UI) - if (LoadInst *LI = dyn_cast(*UI)) { - SmallPtrSet PHIs; +static bool AllUsesOfLoadedValueWillTrapIfNull(const GlobalVariable *GV) { + for (Value::const_use_iterator UI = GV->use_begin(), E = GV->use_end(); + UI != E; ++UI) { + const User *U = *UI; + + if (const LoadInst *LI = dyn_cast(U)) { + SmallPtrSet PHIs; if (!AllUsesOfValueWillTrapIfNull(LI, PHIs)) return false; - } else if (isa(*UI)) { + } else if (isa(U)) { // Ignore stores to the global. } else { // We don't know or understand this user, bail out. - //cerr << "UNKNOWN USER OF GLOBAL!: " << **UI; + //cerr << "UNKNOWN USER OF GLOBAL!: " << *U; return false; } - + } return true; } @@ -720,8 +744,7 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) { break; if (Idxs.size() == GEPI->getNumOperands()-1) Changed |= OptimizeAwayTrappingUsesOfValue(GEPI, - ConstantExpr::getGetElementPtr(NewV, &Idxs[0], - Idxs.size())); + ConstantExpr::getGetElementPtr(NewV, Idxs)); if (GEPI->use_empty()) { Changed = true; GEPI->eraseFromParent(); @@ -737,13 +760,15 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) { /// value stored into it. If there are uses of the loaded value that would trap /// if the loaded value is dynamically null, then we know that they cannot be /// reachable with a null optimize away the load. -static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV) { +static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV, + TargetData *TD, + TargetLibraryInfo *TLI) { bool Changed = false; // Keep track of whether we are able to remove all the uses of the global // other than the store that defines it. bool AllNonStoreUsesGone = true; - + // Replace all uses of loads with uses of uses of the stored value. for (Value::use_iterator GUI = GV->use_begin(), E = GV->use_end(); GUI != E;){ User *GlobalUser = *GUI++; @@ -766,7 +791,8 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV) { // If we get here we could have other crazy uses that are transitively // loaded. assert((isa(GlobalUser) || isa(GlobalUser) || - isa(GlobalUser)) && "Only expect load and stores!"); + isa(GlobalUser) || isa(GlobalUser)) && + "Only expect load and stores!"); } } @@ -779,7 +805,7 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV) { // nor is the global. if (AllNonStoreUsesGone) { DEBUG(dbgs() << " *** GLOBAL NOW DEAD!\n"); - CleanupConstantGlobalUsers(GV, 0); + CleanupConstantGlobalUsers(GV, 0, TD, TLI); if (GV->use_empty()) { GV->eraseFromParent(); ++NumDeleted; @@ -791,10 +817,11 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV) { /// ConstantPropUsersOf - Walk the use list of V, constant folding all of the /// instructions that are foldable. -static void ConstantPropUsersOf(Value *V) { +static void ConstantPropUsersOf(Value *V, + TargetData *TD, TargetLibraryInfo *TLI) { for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ) if (Instruction *I = dyn_cast(*UI++)) - if (Constant *NewC = ConstantFoldInstruction(I)) { + if (Constant *NewC = ConstantFoldInstruction(I, TD, TLI)) { I->replaceAllUsesWith(NewC); // Advance UI to the next non-I use to avoid invalidating it! @@ -812,12 +839,13 @@ static void ConstantPropUsersOf(Value *V) { /// malloc into a global, and any loads of GV as uses of the new global. static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, - const Type *AllocTy, + Type *AllocTy, ConstantInt *NElements, - TargetData* TD) { + TargetData *TD, + TargetLibraryInfo *TLI) { DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << " CALL = " << *CI << '\n'); - - const Type *GlobalType; + + Type *GlobalType; if (NElements->getZExtValue() == 1) GlobalType = AllocTy; else @@ -826,14 +854,14 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, // Create the new global variable. The contents of the malloc'd memory is // undefined, so initialize with an undef value. - GlobalVariable *NewGV = new GlobalVariable(*GV->getParent(), + GlobalVariable *NewGV = new GlobalVariable(*GV->getParent(), GlobalType, false, GlobalValue::InternalLinkage, UndefValue::get(GlobalType), GV->getName()+".body", GV, GV->isThreadLocal()); - + // If there are bitcast users of the malloc (which is typical, usually we have // a malloc + bitcast) then replace them with uses of the new global. Update // other users to use the global as well. @@ -853,10 +881,10 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, User->replaceUsesOfWith(CI, TheBC); } } - + Constant *RepValue = NewGV; if (NewGV->getType() != GV->getType()->getElementType()) - RepValue = ConstantExpr::getBitCast(RepValue, + RepValue = ConstantExpr::getBitCast(RepValue, GV->getType()->getElementType()); // If there is a comparison against null, we will insert a global bool to @@ -872,11 +900,12 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, while (!GV->use_empty()) { if (StoreInst *SI = dyn_cast(GV->use_back())) { // The global is initialized when the store to it occurs. - new StoreInst(ConstantInt::getTrue(GV->getContext()), InitBool, SI); + new StoreInst(ConstantInt::getTrue(GV->getContext()), InitBool, false, 0, + SI->getOrdering(), SI->getSynchScope(), SI); SI->eraseFromParent(); continue; } - + LoadInst *LI = cast(GV->use_back()); while (!LI->use_empty()) { Use &LoadUse = LI->use_begin().getUse(); @@ -884,10 +913,13 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, LoadUse = RepValue; continue; } - + ICmpInst *ICI = cast(LoadUse.getUser()); // Replace the cmp X, 0 with a use of the bool value. - Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", ICI); + // Sink the load to where the compare was, if atomic rules allow us to. + Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", false, 0, + LI->getOrdering(), LI->getSynchScope(), + LI->isUnordered() ? (Instruction*)ICI : LI); InitBoolUsed = true; switch (ICI->getPredicate()) { default: llvm_unreachable("Unknown ICmp Predicate!"); @@ -928,9 +960,9 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, // To further other optimizations, loop over all users of NewGV and try to // constant prop them. This will promote GEP instructions with constant // indices into GEP constant-exprs, which will allow global-opt to hack on it. - ConstantPropUsersOf(NewGV); + ConstantPropUsersOf(NewGV, TD, TLI); if (RepValue != NewGV) - ConstantPropUsersOf(RepValue); + ConstantPropUsersOf(RepValue, TD, TLI); return NewGV; } @@ -939,29 +971,31 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, /// to make sure that there are no complex uses of V. We permit simple things /// like dereferencing the pointer, but not storing through the address, unless /// it is to the specified global. -static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Instruction *V, - GlobalVariable *GV, - SmallPtrSet &PHIs) { - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){ - Instruction *Inst = cast(*UI); - +static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V, + const GlobalVariable *GV, + SmallPtrSet &PHIs) { + for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); + UI != E; ++UI) { + const Instruction *Inst = cast(*UI); + if (isa(Inst) || isa(Inst)) { continue; // Fine, ignore. } - - if (StoreInst *SI = dyn_cast(Inst)) { + + if (const StoreInst *SI = dyn_cast(Inst)) { if (SI->getOperand(0) == V && SI->getOperand(1) != GV) return false; // Storing the pointer itself... bad. continue; // Otherwise, storing through it, or storing into GV... fine. } - - if (isa(Inst)) { + + // Must index into the array and into the struct. + if (isa(Inst) && Inst->getNumOperands() >= 3) { if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Inst, GV, PHIs)) return false; continue; } - - if (PHINode *PN = dyn_cast(Inst)) { + + if (const PHINode *PN = dyn_cast(Inst)) { // PHIs are ok if all uses are ok. Don't infinitely recurse through PHI // cycles. if (PHIs.insert(PN)) @@ -969,13 +1003,13 @@ static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Instruction *V, return false; continue; } - - if (BitCastInst *BCI = dyn_cast(Inst)) { + + if (const BitCastInst *BCI = dyn_cast(Inst)) { if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV, PHIs)) return false; continue; } - + return false; } return true; @@ -984,9 +1018,9 @@ static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Instruction *V, /// ReplaceUsesOfMallocWithGlobal - The Alloc pointer is stored into GV /// somewhere. Transform all uses of the allocation into loads from the /// global and uses of the resultant pointer. Further, delete the store into -/// GV. This assumes that these value pass the +/// GV. This assumes that these value pass the /// 'ValueIsOnlyUsedLocallyOrStoredToOneGlobal' predicate. -static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc, +static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc, GlobalVariable *GV) { while (!Alloc->use_empty()) { Instruction *U = cast(*Alloc->use_begin()); @@ -1019,7 +1053,7 @@ static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc, continue; } } - + // Insert a load from the global, and use it instead of the malloc. Value *NL = new LoadInst(GV, GV->getName()+".val", InsertPt); U->replaceUsesOfWith(Alloc, NL); @@ -1030,30 +1064,31 @@ static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc, /// of a load) are simple enough to perform heap SRA on. This permits GEP's /// that index through the array and struct field, icmps of null, and PHIs. static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V, - SmallPtrSet &LoadUsingPHIs, - SmallPtrSet &LoadUsingPHIsPerLoad) { + SmallPtrSet &LoadUsingPHIs, + SmallPtrSet &LoadUsingPHIsPerLoad) { // We permit two users of the load: setcc comparing against the null // pointer, and a getelementptr of a specific form. - for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){ + for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; + ++UI) { const Instruction *User = cast(*UI); - + // Comparison against null is ok. if (const ICmpInst *ICI = dyn_cast(User)) { if (!isa(ICI->getOperand(1))) return false; continue; } - + // getelementptr is also ok, but only a simple form. if (const GetElementPtrInst *GEPI = dyn_cast(User)) { // Must index into the array and into the struct. if (GEPI->getNumOperands() < 3) return false; - + // Otherwise the GEP is ok. continue; } - + if (const PHINode *PN = dyn_cast(User)) { if (!LoadUsingPHIsPerLoad.insert(PN)) // This means some phi nodes are dependent on each other. @@ -1062,19 +1097,19 @@ static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V, if (!LoadUsingPHIs.insert(PN)) // If we have already analyzed this PHI, then it is safe. continue; - + // Make sure all uses of the PHI are simple enough to transform. if (!LoadUsesSimpleEnoughForHeapSRA(PN, LoadUsingPHIs, LoadUsingPHIsPerLoad)) return false; - + continue; } - + // Otherwise we don't know what this is, not ok. return false; } - + return true; } @@ -1085,48 +1120,48 @@ static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(const GlobalVariable *GV, Instruction *StoredVal) { SmallPtrSet LoadUsingPHIs; SmallPtrSet LoadUsingPHIsPerLoad; - for (Value::const_use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E; - ++UI) + for (Value::const_use_iterator UI = GV->use_begin(), E = GV->use_end(); + UI != E; ++UI) if (const LoadInst *LI = dyn_cast(*UI)) { if (!LoadUsesSimpleEnoughForHeapSRA(LI, LoadUsingPHIs, LoadUsingPHIsPerLoad)) return false; LoadUsingPHIsPerLoad.clear(); } - + // If we reach here, we know that all uses of the loads and transitive uses // (through PHI nodes) are simple enough to transform. However, we don't know - // that all inputs the to the PHI nodes are in the same equivalence sets. + // that all inputs the to the PHI nodes are in the same equivalence sets. // Check to verify that all operands of the PHIs are either PHIS that can be // transformed, loads from GV, or MI itself. - for (SmallPtrSet::const_iterator I = LoadUsingPHIs.begin(), - E = LoadUsingPHIs.end(); I != E; ++I) { + for (SmallPtrSet::const_iterator I = LoadUsingPHIs.begin() + , E = LoadUsingPHIs.end(); I != E; ++I) { const PHINode *PN = *I; for (unsigned op = 0, e = PN->getNumIncomingValues(); op != e; ++op) { Value *InVal = PN->getIncomingValue(op); - + // PHI of the stored value itself is ok. if (InVal == StoredVal) continue; - + if (const PHINode *InPN = dyn_cast(InVal)) { // One of the PHIs in our set is (optimistically) ok. if (LoadUsingPHIs.count(InPN)) continue; return false; } - + // Load from GV is ok. if (const LoadInst *LI = dyn_cast(InVal)) if (LI->getOperand(0) == GV) continue; - + // UNDEF? NULL? - + // Anything else is rejected. return false; } } - + return true; } @@ -1134,15 +1169,15 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo, DenseMap > &InsertedScalarizedValues, std::vector > &PHIsToRewrite) { std::vector &FieldVals = InsertedScalarizedValues[V]; - + if (FieldNo >= FieldVals.size()) FieldVals.resize(FieldNo+1); - + // If we already have this value, just reuse the previously scalarized // version. if (Value *FieldVal = FieldVals[FieldNo]) return FieldVal; - + // Depending on what instruction this is, we have several cases. Value *Result; if (LoadInst *LI = dyn_cast(V)) { @@ -1155,24 +1190,25 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo, } else if (PHINode *PN = dyn_cast(V)) { // PN's type is pointer to struct. Make a new PHI of pointer to struct // field. - const StructType *ST = + StructType *ST = cast(cast(PN->getType())->getElementType()); - - Result = + + PHINode *NewPN = PHINode::Create(PointerType::getUnqual(ST->getElementType(FieldNo)), + PN->getNumIncomingValues(), PN->getName()+".f"+Twine(FieldNo), PN); + Result = NewPN; PHIsToRewrite.push_back(std::make_pair(PN, FieldNo)); } else { llvm_unreachable("Unknown usable value"); - Result = 0; } - + return FieldVals[FieldNo] = Result; } /// RewriteHeapSROALoadUser - Given a load instruction and a value derived from /// the load, rewrite the derived value to use the HeapSRoA'd load. -static void RewriteHeapSROALoadUser(Instruction *LoadUser, +static void RewriteHeapSROALoadUser(Instruction *LoadUser, DenseMap > &InsertedScalarizedValues, std::vector > &PHIsToRewrite) { // If this is a comparison against null, handle it. @@ -1182,32 +1218,31 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser, // field. Value *NPtr = GetHeapSROAValue(SCI->getOperand(0), 0, InsertedScalarizedValues, PHIsToRewrite); - + Value *New = new ICmpInst(SCI, SCI->getPredicate(), NPtr, - Constant::getNullValue(NPtr->getType()), + Constant::getNullValue(NPtr->getType()), SCI->getName()); SCI->replaceAllUsesWith(New); SCI->eraseFromParent(); return; } - + // Handle 'getelementptr Ptr, Idx, i32 FieldNo ...' if (GetElementPtrInst *GEPI = dyn_cast(LoadUser)) { assert(GEPI->getNumOperands() >= 3 && isa(GEPI->getOperand(2)) && "Unexpected GEPI!"); - + // Load the pointer for this field. unsigned FieldNo = cast(GEPI->getOperand(2))->getZExtValue(); Value *NewPtr = GetHeapSROAValue(GEPI->getOperand(0), FieldNo, InsertedScalarizedValues, PHIsToRewrite); - + // Create the new GEP idx vector. SmallVector GEPIdx; GEPIdx.push_back(GEPI->getOperand(1)); GEPIdx.append(GEPI->op_begin()+3, GEPI->op_end()); - - Value *NGEPI = GetElementPtrInst::Create(NewPtr, - GEPIdx.begin(), GEPIdx.end(), + + Value *NGEPI = GetElementPtrInst::Create(NewPtr, GEPIdx, GEPI->getName(), GEPI); GEPI->replaceAllUsesWith(NGEPI); GEPI->eraseFromParent(); @@ -1221,12 +1256,10 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser, // already been seen first by another load, so its uses have already been // processed. PHINode *PN = cast(LoadUser); - bool Inserted; - DenseMap >::iterator InsertPos; - tie(InsertPos, Inserted) = - InsertedScalarizedValues.insert(std::make_pair(PN, std::vector())); - if (!Inserted) return; - + if (!InsertedScalarizedValues.insert(std::make_pair(PN, + std::vector())).second) + return; + // If this is the first time we've seen this PHI, recursively process all // users. for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); UI != E; ) { @@ -1239,7 +1272,7 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser, /// is a value loaded from the global. Eliminate all uses of Ptr, making them /// use FieldGlobals instead. All uses of loaded values satisfy /// AllGlobalLoadUsesSimpleEnoughForHeapSRA. -static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load, +static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load, DenseMap > &InsertedScalarizedValues, std::vector > &PHIsToRewrite) { for (Value::use_iterator UI = Load->use_begin(), E = Load->use_end(); @@ -1247,7 +1280,7 @@ static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load, Instruction *User = cast(*UI++); RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite); } - + if (Load->use_empty()) { Load->eraseFromParent(); InsertedScalarizedValues.erase(Load); @@ -1257,10 +1290,10 @@ static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load, /// PerformHeapAllocSRoA - CI is an allocation of an array of structures. Break /// it up into multiple allocations of arrays of the fields. static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, - Value* NElems, TargetData *TD) { + Value *NElems, TargetData *TD) { DEBUG(dbgs() << "SROA HEAP ALLOC: " << *GV << " MALLOC = " << *CI << '\n'); - const Type* MAT = getMallocAllocatedType(CI); - const StructType *STy = cast(MAT); + Type *MAT = getMallocAllocatedType(CI); + StructType *STy = cast(MAT); // There is guaranteed to be at least one use of the malloc (storing // it into GV). If there are other uses, change them to be uses of @@ -1272,11 +1305,11 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, // new mallocs at the same place as CI, and N globals. std::vector FieldGlobals; std::vector FieldMallocs; - + for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){ - const Type *FieldTy = STy->getElementType(FieldNo); - const PointerType *PFieldTy = PointerType::getUnqual(FieldTy); - + Type *FieldTy = STy->getElementType(FieldNo); + PointerType *PFieldTy = PointerType::getUnqual(FieldTy); + GlobalVariable *NGV = new GlobalVariable(*GV->getParent(), PFieldTy, false, GlobalValue::InternalLinkage, @@ -1284,19 +1317,19 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, GV->getName() + ".f" + Twine(FieldNo), GV, GV->isThreadLocal()); FieldGlobals.push_back(NGV); - + unsigned TypeSize = TD->getTypeAllocSize(FieldTy); - if (const StructType *ST = dyn_cast(FieldTy)) + if (StructType *ST = dyn_cast(FieldTy)) TypeSize = TD->getStructLayout(ST)->getSizeInBytes(); - const Type *IntPtrTy = TD->getIntPtrType(CI->getContext()); + Type *IntPtrTy = TD->getIntPtrType(CI->getContext()); Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy, ConstantInt::get(IntPtrTy, TypeSize), - NElems, + NElems, 0, CI->getName() + ".f" + Twine(FieldNo)); FieldMallocs.push_back(NMI); new StoreInst(NMI, NGV, CI); } - + // The tricky aspect of this transformation is handling the case when malloc // fails. In the original code, malloc failing would set the result pointer // of malloc to null. In this case, some mallocs could succeed and others @@ -1310,8 +1343,8 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, // if (F2) { free(F2); F2 = 0; } // } // The malloc can also fail if its argument is too large. - Constant *ConstantZero = ConstantInt::get(CI->getOperand(1)->getType(), 0); - Value *RunningOr = new ICmpInst(CI, ICmpInst::ICMP_SLT, CI->getOperand(1), + Constant *ConstantZero = ConstantInt::get(CI->getArgOperand(0)->getType(), 0); + Value *RunningOr = new ICmpInst(CI, ICmpInst::ICMP_SLT, CI->getArgOperand(0), ConstantZero, "isneg"); for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) { Value *Cond = new ICmpInst(CI, ICmpInst::ICMP_EQ, FieldMallocs[i], @@ -1323,25 +1356,24 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, // Split the basic block at the old malloc. BasicBlock *OrigBB = CI->getParent(); BasicBlock *ContBB = OrigBB->splitBasicBlock(CI, "malloc_cont"); - + // Create the block to check the first condition. Put all these blocks at the // end of the function as they are unlikely to be executed. BasicBlock *NullPtrBlock = BasicBlock::Create(OrigBB->getContext(), "malloc_ret_null", OrigBB->getParent()); - + // Remove the uncond branch from OrigBB to ContBB, turning it into a cond // branch on RunningOr. OrigBB->getTerminator()->eraseFromParent(); BranchInst::Create(NullPtrBlock, ContBB, RunningOr, OrigBB); - + // Within the NullPtrBlock, we need to emit a comparison and branch for each // pointer, because some may be null while others are not. for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) { Value *GVVal = new LoadInst(FieldGlobals[i], "tmp", NullPtrBlock); - Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal, - Constant::getNullValue(GVVal->getType()), - "tmp"); + Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal, + Constant::getNullValue(GVVal->getType())); BasicBlock *FreeBlock = BasicBlock::Create(Cmp->getContext(), "free_it", OrigBB->getParent()); BasicBlock *NextBlock = BasicBlock::Create(Cmp->getContext(), "next", @@ -1354,10 +1386,10 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, new StoreInst(Constant::getNullValue(GVVal->getType()), FieldGlobals[i], FreeBlock); BranchInst::Create(NextBlock, FreeBlock); - + NullPtrBlock = NextBlock; } - + BranchInst::Create(ContBB, NullPtrBlock); // CI is no longer needed, remove it. @@ -1368,28 +1400,28 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, /// inserted for a given load. DenseMap > InsertedScalarizedValues; InsertedScalarizedValues[GV] = FieldGlobals; - + std::vector > PHIsToRewrite; - + // Okay, the malloc site is completely handled. All of the uses of GV are now // loads, and all uses of those loads are simple. Rewrite them to use loads // of the per-field globals instead. for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E;) { Instruction *User = cast(*UI++); - + if (LoadInst *LI = dyn_cast(User)) { RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite); continue; } - + // Must be a store of null. StoreInst *SI = cast(User); assert(isa(SI->getOperand(0)) && "Unexpected heap-sra user!"); - + // Insert a store of null into each global. for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) { - const PointerType *PT = cast(FieldGlobals[i]->getType()); + PointerType *PT = cast(FieldGlobals[i]->getType()); Constant *Null = Constant::getNullValue(PT->getElementType()); new StoreInst(Null, FieldGlobals[i], SI); } @@ -1413,7 +1445,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, FieldPN->addIncoming(InVal, PN->getIncomingBlock(i)); } } - + // Drop all inter-phi links and any loads that made it this far. for (DenseMap >::iterator I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end(); @@ -1423,7 +1455,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, else if (LoadInst *LI = dyn_cast(I->first)) LI->dropAllReferences(); } - + // Delete all the phis and loads now that inter-references are dead. for (DenseMap >::iterator I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end(); @@ -1433,7 +1465,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, else if (LoadInst *LI = dyn_cast(I->first)) LI->eraseFromParent(); } - + // The old global is now dead, remove it. GV->eraseFromParent(); @@ -1446,9 +1478,14 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, /// cast of malloc. static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI, - const Type *AllocTy, + Type *AllocTy, + AtomicOrdering Ordering, Module::global_iterator &GVI, - TargetData *TD) { + TargetData *TD, + TargetLibraryInfo *TLI) { + if (!TD) + return false; + // If this is a malloc of an abstract type, don't touch it. if (!AllocTy->isSized()) return false; @@ -1464,79 +1501,83 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, // We can't optimize this if the malloc itself is used in a complex way, // for example, being stored into multiple globals. This allows the - // malloc to be stored into the specified global, loaded setcc'd, and + // malloc to be stored into the specified global, loaded icmp'd, and // GEP'd. These are all things we could transform to using the global // for. - { - SmallPtrSet PHIs; - if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(CI, GV, PHIs)) - return false; - } + SmallPtrSet PHIs; + if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(CI, GV, PHIs)) + return false; // If we have a global that is only initialized with a fixed size malloc, // transform the program to use global memory instead of malloc'd memory. // This eliminates dynamic allocation, avoids an indirection accessing the // data, and exposes the resultant global to further GlobalOpt. // We cannot optimize the malloc if we cannot determine malloc array size. - if (Value *NElems = getMallocArraySize(CI, TD, true)) { - if (ConstantInt *NElements = dyn_cast(NElems)) - // Restrict this transformation to only working on small allocations - // (2048 bytes currently), as we don't want to introduce a 16M global or - // something. - if (TD && - NElements->getZExtValue() * TD->getTypeAllocSize(AllocTy) < 2048) { - GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, TD); - return true; - } - - // If the allocation is an array of structures, consider transforming this - // into multiple malloc'd arrays, one for each field. This is basically - // SRoA for malloc'd memory. - - // If this is an allocation of a fixed size array of structs, analyze as a - // variable size array. malloc [100 x struct],1 -> malloc struct, 100 - if (NElems == ConstantInt::get(CI->getOperand(1)->getType(), 1)) - if (const ArrayType *AT = dyn_cast(AllocTy)) - AllocTy = AT->getElementType(); - - if (const StructType *AllocSTy = dyn_cast(AllocTy)) { - // This the structure has an unreasonable number of fields, leave it - // alone. - if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 && - AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, CI)) { - - // If this is a fixed size array, transform the Malloc to be an alloc of - // structs. malloc [100 x struct],1 -> malloc struct, 100 - if (const ArrayType *AT = - dyn_cast(getMallocAllocatedType(CI))) { - const Type *IntPtrTy = TD->getIntPtrType(CI->getContext()); - unsigned TypeSize = TD->getStructLayout(AllocSTy)->getSizeInBytes(); - Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize); - Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements()); - Instruction *Malloc = CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy, - AllocSize, NumElements, - CI->getName()); - Instruction *Cast = new BitCastInst(Malloc, CI->getType(), "tmp", CI); - CI->replaceAllUsesWith(Cast); - CI->eraseFromParent(); - CI = dyn_cast(Malloc) ? - extractMallocCallFromBitCast(Malloc) : cast(Malloc); - } - - GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, TD, true),TD); - return true; - } + Value *NElems = getMallocArraySize(CI, TD, true); + if (!NElems) + return false; + + if (ConstantInt *NElements = dyn_cast(NElems)) + // Restrict this transformation to only working on small allocations + // (2048 bytes currently), as we don't want to introduce a 16M global or + // something. + if (NElements->getZExtValue() * TD->getTypeAllocSize(AllocTy) < 2048) { + GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, TD, TLI); + return true; + } + + // If the allocation is an array of structures, consider transforming this + // into multiple malloc'd arrays, one for each field. This is basically + // SRoA for malloc'd memory. + + if (Ordering != NotAtomic) + return false; + + // If this is an allocation of a fixed size array of structs, analyze as a + // variable size array. malloc [100 x struct],1 -> malloc struct, 100 + if (NElems == ConstantInt::get(CI->getArgOperand(0)->getType(), 1)) + if (ArrayType *AT = dyn_cast(AllocTy)) + AllocTy = AT->getElementType(); + + StructType *AllocSTy = dyn_cast(AllocTy); + if (!AllocSTy) + return false; + + // This the structure has an unreasonable number of fields, leave it + // alone. + if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 && + AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, CI)) { + + // If this is a fixed size array, transform the Malloc to be an alloc of + // structs. malloc [100 x struct],1 -> malloc struct, 100 + if (ArrayType *AT = dyn_cast(getMallocAllocatedType(CI))) { + Type *IntPtrTy = TD->getIntPtrType(CI->getContext()); + unsigned TypeSize = TD->getStructLayout(AllocSTy)->getSizeInBytes(); + Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize); + Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements()); + Instruction *Malloc = CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy, + AllocSize, NumElements, + 0, CI->getName()); + Instruction *Cast = new BitCastInst(Malloc, CI->getType(), "tmp", CI); + CI->replaceAllUsesWith(Cast); + CI->eraseFromParent(); + CI = dyn_cast(Malloc) ? + extractMallocCallFromBitCast(Malloc) : cast(Malloc); } + + GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, TD, true), TD); + return true; } - + return false; -} +} // OptimizeOnceStoredGlobal - Try to optimize globals based on the knowledge // that only one value (besides its initializer) is ever stored to the global. static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, + AtomicOrdering Ordering, Module::global_iterator &GVI, - TargetData *TD) { + TargetData *TD, TargetLibraryInfo *TLI) { // Ignore no-op GEPs and bitcasts. StoredOnceVal = StoredOnceVal->stripPointerCasts(); @@ -1548,16 +1589,16 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, GV->getInitializer()->isNullValue()) { if (Constant *SOVC = dyn_cast(StoredOnceVal)) { if (GV->getInitializer()->getType() != SOVC->getType()) - SOVC = - ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType()); + SOVC = ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType()); // Optimize away any trapping uses of the loaded value. - if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC)) + if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, TD, TLI)) return true; } else if (CallInst *CI = extractMallocCall(StoredOnceVal)) { - const Type* MallocType = getMallocAllocatedType(CI); - if (MallocType && TryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType, - GVI, TD)) + Type *MallocType = getMallocAllocatedType(CI); + if (MallocType && + TryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType, Ordering, GVI, + TD, TLI)) return true; } } @@ -1570,8 +1611,8 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, /// can shrink the global into a boolean and select between the two values /// whenever it is used. This exposes the values to other scalar optimizations. static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { - const Type *GVElType = GV->getType()->getElementType(); - + Type *GVElType = GV->getType()->getElementType(); + // If GVElType is already i1, it is already shrunk. If the type of the GV is // an FP value, pointer or vector, don't do this optimization because a select // between them is very expensive and unlikely to lead to later @@ -1581,19 +1622,21 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { GVElType->isFloatingPointTy() || GVElType->isPointerTy() || GVElType->isVectorTy()) return false; - + // Walk the use list of the global seeing if all the uses are load or store. // If there is anything else, bail out. - for (Value::use_iterator I = GV->use_begin(), E = GV->use_end(); I != E; ++I) - if (!isa(I) && !isa(I)) + for (Value::use_iterator I = GV->use_begin(), E = GV->use_end(); I != E; ++I){ + User *U = *I; + if (!isa(U) && !isa(U)) return false; - + } + DEBUG(dbgs() << " *** SHRINKING TO BOOL: " << *GV); - + // Create the new global, initializing it to false. GlobalVariable *NewGV = new GlobalVariable(Type::getInt1Ty(GV->getContext()), false, - GlobalValue::InternalLinkage, + GlobalValue::InternalLinkage, ConstantInt::getFalse(GV->getContext()), GV->getName()+".b", GV->isThreadLocal()); @@ -1625,13 +1668,14 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { // bool. Instruction *StoredVal = cast(SI->getOperand(0)); - // If we're already replaced the input, StoredVal will be a cast or + // If we've already replaced the input, StoredVal will be a cast or // select instruction. If not, it will be a load of the original // global. if (LoadInst *LI = dyn_cast(StoredVal)) { assert(LI->getOperand(0) == GV && "Not a copy!"); // Insert a new load, to preserve the saved value. - StoreVal = new LoadInst(NewGV, LI->getName()+".b", LI); + StoreVal = new LoadInst(NewGV, LI->getName()+".b", false, 0, + LI->getOrdering(), LI->getSynchScope(), LI); } else { assert((isa(StoredVal) || isa(StoredVal)) && "This is not a form that we understand!"); @@ -1639,11 +1683,13 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { assert(isa(StoreVal) && "Not a load of NewGV!"); } } - new StoreInst(StoreVal, NewGV, SI); + new StoreInst(StoreVal, NewGV, false, 0, + SI->getOrdering(), SI->getSynchScope(), SI); } else { // Change the load into a load of bool then a select. LoadInst *LI = cast(UI); - LoadInst *NLI = new LoadInst(NewGV, LI->getName()+".b", LI); + LoadInst *NLI = new LoadInst(NewGV, LI->getName()+".b", false, 0, + LI->getOrdering(), LI->getSynchScope(), LI); Value *NSI; if (IsOneZero) NSI = new ZExtInst(NLI, LI->getType(), "", LI); @@ -1660,12 +1706,14 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { } -/// ProcessInternalGlobal - Analyze the specified global variable and optimize -/// it if possible. If we make a change, return true. -bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, - Module::global_iterator &GVI) { - SmallPtrSet PHIUsers; - GlobalStatus GS; +/// ProcessGlobal - Analyze the specified global variable and optimize it if +/// possible. If we make a change, return true. +bool GlobalOpt::ProcessGlobal(GlobalVariable *GV, + Module::global_iterator &GVI) { + if (!GV->hasLocalLinkage()) + return false; + + // Do more involved optimizations if the global is internal. GV->removeDeadConstantUsers(); if (GV->use_empty()) { @@ -1675,142 +1723,142 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, return true; } - if (!AnalyzeGlobal(GV, GS, PHIUsers)) { -#if 0 - DEBUG(dbgs() << "Global: " << *GV); - DEBUG(dbgs() << " isLoaded = " << GS.isLoaded << "\n"); - DEBUG(dbgs() << " StoredType = "); - switch (GS.StoredType) { - case GlobalStatus::NotStored: DEBUG(dbgs() << "NEVER STORED\n"); break; - case GlobalStatus::isInitializerStored: DEBUG(dbgs() << "INIT STORED\n"); - break; - case GlobalStatus::isStoredOnce: DEBUG(dbgs() << "STORED ONCE\n"); break; - case GlobalStatus::isStored: DEBUG(dbgs() << "stored\n"); break; - } - if (GS.StoredType == GlobalStatus::isStoredOnce && GS.StoredOnceValue) - DEBUG(dbgs() << " StoredOnceValue = " << *GS.StoredOnceValue << "\n"); - if (GS.AccessingFunction && !GS.HasMultipleAccessingFunctions) - DEBUG(dbgs() << " AccessingFunction = " << GS.AccessingFunction->getName() - << "\n"); - DEBUG(dbgs() << " HasMultipleAccessingFunctions = " - << GS.HasMultipleAccessingFunctions << "\n"); - DEBUG(dbgs() << " HasNonInstructionUser = " - << GS.HasNonInstructionUser<<"\n"); - DEBUG(dbgs() << "\n"); -#endif - - // If this is a first class global and has only one accessing function - // and this function is main (which we know is not recursive we can make - // this global a local variable) we replace the global with a local alloca - // in this function. - // - // NOTE: It doesn't make sense to promote non single-value types since we - // are just replacing static memory to stack memory. - // - // If the global is in different address space, don't bring it to stack. - if (!GS.HasMultipleAccessingFunctions && - GS.AccessingFunction && !GS.HasNonInstructionUser && - GV->getType()->getElementType()->isSingleValueType() && - GS.AccessingFunction->getName() == "main" && - GS.AccessingFunction->hasExternalLinkage() && - GV->getType()->getAddressSpace() == 0) { - DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV); - Instruction& FirstI = const_cast(*GS.AccessingFunction - ->getEntryBlock().begin()); - const Type* ElemTy = GV->getType()->getElementType(); - // FIXME: Pass Global's alignment when globals have alignment - AllocaInst* Alloca = new AllocaInst(ElemTy, NULL, GV->getName(), &FirstI); - if (!isa(GV->getInitializer())) - new StoreInst(GV->getInitializer(), Alloca, &FirstI); - - GV->replaceAllUsesWith(Alloca); - GV->eraseFromParent(); - ++NumLocalized; - return true; - } - - // If the global is never loaded (but may be stored to), it is dead. - // Delete it now. - if (!GS.isLoaded) { - DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV); - - // Delete any stores we can find to the global. We may not be able to - // make it completely dead though. - bool Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer()); - - // If the global is dead now, delete it. - if (GV->use_empty()) { - GV->eraseFromParent(); - ++NumDeleted; - Changed = true; - } - return Changed; - - } else if (GS.StoredType <= GlobalStatus::isInitializerStored) { - DEBUG(dbgs() << "MARKING CONSTANT: " << *GV); - GV->setConstant(true); + SmallPtrSet PHIUsers; + GlobalStatus GS; - // Clean up any obviously simplifiable users now. - CleanupConstantGlobalUsers(GV, GV->getInitializer()); + if (AnalyzeGlobal(GV, GS, PHIUsers)) + return false; - // If the global is dead now, just nuke it. - if (GV->use_empty()) { - DEBUG(dbgs() << " *** Marking constant allowed us to simplify " - << "all users and delete global!\n"); - GV->eraseFromParent(); - ++NumDeleted; - } + if (!GS.isCompared && !GV->hasUnnamedAddr()) { + GV->setUnnamedAddr(true); + NumUnnamed++; + } - ++NumMarked; - return true; - } else if (!GV->getInitializer()->getType()->isSingleValueType()) { - if (TargetData *TD = getAnalysisIfAvailable()) - if (GlobalVariable *FirstNewGV = SRAGlobal(GV, *TD)) { - GVI = FirstNewGV; // Don't skip the newly produced globals! - return true; - } - } else if (GS.StoredType == GlobalStatus::isStoredOnce) { - // If the initial value for the global was an undef value, and if only - // one other value was stored into it, we can just change the - // initializer to be the stored value, then delete all stores to the - // global. This allows us to mark it constant. - if (Constant *SOVConstant = dyn_cast(GS.StoredOnceValue)) - if (isa(GV->getInitializer())) { - // Change the initial value here. - GV->setInitializer(SOVConstant); - - // Clean up any obviously simplifiable users now. - CleanupConstantGlobalUsers(GV, GV->getInitializer()); - - if (GV->use_empty()) { - DEBUG(dbgs() << " *** Substituting initializer allowed us to " - << "simplify all users and delete global!\n"); - GV->eraseFromParent(); - ++NumDeleted; - } else { - GVI = GV; - } - ++NumSubstitute; - return true; - } + if (GV->isConstant() || !GV->hasInitializer()) + return false; - // Try to optimize globals based on the knowledge that only one value - // (besides its initializer) is ever stored to the global. - if (OptimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GVI, - getAnalysisIfAvailable())) - return true; + return ProcessInternalGlobal(GV, GVI, PHIUsers, GS); +} - // Otherwise, if the global was not a boolean, we can shrink it to be a - // boolean. - if (Constant *SOVConstant = dyn_cast(GS.StoredOnceValue)) - if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) { - ++NumShrunkToBool; - return true; - } - } +/// ProcessInternalGlobal - Analyze the specified global variable and optimize +/// it if possible. If we make a change, return true. +bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, + Module::global_iterator &GVI, + const SmallPtrSet &PHIUsers, + const GlobalStatus &GS) { + // If this is a first class global and has only one accessing function + // and this function is main (which we know is not recursive we can make + // this global a local variable) we replace the global with a local alloca + // in this function. + // + // NOTE: It doesn't make sense to promote non single-value types since we + // are just replacing static memory to stack memory. + // + // If the global is in different address space, don't bring it to stack. + if (!GS.HasMultipleAccessingFunctions && + GS.AccessingFunction && !GS.HasNonInstructionUser && + GV->getType()->getElementType()->isSingleValueType() && + GS.AccessingFunction->getName() == "main" && + GS.AccessingFunction->hasExternalLinkage() && + GV->getType()->getAddressSpace() == 0) { + DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV); + Instruction &FirstI = const_cast(*GS.AccessingFunction + ->getEntryBlock().begin()); + Type *ElemTy = GV->getType()->getElementType(); + // FIXME: Pass Global's alignment when globals have alignment + AllocaInst *Alloca = new AllocaInst(ElemTy, NULL, GV->getName(), &FirstI); + if (!isa(GV->getInitializer())) + new StoreInst(GV->getInitializer(), Alloca, &FirstI); + + GV->replaceAllUsesWith(Alloca); + GV->eraseFromParent(); + ++NumLocalized; + return true; } - return false; -} + + // If the global is never loaded (but may be stored to), it is dead. + // Delete it now. + if (!GS.isLoaded) { + DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV); + + // Delete any stores we can find to the global. We may not be able to + // make it completely dead though. + bool Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer(), + TD, TLI); + + // If the global is dead now, delete it. + if (GV->use_empty()) { + GV->eraseFromParent(); + ++NumDeleted; + Changed = true; + } + return Changed; + + } else if (GS.StoredType <= GlobalStatus::isInitializerStored) { + DEBUG(dbgs() << "MARKING CONSTANT: " << *GV); + GV->setConstant(true); + + // Clean up any obviously simplifiable users now. + CleanupConstantGlobalUsers(GV, GV->getInitializer(), TD, TLI); + + // If the global is dead now, just nuke it. + if (GV->use_empty()) { + DEBUG(dbgs() << " *** Marking constant allowed us to simplify " + << "all users and delete global!\n"); + GV->eraseFromParent(); + ++NumDeleted; + } + + ++NumMarked; + return true; + } else if (!GV->getInitializer()->getType()->isSingleValueType()) { + if (TargetData *TD = getAnalysisIfAvailable()) + if (GlobalVariable *FirstNewGV = SRAGlobal(GV, *TD)) { + GVI = FirstNewGV; // Don't skip the newly produced globals! + return true; + } + } else if (GS.StoredType == GlobalStatus::isStoredOnce) { + // If the initial value for the global was an undef value, and if only + // one other value was stored into it, we can just change the + // initializer to be the stored value, then delete all stores to the + // global. This allows us to mark it constant. + if (Constant *SOVConstant = dyn_cast(GS.StoredOnceValue)) + if (isa(GV->getInitializer())) { + // Change the initial value here. + GV->setInitializer(SOVConstant); + + // Clean up any obviously simplifiable users now. + CleanupConstantGlobalUsers(GV, GV->getInitializer(), TD, TLI); + + if (GV->use_empty()) { + DEBUG(dbgs() << " *** Substituting initializer allowed us to " + << "simplify all users and delete global!\n"); + GV->eraseFromParent(); + ++NumDeleted; + } else { + GVI = GV; + } + ++NumSubstitute; + return true; + } + + // Try to optimize globals based on the knowledge that only one value + // (besides its initializer) is ever stored to the global. + if (OptimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GS.Ordering, GVI, + TD, TLI)) + return true; + + // Otherwise, if the global was not a boolean, we can shrink it to be a + // boolean. + if (Constant *SOVConstant = dyn_cast(GS.StoredOnceValue)) + if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) { + ++NumShrunkToBool; + return true; + } + } + + return false; +} /// ChangeCalleesToFastCall - Walk all of the direct calls of the specified /// function, changing them to FastCC. @@ -1850,7 +1898,7 @@ bool GlobalOpt::OptimizeFunctions(Module &M) { if (!F->hasName() && !F->isDeclaration()) F->setLinkage(GlobalValue::InternalLinkage); F->removeDeadConstantUsers(); - if (F->use_empty() && (F->hasLocalLinkage() || F->hasLinkOnceLinkage())) { + if (F->isDefTriviallyDead()) { F->eraseFromParent(); Changed = true; ++NumFnDeleted; @@ -1890,67 +1938,55 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) { // Simplify the initializer. if (GV->hasInitializer()) if (ConstantExpr *CE = dyn_cast(GV->getInitializer())) { - TargetData *TD = getAnalysisIfAvailable(); - Constant *New = ConstantFoldConstantExpression(CE, TD); + Constant *New = ConstantFoldConstantExpression(CE, TD, TLI); if (New && New != CE) GV->setInitializer(New); } - // Do more involved optimizations if the global is internal. - if (!GV->isConstant() && GV->hasLocalLinkage() && - GV->hasInitializer()) - Changed |= ProcessInternalGlobal(GV, GVI); + + Changed |= ProcessGlobal(GV, GVI); } return Changed; } -/// FindGlobalCtors - Find the llvm.globalctors list, verifying that all +/// FindGlobalCtors - Find the llvm.global_ctors list, verifying that all /// initializers have an init priority of 65535. GlobalVariable *GlobalOpt::FindGlobalCtors(Module &M) { - for (Module::global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ++I) - if (I->getName() == "llvm.global_ctors") { - // Found it, verify it's an array of { int, void()* }. - const ArrayType *ATy =dyn_cast(I->getType()->getElementType()); - if (!ATy) return 0; - const StructType *STy = dyn_cast(ATy->getElementType()); - if (!STy || STy->getNumElements() != 2 || - !STy->getElementType(0)->isIntegerTy(32)) return 0; - const PointerType *PFTy = dyn_cast(STy->getElementType(1)); - if (!PFTy) return 0; - const FunctionType *FTy = dyn_cast(PFTy->getElementType()); - if (!FTy || !FTy->getReturnType()->isVoidTy() || - FTy->isVarArg() || FTy->getNumParams() != 0) - return 0; - - // Verify that the initializer is simple enough for us to handle. - if (!I->hasDefinitiveInitializer()) return 0; - ConstantArray *CA = dyn_cast(I->getInitializer()); - if (!CA) return 0; - for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) - if (ConstantStruct *CS = dyn_cast(*i)) { - if (isa(CS->getOperand(1))) - continue; + GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors"); + if (GV == 0) return 0; + + // Verify that the initializer is simple enough for us to handle. We are + // only allowed to optimize the initializer if it is unique. + if (!GV->hasUniqueInitializer()) return 0; - // Must have a function or null ptr. - if (!isa(CS->getOperand(1))) - return 0; - - // Init priority must be standard. - ConstantInt *CI = dyn_cast(CS->getOperand(0)); - if (!CI || CI->getZExtValue() != 65535) - return 0; - } else { - return 0; - } - - return I; - } - return 0; + if (isa(GV->getInitializer())) + return GV; + ConstantArray *CA = cast(GV->getInitializer()); + + for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) { + if (isa(*i)) + continue; + ConstantStruct *CS = cast(*i); + if (isa(CS->getOperand(1))) + continue; + + // Must have a function or null ptr. + if (!isa(CS->getOperand(1))) + return 0; + + // Init priority must be standard. + ConstantInt *CI = cast(CS->getOperand(0)); + if (CI->getZExtValue() != 65535) + return 0; + } + + return GV; } /// ParseGlobalCtors - Given a llvm.global_ctors list that we can understand, /// return a list of the functions and null terminator as a vector. static std::vector ParseGlobalCtors(GlobalVariable *GV) { + if (GV->getInitializer()->isNullValue()) + return std::vector(); ConstantArray *CA = cast(GV->getInitializer()); std::vector Result; Result.reserve(CA->getNumOperands()); @@ -1963,48 +1999,50 @@ static std::vector ParseGlobalCtors(GlobalVariable *GV) { /// InstallGlobalCtors - Given a specified llvm.global_ctors list, install the /// specified array, returning the new global to use. -static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL, +static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL, const std::vector &Ctors) { // If we made a change, reassemble the initializer list. - std::vector CSVals; - CSVals.push_back(ConstantInt::get(Type::getInt32Ty(GCL->getContext()),65535)); - CSVals.push_back(0); - + Constant *CSVals[2]; + CSVals[0] = ConstantInt::get(Type::getInt32Ty(GCL->getContext()), 65535); + CSVals[1] = 0; + + StructType *StructTy = + cast ( + cast(GCL->getType()->getElementType())->getElementType()); + // Create the new init list. std::vector CAList; for (unsigned i = 0, e = Ctors.size(); i != e; ++i) { if (Ctors[i]) { CSVals[1] = Ctors[i]; } else { - const Type *FTy = FunctionType::get(Type::getVoidTy(GCL->getContext()), + Type *FTy = FunctionType::get(Type::getVoidTy(GCL->getContext()), false); - const PointerType *PFTy = PointerType::getUnqual(FTy); + PointerType *PFTy = PointerType::getUnqual(FTy); CSVals[1] = Constant::getNullValue(PFTy); CSVals[0] = ConstantInt::get(Type::getInt32Ty(GCL->getContext()), - 2147483647); + 0x7fffffff); } - CAList.push_back(ConstantStruct::get(GCL->getContext(), CSVals, false)); + CAList.push_back(ConstantStruct::get(StructTy, CSVals)); } - + // Create the array initializer. - const Type *StructTy = - cast(GCL->getType()->getElementType())->getElementType(); - Constant *CA = ConstantArray::get(ArrayType::get(StructTy, + Constant *CA = ConstantArray::get(ArrayType::get(StructTy, CAList.size()), CAList); - + // If we didn't change the number of elements, don't create a new GV. if (CA->getType() == GCL->getInitializer()->getType()) { GCL->setInitializer(CA); return GCL; } - + // Create the new global and insert it next to the existing list. GlobalVariable *NGV = new GlobalVariable(CA->getType(), GCL->isConstant(), GCL->getLinkage(), CA, "", GCL->isThreadLocal()); GCL->getParent()->getGlobalList().insert(GCL, NGV); NGV->takeName(GCL); - + // Nuke the old list, replacing any uses with the new one. if (!GCL->use_empty()) { Constant *V = NGV; @@ -2013,7 +2051,7 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL, GCL->replaceAllUsesWith(V); } GCL->eraseFromParent(); - + if (Ctors.size()) return NGV; else @@ -2021,17 +2059,89 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL, } -static Constant *getVal(DenseMap &ComputedValues, - Value *V) { - if (Constant *CV = dyn_cast(V)) return CV; - Constant *R = ComputedValues[V]; - assert(R && "Reference to an uncomputed value!"); - return R; +static inline bool +isSimpleEnoughValueToCommit(Constant *C, + SmallPtrSet &SimpleConstants, + const TargetData *TD); + + +/// isSimpleEnoughValueToCommit - Return true if the specified constant can be +/// handled by the code generator. We don't want to generate something like: +/// void *X = &X/42; +/// because the code generator doesn't have a relocation that can handle that. +/// +/// This function should be called if C was not found (but just got inserted) +/// in SimpleConstants to avoid having to rescan the same constants all the +/// time. +static bool isSimpleEnoughValueToCommitHelper(Constant *C, + SmallPtrSet &SimpleConstants, + const TargetData *TD) { + // Simple integer, undef, constant aggregate zero, global addresses, etc are + // all supported. + if (C->getNumOperands() == 0 || isa(C) || + isa(C)) + return true; + + // Aggregate values are safe if all their elements are. + if (isa(C) || isa(C) || + isa(C)) { + for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) { + Constant *Op = cast(C->getOperand(i)); + if (!isSimpleEnoughValueToCommit(Op, SimpleConstants, TD)) + return false; + } + return true; + } + + // We don't know exactly what relocations are allowed in constant expressions, + // so we allow &global+constantoffset, which is safe and uniformly supported + // across targets. + ConstantExpr *CE = cast(C); + switch (CE->getOpcode()) { + case Instruction::BitCast: + // Bitcast is fine if the casted value is fine. + return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, TD); + + case Instruction::IntToPtr: + case Instruction::PtrToInt: + // int <=> ptr is fine if the int type is the same size as the + // pointer type. + if (!TD || TD->getTypeSizeInBits(CE->getType()) != + TD->getTypeSizeInBits(CE->getOperand(0)->getType())) + return false; + return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, TD); + + // GEP is fine if it is simple + constant offset. + case Instruction::GetElementPtr: + for (unsigned i = 1, e = CE->getNumOperands(); i != e; ++i) + if (!isa(CE->getOperand(i))) + return false; + return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, TD); + + case Instruction::Add: + // We allow simple+cst. + if (!isa(CE->getOperand(1))) + return false; + return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, TD); + } + return false; +} + +static inline bool +isSimpleEnoughValueToCommit(Constant *C, + SmallPtrSet &SimpleConstants, + const TargetData *TD) { + // If we already checked this constant, we win. + if (!SimpleConstants.insert(C)) return true; + // Check the constant. + return isSimpleEnoughValueToCommitHelper(C, SimpleConstants, TD); } + /// isSimpleEnoughPointerToCommit - Return true if this constant is simple -/// enough for us to understand. In particular, if it is a cast of something, -/// we punt. We basically just support direct accesses to globals and GEP's of +/// enough for us to understand. In particular, if it is a cast to anything +/// other than from one pointer type to another pointer type, we punt. +/// We basically just support direct accesses to globals and GEP's of /// globals. This should be kept up to date with CommitValueTo. static bool isSimpleEnoughPointerToCommit(Constant *C) { // Conservatively, avoid aggregate types. This is because we don't @@ -2040,23 +2150,23 @@ static bool isSimpleEnoughPointerToCommit(Constant *C) { return false; if (GlobalVariable *GV = dyn_cast(C)) - // Do not allow weak/linkonce/dllimport/dllexport linkage or + // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or // external globals. - return GV->hasDefinitiveInitializer(); + return GV->hasUniqueInitializer(); - if (ConstantExpr *CE = dyn_cast(C)) + if (ConstantExpr *CE = dyn_cast(C)) { // Handle a constantexpr gep. if (CE->getOpcode() == Instruction::GetElementPtr && isa(CE->getOperand(0)) && cast(CE)->isInBounds()) { GlobalVariable *GV = cast(CE->getOperand(0)); - // Do not allow weak/linkonce/dllimport/dllexport linkage or + // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or // external globals. - if (!GV->hasDefinitiveInitializer()) + if (!GV->hasUniqueInitializer()) return false; // The first index must be zero. - ConstantInt *CI = dyn_cast(*next(CE->op_begin())); + ConstantInt *CI = dyn_cast(*llvm::next(CE->op_begin())); if (!CI || !CI->isZero()) return false; // The remaining indices must be compile-time known integers within the @@ -2065,7 +2175,18 @@ static bool isSimpleEnoughPointerToCommit(Constant *C) { return false; return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE); + + // A constantexpr bitcast from a pointer to another pointer is a no-op, + // and we know how to evaluate it by moving the bitcast from the pointer + // operand to the value operand. + } else if (CE->getOpcode() == Instruction::BitCast && + isa(CE->getOperand(0))) { + // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or + // external globals. + return cast(CE->getOperand(0))->hasUniqueInitializer(); } + } + return false; } @@ -2079,69 +2200,43 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val, assert(Val->getType() == Init->getType() && "Type mismatch!"); return Val; } - - std::vector Elts; - if (const StructType *STy = dyn_cast(Init->getType())) { + SmallVector Elts; + if (StructType *STy = dyn_cast(Init->getType())) { // Break up the constant into its elements. - if (ConstantStruct *CS = dyn_cast(Init)) { - for (User::op_iterator i = CS->op_begin(), e = CS->op_end(); i != e; ++i) - Elts.push_back(cast(*i)); - } else if (isa(Init)) { - for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) - Elts.push_back(Constant::getNullValue(STy->getElementType(i))); - } else if (isa(Init)) { - for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) - Elts.push_back(UndefValue::get(STy->getElementType(i))); - } else { - llvm_unreachable("This code is out of sync with " - " ConstantFoldLoadThroughGEPConstantExpr"); - } - + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) + Elts.push_back(Init->getAggregateElement(i)); + // Replace the element that we are supposed to. ConstantInt *CU = cast(Addr->getOperand(OpNo)); unsigned Idx = CU->getZExtValue(); assert(Idx < STy->getNumElements() && "Struct index out of range!"); Elts[Idx] = EvaluateStoreInto(Elts[Idx], Val, Addr, OpNo+1); - + // Return the modified struct. - return ConstantStruct::get(Init->getContext(), &Elts[0], Elts.size(), - STy->isPacked()); - } else { - ConstantInt *CI = cast(Addr->getOperand(OpNo)); - const SequentialType *InitTy = cast(Init->getType()); + return ConstantStruct::get(STy, Elts); + } + + ConstantInt *CI = cast(Addr->getOperand(OpNo)); + SequentialType *InitTy = cast(Init->getType()); - uint64_t NumElts; - if (const ArrayType *ATy = dyn_cast(InitTy)) - NumElts = ATy->getNumElements(); - else - NumElts = cast(InitTy)->getNumElements(); - - - // Break up the array into elements. - if (ConstantArray *CA = dyn_cast(Init)) { - for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) - Elts.push_back(cast(*i)); - } else if (ConstantVector *CV = dyn_cast(Init)) { - for (User::op_iterator i = CV->op_begin(), e = CV->op_end(); i != e; ++i) - Elts.push_back(cast(*i)); - } else if (isa(Init)) { - Elts.assign(NumElts, Constant::getNullValue(InitTy->getElementType())); - } else { - assert(isa(Init) && "This code is out of sync with " - " ConstantFoldLoadThroughGEPConstantExpr"); - Elts.assign(NumElts, UndefValue::get(InitTy->getElementType())); - } - - assert(CI->getZExtValue() < NumElts); - Elts[CI->getZExtValue()] = - EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1); - - if (Init->getType()->isArrayTy()) - return ConstantArray::get(cast(InitTy), Elts); - else - return ConstantVector::get(&Elts[0], Elts.size()); - } + uint64_t NumElts; + if (ArrayType *ATy = dyn_cast(InitTy)) + NumElts = ATy->getNumElements(); + else + NumElts = InitTy->getVectorNumElements(); + + // Break up the array into elements. + for (uint64_t i = 0, e = NumElts; i != e; ++i) + Elts.push_back(Init->getAggregateElement(i)); + + assert(CI->getZExtValue() < NumElts); + Elts[CI->getZExtValue()] = + EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1); + + if (Init->getType()->isArrayTy()) + return ConstantArray::get(cast(InitTy), Elts); + return ConstantVector::get(Elts); } /// CommitValueTo - We have decided that Addr (which satisfies the predicate @@ -2158,23 +2253,113 @@ static void CommitValueTo(Constant *Val, Constant *Addr) { GV->setInitializer(EvaluateStoreInto(GV->getInitializer(), Val, CE, 2)); } +/// Evaluate - This class evaluates LLVM IR, producing the Constant representing +/// each SSA instruction. Changes to global variables are stored in a mapping +/// that can be iterated over after the evaluation is complete. Once an +/// evaluation call fails, the evaluation object should not be reused. +class Evaluate { +public: + Evaluate(const TargetData *TD, const TargetLibraryInfo *TLI) + : TD(TD), TLI(TLI) { + ValueStack.push_back(new DenseMap); + } + + ~Evaluate() { + DeleteContainerPointers(ValueStack); + while (!AllocaTmps.empty()) { + GlobalVariable *Tmp = AllocaTmps.back(); + AllocaTmps.pop_back(); + + // If there are still users of the alloca, the program is doing something + // silly, e.g. storing the address of the alloca somewhere and using it + // later. Since this is undefined, we'll just make it be null. + if (!Tmp->use_empty()) + Tmp->replaceAllUsesWith(Constant::getNullValue(Tmp->getType())); + delete Tmp; + } + } + + /// EvaluateFunction - Evaluate a call to function F, returning true if + /// successful, false if we can't evaluate it. ActualArgs contains the formal + /// arguments for the function. + bool EvaluateFunction(Function *F, Constant *&RetVal, + const SmallVectorImpl &ActualArgs); + + /// EvaluateBlock - Evaluate all instructions in block BB, returning true if + /// successful, false if we can't evaluate it. NewBB returns the next BB that + /// control flows into, or null upon return. + bool EvaluateBlock(BasicBlock::iterator CurInst, BasicBlock *&NextBB); + + Constant *getVal(Value *V) { + if (Constant *CV = dyn_cast(V)) return CV; + Constant *R = ValueStack.back()->lookup(V); + assert(R && "Reference to an uncomputed value!"); + return R; + } + + void setVal(Value *V, Constant *C) { + ValueStack.back()->operator[](V) = C; + } + + const DenseMap &getMutatedMemory() const { + return MutatedMemory; + } + + const SmallPtrSet &getInvariants() const { + return Invariants; + } + +private: + Constant *ComputeLoadResult(Constant *P); + + /// ValueStack - As we compute SSA register values, we store their contents + /// here. The back of the vector contains the current function and the stack + /// contains the values in the calling frames. + SmallVector*, 4> ValueStack; + + /// CallStack - This is used to detect recursion. In pathological situations + /// we could hit exponential behavior, but at least there is nothing + /// unbounded. + SmallVector CallStack; + + /// MutatedMemory - For each store we execute, we update this map. Loads + /// check this to get the most up-to-date value. If evaluation is successful, + /// this state is committed to the process. + DenseMap MutatedMemory; + + /// AllocaTmps - To 'execute' an alloca, we create a temporary global variable + /// to represent its body. This vector is needed so we can delete the + /// temporary globals when we are done. + SmallVector AllocaTmps; + + /// Invariants - These global variables have been marked invariant by the + /// static constructor. + SmallPtrSet Invariants; + + /// SimpleConstants - These are constants we have checked and know to be + /// simple enough to live in a static initializer of a global. + SmallPtrSet SimpleConstants; + + const TargetData *TD; + const TargetLibraryInfo *TLI; +}; + /// ComputeLoadResult - Return the value that would be computed by a load from /// P after the stores reflected by 'memory' have been performed. If we can't /// decide, return null. -static Constant *ComputeLoadResult(Constant *P, - const DenseMap &Memory) { +Constant *Evaluate::ComputeLoadResult(Constant *P) { // If this memory location has been recently stored, use the stored value: it // is the most up-to-date. - DenseMap::const_iterator I = Memory.find(P); - if (I != Memory.end()) return I->second; - + DenseMap::const_iterator I = MutatedMemory.find(P); + if (I != MutatedMemory.end()) return I->second; + // Access it. if (GlobalVariable *GV = dyn_cast(P)) { if (GV->hasDefinitiveInitializer()) return GV->getInitializer(); return 0; } - + // Handle a constantexpr getelementptr. if (ConstantExpr *CE = dyn_cast(P)) if (CE->getOpcode() == Instruction::GetElementPtr && @@ -2187,113 +2372,165 @@ static Constant *ComputeLoadResult(Constant *P, return 0; // don't know how to evaluate. } -/// EvaluateFunction - Evaluate a call to function F, returning true if -/// successful, false if we can't evaluate it. ActualArgs contains the formal -/// arguments for the function. -static bool EvaluateFunction(Function *F, Constant *&RetVal, - const SmallVectorImpl &ActualArgs, - std::vector &CallStack, - DenseMap &MutatedMemory, - std::vector &AllocaTmps) { - // Check to see if this function is already executing (recursion). If so, - // bail out. TODO: we might want to accept limited recursion. - if (std::find(CallStack.begin(), CallStack.end(), F) != CallStack.end()) - return false; - - CallStack.push_back(F); - - /// Values - As we compute SSA register values, we store their contents here. - DenseMap Values; - - // Initialize arguments to the incoming values specified. - unsigned ArgNo = 0; - for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E; - ++AI, ++ArgNo) - Values[AI] = ActualArgs[ArgNo]; - - /// ExecutedBlocks - We only handle non-looping, non-recursive code. As such, - /// we can only evaluate any one basic block at most once. This set keeps - /// track of what we have executed so we can detect recursive cases etc. - SmallPtrSet ExecutedBlocks; - - // CurInst - The current instruction we're evaluating. - BasicBlock::iterator CurInst = F->begin()->begin(); - +/// EvaluateBlock - Evaluate all instructions in block BB, returning true if +/// successful, false if we can't evaluate it. NewBB returns the next BB that +/// control flows into, or null upon return. +bool Evaluate::EvaluateBlock(BasicBlock::iterator CurInst, BasicBlock *&NextBB){ // This is the main evaluation loop. while (1) { Constant *InstResult = 0; - + if (StoreInst *SI = dyn_cast(CurInst)) { - if (SI->isVolatile()) return false; // no volatile accesses. - Constant *Ptr = getVal(Values, SI->getOperand(1)); + if (!SI->isSimple()) return false; // no volatile/atomic accesses. + Constant *Ptr = getVal(SI->getOperand(1)); if (!isSimpleEnoughPointerToCommit(Ptr)) // If this is too complex for us to commit, reject it. return false; - Constant *Val = getVal(Values, SI->getOperand(0)); + + Constant *Val = getVal(SI->getOperand(0)); + + // If this might be too difficult for the backend to handle (e.g. the addr + // of one global variable divided by another) then we can't commit it. + if (!isSimpleEnoughValueToCommit(Val, SimpleConstants, TD)) + return false; + + if (ConstantExpr *CE = dyn_cast(Ptr)) + if (CE->getOpcode() == Instruction::BitCast) { + // If we're evaluating a store through a bitcast, then we need + // to pull the bitcast off the pointer type and push it onto the + // stored value. + Ptr = CE->getOperand(0); + + Type *NewTy = cast(Ptr->getType())->getElementType(); + + // In order to push the bitcast onto the stored value, a bitcast + // from NewTy to Val's type must be legal. If it's not, we can try + // introspecting NewTy to find a legal conversion. + while (!Val->getType()->canLosslesslyBitCastTo(NewTy)) { + // If NewTy is a struct, we can convert the pointer to the struct + // into a pointer to its first member. + // FIXME: This could be extended to support arrays as well. + if (StructType *STy = dyn_cast(NewTy)) { + NewTy = STy->getTypeAtIndex(0U); + + IntegerType *IdxTy = IntegerType::get(NewTy->getContext(), 32); + Constant *IdxZero = ConstantInt::get(IdxTy, 0, false); + Constant * const IdxList[] = {IdxZero, IdxZero}; + + Ptr = ConstantExpr::getGetElementPtr(Ptr, IdxList); + + // If we can't improve the situation by introspecting NewTy, + // we have to give up. + } else { + return false; + } + } + + // If we found compatible types, go ahead and push the bitcast + // onto the stored value. + Val = ConstantExpr::getBitCast(Val, NewTy); + } + MutatedMemory[Ptr] = Val; } else if (BinaryOperator *BO = dyn_cast(CurInst)) { InstResult = ConstantExpr::get(BO->getOpcode(), - getVal(Values, BO->getOperand(0)), - getVal(Values, BO->getOperand(1))); + getVal(BO->getOperand(0)), + getVal(BO->getOperand(1))); } else if (CmpInst *CI = dyn_cast(CurInst)) { InstResult = ConstantExpr::getCompare(CI->getPredicate(), - getVal(Values, CI->getOperand(0)), - getVal(Values, CI->getOperand(1))); + getVal(CI->getOperand(0)), + getVal(CI->getOperand(1))); } else if (CastInst *CI = dyn_cast(CurInst)) { InstResult = ConstantExpr::getCast(CI->getOpcode(), - getVal(Values, CI->getOperand(0)), + getVal(CI->getOperand(0)), CI->getType()); } else if (SelectInst *SI = dyn_cast(CurInst)) { - InstResult = - ConstantExpr::getSelect(getVal(Values, SI->getOperand(0)), - getVal(Values, SI->getOperand(1)), - getVal(Values, SI->getOperand(2))); + InstResult = ConstantExpr::getSelect(getVal(SI->getOperand(0)), + getVal(SI->getOperand(1)), + getVal(SI->getOperand(2))); } else if (GetElementPtrInst *GEP = dyn_cast(CurInst)) { - Constant *P = getVal(Values, GEP->getOperand(0)); + Constant *P = getVal(GEP->getOperand(0)); SmallVector GEPOps; for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end(); i != e; ++i) - GEPOps.push_back(getVal(Values, *i)); - InstResult = cast(GEP)->isInBounds() ? - ConstantExpr::getInBoundsGetElementPtr(P, &GEPOps[0], GEPOps.size()) : - ConstantExpr::getGetElementPtr(P, &GEPOps[0], GEPOps.size()); + GEPOps.push_back(getVal(*i)); + InstResult = + ConstantExpr::getGetElementPtr(P, GEPOps, + cast(GEP)->isInBounds()); } else if (LoadInst *LI = dyn_cast(CurInst)) { - if (LI->isVolatile()) return false; // no volatile accesses. - InstResult = ComputeLoadResult(getVal(Values, LI->getOperand(0)), - MutatedMemory); + if (!LI->isSimple()) return false; // no volatile/atomic accesses. + InstResult = ComputeLoadResult(getVal(LI->getOperand(0))); if (InstResult == 0) return false; // Could not evaluate load. } else if (AllocaInst *AI = dyn_cast(CurInst)) { if (AI->isArrayAllocation()) return false; // Cannot handle array allocs. - const Type *Ty = AI->getType()->getElementType(); + Type *Ty = AI->getType()->getElementType(); AllocaTmps.push_back(new GlobalVariable(Ty, false, GlobalValue::InternalLinkage, UndefValue::get(Ty), AI->getName())); - InstResult = AllocaTmps.back(); - } else if (CallInst *CI = dyn_cast(CurInst)) { + InstResult = AllocaTmps.back(); + } else if (isa(CurInst) || isa(CurInst)) { + CallSite CS(CurInst); // Debug info can safely be ignored here. - if (isa(CI)) { + if (isa(CS.getInstruction())) { ++CurInst; continue; } // Cannot handle inline asm. - if (isa(CI->getOperand(0))) return false; + if (isa(CS.getCalledValue())) return false; + + if (IntrinsicInst *II = dyn_cast(CS.getInstruction())) { + if (MemSetInst *MSI = dyn_cast(II)) { + if (MSI->isVolatile()) return false; + Constant *Ptr = getVal(MSI->getDest()); + Constant *Val = getVal(MSI->getValue()); + Constant *DestVal = ComputeLoadResult(getVal(Ptr)); + if (Val->isNullValue() && DestVal && DestVal->isNullValue()) { + // This memset is a no-op. + ++CurInst; + continue; + } + } + + if (II->getIntrinsicID() == Intrinsic::lifetime_start || + II->getIntrinsicID() == Intrinsic::lifetime_end) { + ++CurInst; + continue; + } + + if (II->getIntrinsicID() == Intrinsic::invariant_start) { + // We don't insert an entry into Values, as it doesn't have a + // meaningful return value. + if (!II->use_empty()) + return false; + ConstantInt *Size = cast(II->getArgOperand(0)); + if (Size->isAllOnesValue()) { + Value *PtrArg = getVal(II->getArgOperand(1)); + Value *Ptr = PtrArg->stripPointerCasts(); + if (GlobalVariable *GV = dyn_cast(Ptr)) + Invariants.insert(GV); + } + // Continue even if we do nothing. + ++CurInst; + continue; + } + return false; + } // Resolve function pointers. - Function *Callee = dyn_cast(getVal(Values, CI->getOperand(0))); - if (!Callee) return false; // Cannot resolve. + Function *Callee = dyn_cast(getVal(CS.getCalledValue())); + if (!Callee || Callee->mayBeOverridden()) + return false; // Cannot resolve. SmallVector Formals; - for (User::op_iterator i = CI->op_begin() + 1, e = CI->op_end(); - i != e; ++i) - Formals.push_back(getVal(Values, *i)); + for (User::op_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i) + Formals.push_back(getVal(*i)); if (Callee->isDeclaration()) { // If this is a function we can constant fold, do it. - if (Constant *C = ConstantFoldCall(Callee, Formals.data(), - Formals.size())) { + if (Constant *C = ConstantFoldCall(Callee, Formals, TLI)) { InstResult = C; } else { return false; @@ -2301,137 +2538,166 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, } else { if (Callee->getFunctionType()->isVarArg()) return false; - + Constant *RetVal; // Execute the call, if successful, use the return value. - if (!EvaluateFunction(Callee, RetVal, Formals, CallStack, - MutatedMemory, AllocaTmps)) + ValueStack.push_back(new DenseMap); + if (!EvaluateFunction(Callee, RetVal, Formals)) return false; + ValueStack.pop_back(); InstResult = RetVal; + + if (InvokeInst *II = dyn_cast(CurInst)) { + NextBB = II->getNormalDest(); + return true; + } } } else if (isa(CurInst)) { - BasicBlock *NewBB = 0; if (BranchInst *BI = dyn_cast(CurInst)) { if (BI->isUnconditional()) { - NewBB = BI->getSuccessor(0); + NextBB = BI->getSuccessor(0); } else { ConstantInt *Cond = - dyn_cast(getVal(Values, BI->getCondition())); + dyn_cast(getVal(BI->getCondition())); if (!Cond) return false; // Cannot determine. - NewBB = BI->getSuccessor(!Cond->getZExtValue()); + NextBB = BI->getSuccessor(!Cond->getZExtValue()); } } else if (SwitchInst *SI = dyn_cast(CurInst)) { ConstantInt *Val = - dyn_cast(getVal(Values, SI->getCondition())); + dyn_cast(getVal(SI->getCondition())); if (!Val) return false; // Cannot determine. - NewBB = SI->getSuccessor(SI->findCaseValue(Val)); + unsigned ValTISucc = SI->resolveSuccessorIndex(SI->findCaseValue(Val)); + NextBB = SI->getSuccessor(ValTISucc); } else if (IndirectBrInst *IBI = dyn_cast(CurInst)) { - Value *Val = getVal(Values, IBI->getAddress())->stripPointerCasts(); + Value *Val = getVal(IBI->getAddress())->stripPointerCasts(); if (BlockAddress *BA = dyn_cast(Val)) - NewBB = BA->getBasicBlock(); + NextBB = BA->getBasicBlock(); else return false; // Cannot determine. - } else if (ReturnInst *RI = dyn_cast(CurInst)) { - if (RI->getNumOperands()) - RetVal = getVal(Values, RI->getOperand(0)); - - CallStack.pop_back(); // return from fn. - return true; // We succeeded at evaluating this ctor! + } else if (isa(CurInst)) { + NextBB = 0; } else { - // invoke, unwind, unreachable. + // invoke, unwind, resume, unreachable. return false; // Cannot handle this terminator. } - - // Okay, we succeeded in evaluating this control flow. See if we have - // executed the new block before. If so, we have a looping function, - // which we cannot evaluate in reasonable time. - if (!ExecutedBlocks.insert(NewBB)) - return false; // looped! - - // Okay, we have never been in this block before. Check to see if there - // are any PHI nodes. If so, evaluate them with information about where - // we came from. - BasicBlock *OldBB = CurInst->getParent(); - CurInst = NewBB->begin(); - PHINode *PN; - for (; (PN = dyn_cast(CurInst)); ++CurInst) - Values[PN] = getVal(Values, PN->getIncomingValueForBlock(OldBB)); - - // Do NOT increment CurInst. We know that the terminator had no value. - continue; + + // We succeeded at evaluating this block! + return true; } else { // Did not know how to evaluate this! return false; } - - if (!CurInst->use_empty()) - Values[CurInst] = InstResult; - + + if (!CurInst->use_empty()) { + if (ConstantExpr *CE = dyn_cast(InstResult)) + InstResult = ConstantFoldConstantExpression(CE, TD, TLI); + + setVal(CurInst, InstResult); + } + // Advance program counter. ++CurInst; } } -/// EvaluateStaticConstructor - Evaluate static constructors in the function, if -/// we can. Return true if we can, false otherwise. -static bool EvaluateStaticConstructor(Function *F) { - /// MutatedMemory - For each store we execute, we update this map. Loads - /// check this to get the most up-to-date value. If evaluation is successful, - /// this state is committed to the process. - DenseMap MutatedMemory; +/// EvaluateFunction - Evaluate a call to function F, returning true if +/// successful, false if we can't evaluate it. ActualArgs contains the formal +/// arguments for the function. +bool Evaluate::EvaluateFunction(Function *F, Constant *&RetVal, + const SmallVectorImpl &ActualArgs) { + // Check to see if this function is already executing (recursion). If so, + // bail out. TODO: we might want to accept limited recursion. + if (std::find(CallStack.begin(), CallStack.end(), F) != CallStack.end()) + return false; - /// AllocaTmps - To 'execute' an alloca, we create a temporary global variable - /// to represent its body. This vector is needed so we can delete the - /// temporary globals when we are done. - std::vector AllocaTmps; - - /// CallStack - This is used to detect recursion. In pathological situations - /// we could hit exponential behavior, but at least there is nothing - /// unbounded. - std::vector CallStack; + CallStack.push_back(F); + + // Initialize arguments to the incoming values specified. + unsigned ArgNo = 0; + for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E; + ++AI, ++ArgNo) + setVal(AI, ActualArgs[ArgNo]); + + // ExecutedBlocks - We only handle non-looping, non-recursive code. As such, + // we can only evaluate any one basic block at most once. This set keeps + // track of what we have executed so we can detect recursive cases etc. + SmallPtrSet ExecutedBlocks; + + // CurBB - The current basic block we're evaluating. + BasicBlock *CurBB = F->begin(); + + BasicBlock::iterator CurInst = CurBB->begin(); + while (1) { + BasicBlock *NextBB; + if (!EvaluateBlock(CurInst, NextBB)) + return false; + + if (NextBB == 0) { + // Successfully running until there's no next block means that we found + // the return. Fill it the return value and pop the call stack. + ReturnInst *RI = cast(CurBB->getTerminator()); + if (RI->getNumOperands()) + RetVal = getVal(RI->getOperand(0)); + CallStack.pop_back(); + return true; + } + + // Okay, we succeeded in evaluating this control flow. See if we have + // executed the new block before. If so, we have a looping function, + // which we cannot evaluate in reasonable time. + if (!ExecutedBlocks.insert(NextBB)) + return false; // looped! + + // Okay, we have never been in this block before. Check to see if there + // are any PHI nodes. If so, evaluate them with information about where + // we came from. + PHINode *PN = 0; + for (CurInst = NextBB->begin(); + (PN = dyn_cast(CurInst)); ++CurInst) + setVal(PN, getVal(PN->getIncomingValueForBlock(CurBB))); + + // Advance to the next block. + CurBB = NextBB; + } +} + +/// EvaluateStaticConstructor - Evaluate static constructors in the function, if +/// we can. Return true if we can, false otherwise. +static bool EvaluateStaticConstructor(Function *F, const TargetData *TD, + const TargetLibraryInfo *TLI) { // Call the function. + Evaluate Eval(TD, TLI); Constant *RetValDummy; - bool EvalSuccess = EvaluateFunction(F, RetValDummy, - SmallVector(), CallStack, - MutatedMemory, AllocaTmps); + bool EvalSuccess = Eval.EvaluateFunction(F, RetValDummy, + SmallVector()); + if (EvalSuccess) { // We succeeded at evaluation: commit the result. DEBUG(dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '" - << F->getName() << "' to " << MutatedMemory.size() + << F->getName() << "' to " << Eval.getMutatedMemory().size() << " stores.\n"); - for (DenseMap::iterator I = MutatedMemory.begin(), - E = MutatedMemory.end(); I != E; ++I) + for (DenseMap::const_iterator I = + Eval.getMutatedMemory().begin(), E = Eval.getMutatedMemory().end(); + I != E; ++I) CommitValueTo(I->second, I->first); + for (SmallPtrSet::const_iterator I = + Eval.getInvariants().begin(), E = Eval.getInvariants().end(); + I != E; ++I) + (*I)->setConstant(true); } - - // At this point, we are done interpreting. If we created any 'alloca' - // temporaries, release them now. - while (!AllocaTmps.empty()) { - GlobalVariable *Tmp = AllocaTmps.back(); - AllocaTmps.pop_back(); - - // If there are still users of the alloca, the program is doing something - // silly, e.g. storing the address of the alloca somewhere and using it - // later. Since this is undefined, we'll just make it be null. - if (!Tmp->use_empty()) - Tmp->replaceAllUsesWith(Constant::getNullValue(Tmp->getType())); - delete Tmp; - } - + return EvalSuccess; } - - /// OptimizeGlobalCtorsList - Simplify and evaluation global ctors if possible. /// Return true if anything changed. bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) { std::vector Ctors = ParseGlobalCtors(GCL); bool MadeChange = false; if (Ctors.empty()) return false; - + // Loop over global ctors, optimizing them when we can. for (unsigned i = 0; i != Ctors.size(); ++i) { Function *F = Ctors[i]; @@ -2444,12 +2710,12 @@ bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) { } break; } - + // We cannot simplify external ctor functions. if (F->empty()) continue; - + // If we can evaluate the ctor at compile time, do. - if (EvaluateStaticConstructor(F)) { + if (EvaluateStaticConstructor(F, TD, TLI)) { Ctors.erase(Ctors.begin()+i); MadeChange = true; --i; @@ -2457,9 +2723,9 @@ bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) { continue; } } - + if (!MadeChange) return false; - + GCL = InstallGlobalCtors(GCL, Ctors); return true; } @@ -2501,7 +2767,7 @@ bool GlobalOpt::OptimizeGlobalAliases(Module &M) { continue; // Do not perform the transform if multiple aliases potentially target the - // aliasee. This check also ensures that it is safe to replace the section + // aliasee. This check also ensures that it is safe to replace the section // and other attributes of the aliasee with those of the alias. if (!hasOneUse) continue; @@ -2521,33 +2787,159 @@ bool GlobalOpt::OptimizeGlobalAliases(Module &M) { return Changed; } +static Function *FindCXAAtExit(Module &M, TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::cxa_atexit)) + return 0; + + Function *Fn = M.getFunction(TLI->getName(LibFunc::cxa_atexit)); + + if (!Fn) + return 0; + + FunctionType *FTy = Fn->getFunctionType(); + + // Checking that the function has the right return type, the right number of + // parameters and that they all have pointer types should be enough. + if (!FTy->getReturnType()->isIntegerTy() || + FTy->getNumParams() != 3 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy() || + !FTy->getParamType(2)->isPointerTy()) + return 0; + + return Fn; +} + +/// cxxDtorIsEmpty - Returns whether the given function is an empty C++ +/// destructor and can therefore be eliminated. +/// Note that we assume that other optimization passes have already simplified +/// the code so we only look for a function with a single basic block, where +/// the only allowed instructions are 'ret', 'call' to an empty C++ dtor and +/// other side-effect free instructions. +static bool cxxDtorIsEmpty(const Function &Fn, + SmallPtrSet &CalledFunctions) { + // FIXME: We could eliminate C++ destructors if they're readonly/readnone and + // nounwind, but that doesn't seem worth doing. + if (Fn.isDeclaration()) + return false; + + if (++Fn.begin() != Fn.end()) + return false; + + const BasicBlock &EntryBlock = Fn.getEntryBlock(); + for (BasicBlock::const_iterator I = EntryBlock.begin(), E = EntryBlock.end(); + I != E; ++I) { + if (const CallInst *CI = dyn_cast(I)) { + // Ignore debug intrinsics. + if (isa(CI)) + continue; + + const Function *CalledFn = CI->getCalledFunction(); + + if (!CalledFn) + return false; + + SmallPtrSet NewCalledFunctions(CalledFunctions); + + // Don't treat recursive functions as empty. + if (!NewCalledFunctions.insert(CalledFn)) + return false; + + if (!cxxDtorIsEmpty(*CalledFn, NewCalledFunctions)) + return false; + } else if (isa(*I)) + return true; // We're done. + else if (I->mayHaveSideEffects()) + return false; // Destructor with side effects, bail. + } + + return false; +} + +bool GlobalOpt::OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) { + /// Itanium C++ ABI p3.3.5: + /// + /// After constructing a global (or local static) object, that will require + /// destruction on exit, a termination function is registered as follows: + /// + /// extern "C" int __cxa_atexit ( void (*f)(void *), void *p, void *d ); + /// + /// This registration, e.g. __cxa_atexit(f,p,d), is intended to cause the + /// call f(p) when DSO d is unloaded, before all such termination calls + /// registered before this one. It returns zero if registration is + /// successful, nonzero on failure. + + // This pass will look for calls to __cxa_atexit where the function is trivial + // and remove them. + bool Changed = false; + + for (Function::use_iterator I = CXAAtExitFn->use_begin(), + E = CXAAtExitFn->use_end(); I != E;) { + // We're only interested in calls. Theoretically, we could handle invoke + // instructions as well, but neither llvm-gcc nor clang generate invokes + // to __cxa_atexit. + CallInst *CI = dyn_cast(*I++); + if (!CI) + continue; + + Function *DtorFn = + dyn_cast(CI->getArgOperand(0)->stripPointerCasts()); + if (!DtorFn) + continue; + + SmallPtrSet CalledFunctions; + if (!cxxDtorIsEmpty(*DtorFn, CalledFunctions)) + continue; + + // Just remove the call. + CI->replaceAllUsesWith(Constant::getNullValue(CI->getType())); + CI->eraseFromParent(); + + ++NumCXXDtorsRemoved; + + Changed |= true; + } + + return Changed; +} + bool GlobalOpt::runOnModule(Module &M) { bool Changed = false; - + + TD = getAnalysisIfAvailable(); + TLI = &getAnalysis(); + // Try to find the llvm.globalctors list. GlobalVariable *GlobalCtors = FindGlobalCtors(M); + Function *CXAAtExitFn = FindCXAAtExit(M, TLI); + bool LocalChange = true; while (LocalChange) { LocalChange = false; - + // Delete functions that are trivially dead, ccc -> fastcc LocalChange |= OptimizeFunctions(M); - + // Optimize global_ctors list. if (GlobalCtors) LocalChange |= OptimizeGlobalCtorsList(GlobalCtors); - + // Optimize non-address-taken globals. LocalChange |= OptimizeGlobalVars(M); // Resolve aliases, when possible. LocalChange |= OptimizeGlobalAliases(M); + + // Try to remove trivial global destructors. + if (CXAAtExitFn) + LocalChange |= OptimizeEmptyGlobalCXXDtors(CXAAtExitFn); + Changed |= LocalChange; } - + // TODO: Move all global ctors functions to the end of the module for code // layout. - + return Changed; }