X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTransforms%2FIPO%2FGlobalOpt.cpp;h=48f51bbe8fe6d88258b274027ea3290fc1b2d039;hb=b4263a6ff4f9696fc84a8f75b5d09564ff06381f;hp=c09e78e2693e1aa6f09c3769e821ce0f98a0160a;hpb=c9ae19e46520c1051b828171205b379b5a0bb6ad;p=oota-llvm.git diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index c09e78e2693..48f51bbe8fe 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -21,6 +21,7 @@ #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/Module.h" +#include "llvm/Operator.h" #include "llvm/Pass.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/MemoryBuiltins.h" @@ -40,6 +41,7 @@ using namespace llvm; STATISTIC(NumMarked , "Number of globals marked constant"); +STATISTIC(NumUnnamed , "Number of globals marked unnamed_addr"); STATISTIC(NumSRA , "Number of aggregate globals broken into scalars"); STATISTIC(NumHeapSRA , "Number of heap objects SRA'd"); STATISTIC(NumSubstitute,"Number of globals with initializers stored into them"); @@ -53,13 +55,17 @@ STATISTIC(NumCtorsEvaluated, "Number of static ctors evaluated"); STATISTIC(NumNestRemoved , "Number of nest attributes removed"); STATISTIC(NumAliasesResolved, "Number of global aliases resolved"); STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated"); +STATISTIC(NumCXXDtorsRemoved, "Number of global C++ destructors removed"); namespace { + struct GlobalStatus; struct GlobalOpt : public ModulePass { virtual void getAnalysisUsage(AnalysisUsage &AU) const { } static char ID; // Pass identification, replacement for typeid - GlobalOpt() : ModulePass(&ID) {} + GlobalOpt() : ModulePass(ID) { + initializeGlobalOptPass(*PassRegistry::getPassRegistry()); + } bool runOnModule(Module &M); @@ -69,12 +75,17 @@ namespace { bool OptimizeGlobalVars(Module &M); bool OptimizeGlobalAliases(Module &M); bool OptimizeGlobalCtorsList(GlobalVariable *&GCL); - bool ProcessInternalGlobal(GlobalVariable *GV,Module::global_iterator &GVI); + bool ProcessGlobal(GlobalVariable *GV,Module::global_iterator &GVI); + bool ProcessInternalGlobal(GlobalVariable *GV,Module::global_iterator &GVI, + const SmallPtrSet &PHIUsers, + const GlobalStatus &GS); + bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn); }; } char GlobalOpt::ID = 0; -static RegisterPass X("globalopt", "Global Variable Optimizer"); +INITIALIZE_PASS(GlobalOpt, "globalopt", + "Global Variable Optimizer", false, false) ModulePass *llvm::createGlobalOptimizerPass() { return new GlobalOpt(); } @@ -84,6 +95,9 @@ namespace { /// about it. If we find out that the address of the global is taken, none of /// this info will be accurate. struct GlobalStatus { + /// isCompared - True if the global's address is used in a comparison. + bool isCompared; + /// isLoaded - True if the global is ever loaded. If the global isn't ever /// loaded it can be deleted. bool isLoaded; @@ -128,10 +142,11 @@ struct GlobalStatus { /// HasPHIUser - Set to true if this global has a user that is a PHI node. bool HasPHIUser; - - GlobalStatus() : isLoaded(false), StoredType(NotStored), StoredOnceValue(0), - AccessingFunction(0), HasMultipleAccessingFunctions(false), - HasNonInstructionUser(false), HasPHIUser(false) {} + + GlobalStatus() : isCompared(false), isLoaded(false), StoredType(NotStored), + StoredOnceValue(0), AccessingFunction(0), + HasMultipleAccessingFunctions(false), HasNonInstructionUser(false), + HasPHIUser(false) {} }; } @@ -143,7 +158,8 @@ struct GlobalStatus { static bool SafeToDestroyConstant(const Constant *C) { if (isa(C)) return false; - for (Value::const_use_iterator UI = C->use_begin(), E = C->use_end(); UI != E; ++UI) + for (Value::const_use_iterator UI = C->use_begin(), E = C->use_end(); UI != E; + ++UI) if (const Constant *CU = dyn_cast(*UI)) { if (!SafeToDestroyConstant(CU)) return false; } else @@ -158,13 +174,18 @@ static bool SafeToDestroyConstant(const Constant *C) { /// static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS, SmallPtrSet &PHIUsers) { - for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI) - if (const ConstantExpr *CE = dyn_cast(*UI)) { + for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; + ++UI) { + const User *U = *UI; + if (const ConstantExpr *CE = dyn_cast(U)) { GS.HasNonInstructionUser = true; - + + // If the result of the constantexpr isn't pointer type, then we won't + // know to expect it in various places. Just reject early. + if (!isa(CE->getType())) return true; + if (AnalyzeGlobal(CE, GS, PHIUsers)) return true; - - } else if (const Instruction *I = dyn_cast(*UI)) { + } else if (const Instruction *I = dyn_cast(U)) { if (!GS.HasMultipleAccessingFunctions) { const Function *F = I->getParent()->getParent(); if (GS.AccessingFunction == 0) @@ -185,7 +206,8 @@ static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS, // value, not an aggregate), keep more specific information about // stores. if (GS.StoredType != GlobalStatus::isStored) { - if (const GlobalVariable *GV = dyn_cast(SI->getOperand(1))){ + if (const GlobalVariable *GV = dyn_cast( + SI->getOperand(1))) { Value *StoredVal = SI->getOperand(0); if (StoredVal == GV->getInitializer()) { if (GS.StoredType < GlobalStatus::isInitializerStored) @@ -218,18 +240,21 @@ static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS, if (AnalyzeGlobal(I, GS, PHIUsers)) return true; GS.HasPHIUser = true; } else if (isa(I)) { - } else if (isa(I)) { - if (I->getOperand(1) == V) + GS.isCompared = true; + } else if (const MemTransferInst *MTI = dyn_cast(I)) { + if (MTI->isVolatile()) return true; + if (MTI->getArgOperand(0) == V) GS.StoredType = GlobalStatus::isStored; - if (I->getOperand(2) == V) + if (MTI->getArgOperand(1) == V) GS.isLoaded = true; - } else if (isa(I)) { - assert(I->getOperand(1) == V && "Memset only takes one pointer!"); + } else if (const MemSetInst *MSI = dyn_cast(I)) { + assert(MSI->getArgOperand(0) == V && "Memset only takes one pointer!"); + if (MSI->isVolatile()) return true; GS.StoredType = GlobalStatus::isStored; } else { return true; // Any other non-load instruction might take address! } - } else if (const Constant *C = dyn_cast(*UI)) { + } else if (const Constant *C = dyn_cast(U)) { GS.HasNonInstructionUser = true; // We might have a dead and dangling constant hanging off of here. if (!SafeToDestroyConstant(C)) @@ -239,6 +264,7 @@ static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS, // Otherwise must be some other user. return true; } + } return false; } @@ -255,18 +281,18 @@ static Constant *getAggregateConstantElement(Constant *Agg, Constant *Idx) { } else if (ConstantVector *CP = dyn_cast(Agg)) { if (IdxV < CP->getNumOperands()) return CP->getOperand(IdxV); } else if (isa(Agg)) { - if (const StructType *STy = dyn_cast(Agg->getType())) { + if (StructType *STy = dyn_cast(Agg->getType())) { if (IdxV < STy->getNumElements()) return Constant::getNullValue(STy->getElementType(IdxV)); - } else if (const SequentialType *STy = + } else if (SequentialType *STy = dyn_cast(Agg->getType())) { return Constant::getNullValue(STy->getElementType()); } } else if (isa(Agg)) { - if (const StructType *STy = dyn_cast(Agg->getType())) { + if (StructType *STy = dyn_cast(Agg->getType())) { if (IdxV < STy->getNumElements()) return UndefValue::get(STy->getElementType(IdxV)); - } else if (const SequentialType *STy = + } else if (SequentialType *STy = dyn_cast(Agg->getType())) { return UndefValue::get(STy->getElementType()); } @@ -301,7 +327,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) { if (Init) SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE); Changed |= CleanupConstantGlobalUsers(CE, SubInit); - } else if (CE->getOpcode() == Instruction::BitCast && + } else if (CE->getOpcode() == Instruction::BitCast && CE->getType()->isPointerTy()) { // Pointer cast, delete any stores and memsets to the global. Changed |= CleanupConstantGlobalUsers(CE, 0); @@ -317,7 +343,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) { // and will invalidate our notion of what Init is. Constant *SubInit = 0; if (!isa(GEP->getOperand(0))) { - ConstantExpr *CE = + ConstantExpr *CE = dyn_cast_or_null(ConstantFoldInstruction(GEP)); if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr) SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE); @@ -354,7 +380,7 @@ static bool isSafeSROAElementUse(Value *V) { // We might have a dead and dangling constant hanging off of here. if (Constant *C = dyn_cast(V)) return SafeToDestroyConstant(C); - + Instruction *I = dyn_cast(V); if (!I) return false; @@ -364,15 +390,15 @@ static bool isSafeSROAElementUse(Value *V) { // Stores *to* the pointer are ok. if (StoreInst *SI = dyn_cast(I)) return SI->getOperand(0) != V; - + // Otherwise, it must be a GEP. GetElementPtrInst *GEPI = dyn_cast(I); if (GEPI == 0) return false; - + if (GEPI->getNumOperands() < 3 || !isa(GEPI->getOperand(1)) || !cast(GEPI->getOperand(1))->isNullValue()) return false; - + for (Value::use_iterator I = GEPI->use_begin(), E = GEPI->use_end(); I != E; ++I) if (!isSafeSROAElementUse(*I)) @@ -386,11 +412,11 @@ static bool isSafeSROAElementUse(Value *V) { /// static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) { // The user of the global must be a GEP Inst or a ConstantExpr GEP. - if (!isa(U) && - (!isa(U) || + if (!isa(U) && + (!isa(U) || cast(U)->getOpcode() != Instruction::GetElementPtr)) return false; - + // Check to see if this ConstantExpr GEP is SRA'able. In particular, we // don't like < 3 operand CE's, and we don't like non-constant integer // indices. This enforces that all uses are 'gep GV, 0, C, ...' for some @@ -402,18 +428,18 @@ static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) { gep_type_iterator GEPI = gep_type_begin(U), E = gep_type_end(U); ++GEPI; // Skip over the pointer index. - + // If this is a use of an array allocation, do a bit more checking for sanity. - if (const ArrayType *AT = dyn_cast(*GEPI)) { + if (ArrayType *AT = dyn_cast(*GEPI)) { uint64_t NumElements = AT->getNumElements(); ConstantInt *Idx = cast(U->getOperand(2)); - + // Check to make sure that index falls within the array. If not, // something funny is going on, so we won't do the optimization. // if (Idx->getZExtValue() >= NumElements) return false; - + // We cannot scalar repl this level of the array unless any array // sub-indices are in-range constants. In particular, consider: // A[0][i]. We cannot know that the user isn't doing invalid things like @@ -425,16 +451,16 @@ static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) { GEPI != E; ++GEPI) { uint64_t NumElements; - if (const ArrayType *SubArrayTy = dyn_cast(*GEPI)) + if (ArrayType *SubArrayTy = dyn_cast(*GEPI)) NumElements = SubArrayTy->getNumElements(); - else if (const VectorType *SubVectorTy = dyn_cast(*GEPI)) + else if (VectorType *SubVectorTy = dyn_cast(*GEPI)) NumElements = SubVectorTy->getNumElements(); else { assert((*GEPI)->isStructTy() && "Indexed GEP type is not array, vector, or struct!"); continue; } - + ConstantInt *IdxVal = dyn_cast(GEPI.getOperand()); if (!IdxVal || IdxVal->getZExtValue() >= NumElements) return false; @@ -458,7 +484,7 @@ static bool GlobalUsersSafeToSRA(GlobalValue *GV) { } return true; } - + /// SRAGlobal - Perform scalar replacement of aggregates on the specified global /// variable. This opens the door for other optimizations by exposing the @@ -469,10 +495,10 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) { // Make sure this global only has simple uses that we can SRA. if (!GlobalUsersSafeToSRA(GV)) return 0; - + assert(GV->hasLocalLinkage() && !GV->isConstant()); Constant *Init = GV->getInitializer(); - const Type *Ty = Init->getType(); + Type *Ty = Init->getType(); std::vector NewGlobals; Module::GlobalListType &Globals = GV->getParent()->getGlobalList(); @@ -481,8 +507,8 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) { unsigned StartAlignment = GV->getAlignment(); if (StartAlignment == 0) StartAlignment = TD.getABITypeAlignment(GV->getType()); - - if (const StructType *STy = dyn_cast(Ty)) { + + if (StructType *STy = dyn_cast(Ty)) { NewGlobals.reserve(STy->getNumElements()); const StructLayout &Layout = *TD.getStructLayout(STy); for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { @@ -496,7 +522,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) { GV->getType()->getAddressSpace()); Globals.insert(GV, NGV); NewGlobals.push_back(NGV); - + // Calculate the known alignment of the field. If the original aggregate // had 256 byte alignment for example, something might depend on that: // propagate info to each field. @@ -505,9 +531,9 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) { if (NewAlign > TD.getABITypeAlignment(STy->getElementType(i))) NGV->setAlignment(NewAlign); } - } else if (const SequentialType *STy = dyn_cast(Ty)) { + } else if (SequentialType *STy = dyn_cast(Ty)) { unsigned NumElements = 0; - if (const ArrayType *ATy = dyn_cast(STy)) + if (ArrayType *ATy = dyn_cast(STy)) NumElements = ATy->getNumElements(); else NumElements = cast(STy)->getNumElements(); @@ -515,7 +541,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) { if (NumElements > 16 && GV->hasNUsesOrMore(16)) return 0; // It's not worth it. NewGlobals.reserve(NumElements); - + uint64_t EltSize = TD.getTypeAllocSize(STy->getElementType()); unsigned EltAlign = TD.getABITypeAlignment(STy->getElementType()); for (unsigned i = 0, e = NumElements; i != e; ++i) { @@ -530,7 +556,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) { GV->getType()->getAddressSpace()); Globals.insert(GV, NGV); NewGlobals.push_back(NGV); - + // Calculate the known alignment of the field. If the original aggregate // had 256 byte alignment for example, something might depend on that: // propagate info to each field. @@ -542,7 +568,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) { if (NewGlobals.empty()) return 0; - + DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV); Constant *NullInt =Constant::getNullValue(Type::getInt32Ty(GV->getContext())); @@ -570,8 +596,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) { Idxs.push_back(NullInt); for (unsigned i = 3, e = CE->getNumOperands(); i != e; ++i) Idxs.push_back(CE->getOperand(i)); - NewPtr = ConstantExpr::getGetElementPtr(cast(NewPtr), - &Idxs[0], Idxs.size()); + NewPtr = ConstantExpr::getGetElementPtr(cast(NewPtr), Idxs); } else { GetElementPtrInst *GEPI = cast(GEP); SmallVector Idxs; @@ -608,64 +633,71 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) { } /// AllUsesOfValueWillTrapIfNull - Return true if all users of the specified -/// value will trap if the value is dynamically null. PHIs keeps track of any +/// value will trap if the value is dynamically null. PHIs keeps track of any /// phi nodes we've seen to avoid reprocessing them. -static bool AllUsesOfValueWillTrapIfNull(Value *V, - SmallPtrSet &PHIs) { - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI) - if (isa(*UI)) { +static bool AllUsesOfValueWillTrapIfNull(const Value *V, + SmallPtrSet &PHIs) { + for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; + ++UI) { + const User *U = *UI; + + if (isa(U)) { // Will trap. - } else if (StoreInst *SI = dyn_cast(*UI)) { + } else if (const StoreInst *SI = dyn_cast(U)) { if (SI->getOperand(0) == V) { - //cerr << "NONTRAPPING USE: " << **UI; + //cerr << "NONTRAPPING USE: " << *U; return false; // Storing the value. } - } else if (CallInst *CI = dyn_cast(*UI)) { + } else if (const CallInst *CI = dyn_cast(U)) { if (CI->getCalledValue() != V) { - //cerr << "NONTRAPPING USE: " << **UI; + //cerr << "NONTRAPPING USE: " << *U; return false; // Not calling the ptr } - } else if (InvokeInst *II = dyn_cast(*UI)) { + } else if (const InvokeInst *II = dyn_cast(U)) { if (II->getCalledValue() != V) { - //cerr << "NONTRAPPING USE: " << **UI; + //cerr << "NONTRAPPING USE: " << *U; return false; // Not calling the ptr } - } else if (BitCastInst *CI = dyn_cast(*UI)) { + } else if (const BitCastInst *CI = dyn_cast(U)) { if (!AllUsesOfValueWillTrapIfNull(CI, PHIs)) return false; - } else if (GetElementPtrInst *GEPI = dyn_cast(*UI)) { + } else if (const GetElementPtrInst *GEPI = dyn_cast(U)) { if (!AllUsesOfValueWillTrapIfNull(GEPI, PHIs)) return false; - } else if (PHINode *PN = dyn_cast(*UI)) { + } else if (const PHINode *PN = dyn_cast(U)) { // If we've already seen this phi node, ignore it, it has already been // checked. if (PHIs.insert(PN) && !AllUsesOfValueWillTrapIfNull(PN, PHIs)) return false; - } else if (isa(*UI) && + } else if (isa(U) && isa(UI->getOperand(1))) { // Ignore icmp X, null } else { - //cerr << "NONTRAPPING USE: " << **UI; + //cerr << "NONTRAPPING USE: " << *U; return false; } + } return true; } /// AllUsesOfLoadedValueWillTrapIfNull - Return true if all uses of any loads /// from GV will trap if the loaded value is null. Note that this also permits /// comparisons of the loaded value against null, as a special case. -static bool AllUsesOfLoadedValueWillTrapIfNull(GlobalVariable *GV) { - for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI!=E; ++UI) - if (LoadInst *LI = dyn_cast(*UI)) { - SmallPtrSet PHIs; +static bool AllUsesOfLoadedValueWillTrapIfNull(const GlobalVariable *GV) { + for (Value::const_use_iterator UI = GV->use_begin(), E = GV->use_end(); + UI != E; ++UI) { + const User *U = *UI; + + if (const LoadInst *LI = dyn_cast(U)) { + SmallPtrSet PHIs; if (!AllUsesOfValueWillTrapIfNull(LI, PHIs)) return false; - } else if (isa(*UI)) { + } else if (isa(U)) { // Ignore stores to the global. } else { // We don't know or understand this user, bail out. - //cerr << "UNKNOWN USER OF GLOBAL!: " << **UI; + //cerr << "UNKNOWN USER OF GLOBAL!: " << *U; return false; } - + } return true; } @@ -682,16 +714,17 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) { Changed = true; } } else if (isa(I) || isa(I)) { - if (I->getOperand(0) == V) { + CallSite CS(I); + if (CS.getCalledValue() == V) { // Calling through the pointer! Turn into a direct call, but be careful // that the pointer is not also being passed as an argument. - I->setOperand(0, NewV); + CS.setCalledFunction(NewV); Changed = true; bool PassedAsArg = false; - for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i) - if (I->getOperand(i) == V) { + for (unsigned i = 0, e = CS.arg_size(); i != e; ++i) + if (CS.getArgument(i) == V) { PassedAsArg = true; - I->setOperand(i, NewV); + CS.setArgument(i, NewV); } if (PassedAsArg) { @@ -719,8 +752,7 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) { break; if (Idxs.size() == GEPI->getNumOperands()-1) Changed |= OptimizeAwayTrappingUsesOfValue(GEPI, - ConstantExpr::getGetElementPtr(NewV, &Idxs[0], - Idxs.size())); + ConstantExpr::getGetElementPtr(NewV, Idxs)); if (GEPI->use_empty()) { Changed = true; GEPI->eraseFromParent(); @@ -742,7 +774,7 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV) { // Keep track of whether we are able to remove all the uses of the global // other than the store that defines it. bool AllNonStoreUsesGone = true; - + // Replace all uses of loads with uses of uses of the stored value. for (Value::use_iterator GUI = GV->use_begin(), E = GV->use_end(); GUI != E;){ User *GlobalUser = *GUI++; @@ -765,7 +797,8 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV) { // If we get here we could have other crazy uses that are transitively // loaded. assert((isa(GlobalUser) || isa(GlobalUser) || - isa(GlobalUser)) && "Only expect load and stores!"); + isa(GlobalUser) || isa(GlobalUser)) && + "Only expect load and stores!"); } } @@ -811,12 +844,12 @@ static void ConstantPropUsersOf(Value *V) { /// malloc into a global, and any loads of GV as uses of the new global. static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, - const Type *AllocTy, + Type *AllocTy, ConstantInt *NElements, TargetData* TD) { DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << " CALL = " << *CI << '\n'); - - const Type *GlobalType; + + Type *GlobalType; if (NElements->getZExtValue() == 1) GlobalType = AllocTy; else @@ -825,14 +858,14 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, // Create the new global variable. The contents of the malloc'd memory is // undefined, so initialize with an undef value. - GlobalVariable *NewGV = new GlobalVariable(*GV->getParent(), + GlobalVariable *NewGV = new GlobalVariable(*GV->getParent(), GlobalType, false, GlobalValue::InternalLinkage, UndefValue::get(GlobalType), GV->getName()+".body", GV, GV->isThreadLocal()); - + // If there are bitcast users of the malloc (which is typical, usually we have // a malloc + bitcast) then replace them with uses of the new global. Update // other users to use the global as well. @@ -852,10 +885,10 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, User->replaceUsesOfWith(CI, TheBC); } } - + Constant *RepValue = NewGV; if (NewGV->getType() != GV->getType()->getElementType()) - RepValue = ConstantExpr::getBitCast(RepValue, + RepValue = ConstantExpr::getBitCast(RepValue, GV->getType()->getElementType()); // If there is a comparison against null, we will insert a global bool to @@ -875,7 +908,7 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, SI->eraseFromParent(); continue; } - + LoadInst *LI = cast(GV->use_back()); while (!LI->use_empty()) { Use &LoadUse = LI->use_begin().getUse(); @@ -883,7 +916,7 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, LoadUse = RepValue; continue; } - + ICmpInst *ICI = cast(LoadUse.getUser()); // Replace the cmp X, 0 with a use of the bool value. Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", ICI); @@ -938,29 +971,31 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, /// to make sure that there are no complex uses of V. We permit simple things /// like dereferencing the pointer, but not storing through the address, unless /// it is to the specified global. -static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Instruction *V, - GlobalVariable *GV, - SmallPtrSet &PHIs) { - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){ - Instruction *Inst = cast(*UI); - +static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V, + const GlobalVariable *GV, + SmallPtrSet &PHIs) { + for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); + UI != E; ++UI) { + const Instruction *Inst = cast(*UI); + if (isa(Inst) || isa(Inst)) { continue; // Fine, ignore. } - - if (StoreInst *SI = dyn_cast(Inst)) { + + if (const StoreInst *SI = dyn_cast(Inst)) { if (SI->getOperand(0) == V && SI->getOperand(1) != GV) return false; // Storing the pointer itself... bad. continue; // Otherwise, storing through it, or storing into GV... fine. } - - if (isa(Inst)) { + + // Must index into the array and into the struct. + if (isa(Inst) && Inst->getNumOperands() >= 3) { if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Inst, GV, PHIs)) return false; continue; } - - if (PHINode *PN = dyn_cast(Inst)) { + + if (const PHINode *PN = dyn_cast(Inst)) { // PHIs are ok if all uses are ok. Don't infinitely recurse through PHI // cycles. if (PHIs.insert(PN)) @@ -968,13 +1003,13 @@ static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Instruction *V, return false; continue; } - - if (BitCastInst *BCI = dyn_cast(Inst)) { + + if (const BitCastInst *BCI = dyn_cast(Inst)) { if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV, PHIs)) return false; continue; } - + return false; } return true; @@ -983,9 +1018,9 @@ static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Instruction *V, /// ReplaceUsesOfMallocWithGlobal - The Alloc pointer is stored into GV /// somewhere. Transform all uses of the allocation into loads from the /// global and uses of the resultant pointer. Further, delete the store into -/// GV. This assumes that these value pass the +/// GV. This assumes that these value pass the /// 'ValueIsOnlyUsedLocallyOrStoredToOneGlobal' predicate. -static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc, +static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc, GlobalVariable *GV) { while (!Alloc->use_empty()) { Instruction *U = cast(*Alloc->use_begin()); @@ -1018,7 +1053,7 @@ static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc, continue; } } - + // Insert a load from the global, and use it instead of the malloc. Value *NL = new LoadInst(GV, GV->getName()+".val", InsertPt); U->replaceUsesOfWith(Alloc, NL); @@ -1029,30 +1064,31 @@ static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc, /// of a load) are simple enough to perform heap SRA on. This permits GEP's /// that index through the array and struct field, icmps of null, and PHIs. static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V, - SmallPtrSet &LoadUsingPHIs, - SmallPtrSet &LoadUsingPHIsPerLoad) { + SmallPtrSet &LoadUsingPHIs, + SmallPtrSet &LoadUsingPHIsPerLoad) { // We permit two users of the load: setcc comparing against the null // pointer, and a getelementptr of a specific form. - for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){ + for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; + ++UI) { const Instruction *User = cast(*UI); - + // Comparison against null is ok. if (const ICmpInst *ICI = dyn_cast(User)) { if (!isa(ICI->getOperand(1))) return false; continue; } - + // getelementptr is also ok, but only a simple form. if (const GetElementPtrInst *GEPI = dyn_cast(User)) { // Must index into the array and into the struct. if (GEPI->getNumOperands() < 3) return false; - + // Otherwise the GEP is ok. continue; } - + if (const PHINode *PN = dyn_cast(User)) { if (!LoadUsingPHIsPerLoad.insert(PN)) // This means some phi nodes are dependent on each other. @@ -1061,19 +1097,19 @@ static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V, if (!LoadUsingPHIs.insert(PN)) // If we have already analyzed this PHI, then it is safe. continue; - + // Make sure all uses of the PHI are simple enough to transform. if (!LoadUsesSimpleEnoughForHeapSRA(PN, LoadUsingPHIs, LoadUsingPHIsPerLoad)) return false; - + continue; } - + // Otherwise we don't know what this is, not ok. return false; } - + return true; } @@ -1084,48 +1120,48 @@ static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(const GlobalVariable *GV, Instruction *StoredVal) { SmallPtrSet LoadUsingPHIs; SmallPtrSet LoadUsingPHIsPerLoad; - for (Value::const_use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E; - ++UI) + for (Value::const_use_iterator UI = GV->use_begin(), E = GV->use_end(); + UI != E; ++UI) if (const LoadInst *LI = dyn_cast(*UI)) { if (!LoadUsesSimpleEnoughForHeapSRA(LI, LoadUsingPHIs, LoadUsingPHIsPerLoad)) return false; LoadUsingPHIsPerLoad.clear(); } - + // If we reach here, we know that all uses of the loads and transitive uses // (through PHI nodes) are simple enough to transform. However, we don't know - // that all inputs the to the PHI nodes are in the same equivalence sets. + // that all inputs the to the PHI nodes are in the same equivalence sets. // Check to verify that all operands of the PHIs are either PHIS that can be // transformed, loads from GV, or MI itself. - for (SmallPtrSet::const_iterator I = LoadUsingPHIs.begin(), - E = LoadUsingPHIs.end(); I != E; ++I) { + for (SmallPtrSet::const_iterator I = LoadUsingPHIs.begin() + , E = LoadUsingPHIs.end(); I != E; ++I) { const PHINode *PN = *I; for (unsigned op = 0, e = PN->getNumIncomingValues(); op != e; ++op) { Value *InVal = PN->getIncomingValue(op); - + // PHI of the stored value itself is ok. if (InVal == StoredVal) continue; - + if (const PHINode *InPN = dyn_cast(InVal)) { // One of the PHIs in our set is (optimistically) ok. if (LoadUsingPHIs.count(InPN)) continue; return false; } - + // Load from GV is ok. if (const LoadInst *LI = dyn_cast(InVal)) if (LI->getOperand(0) == GV) continue; - + // UNDEF? NULL? - + // Anything else is rejected. return false; } } - + return true; } @@ -1133,15 +1169,15 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo, DenseMap > &InsertedScalarizedValues, std::vector > &PHIsToRewrite) { std::vector &FieldVals = InsertedScalarizedValues[V]; - + if (FieldNo >= FieldVals.size()) FieldVals.resize(FieldNo+1); - + // If we already have this value, just reuse the previously scalarized // version. if (Value *FieldVal = FieldVals[FieldNo]) return FieldVal; - + // Depending on what instruction this is, we have several cases. Value *Result; if (LoadInst *LI = dyn_cast(V)) { @@ -1154,24 +1190,26 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo, } else if (PHINode *PN = dyn_cast(V)) { // PN's type is pointer to struct. Make a new PHI of pointer to struct // field. - const StructType *ST = + StructType *ST = cast(cast(PN->getType())->getElementType()); - - Result = + + PHINode *NewPN = PHINode::Create(PointerType::getUnqual(ST->getElementType(FieldNo)), + PN->getNumIncomingValues(), PN->getName()+".f"+Twine(FieldNo), PN); + Result = NewPN; PHIsToRewrite.push_back(std::make_pair(PN, FieldNo)); } else { llvm_unreachable("Unknown usable value"); Result = 0; } - + return FieldVals[FieldNo] = Result; } /// RewriteHeapSROALoadUser - Given a load instruction and a value derived from /// the load, rewrite the derived value to use the HeapSRoA'd load. -static void RewriteHeapSROALoadUser(Instruction *LoadUser, +static void RewriteHeapSROALoadUser(Instruction *LoadUser, DenseMap > &InsertedScalarizedValues, std::vector > &PHIsToRewrite) { // If this is a comparison against null, handle it. @@ -1181,30 +1219,30 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser, // field. Value *NPtr = GetHeapSROAValue(SCI->getOperand(0), 0, InsertedScalarizedValues, PHIsToRewrite); - + Value *New = new ICmpInst(SCI, SCI->getPredicate(), NPtr, - Constant::getNullValue(NPtr->getType()), + Constant::getNullValue(NPtr->getType()), SCI->getName()); SCI->replaceAllUsesWith(New); SCI->eraseFromParent(); return; } - + // Handle 'getelementptr Ptr, Idx, i32 FieldNo ...' if (GetElementPtrInst *GEPI = dyn_cast(LoadUser)) { assert(GEPI->getNumOperands() >= 3 && isa(GEPI->getOperand(2)) && "Unexpected GEPI!"); - + // Load the pointer for this field. unsigned FieldNo = cast(GEPI->getOperand(2))->getZExtValue(); Value *NewPtr = GetHeapSROAValue(GEPI->getOperand(0), FieldNo, InsertedScalarizedValues, PHIsToRewrite); - + // Create the new GEP idx vector. SmallVector GEPIdx; GEPIdx.push_back(GEPI->getOperand(1)); GEPIdx.append(GEPI->op_begin()+3, GEPI->op_end()); - + Value *NGEPI = GetElementPtrInst::Create(NewPtr, GEPIdx.begin(), GEPIdx.end(), GEPI->getName(), GEPI); @@ -1220,12 +1258,10 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser, // already been seen first by another load, so its uses have already been // processed. PHINode *PN = cast(LoadUser); - bool Inserted; - DenseMap >::iterator InsertPos; - tie(InsertPos, Inserted) = - InsertedScalarizedValues.insert(std::make_pair(PN, std::vector())); - if (!Inserted) return; - + if (!InsertedScalarizedValues.insert(std::make_pair(PN, + std::vector())).second) + return; + // If this is the first time we've seen this PHI, recursively process all // users. for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); UI != E; ) { @@ -1238,7 +1274,7 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser, /// is a value loaded from the global. Eliminate all uses of Ptr, making them /// use FieldGlobals instead. All uses of loaded values satisfy /// AllGlobalLoadUsesSimpleEnoughForHeapSRA. -static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load, +static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load, DenseMap > &InsertedScalarizedValues, std::vector > &PHIsToRewrite) { for (Value::use_iterator UI = Load->use_begin(), E = Load->use_end(); @@ -1246,7 +1282,7 @@ static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load, Instruction *User = cast(*UI++); RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite); } - + if (Load->use_empty()) { Load->eraseFromParent(); InsertedScalarizedValues.erase(Load); @@ -1258,8 +1294,8 @@ static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load, static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, Value* NElems, TargetData *TD) { DEBUG(dbgs() << "SROA HEAP ALLOC: " << *GV << " MALLOC = " << *CI << '\n'); - const Type* MAT = getMallocAllocatedType(CI); - const StructType *STy = cast(MAT); + Type* MAT = getMallocAllocatedType(CI); + StructType *STy = cast(MAT); // There is guaranteed to be at least one use of the malloc (storing // it into GV). If there are other uses, change them to be uses of @@ -1271,11 +1307,11 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, // new mallocs at the same place as CI, and N globals. std::vector FieldGlobals; std::vector FieldMallocs; - + for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){ - const Type *FieldTy = STy->getElementType(FieldNo); - const PointerType *PFieldTy = PointerType::getUnqual(FieldTy); - + Type *FieldTy = STy->getElementType(FieldNo); + PointerType *PFieldTy = PointerType::getUnqual(FieldTy); + GlobalVariable *NGV = new GlobalVariable(*GV->getParent(), PFieldTy, false, GlobalValue::InternalLinkage, @@ -1283,19 +1319,19 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, GV->getName() + ".f" + Twine(FieldNo), GV, GV->isThreadLocal()); FieldGlobals.push_back(NGV); - + unsigned TypeSize = TD->getTypeAllocSize(FieldTy); - if (const StructType *ST = dyn_cast(FieldTy)) + if (StructType *ST = dyn_cast(FieldTy)) TypeSize = TD->getStructLayout(ST)->getSizeInBytes(); - const Type *IntPtrTy = TD->getIntPtrType(CI->getContext()); + Type *IntPtrTy = TD->getIntPtrType(CI->getContext()); Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy, ConstantInt::get(IntPtrTy, TypeSize), - NElems, + NElems, 0, CI->getName() + ".f" + Twine(FieldNo)); FieldMallocs.push_back(NMI); new StoreInst(NMI, NGV, CI); } - + // The tricky aspect of this transformation is handling the case when malloc // fails. In the original code, malloc failing would set the result pointer // of malloc to null. In this case, some mallocs could succeed and others @@ -1309,8 +1345,8 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, // if (F2) { free(F2); F2 = 0; } // } // The malloc can also fail if its argument is too large. - Constant *ConstantZero = ConstantInt::get(CI->getOperand(1)->getType(), 0); - Value *RunningOr = new ICmpInst(CI, ICmpInst::ICMP_SLT, CI->getOperand(1), + Constant *ConstantZero = ConstantInt::get(CI->getArgOperand(0)->getType(), 0); + Value *RunningOr = new ICmpInst(CI, ICmpInst::ICMP_SLT, CI->getArgOperand(0), ConstantZero, "isneg"); for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) { Value *Cond = new ICmpInst(CI, ICmpInst::ICMP_EQ, FieldMallocs[i], @@ -1322,23 +1358,23 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, // Split the basic block at the old malloc. BasicBlock *OrigBB = CI->getParent(); BasicBlock *ContBB = OrigBB->splitBasicBlock(CI, "malloc_cont"); - + // Create the block to check the first condition. Put all these blocks at the // end of the function as they are unlikely to be executed. BasicBlock *NullPtrBlock = BasicBlock::Create(OrigBB->getContext(), "malloc_ret_null", OrigBB->getParent()); - + // Remove the uncond branch from OrigBB to ContBB, turning it into a cond // branch on RunningOr. OrigBB->getTerminator()->eraseFromParent(); BranchInst::Create(NullPtrBlock, ContBB, RunningOr, OrigBB); - + // Within the NullPtrBlock, we need to emit a comparison and branch for each // pointer, because some may be null while others are not. for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) { Value *GVVal = new LoadInst(FieldGlobals[i], "tmp", NullPtrBlock); - Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal, + Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal, Constant::getNullValue(GVVal->getType()), "tmp"); BasicBlock *FreeBlock = BasicBlock::Create(Cmp->getContext(), "free_it", @@ -1353,10 +1389,10 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, new StoreInst(Constant::getNullValue(GVVal->getType()), FieldGlobals[i], FreeBlock); BranchInst::Create(NextBlock, FreeBlock); - + NullPtrBlock = NextBlock; } - + BranchInst::Create(ContBB, NullPtrBlock); // CI is no longer needed, remove it. @@ -1367,28 +1403,28 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, /// inserted for a given load. DenseMap > InsertedScalarizedValues; InsertedScalarizedValues[GV] = FieldGlobals; - + std::vector > PHIsToRewrite; - + // Okay, the malloc site is completely handled. All of the uses of GV are now // loads, and all uses of those loads are simple. Rewrite them to use loads // of the per-field globals instead. for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E;) { Instruction *User = cast(*UI++); - + if (LoadInst *LI = dyn_cast(User)) { RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite); continue; } - + // Must be a store of null. StoreInst *SI = cast(User); assert(isa(SI->getOperand(0)) && "Unexpected heap-sra user!"); - + // Insert a store of null into each global. for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) { - const PointerType *PT = cast(FieldGlobals[i]->getType()); + PointerType *PT = cast(FieldGlobals[i]->getType()); Constant *Null = Constant::getNullValue(PT->getElementType()); new StoreInst(Null, FieldGlobals[i], SI); } @@ -1412,7 +1448,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, FieldPN->addIncoming(InVal, PN->getIncomingBlock(i)); } } - + // Drop all inter-phi links and any loads that made it this far. for (DenseMap >::iterator I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end(); @@ -1422,7 +1458,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, else if (LoadInst *LI = dyn_cast(I->first)) LI->dropAllReferences(); } - + // Delete all the phis and loads now that inter-references are dead. for (DenseMap >::iterator I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end(); @@ -1432,7 +1468,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, else if (LoadInst *LI = dyn_cast(I->first)) LI->eraseFromParent(); } - + // The old global is now dead, remove it. GV->eraseFromParent(); @@ -1445,9 +1481,12 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, /// cast of malloc. static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI, - const Type *AllocTy, + Type *AllocTy, Module::global_iterator &GVI, TargetData *TD) { + if (!TD) + return false; + // If this is a malloc of an abstract type, don't touch it. if (!AllocTy->isSized()) return false; @@ -1466,70 +1505,70 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, // malloc to be stored into the specified global, loaded setcc'd, and // GEP'd. These are all things we could transform to using the global // for. - { - SmallPtrSet PHIs; - if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(CI, GV, PHIs)) - return false; - } + SmallPtrSet PHIs; + if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(CI, GV, PHIs)) + return false; // If we have a global that is only initialized with a fixed size malloc, // transform the program to use global memory instead of malloc'd memory. // This eliminates dynamic allocation, avoids an indirection accessing the // data, and exposes the resultant global to further GlobalOpt. // We cannot optimize the malloc if we cannot determine malloc array size. - if (Value *NElems = getMallocArraySize(CI, TD, true)) { - if (ConstantInt *NElements = dyn_cast(NElems)) - // Restrict this transformation to only working on small allocations - // (2048 bytes currently), as we don't want to introduce a 16M global or - // something. - if (TD && - NElements->getZExtValue() * TD->getTypeAllocSize(AllocTy) < 2048) { - GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, TD); - return true; - } - - // If the allocation is an array of structures, consider transforming this - // into multiple malloc'd arrays, one for each field. This is basically - // SRoA for malloc'd memory. - - // If this is an allocation of a fixed size array of structs, analyze as a - // variable size array. malloc [100 x struct],1 -> malloc struct, 100 - if (NElems == ConstantInt::get(CI->getOperand(1)->getType(), 1)) - if (const ArrayType *AT = dyn_cast(AllocTy)) - AllocTy = AT->getElementType(); - - if (const StructType *AllocSTy = dyn_cast(AllocTy)) { - // This the structure has an unreasonable number of fields, leave it - // alone. - if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 && - AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, CI)) { - - // If this is a fixed size array, transform the Malloc to be an alloc of - // structs. malloc [100 x struct],1 -> malloc struct, 100 - if (const ArrayType *AT = - dyn_cast(getMallocAllocatedType(CI))) { - const Type *IntPtrTy = TD->getIntPtrType(CI->getContext()); - unsigned TypeSize = TD->getStructLayout(AllocSTy)->getSizeInBytes(); - Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize); - Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements()); - Instruction *Malloc = CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy, - AllocSize, NumElements, - CI->getName()); - Instruction *Cast = new BitCastInst(Malloc, CI->getType(), "tmp", CI); - CI->replaceAllUsesWith(Cast); - CI->eraseFromParent(); - CI = dyn_cast(Malloc) ? - extractMallocCallFromBitCast(Malloc) : cast(Malloc); - } - - GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, TD, true),TD); - return true; - } + Value *NElems = getMallocArraySize(CI, TD, true); + if (!NElems) + return false; + + if (ConstantInt *NElements = dyn_cast(NElems)) + // Restrict this transformation to only working on small allocations + // (2048 bytes currently), as we don't want to introduce a 16M global or + // something. + if (NElements->getZExtValue() * TD->getTypeAllocSize(AllocTy) < 2048) { + GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, TD); + return true; + } + + // If the allocation is an array of structures, consider transforming this + // into multiple malloc'd arrays, one for each field. This is basically + // SRoA for malloc'd memory. + + // If this is an allocation of a fixed size array of structs, analyze as a + // variable size array. malloc [100 x struct],1 -> malloc struct, 100 + if (NElems == ConstantInt::get(CI->getArgOperand(0)->getType(), 1)) + if (ArrayType *AT = dyn_cast(AllocTy)) + AllocTy = AT->getElementType(); + + StructType *AllocSTy = dyn_cast(AllocTy); + if (!AllocSTy) + return false; + + // This the structure has an unreasonable number of fields, leave it + // alone. + if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 && + AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, CI)) { + + // If this is a fixed size array, transform the Malloc to be an alloc of + // structs. malloc [100 x struct],1 -> malloc struct, 100 + if (ArrayType *AT = dyn_cast(getMallocAllocatedType(CI))) { + Type *IntPtrTy = TD->getIntPtrType(CI->getContext()); + unsigned TypeSize = TD->getStructLayout(AllocSTy)->getSizeInBytes(); + Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize); + Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements()); + Instruction *Malloc = CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy, + AllocSize, NumElements, + 0, CI->getName()); + Instruction *Cast = new BitCastInst(Malloc, CI->getType(), "tmp", CI); + CI->replaceAllUsesWith(Cast); + CI->eraseFromParent(); + CI = dyn_cast(Malloc) ? + extractMallocCallFromBitCast(Malloc) : cast(Malloc); } + + GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, TD, true),TD); + return true; } - + return false; -} +} // OptimizeOnceStoredGlobal - Try to optimize globals based on the knowledge // that only one value (besides its initializer) is ever stored to the global. @@ -1547,15 +1586,14 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, GV->getInitializer()->isNullValue()) { if (Constant *SOVC = dyn_cast(StoredOnceVal)) { if (GV->getInitializer()->getType() != SOVC->getType()) - SOVC = - ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType()); + SOVC = ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType()); // Optimize away any trapping uses of the loaded value. if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC)) return true; } else if (CallInst *CI = extractMallocCall(StoredOnceVal)) { - const Type* MallocType = getMallocAllocatedType(CI); - if (MallocType && TryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType, + Type* MallocType = getMallocAllocatedType(CI); + if (MallocType && TryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType, GVI, TD)) return true; } @@ -1569,8 +1607,8 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, /// can shrink the global into a boolean and select between the two values /// whenever it is used. This exposes the values to other scalar optimizations. static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { - const Type *GVElType = GV->getType()->getElementType(); - + Type *GVElType = GV->getType()->getElementType(); + // If GVElType is already i1, it is already shrunk. If the type of the GV is // an FP value, pointer or vector, don't do this optimization because a select // between them is very expensive and unlikely to lead to later @@ -1580,19 +1618,21 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { GVElType->isFloatingPointTy() || GVElType->isPointerTy() || GVElType->isVectorTy()) return false; - + // Walk the use list of the global seeing if all the uses are load or store. // If there is anything else, bail out. - for (Value::use_iterator I = GV->use_begin(), E = GV->use_end(); I != E; ++I) - if (!isa(I) && !isa(I)) + for (Value::use_iterator I = GV->use_begin(), E = GV->use_end(); I != E; ++I){ + User *U = *I; + if (!isa(U) && !isa(U)) return false; - + } + DEBUG(dbgs() << " *** SHRINKING TO BOOL: " << *GV); - + // Create the new global, initializing it to false. GlobalVariable *NewGV = new GlobalVariable(Type::getInt1Ty(GV->getContext()), false, - GlobalValue::InternalLinkage, + GlobalValue::InternalLinkage, ConstantInt::getFalse(GV->getContext()), GV->getName()+".b", GV->isThreadLocal()); @@ -1624,7 +1664,7 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { // bool. Instruction *StoredVal = cast(SI->getOperand(0)); - // If we're already replaced the input, StoredVal will be a cast or + // If we've already replaced the input, StoredVal will be a cast or // select instruction. If not, it will be a load of the original // global. if (LoadInst *LI = dyn_cast(StoredVal)) { @@ -1661,10 +1701,12 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { /// ProcessInternalGlobal - Analyze the specified global variable and optimize /// it if possible. If we make a change, return true. -bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, - Module::global_iterator &GVI) { - SmallPtrSet PHIUsers; - GlobalStatus GS; +bool GlobalOpt::ProcessGlobal(GlobalVariable *GV, + Module::global_iterator &GVI) { + if (!GV->hasLocalLinkage()) + return false; + + // Do more involved optimizations if the global is internal. GV->removeDeadConstantUsers(); if (GV->use_empty()) { @@ -1674,140 +1716,139 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, return true; } - if (!AnalyzeGlobal(GV, GS, PHIUsers)) { -#if 0 - DEBUG(dbgs() << "Global: " << *GV); - DEBUG(dbgs() << " isLoaded = " << GS.isLoaded << "\n"); - DEBUG(dbgs() << " StoredType = "); - switch (GS.StoredType) { - case GlobalStatus::NotStored: DEBUG(dbgs() << "NEVER STORED\n"); break; - case GlobalStatus::isInitializerStored: DEBUG(dbgs() << "INIT STORED\n"); - break; - case GlobalStatus::isStoredOnce: DEBUG(dbgs() << "STORED ONCE\n"); break; - case GlobalStatus::isStored: DEBUG(dbgs() << "stored\n"); break; - } - if (GS.StoredType == GlobalStatus::isStoredOnce && GS.StoredOnceValue) - DEBUG(dbgs() << " StoredOnceValue = " << *GS.StoredOnceValue << "\n"); - if (GS.AccessingFunction && !GS.HasMultipleAccessingFunctions) - DEBUG(dbgs() << " AccessingFunction = " << GS.AccessingFunction->getName() - << "\n"); - DEBUG(dbgs() << " HasMultipleAccessingFunctions = " - << GS.HasMultipleAccessingFunctions << "\n"); - DEBUG(dbgs() << " HasNonInstructionUser = " - << GS.HasNonInstructionUser<<"\n"); - DEBUG(dbgs() << "\n"); -#endif - - // If this is a first class global and has only one accessing function - // and this function is main (which we know is not recursive we can make - // this global a local variable) we replace the global with a local alloca - // in this function. - // - // NOTE: It doesn't make sense to promote non single-value types since we - // are just replacing static memory to stack memory. - // - // If the global is in different address space, don't bring it to stack. - if (!GS.HasMultipleAccessingFunctions && - GS.AccessingFunction && !GS.HasNonInstructionUser && - GV->getType()->getElementType()->isSingleValueType() && - GS.AccessingFunction->getName() == "main" && - GS.AccessingFunction->hasExternalLinkage() && - GV->getType()->getAddressSpace() == 0) { - DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV); - Instruction& FirstI = const_cast(*GS.AccessingFunction - ->getEntryBlock().begin()); - const Type* ElemTy = GV->getType()->getElementType(); - // FIXME: Pass Global's alignment when globals have alignment - AllocaInst* Alloca = new AllocaInst(ElemTy, NULL, GV->getName(), &FirstI); - if (!isa(GV->getInitializer())) - new StoreInst(GV->getInitializer(), Alloca, &FirstI); - - GV->replaceAllUsesWith(Alloca); + SmallPtrSet PHIUsers; + GlobalStatus GS; + + if (AnalyzeGlobal(GV, GS, PHIUsers)) + return false; + + if (!GS.isCompared && !GV->hasUnnamedAddr()) { + GV->setUnnamedAddr(true); + NumUnnamed++; + } + + if (GV->isConstant() || !GV->hasInitializer()) + return false; + + return ProcessInternalGlobal(GV, GVI, PHIUsers, GS); +} + +/// ProcessInternalGlobal - Analyze the specified global variable and optimize +/// it if possible. If we make a change, return true. +bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, + Module::global_iterator &GVI, + const SmallPtrSet &PHIUsers, + const GlobalStatus &GS) { + // If this is a first class global and has only one accessing function + // and this function is main (which we know is not recursive we can make + // this global a local variable) we replace the global with a local alloca + // in this function. + // + // NOTE: It doesn't make sense to promote non single-value types since we + // are just replacing static memory to stack memory. + // + // If the global is in different address space, don't bring it to stack. + if (!GS.HasMultipleAccessingFunctions && + GS.AccessingFunction && !GS.HasNonInstructionUser && + GV->getType()->getElementType()->isSingleValueType() && + GS.AccessingFunction->getName() == "main" && + GS.AccessingFunction->hasExternalLinkage() && + GV->getType()->getAddressSpace() == 0) { + DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV); + Instruction& FirstI = const_cast(*GS.AccessingFunction + ->getEntryBlock().begin()); + Type* ElemTy = GV->getType()->getElementType(); + // FIXME: Pass Global's alignment when globals have alignment + AllocaInst* Alloca = new AllocaInst(ElemTy, NULL, GV->getName(), &FirstI); + if (!isa(GV->getInitializer())) + new StoreInst(GV->getInitializer(), Alloca, &FirstI); + + GV->replaceAllUsesWith(Alloca); + GV->eraseFromParent(); + ++NumLocalized; + return true; + } + + // If the global is never loaded (but may be stored to), it is dead. + // Delete it now. + if (!GS.isLoaded) { + DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV); + + // Delete any stores we can find to the global. We may not be able to + // make it completely dead though. + bool Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer()); + + // If the global is dead now, delete it. + if (GV->use_empty()) { GV->eraseFromParent(); - ++NumLocalized; - return true; + ++NumDeleted; + Changed = true; } - - // If the global is never loaded (but may be stored to), it is dead. - // Delete it now. - if (!GS.isLoaded) { - DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV); - - // Delete any stores we can find to the global. We may not be able to - // make it completely dead though. - bool Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer()); - - // If the global is dead now, delete it. - if (GV->use_empty()) { - GV->eraseFromParent(); - ++NumDeleted; - Changed = true; - } - return Changed; + return Changed; - } else if (GS.StoredType <= GlobalStatus::isInitializerStored) { - DEBUG(dbgs() << "MARKING CONSTANT: " << *GV); - GV->setConstant(true); + } else if (GS.StoredType <= GlobalStatus::isInitializerStored) { + DEBUG(dbgs() << "MARKING CONSTANT: " << *GV); + GV->setConstant(true); - // Clean up any obviously simplifiable users now. - CleanupConstantGlobalUsers(GV, GV->getInitializer()); + // Clean up any obviously simplifiable users now. + CleanupConstantGlobalUsers(GV, GV->getInitializer()); - // If the global is dead now, just nuke it. - if (GV->use_empty()) { - DEBUG(dbgs() << " *** Marking constant allowed us to simplify " - << "all users and delete global!\n"); - GV->eraseFromParent(); - ++NumDeleted; + // If the global is dead now, just nuke it. + if (GV->use_empty()) { + DEBUG(dbgs() << " *** Marking constant allowed us to simplify " + << "all users and delete global!\n"); + GV->eraseFromParent(); + ++NumDeleted; + } + + ++NumMarked; + return true; + } else if (!GV->getInitializer()->getType()->isSingleValueType()) { + if (TargetData *TD = getAnalysisIfAvailable()) + if (GlobalVariable *FirstNewGV = SRAGlobal(GV, *TD)) { + GVI = FirstNewGV; // Don't skip the newly produced globals! + return true; + } + } else if (GS.StoredType == GlobalStatus::isStoredOnce) { + // If the initial value for the global was an undef value, and if only + // one other value was stored into it, we can just change the + // initializer to be the stored value, then delete all stores to the + // global. This allows us to mark it constant. + if (Constant *SOVConstant = dyn_cast(GS.StoredOnceValue)) + if (isa(GV->getInitializer())) { + // Change the initial value here. + GV->setInitializer(SOVConstant); + + // Clean up any obviously simplifiable users now. + CleanupConstantGlobalUsers(GV, GV->getInitializer()); + + if (GV->use_empty()) { + DEBUG(dbgs() << " *** Substituting initializer allowed us to " + << "simplify all users and delete global!\n"); + GV->eraseFromParent(); + ++NumDeleted; + } else { + GVI = GV; + } + ++NumSubstitute; + return true; } - ++NumMarked; + // Try to optimize globals based on the knowledge that only one value + // (besides its initializer) is ever stored to the global. + if (OptimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GVI, + getAnalysisIfAvailable())) return true; - } else if (!GV->getInitializer()->getType()->isSingleValueType()) { - if (TargetData *TD = getAnalysisIfAvailable()) - if (GlobalVariable *FirstNewGV = SRAGlobal(GV, *TD)) { - GVI = FirstNewGV; // Don't skip the newly produced globals! - return true; - } - } else if (GS.StoredType == GlobalStatus::isStoredOnce) { - // If the initial value for the global was an undef value, and if only - // one other value was stored into it, we can just change the - // initializer to be the stored value, then delete all stores to the - // global. This allows us to mark it constant. - if (Constant *SOVConstant = dyn_cast(GS.StoredOnceValue)) - if (isa(GV->getInitializer())) { - // Change the initial value here. - GV->setInitializer(SOVConstant); - - // Clean up any obviously simplifiable users now. - CleanupConstantGlobalUsers(GV, GV->getInitializer()); - - if (GV->use_empty()) { - DEBUG(dbgs() << " *** Substituting initializer allowed us to " - << "simplify all users and delete global!\n"); - GV->eraseFromParent(); - ++NumDeleted; - } else { - GVI = GV; - } - ++NumSubstitute; - return true; - } - // Try to optimize globals based on the knowledge that only one value - // (besides its initializer) is ever stored to the global. - if (OptimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GVI, - getAnalysisIfAvailable())) + // Otherwise, if the global was not a boolean, we can shrink it to be a + // boolean. + if (Constant *SOVConstant = dyn_cast(GS.StoredOnceValue)) + if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) { + ++NumShrunkToBool; return true; - - // Otherwise, if the global was not a boolean, we can shrink it to be a - // boolean. - if (Constant *SOVConstant = dyn_cast(GS.StoredOnceValue)) - if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) { - ++NumShrunkToBool; - return true; - } - } + } } + return false; } @@ -1880,7 +1921,6 @@ bool GlobalOpt::OptimizeFunctions(Module &M) { bool GlobalOpt::OptimizeGlobalVars(Module &M) { bool Changed = false; - TargetData *TD = getAnalysisIfAvailable(); for (Module::global_iterator GVI = M.global_begin(), E = M.global_end(); GVI != E; ) { GlobalVariable *GV = GVI++; @@ -1890,72 +1930,56 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) { // Simplify the initializer. if (GV->hasInitializer()) if (ConstantExpr *CE = dyn_cast(GV->getInitializer())) { + TargetData *TD = getAnalysisIfAvailable(); Constant *New = ConstantFoldConstantExpression(CE, TD); if (New && New != CE) GV->setInitializer(New); } - // Refine the alignment value. - if (TD && GV->hasDefinitiveInitializer()) { - unsigned Align = TD->getPreferredAlignment(GV); - if (Align > GV->getAlignment()) - GV->setAlignment(Align); - } - // Do more involved optimizations if the global is internal. - if (!GV->isConstant() && GV->hasLocalLinkage() && - GV->hasInitializer()) - Changed |= ProcessInternalGlobal(GV, GVI); + + Changed |= ProcessGlobal(GV, GVI); } return Changed; } -/// FindGlobalCtors - Find the llvm.globalctors list, verifying that all +/// FindGlobalCtors - Find the llvm.global_ctors list, verifying that all /// initializers have an init priority of 65535. GlobalVariable *GlobalOpt::FindGlobalCtors(Module &M) { - for (Module::global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ++I) - if (I->getName() == "llvm.global_ctors") { - // Found it, verify it's an array of { int, void()* }. - const ArrayType *ATy =dyn_cast(I->getType()->getElementType()); - if (!ATy) return 0; - const StructType *STy = dyn_cast(ATy->getElementType()); - if (!STy || STy->getNumElements() != 2 || - !STy->getElementType(0)->isIntegerTy(32)) return 0; - const PointerType *PFTy = dyn_cast(STy->getElementType(1)); - if (!PFTy) return 0; - const FunctionType *FTy = dyn_cast(PFTy->getElementType()); - if (!FTy || !FTy->getReturnType()->isVoidTy() || - FTy->isVarArg() || FTy->getNumParams() != 0) - return 0; - - // Verify that the initializer is simple enough for us to handle. - if (!I->hasDefinitiveInitializer()) return 0; - ConstantArray *CA = dyn_cast(I->getInitializer()); - if (!CA) return 0; - for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) - if (ConstantStruct *CS = dyn_cast(*i)) { - if (isa(CS->getOperand(1))) - continue; + GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors"); + if (GV == 0) return 0; + + // Verify that the initializer is simple enough for us to handle. We are + // only allowed to optimize the initializer if it is unique. + if (!GV->hasUniqueInitializer()) return 0; - // Must have a function or null ptr. - if (!isa(CS->getOperand(1))) - return 0; - - // Init priority must be standard. - ConstantInt *CI = dyn_cast(CS->getOperand(0)); - if (!CI || CI->getZExtValue() != 65535) - return 0; - } else { - return 0; - } - - return I; - } - return 0; + if (isa(GV->getInitializer())) + return GV; + ConstantArray *CA = cast(GV->getInitializer()); + + for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) { + if (isa(*i)) + continue; + ConstantStruct *CS = cast(*i); + if (isa(CS->getOperand(1))) + continue; + + // Must have a function or null ptr. + if (!isa(CS->getOperand(1))) + return 0; + + // Init priority must be standard. + ConstantInt *CI = cast(CS->getOperand(0)); + if (CI->getZExtValue() != 65535) + return 0; + } + + return GV; } /// ParseGlobalCtors - Given a llvm.global_ctors list that we can understand, /// return a list of the functions and null terminator as a vector. static std::vector ParseGlobalCtors(GlobalVariable *GV) { + if (GV->getInitializer()->isNullValue()) + return std::vector(); ConstantArray *CA = cast(GV->getInitializer()); std::vector Result; Result.reserve(CA->getNumOperands()); @@ -1968,48 +1992,50 @@ static std::vector ParseGlobalCtors(GlobalVariable *GV) { /// InstallGlobalCtors - Given a specified llvm.global_ctors list, install the /// specified array, returning the new global to use. -static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL, +static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL, const std::vector &Ctors) { // If we made a change, reassemble the initializer list. - std::vector CSVals; - CSVals.push_back(ConstantInt::get(Type::getInt32Ty(GCL->getContext()),65535)); - CSVals.push_back(0); - + Constant *CSVals[2]; + CSVals[0] = ConstantInt::get(Type::getInt32Ty(GCL->getContext()), 65535); + CSVals[1] = 0; + + StructType *StructTy = + cast ( + cast(GCL->getType()->getElementType())->getElementType()); + // Create the new init list. std::vector CAList; for (unsigned i = 0, e = Ctors.size(); i != e; ++i) { if (Ctors[i]) { CSVals[1] = Ctors[i]; } else { - const Type *FTy = FunctionType::get(Type::getVoidTy(GCL->getContext()), + Type *FTy = FunctionType::get(Type::getVoidTy(GCL->getContext()), false); - const PointerType *PFTy = PointerType::getUnqual(FTy); + PointerType *PFTy = PointerType::getUnqual(FTy); CSVals[1] = Constant::getNullValue(PFTy); CSVals[0] = ConstantInt::get(Type::getInt32Ty(GCL->getContext()), - 2147483647); + 0x7fffffff); } - CAList.push_back(ConstantStruct::get(GCL->getContext(), CSVals, false)); + CAList.push_back(ConstantStruct::get(StructTy, CSVals)); } - + // Create the array initializer. - const Type *StructTy = - cast(GCL->getType()->getElementType())->getElementType(); - Constant *CA = ConstantArray::get(ArrayType::get(StructTy, + Constant *CA = ConstantArray::get(ArrayType::get(StructTy, CAList.size()), CAList); - + // If we didn't change the number of elements, don't create a new GV. if (CA->getType() == GCL->getInitializer()->getType()) { GCL->setInitializer(CA); return GCL; } - + // Create the new global and insert it next to the existing list. GlobalVariable *NGV = new GlobalVariable(CA->getType(), GCL->isConstant(), GCL->getLinkage(), CA, "", GCL->isThreadLocal()); GCL->getParent()->getGlobalList().insert(GCL, NGV); NGV->takeName(GCL); - + // Nuke the old list, replacing any uses with the new one. if (!GCL->use_empty()) { Constant *V = NGV; @@ -2018,7 +2044,7 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL, GCL->replaceAllUsesWith(V); } GCL->eraseFromParent(); - + if (Ctors.size()) return NGV; else @@ -2026,17 +2052,86 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL, } -static Constant *getVal(DenseMap &ComputedValues, - Value *V) { +static Constant *getVal(DenseMap &ComputedValues, Value *V) { if (Constant *CV = dyn_cast(V)) return CV; Constant *R = ComputedValues[V]; assert(R && "Reference to an uncomputed value!"); return R; } +static inline bool +isSimpleEnoughValueToCommit(Constant *C, + SmallPtrSet &SimpleConstants); + + +/// isSimpleEnoughValueToCommit - Return true if the specified constant can be +/// handled by the code generator. We don't want to generate something like: +/// void *X = &X/42; +/// because the code generator doesn't have a relocation that can handle that. +/// +/// This function should be called if C was not found (but just got inserted) +/// in SimpleConstants to avoid having to rescan the same constants all the +/// time. +static bool isSimpleEnoughValueToCommitHelper(Constant *C, + SmallPtrSet &SimpleConstants) { + // Simple integer, undef, constant aggregate zero, global addresses, etc are + // all supported. + if (C->getNumOperands() == 0 || isa(C) || + isa(C)) + return true; + + // Aggregate values are safe if all their elements are. + if (isa(C) || isa(C) || + isa(C)) { + for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) { + Constant *Op = cast(C->getOperand(i)); + if (!isSimpleEnoughValueToCommit(Op, SimpleConstants)) + return false; + } + return true; + } + + // We don't know exactly what relocations are allowed in constant expressions, + // so we allow &global+constantoffset, which is safe and uniformly supported + // across targets. + ConstantExpr *CE = cast(C); + switch (CE->getOpcode()) { + case Instruction::BitCast: + case Instruction::IntToPtr: + case Instruction::PtrToInt: + // These casts are always fine if the casted value is. + return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants); + + // GEP is fine if it is simple + constant offset. + case Instruction::GetElementPtr: + for (unsigned i = 1, e = CE->getNumOperands(); i != e; ++i) + if (!isa(CE->getOperand(i))) + return false; + return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants); + + case Instruction::Add: + // We allow simple+cst. + if (!isa(CE->getOperand(1))) + return false; + return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants); + } + return false; +} + +static inline bool +isSimpleEnoughValueToCommit(Constant *C, + SmallPtrSet &SimpleConstants) { + // If we already checked this constant, we win. + if (!SimpleConstants.insert(C)) return true; + // Check the constant. + return isSimpleEnoughValueToCommitHelper(C, SimpleConstants); +} + + /// isSimpleEnoughPointerToCommit - Return true if this constant is simple -/// enough for us to understand. In particular, if it is a cast of something, -/// we punt. We basically just support direct accesses to globals and GEP's of +/// enough for us to understand. In particular, if it is a cast to anything +/// other than from one pointer type to another pointer type, we punt. +/// We basically just support direct accesses to globals and GEP's of /// globals. This should be kept up to date with CommitValueTo. static bool isSimpleEnoughPointerToCommit(Constant *C) { // Conservatively, avoid aggregate types. This is because we don't @@ -2045,23 +2140,23 @@ static bool isSimpleEnoughPointerToCommit(Constant *C) { return false; if (GlobalVariable *GV = dyn_cast(C)) - // Do not allow weak/linkonce/dllimport/dllexport linkage or + // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or // external globals. - return GV->hasDefinitiveInitializer(); + return GV->hasUniqueInitializer(); - if (ConstantExpr *CE = dyn_cast(C)) + if (ConstantExpr *CE = dyn_cast(C)) { // Handle a constantexpr gep. if (CE->getOpcode() == Instruction::GetElementPtr && isa(CE->getOperand(0)) && cast(CE)->isInBounds()) { GlobalVariable *GV = cast(CE->getOperand(0)); - // Do not allow weak/linkonce/dllimport/dllexport linkage or + // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or // external globals. - if (!GV->hasDefinitiveInitializer()) + if (!GV->hasUniqueInitializer()) return false; // The first index must be zero. - ConstantInt *CI = dyn_cast(*next(CE->op_begin())); + ConstantInt *CI = dyn_cast(*llvm::next(CE->op_begin())); if (!CI || !CI->isZero()) return false; // The remaining indices must be compile-time known integers within the @@ -2070,7 +2165,18 @@ static bool isSimpleEnoughPointerToCommit(Constant *C) { return false; return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE); + + // A constantexpr bitcast from a pointer to another pointer is a no-op, + // and we know how to evaluate it by moving the bitcast from the pointer + // operand to the value operand. + } else if (CE->getOpcode() == Instruction::BitCast && + isa(CE->getOperand(0))) { + // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or + // external globals. + return cast(CE->getOperand(0))->hasUniqueInitializer(); } + } + return false; } @@ -2084,9 +2190,9 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val, assert(Val->getType() == Init->getType() && "Type mismatch!"); return Val; } - + std::vector Elts; - if (const StructType *STy = dyn_cast(Init->getType())) { + if (StructType *STy = dyn_cast(Init->getType())) { // Break up the constant into its elements. if (ConstantStruct *CS = dyn_cast(Init)) { @@ -2102,51 +2208,48 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val, llvm_unreachable("This code is out of sync with " " ConstantFoldLoadThroughGEPConstantExpr"); } - + // Replace the element that we are supposed to. ConstantInt *CU = cast(Addr->getOperand(OpNo)); unsigned Idx = CU->getZExtValue(); assert(Idx < STy->getNumElements() && "Struct index out of range!"); Elts[Idx] = EvaluateStoreInto(Elts[Idx], Val, Addr, OpNo+1); - + // Return the modified struct. - return ConstantStruct::get(Init->getContext(), &Elts[0], Elts.size(), - STy->isPacked()); + return ConstantStruct::get(STy, Elts); + } + + ConstantInt *CI = cast(Addr->getOperand(OpNo)); + SequentialType *InitTy = cast(Init->getType()); + + uint64_t NumElts; + if (ArrayType *ATy = dyn_cast(InitTy)) + NumElts = ATy->getNumElements(); + else + NumElts = cast(InitTy)->getNumElements(); + + // Break up the array into elements. + if (ConstantArray *CA = dyn_cast(Init)) { + for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) + Elts.push_back(cast(*i)); + } else if (ConstantVector *CV = dyn_cast(Init)) { + for (User::op_iterator i = CV->op_begin(), e = CV->op_end(); i != e; ++i) + Elts.push_back(cast(*i)); + } else if (isa(Init)) { + Elts.assign(NumElts, Constant::getNullValue(InitTy->getElementType())); } else { - ConstantInt *CI = cast(Addr->getOperand(OpNo)); - const SequentialType *InitTy = cast(Init->getType()); + assert(isa(Init) && "This code is out of sync with " + " ConstantFoldLoadThroughGEPConstantExpr"); + Elts.assign(NumElts, UndefValue::get(InitTy->getElementType())); + } - uint64_t NumElts; - if (const ArrayType *ATy = dyn_cast(InitTy)) - NumElts = ATy->getNumElements(); - else - NumElts = cast(InitTy)->getNumElements(); - - - // Break up the array into elements. - if (ConstantArray *CA = dyn_cast(Init)) { - for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) - Elts.push_back(cast(*i)); - } else if (ConstantVector *CV = dyn_cast(Init)) { - for (User::op_iterator i = CV->op_begin(), e = CV->op_end(); i != e; ++i) - Elts.push_back(cast(*i)); - } else if (isa(Init)) { - Elts.assign(NumElts, Constant::getNullValue(InitTy->getElementType())); - } else { - assert(isa(Init) && "This code is out of sync with " - " ConstantFoldLoadThroughGEPConstantExpr"); - Elts.assign(NumElts, UndefValue::get(InitTy->getElementType())); - } - - assert(CI->getZExtValue() < NumElts); - Elts[CI->getZExtValue()] = - EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1); - - if (Init->getType()->isArrayTy()) - return ConstantArray::get(cast(InitTy), Elts); - else - return ConstantVector::get(&Elts[0], Elts.size()); - } + assert(CI->getZExtValue() < NumElts); + Elts[CI->getZExtValue()] = + EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1); + + if (Init->getType()->isArrayTy()) + return ConstantArray::get(cast(InitTy), Elts); + return ConstantVector::get(Elts); } /// CommitValueTo - We have decided that Addr (which satisfies the predicate @@ -2172,14 +2275,14 @@ static Constant *ComputeLoadResult(Constant *P, // is the most up-to-date. DenseMap::const_iterator I = Memory.find(P); if (I != Memory.end()) return I->second; - + // Access it. if (GlobalVariable *GV = dyn_cast(P)) { if (GV->hasDefinitiveInitializer()) return GV->getInitializer(); return 0; } - + // Handle a constantexpr getelementptr. if (ConstantExpr *CE = dyn_cast(P)) if (CE->getOpcode() == Instruction::GetElementPtr && @@ -2199,17 +2302,19 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, const SmallVectorImpl &ActualArgs, std::vector &CallStack, DenseMap &MutatedMemory, - std::vector &AllocaTmps) { + std::vector &AllocaTmps, + SmallPtrSet &SimpleConstants, + const TargetData *TD) { // Check to see if this function is already executing (recursion). If so, // bail out. TODO: we might want to accept limited recursion. if (std::find(CallStack.begin(), CallStack.end(), F) != CallStack.end()) return false; - + CallStack.push_back(F); - + /// Values - As we compute SSA register values, we store their contents here. DenseMap Values; - + // Initialize arguments to the incoming values specified. unsigned ArgNo = 0; for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E; @@ -2220,21 +2325,65 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, /// we can only evaluate any one basic block at most once. This set keeps /// track of what we have executed so we can detect recursive cases etc. SmallPtrSet ExecutedBlocks; - + // CurInst - The current instruction we're evaluating. BasicBlock::iterator CurInst = F->begin()->begin(); - + // This is the main evaluation loop. while (1) { Constant *InstResult = 0; - + if (StoreInst *SI = dyn_cast(CurInst)) { if (SI->isVolatile()) return false; // no volatile accesses. Constant *Ptr = getVal(Values, SI->getOperand(1)); if (!isSimpleEnoughPointerToCommit(Ptr)) // If this is too complex for us to commit, reject it. return false; + Constant *Val = getVal(Values, SI->getOperand(0)); + + // If this might be too difficult for the backend to handle (e.g. the addr + // of one global variable divided by another) then we can't commit it. + if (!isSimpleEnoughValueToCommit(Val, SimpleConstants)) + return false; + + if (ConstantExpr *CE = dyn_cast(Ptr)) + if (CE->getOpcode() == Instruction::BitCast) { + // If we're evaluating a store through a bitcast, then we need + // to pull the bitcast off the pointer type and push it onto the + // stored value. + Ptr = CE->getOperand(0); + + Type *NewTy=cast(Ptr->getType())->getElementType(); + + // In order to push the bitcast onto the stored value, a bitcast + // from NewTy to Val's type must be legal. If it's not, we can try + // introspecting NewTy to find a legal conversion. + while (!Val->getType()->canLosslesslyBitCastTo(NewTy)) { + // If NewTy is a struct, we can convert the pointer to the struct + // into a pointer to its first member. + // FIXME: This could be extended to support arrays as well. + if (StructType *STy = dyn_cast(NewTy)) { + NewTy = STy->getTypeAtIndex(0U); + + IntegerType *IdxTy =IntegerType::get(NewTy->getContext(), 32); + Constant *IdxZero = ConstantInt::get(IdxTy, 0, false); + Constant * const IdxList[] = {IdxZero, IdxZero}; + + Ptr = ConstantExpr::getGetElementPtr(Ptr, IdxList); + + // If we can't improve the situation by introspecting NewTy, + // we have to give up. + } else { + return 0; + } + } + + // If we found compatible types, go ahead and push the bitcast + // onto the stored value. + Val = ConstantExpr::getBitCast(Val, NewTy); + } + MutatedMemory[Ptr] = Val; } else if (BinaryOperator *BO = dyn_cast(CurInst)) { InstResult = ConstantExpr::get(BO->getOpcode(), @@ -2249,8 +2398,7 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, getVal(Values, CI->getOperand(0)), CI->getType()); } else if (SelectInst *SI = dyn_cast(CurInst)) { - InstResult = - ConstantExpr::getSelect(getVal(Values, SI->getOperand(0)), + InstResult = ConstantExpr::getSelect(getVal(Values, SI->getOperand(0)), getVal(Values, SI->getOperand(1)), getVal(Values, SI->getOperand(2))); } else if (GetElementPtrInst *GEP = dyn_cast(CurInst)) { @@ -2259,9 +2407,9 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end(); i != e; ++i) GEPOps.push_back(getVal(Values, *i)); - InstResult = cast(GEP)->isInBounds() ? - ConstantExpr::getInBoundsGetElementPtr(P, &GEPOps[0], GEPOps.size()) : - ConstantExpr::getGetElementPtr(P, &GEPOps[0], GEPOps.size()); + InstResult = + ConstantExpr::getGetElementPtr(P, GEPOps, + cast(GEP)->isInBounds()); } else if (LoadInst *LI = dyn_cast(CurInst)) { if (LI->isVolatile()) return false; // no volatile accesses. InstResult = ComputeLoadResult(getVal(Values, LI->getOperand(0)), @@ -2269,12 +2417,12 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, if (InstResult == 0) return false; // Could not evaluate load. } else if (AllocaInst *AI = dyn_cast(CurInst)) { if (AI->isArrayAllocation()) return false; // Cannot handle array allocs. - const Type *Ty = AI->getType()->getElementType(); + Type *Ty = AI->getType()->getElementType(); AllocaTmps.push_back(new GlobalVariable(Ty, false, GlobalValue::InternalLinkage, UndefValue::get(Ty), AI->getName())); - InstResult = AllocaTmps.back(); + InstResult = AllocaTmps.back(); } else if (CallInst *CI = dyn_cast(CurInst)) { // Debug info can safely be ignored here. @@ -2284,21 +2432,36 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, } // Cannot handle inline asm. - if (isa(CI->getOperand(0))) return false; + if (isa(CI->getCalledValue())) return false; + + if (MemSetInst *MSI = dyn_cast(CI)) { + if (MSI->isVolatile()) return false; + Constant *Ptr = getVal(Values, MSI->getDest()); + Constant *Val = getVal(Values, MSI->getValue()); + Constant *DestVal = ComputeLoadResult(getVal(Values, Ptr), + MutatedMemory); + if (Val->isNullValue() && DestVal && DestVal->isNullValue()) { + // This memset is a no-op. + ++CurInst; + continue; + } + return false; + } // Resolve function pointers. - Function *Callee = dyn_cast(getVal(Values, CI->getOperand(0))); + Function *Callee = dyn_cast(getVal(Values, + CI->getCalledValue())); if (!Callee) return false; // Cannot resolve. SmallVector Formals; - for (User::op_iterator i = CI->op_begin() + 1, e = CI->op_end(); + CallSite CS(CI); + for (User::op_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i) Formals.push_back(getVal(Values, *i)); if (Callee->isDeclaration()) { // If this is a function we can constant fold, do it. - if (Constant *C = ConstantFoldCall(Callee, Formals.data(), - Formals.size())) { + if (Constant *C = ConstantFoldCall(Callee, Formals)) { InstResult = C; } else { return false; @@ -2306,11 +2469,11 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, } else { if (Callee->getFunctionType()->isVarArg()) return false; - + Constant *RetVal; // Execute the call, if successful, use the return value. if (!EvaluateFunction(Callee, RetVal, Formals, CallStack, - MutatedMemory, AllocaTmps)) + MutatedMemory, AllocaTmps, SimpleConstants, TD)) return false; InstResult = RetVal; } @@ -2324,7 +2487,7 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, dyn_cast(getVal(Values, BI->getCondition())); if (!Cond) return false; // Cannot determine. - NewBB = BI->getSuccessor(!Cond->getZExtValue()); + NewBB = BI->getSuccessor(!Cond->getZExtValue()); } } else if (SwitchInst *SI = dyn_cast(CurInst)) { ConstantInt *Val = @@ -2340,20 +2503,20 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, } else if (ReturnInst *RI = dyn_cast(CurInst)) { if (RI->getNumOperands()) RetVal = getVal(Values, RI->getOperand(0)); - + CallStack.pop_back(); // return from fn. return true; // We succeeded at evaluating this ctor! } else { // invoke, unwind, unreachable. return false; // Cannot handle this terminator. } - + // Okay, we succeeded in evaluating this control flow. See if we have // executed the new block before. If so, we have a looping function, // which we cannot evaluate in reasonable time. if (!ExecutedBlocks.insert(NewBB)) return false; // looped! - + // Okay, we have never been in this block before. Check to see if there // are any PHI nodes. If so, evaluate them with information about where // we came from. @@ -2369,10 +2532,14 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, // Did not know how to evaluate this! return false; } - - if (!CurInst->use_empty()) + + if (!CurInst->use_empty()) { + if (ConstantExpr *CE = dyn_cast(InstResult)) + InstResult = ConstantFoldConstantExpression(CE, TD); + Values[CurInst] = InstResult; - + } + // Advance program counter. ++CurInst; } @@ -2380,7 +2547,7 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, /// EvaluateStaticConstructor - Evaluate static constructors in the function, if /// we can. Return true if we can, false otherwise. -static bool EvaluateStaticConstructor(Function *F) { +static bool EvaluateStaticConstructor(Function *F, const TargetData *TD) { /// MutatedMemory - For each store we execute, we update this map. Loads /// check this to get the most up-to-date value. If evaluation is successful, /// this state is committed to the process. @@ -2390,17 +2557,23 @@ static bool EvaluateStaticConstructor(Function *F) { /// to represent its body. This vector is needed so we can delete the /// temporary globals when we are done. std::vector AllocaTmps; - + /// CallStack - This is used to detect recursion. In pathological situations /// we could hit exponential behavior, but at least there is nothing /// unbounded. std::vector CallStack; + /// SimpleConstants - These are constants we have checked and know to be + /// simple enough to live in a static initializer of a global. + SmallPtrSet SimpleConstants; + // Call the function. Constant *RetValDummy; bool EvalSuccess = EvaluateFunction(F, RetValDummy, SmallVector(), CallStack, - MutatedMemory, AllocaTmps); + MutatedMemory, AllocaTmps, + SimpleConstants, TD); + if (EvalSuccess) { // We succeeded at evaluation: commit the result. DEBUG(dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '" @@ -2410,13 +2583,13 @@ static bool EvaluateStaticConstructor(Function *F) { E = MutatedMemory.end(); I != E; ++I) CommitValueTo(I->second, I->first); } - + // At this point, we are done interpreting. If we created any 'alloca' // temporaries, release them now. while (!AllocaTmps.empty()) { GlobalVariable *Tmp = AllocaTmps.back(); AllocaTmps.pop_back(); - + // If there are still users of the alloca, the program is doing something // silly, e.g. storing the address of the alloca somewhere and using it // later. Since this is undefined, we'll just make it be null. @@ -2424,7 +2597,7 @@ static bool EvaluateStaticConstructor(Function *F) { Tmp->replaceAllUsesWith(Constant::getNullValue(Tmp->getType())); delete Tmp; } - + return EvalSuccess; } @@ -2436,7 +2609,8 @@ bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) { std::vector Ctors = ParseGlobalCtors(GCL); bool MadeChange = false; if (Ctors.empty()) return false; - + + const TargetData *TD = getAnalysisIfAvailable(); // Loop over global ctors, optimizing them when we can. for (unsigned i = 0; i != Ctors.size(); ++i) { Function *F = Ctors[i]; @@ -2449,12 +2623,12 @@ bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) { } break; } - + // We cannot simplify external ctor functions. if (F->empty()) continue; - + // If we can evaluate the ctor at compile time, do. - if (EvaluateStaticConstructor(F)) { + if (EvaluateStaticConstructor(F, TD)) { Ctors.erase(Ctors.begin()+i); MadeChange = true; --i; @@ -2462,9 +2636,9 @@ bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) { continue; } } - + if (!MadeChange) return false; - + GCL = InstallGlobalCtors(GCL, Ctors); return true; } @@ -2506,7 +2680,7 @@ bool GlobalOpt::OptimizeGlobalAliases(Module &M) { continue; // Do not perform the transform if multiple aliases potentially target the - // aliasee. This check also ensures that it is safe to replace the section + // aliasee. This check also ensures that it is safe to replace the section // and other attributes of the aliasee with those of the alias. if (!hasOneUse) continue; @@ -2526,33 +2700,152 @@ bool GlobalOpt::OptimizeGlobalAliases(Module &M) { return Changed; } +static Function *FindCXAAtExit(Module &M) { + Function *Fn = M.getFunction("__cxa_atexit"); + + if (!Fn) + return 0; + + FunctionType *FTy = Fn->getFunctionType(); + + // Checking that the function has the right return type, the right number of + // parameters and that they all have pointer types should be enough. + if (!FTy->getReturnType()->isIntegerTy() || + FTy->getNumParams() != 3 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy() || + !FTy->getParamType(2)->isPointerTy()) + return 0; + + return Fn; +} + +/// cxxDtorIsEmpty - Returns whether the given function is an empty C++ +/// destructor and can therefore be eliminated. +/// Note that we assume that other optimization passes have already simplified +/// the code so we only look for a function with a single basic block, where +/// the only allowed instructions are 'ret' or 'call' to empty C++ dtor. +static bool cxxDtorIsEmpty(const Function &Fn, + SmallPtrSet &CalledFunctions) { + // FIXME: We could eliminate C++ destructors if they're readonly/readnone and + // nounwind, but that doesn't seem worth doing. + if (Fn.isDeclaration()) + return false; + + if (++Fn.begin() != Fn.end()) + return false; + + const BasicBlock &EntryBlock = Fn.getEntryBlock(); + for (BasicBlock::const_iterator I = EntryBlock.begin(), E = EntryBlock.end(); + I != E; ++I) { + if (const CallInst *CI = dyn_cast(I)) { + // Ignore debug intrinsics. + if (isa(CI)) + continue; + + const Function *CalledFn = CI->getCalledFunction(); + + if (!CalledFn) + return false; + + SmallPtrSet NewCalledFunctions(CalledFunctions); + + // Don't treat recursive functions as empty. + if (!NewCalledFunctions.insert(CalledFn)) + return false; + + if (!cxxDtorIsEmpty(*CalledFn, NewCalledFunctions)) + return false; + } else if (isa(*I)) + return true; + else + return false; + } + + return false; +} + +bool GlobalOpt::OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) { + /// Itanium C++ ABI p3.3.5: + /// + /// After constructing a global (or local static) object, that will require + /// destruction on exit, a termination function is registered as follows: + /// + /// extern "C" int __cxa_atexit ( void (*f)(void *), void *p, void *d ); + /// + /// This registration, e.g. __cxa_atexit(f,p,d), is intended to cause the + /// call f(p) when DSO d is unloaded, before all such termination calls + /// registered before this one. It returns zero if registration is + /// successful, nonzero on failure. + + // This pass will look for calls to __cxa_atexit where the function is trivial + // and remove them. + bool Changed = false; + + for (Function::use_iterator I = CXAAtExitFn->use_begin(), + E = CXAAtExitFn->use_end(); I != E;) { + // We're only interested in calls. Theoretically, we could handle invoke + // instructions as well, but neither llvm-gcc nor clang generate invokes + // to __cxa_atexit. + CallInst *CI = dyn_cast(*I++); + if (!CI) + continue; + + Function *DtorFn = + dyn_cast(CI->getArgOperand(0)->stripPointerCasts()); + if (!DtorFn) + continue; + + SmallPtrSet CalledFunctions; + if (!cxxDtorIsEmpty(*DtorFn, CalledFunctions)) + continue; + + // Just remove the call. + CI->replaceAllUsesWith(Constant::getNullValue(CI->getType())); + CI->eraseFromParent(); + + ++NumCXXDtorsRemoved; + + Changed |= true; + } + + return Changed; +} + bool GlobalOpt::runOnModule(Module &M) { bool Changed = false; - + // Try to find the llvm.globalctors list. GlobalVariable *GlobalCtors = FindGlobalCtors(M); + Function *CXAAtExitFn = FindCXAAtExit(M); + bool LocalChange = true; while (LocalChange) { LocalChange = false; - + // Delete functions that are trivially dead, ccc -> fastcc LocalChange |= OptimizeFunctions(M); - + // Optimize global_ctors list. if (GlobalCtors) LocalChange |= OptimizeGlobalCtorsList(GlobalCtors); - + // Optimize non-address-taken globals. LocalChange |= OptimizeGlobalVars(M); // Resolve aliases, when possible. LocalChange |= OptimizeGlobalAliases(M); + + // Try to remove trivial global destructors. + if (CXAAtExitFn) + LocalChange |= OptimizeEmptyGlobalCXXDtors(CXAAtExitFn); + Changed |= LocalChange; } - + // TODO: Move all global ctors functions to the end of the module for code // layout. - + return Changed; }