#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
struct GlobalOpt : public ModulePass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addRequired<DominatorTreeWrapperPass>();
}
static char ID; // Pass identification, replacement for typeid
GlobalOpt() : ModulePass(ID) {
bool ProcessInternalGlobal(GlobalVariable *GV,Module::global_iterator &GVI,
const GlobalStatus &GS);
bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn);
-
+
+ bool isPointerValueDeadOnEntryToFunction(const Function *F, GlobalValue *GV);
+
TargetLibraryInfo *TLI;
SmallSet<const Comdat *, 8> NotDiscardableComdats;
};
INITIALIZE_PASS_BEGIN(GlobalOpt, "globalopt",
"Global Variable Optimizer", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_END(GlobalOpt, "globalopt",
"Global Variable Optimizer", false, false)
return ProcessInternalGlobal(GV, GVI, GS);
}
+bool GlobalOpt::isPointerValueDeadOnEntryToFunction(const Function *F, GlobalValue *GV) {
+ // Find all uses of GV. We expect them all to be in F, and if we can't
+ // identify any of the uses we bail out.
+ //
+ // On each of these uses, identify if the memory that GV points to is
+ // used/required/live at the start of the function. If it is not, for example
+ // if the first thing the function does is store to the GV, the GV can
+ // possibly be demoted.
+ //
+ // We don't do an exhaustive search for memory operations - simply look
+ // through bitcasts as they're quite common and benign.
+ const DataLayout &DL = GV->getParent()->getDataLayout();
+ SmallVector<LoadInst *, 4> Loads;
+ SmallVector<StoreInst *, 4> Stores;
+ for (auto *U : GV->users()) {
+ if (Operator::getOpcode(U) == Instruction::BitCast) {
+ for (auto *UU : U->users()) {
+ if (auto *LI = dyn_cast<LoadInst>(UU))
+ Loads.push_back(LI);
+ else if (auto *SI = dyn_cast<StoreInst>(UU))
+ Stores.push_back(SI);
+ else
+ return false;
+ }
+ continue;
+ }
+
+ Instruction *I = dyn_cast<Instruction>(U);
+ if (!I)
+ return false;
+ assert(I->getParent()->getParent() == F);
+
+ if (auto *LI = dyn_cast<LoadInst>(I))
+ Loads.push_back(LI);
+ else if (auto *SI = dyn_cast<StoreInst>(I))
+ Stores.push_back(SI);
+ else
+ return false;
+ }
+
+ // We have identified all uses of GV into loads and stores. Now check if all
+ // of them are known not to depend on the value of the global at the function
+ // entry point. We do this by ensuring that every load is dominated by at
+ // least one store.
+ auto &DT = getAnalysis<DominatorTreeWrapperPass>(*const_cast<Function *>(F))
+ .getDomTree();
+
+ for (auto *L : Loads) {
+ auto *LTy = L->getType();
+ if (!std::any_of(Stores.begin(), Stores.end(), [&](StoreInst *S) {
+ auto *STy = S->getValueOperand()->getType();
+ // The load is only dominated by the store if DomTree says so
+ // and the number of bits loaded in L is less than or equal to
+ // the number of bits stored in S.
+ return DT.dominates(S, L) &&
+ DL.getTypeStoreSize(LTy) <= DL.getTypeStoreSize(STy);
+ }))
+ return false;
+ }
+ // All loads have known dependences inside F, so the global can be localized.
+ return true;
+}
+
/// Analyze the specified global variable and optimize
/// it if possible. If we make a change, return true.
bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
Module::global_iterator &GVI,
const GlobalStatus &GS) {
auto &DL = GV->getParent()->getDataLayout();
- // If this is a first class global and has only one accessing function
- // and this function is main (which we know is not recursive), we replace
- // the global with a local alloca in this function.
+ // If this is a first class global and has only one accessing function and
+ // this function is non-recursive, we replace the global with a local alloca
+ // in this function.
//
// NOTE: It doesn't make sense to promote non-single-value types since we
// are just replacing static memory to stack memory.
if (!GS.HasMultipleAccessingFunctions &&
GS.AccessingFunction && !GS.HasNonInstructionUser &&
GV->getType()->getElementType()->isSingleValueType() &&
- GS.AccessingFunction->getName() == "main" &&
- GS.AccessingFunction->hasExternalLinkage() &&
- GV->getType()->getAddressSpace() == 0) {
+ GV->getType()->getAddressSpace() == 0 &&
+ !GV->isExternallyInitialized() &&
+ GS.AccessingFunction->doesNotRecurse() &&
+ isPointerValueDeadOnEntryToFunction(GS.AccessingFunction, GV) ) {
DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV << "\n");
Instruction &FirstI = const_cast<Instruction&>(*GS.AccessingFunction
->getEntryBlock().begin());
--- /dev/null
+; RUN: opt -globalopt -S < %s | FileCheck %s
+
+@G1 = internal global i32 5
+@G2 = internal global i32 5
+@G3 = internal global i32 5
+@G4 = internal global i32 5
+@G5 = internal global i32 5
+
+; CHECK-LABEL: @test1
+define internal i32 @test1() norecurse {
+; CHECK-NOT: @G1
+ store i32 4, i32* @G1
+ %a = load i32, i32* @G1
+; CHECK: ret
+ ret i32 %a
+}
+
+; The load comes before the store which makes @G2 live before the call.
+; CHECK-LABEL: @test2
+define internal i32 @test2() norecurse {
+; CHECK-NOT: %G2
+ %a = load i32, i32* @G2
+ store i32 4, i32* @G2
+; CHECK: ret
+ ret i32 %a
+}
+
+; This global is indexed by a GEP - this makes it partial alias and we bail out.
+; FIXME: We don't actually have to bail out in this case.
+
+; CHECK-LABEL: @test3
+define internal i32 @test3() norecurse {
+; CHECK-NOT: %G3
+ %x = getelementptr i32,i32* @G3, i32 0
+ %a = load i32, i32* %x
+ store i32 4, i32* @G3
+; CHECK: ret
+ ret i32 %a
+}
+
+; The global is casted away to a larger type then loaded. The store only partially
+; covers the load, so we must not demote.
+
+; CHECK-LABEL: @test4
+define internal i32 @test4() norecurse {
+; CHECK-NOT: %G4
+ store i32 4, i32* @G4
+ %x = bitcast i32* @G4 to i64*
+ %a = load i64, i64* %x
+ %b = trunc i64 %a to i32
+; CHECK: ret
+ ret i32 %b
+}
+
+; The global is casted away to a smaller type then loaded. This one is fine.
+
+; CHECK-LABEL: @test5
+define internal i32 @test5() norecurse {
+; CHECK-NOT: @G5
+ store i32 4, i32* @G5
+ %x = bitcast i32* @G5 to i16*
+ %a = load i16, i16* %x
+ %b = zext i16 %a to i32
+; CHECK: ret
+ ret i32 %b
+}
+
+define i32 @main() norecurse {
+ %a = call i32 @test1()
+ %b = call i32 @test2()
+ %c = call i32 @test3()
+ %d = call i32 @test4()
+ %e = call i32 @test5()
+
+ %x = or i32 %a, %b
+ %y = or i32 %x, %c
+ %z = or i32 %y, %d
+ %w = or i32 %z, %e
+ ret i32 %w
+}