From: Jiangning Liu Date: Thu, 15 May 2014 23:45:42 +0000 (+0000) Subject: Implement global merge optimization for global variables. X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=d5db8765d61ca77a55867cf1f39aecb8cae3a6cd;p=oota-llvm.git Implement global merge optimization for global variables. This commit implements two command line switches -global-merge-on-external and -global-merge-aligned, and both of them are false by default, so this optimization is disabled by default for all targets. For ARM64, some back-end behaviors need to be tuned to get this optimization further enabled. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@208934 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/IR/GlobalAlias.h b/include/llvm/IR/GlobalAlias.h index 5aa42612562..7aa07149495 100644 --- a/include/llvm/IR/GlobalAlias.h +++ b/include/llvm/IR/GlobalAlias.h @@ -82,6 +82,10 @@ public: static inline bool classof(const Value *V) { return V->getValueID() == Value::GlobalAliasVal; } + + // return the constant offset of an expression, with which this global var + // has alias. + uint64_t calculateOffset(const DataLayout &DL) const; }; template <> diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index dc2c74a835b..bd81f8e4242 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -32,6 +32,7 @@ #include "llvm/IR/CallingConv.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/GlobalVariable.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Target/TargetCallingConv.h" #include "llvm/Target/TargetMachine.h" @@ -883,6 +884,12 @@ public: return 0; } + /// Returns the alignment required by global merge on external symbols. + /// By default, returns the natural alignment of merged data structure. + virtual unsigned getGlobalMergeAlignment(StructType *MergedTy) const { + return getDataLayout()->getABITypeAlignment(MergedTy); + } + /// Returns true if a cast between SrcAS and DestAS is a noop. virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const { return false; diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 37a2c3220cb..0c4865f2f27 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -946,8 +946,11 @@ bool AsmPrinter::doFinalization(Module &M) { EmitVisibility(Name, Alias.getVisibility()); // Emit the directives as assignments aka .set: - OutStreamer.EmitAssignment(Name, - MCSymbolRefExpr::Create(Target, OutContext)); + const MCExpr *Expr = MCSymbolRefExpr::Create(Target, OutContext); + if (uint64_t Offset = Alias.calculateOffset(*TM.getDataLayout())) + Expr = MCBinaryExpr::CreateAdd(Expr, + MCConstantExpr::Create(Offset, OutContext), OutContext); + OutStreamer.EmitAssignment(Name, Expr); } } diff --git a/lib/IR/Globals.cpp b/lib/IR/Globals.cpp index 0ec54fe3c08..d64046a7bf9 100644 --- a/lib/IR/Globals.cpp +++ b/lib/IR/Globals.cpp @@ -15,6 +15,7 @@ #include "llvm/IR/GlobalValue.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalVariable.h" @@ -282,3 +283,27 @@ GlobalObject *GlobalAlias::getAliasedGlobal() { return cast(GV); } } + +uint64_t GlobalAlias::calculateOffset(const DataLayout &DL) const { + uint64_t Offset = 0; + const Constant *C = this; + while (C) { + if (const GlobalAlias *GA = dyn_cast(C)) { + C = GA->getAliasee(); + } else if (const ConstantExpr *CE = dyn_cast(C)) { + if (CE->getOpcode() == Instruction::GetElementPtr) { + std::vector Args; + for (unsigned I = 1; I < CE->getNumOperands(); ++I) + Args.push_back(CE->getOperand(I)); + Offset += DL.getIndexedOffset(CE->getOperand(0)->getType(), Args); + } + C = CE->getOperand(0); + } else if (isa(C)) { + return Offset; + } else { + assert(0 && "Unexpected type in alias chain!"); + return 0; + } + } + return Offset; +} diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 852d324476a..a676600e5df 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -5560,3 +5560,17 @@ unsigned AArch64TargetLowering::getMaximalGlobalOffset() const { return 4095; } +/// getGlobalMergeAlignment - Set alignment to be the max size of merged +/// global variable data structure, and make it aligned up to power of 2. +/// This way, we could guarantee the merged global variable data structure +/// doesn't cross page boundary, because usually OS always allocates page at +/// 4096-byte aligned boundary. +unsigned AArch64TargetLowering::getGlobalMergeAlignment( + StructType *MergedTy) const { + unsigned Align = getDataLayout()->getTypeAllocSize(MergedTy); + if (Align & (Align - 1)) + Align = llvm::NextPowerOf2(Align); + + return Align; +} + diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h index 070db94808f..9818b7a2dcb 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.h +++ b/lib/Target/AArch64/AArch64ISelLowering.h @@ -386,6 +386,10 @@ public: /// be used for loads / stores from the global. unsigned getMaximalGlobalOffset() const override; + /// getGlobalMergeAlignment - Set alignment to be the max size of merged + /// global variable data structure, and make it aligned up to power of 2. + unsigned getGlobalMergeAlignment(StructType *MergedTy) const override; + protected: std::pair findRepresentativeClass(MVT VT) const override; diff --git a/lib/Target/ARM64/ARM64ISelLowering.cpp b/lib/Target/ARM64/ARM64ISelLowering.cpp index 118007fa70c..880df0c1a81 100644 --- a/lib/Target/ARM64/ARM64ISelLowering.cpp +++ b/lib/Target/ARM64/ARM64ISelLowering.cpp @@ -630,6 +630,20 @@ unsigned ARM64TargetLowering::getMaximalGlobalOffset() const { return 4095; } +/// getGlobalMergeAlignment - Set alignment to be the max size of merged +/// global variable data structure, and make it aligned up to power of 2. +/// This way, we could guarantee the merged global variable data structure +/// doesn't cross page boundary, because usually OS always allocates page at +/// 4096-byte aligned boundary. +unsigned ARM64TargetLowering::getGlobalMergeAlignment( + StructType *MergedTy) const { + unsigned Align = getDataLayout()->getTypeAllocSize(MergedTy); + if (Align & (Align - 1)) + Align = llvm::NextPowerOf2(Align); + + return Align; +} + FastISel * ARM64TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) const { diff --git a/lib/Target/ARM64/ARM64ISelLowering.h b/lib/Target/ARM64/ARM64ISelLowering.h index 55792317dba..00b2710a35c 100644 --- a/lib/Target/ARM64/ARM64ISelLowering.h +++ b/lib/Target/ARM64/ARM64ISelLowering.h @@ -236,6 +236,10 @@ public: /// be used for loads / stores from the global. unsigned getMaximalGlobalOffset() const override; + /// getGlobalMergeAlignment - Set alignment to be the max size of merged + /// global variable data structure, and make it aligned up to power of 2. + unsigned getGlobalMergeAlignment(StructType *MergedTy) const override; + /// Returns true if a cast between SrcAS and DestAS is a noop. bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override { // Addrspacecasts are always noops. diff --git a/lib/Transforms/Scalar/GlobalMerge.cpp b/lib/Transforms/Scalar/GlobalMerge.cpp index dd9c3784cc2..98061f5b9a8 100644 --- a/lib/Transforms/Scalar/GlobalMerge.cpp +++ b/lib/Transforms/Scalar/GlobalMerge.cpp @@ -72,7 +72,7 @@ using namespace llvm; #define DEBUG_TYPE "global-merge" static cl::opt -EnableGlobalMerge("global-merge", cl::Hidden, +EnableGlobalMerge("enable-global-merge", cl::NotHidden, cl::desc("Enable global merge pass"), cl::init(true)); @@ -81,6 +81,16 @@ EnableGlobalMergeOnConst("global-merge-on-const", cl::Hidden, cl::desc("Enable global merge pass on constants"), cl::init(false)); +static cl::opt +EnableGlobalMergeOnExternal("global-merge-on-external", cl::Hidden, + cl::desc("Enable global merge pass on external linkage"), + cl::init(false)); + +static cl::opt +EnableGlobalMergeAligned("global-merge-aligned", cl::Hidden, + cl::desc("Set target specific alignment for global merge pass"), + cl::init(false)); + STATISTIC(NumMerged , "Number of globals merged"); namespace { class GlobalMerge : public FunctionPass { @@ -129,9 +139,21 @@ namespace { } // end anonymous namespace char GlobalMerge::ID = 0; -INITIALIZE_PASS(GlobalMerge, "global-merge", - "Global Merge", false, false) +static void *initializeGlobalMergePassOnce(PassRegistry &Registry) { + PassInfo *PI = new PassInfo( + "Merge global variables", + "global-merge", &GlobalMerge::ID, + PassInfo::NormalCtor_t(callDefaultCtor), false, + false, PassInfo::TargetMachineCtor_t( + callTargetMachineCtor)); + Registry.registerPass(*PI, true); + return PI; +} + +void llvm::initializeGlobalMergePass(PassRegistry &Registry) { + CALL_ONCE_INITIALIZATION(initializeGlobalMergePassOnce) +} bool GlobalMerge::doMerge(SmallVectorImpl &Globals, Module &M, bool isConst, unsigned AddrSpace) const { @@ -154,11 +176,16 @@ bool GlobalMerge::doMerge(SmallVectorImpl &Globals, Type *Int32Ty = Type::getInt32Ty(M.getContext()); + assert (Globals.size() > 1); + for (size_t i = 0, e = Globals.size(); i != e; ) { size_t j = 0; uint64_t MergedSize = 0; std::vector Tys; std::vector Inits; + + bool HasExternal = false; + GlobalVariable *TheFirstExternal = 0; for (j = i; j != e; ++j) { Type *Ty = Globals[j]->getType()->getElementType(); MergedSize += DL->getTypeAllocSize(Ty); @@ -167,17 +194,45 @@ bool GlobalMerge::doMerge(SmallVectorImpl &Globals, } Tys.push_back(Ty); Inits.push_back(Globals[j]->getInitializer()); + + if (Globals[j]->hasExternalLinkage() && !HasExternal) { + HasExternal = true; + TheFirstExternal = Globals[j]; + } } + // If merged variables doesn't have external linkage, we needn't to expose + // the symbol after merging. + GlobalValue::LinkageTypes Linkage = HasExternal ? + GlobalValue::ExternalLinkage : + GlobalValue::InternalLinkage ; + + // If merged variables have external linkage, we use symbol name of the + // first variable merged as the suffix of global symbol name. This would + // be able to avoid the link-time naming conflict for globalm symbols. + Twine MergedGVName = HasExternal ? + "_MergedGlobals_" + TheFirstExternal->getName() : + "_MergedGlobals" ; + StructType *MergedTy = StructType::get(M.getContext(), Tys); Constant *MergedInit = ConstantStruct::get(MergedTy, Inits); + GlobalVariable *MergedGV = new GlobalVariable(M, MergedTy, isConst, - GlobalValue::InternalLinkage, - MergedInit, "_MergedGlobals", - nullptr, - GlobalVariable::NotThreadLocal, - AddrSpace); + Linkage, MergedInit, MergedGVName, + nullptr, GlobalVariable::NotThreadLocal, + AddrSpace); + + if (EnableGlobalMergeAligned) { + unsigned Align = TLI->getGlobalMergeAlignment(MergedTy); + assert(((Align % DL->getABITypeAlignment(MergedTy)) == 0) && + "Specified alignment doesn't meet natural alignment requirement."); + MergedGV->setAlignment(Align); + } + for (size_t k = i; k < j; ++k) { + GlobalValue::LinkageTypes Linkage = Globals[k]->getLinkage(); + std::string Name = Globals[k]->getName(); + Constant *Idx[2] = { ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, k-i) @@ -185,6 +240,12 @@ bool GlobalMerge::doMerge(SmallVectorImpl &Globals, Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(MergedGV, Idx); Globals[k]->replaceAllUsesWith(GEP); Globals[k]->eraseFromParent(); + + if (Linkage != GlobalValue::InternalLinkage) { + // Generate a new alias... + new GlobalAlias(GEP->getType(), Linkage, Name, GEP, &M); + } + NumMerged++; } i = j; @@ -245,8 +306,12 @@ bool GlobalMerge::doInitialization(Module &M) { // Grab all non-const globals. for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E; ++I) { - // Merge is safe for "normal" internal globals only - if (!I->hasLocalLinkage() || I->isThreadLocal() || I->hasSection()) + // Merge is safe for "normal" internal or external globals only + if (I->isDeclaration() || I->isThreadLocal() || I->hasSection()) + continue; + + if (!(EnableGlobalMergeOnExternal && I->hasExternalLinkage()) + && !I->hasInternalLinkage()) continue; PointerType *PT = dyn_cast(I->getType()); diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp index f8f828c8405..2cca7254806 100644 --- a/lib/Transforms/Scalar/Scalar.cpp +++ b/lib/Transforms/Scalar/Scalar.cpp @@ -38,6 +38,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeDSEPass(Registry); initializeGVNPass(Registry); initializeEarlyCSEPass(Registry); + initializeGlobalMergePass(Registry); initializeIndVarSimplifyPass(Registry); initializeJumpThreadingPass(Registry); initializeLICMPass(Registry); diff --git a/test/CodeGen/AArch64/global-merge.ll b/test/CodeGen/AArch64/global-merge.ll new file mode 100644 index 00000000000..101a5b778ed --- /dev/null +++ b/test/CodeGen/AArch64/global-merge.ll @@ -0,0 +1,34 @@ +; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -O0 | FileCheck --check-prefix=NO-MERGE %s +; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -O0 -global-merge-on-external=true -global-merge-aligned=true | FileCheck --check-prefix=NO-MERGE %s + +; RUN: llc < %s -mtriple=arm64-apple-ios -O0 | FileCheck %s --check-prefix=CHECK-APPLE-IOS-NO-MERGE +; RUN: llc < %s -mtriple=arm64-apple-ios -O0 -global-merge-on-external=true -global-merge-aligned=false | FileCheck %s --check-prefix=CHECK-APPLE-IOS-NO-MERGE + +; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -O1 | FileCheck %s +; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -O1 -global-merge-on-external=true -global-merge-aligned=true | FileCheck %s + +; RUN: llc < %s -mtriple=arm64-apple-ios -O1 | FileCheck %s --check-prefix=CHECK-APPLE-IOS +; RUN: llc < %s -mtriple=arm64-apple-ios -O1 -global-merge-on-external=true -global-merge-aligned=false | FileCheck %s --check-prefix=CHECK-APPLE-IOS + +@m = internal global i32 0, align 4 +@n = internal global i32 0, align 4 + +define void @f1(i32 %a1, i32 %a2) { +; CHECK-LABEL: f1: +; CHECK: adrp x{{[0-9]+}}, _MergedGlobals +; CHECK-NOT: adrp + +; CHECK-APPLE-IOS-LABEL: f1: +; CHECK-APPLE-IOS: adrp x{{[0-9]+}}, __MergedGlobals +; CHECK-APPLE-IOS-NOT: adrp + store i32 %a1, i32* @m, align 4 + store i32 %a2, i32* @n, align 4 + ret void +} + +; CHECK: .local _MergedGlobals +; CHECK: .comm _MergedGlobals,8,8 +; NO-MERGE-NOT: .local _MergedGlobals + +; CHECK-APPLE-IOS: .zerofill __DATA,__bss,__MergedGlobals,8,3 +; CHECK-APPLE-IOS-NO-MERGE-NOT: .zerofill __DATA,__bss,__MergedGlobals,8,3 diff --git a/test/CodeGen/AArch64/global_merge_1.ll b/test/CodeGen/AArch64/global_merge_1.ll deleted file mode 100644 index e0587d6b904..00000000000 --- a/test/CodeGen/AArch64/global_merge_1.ll +++ /dev/null @@ -1,17 +0,0 @@ -; RUN: llc < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s - -@m = internal global i32 0, align 4 -@n = internal global i32 0, align 4 - -define void @f1(i32 %a1, i32 %a2) { -; CHECK-LABEL: f1: -; CHECK: adrp x{{[0-9]+}}, _MergedGlobals -; CHECK-NOT: adrp - store i32 %a1, i32* @m, align 4 - store i32 %a2, i32* @n, align 4 - ret void -} - -; CHECK: .local _MergedGlobals -; CHECK: .comm _MergedGlobals,8,8 - diff --git a/test/CodeGen/ARM/global-merge-1.ll b/test/CodeGen/ARM/global-merge-1.ll new file mode 100644 index 00000000000..341597e6188 --- /dev/null +++ b/test/CodeGen/ARM/global-merge-1.ll @@ -0,0 +1,85 @@ +; RUN: llc %s -O0 -o - | FileCheck -check-prefix=NO-MERGE %s +; RUN: llc %s -O0 -o - -enable-global-merge=false | FileCheck -check-prefix=NO-MERGE %s +; RUN: llc %s -O0 -o - -enable-global-merge=true | FileCheck -check-prefix=NO-MERGE %s +; RUN: llc %s -O1 -o - | FileCheck -check-prefix=MERGE %s +; RUN: llc %s -O1 -o - -enable-global-merge=false | FileCheck -check-prefix=NO-MERGE %s +; RUN: llc %s -O1 -o - -enable-global-merge=true | FileCheck -check-prefix=MERGE %s + +; MERGE-NOT: .zerofill __DATA,__bss,_bar,20,2 +; MERGE-NOT: .zerofill __DATA,__bss,_baz,20,2 +; MERGE-NOT: .zerofill __DATA,__bss,_foo,20,2 +; MERGE: .zerofill __DATA,__bss,__MergedGlobals,60,4 +; MERGE-NOT: .zerofill __DATA,__bss,_bar,20,2 +; MERGE-NOT: .zerofill __DATA,__bss,_baz,20,2 +; MERGE-NOT: .zerofill __DATA,__bss,_foo,20,2 + +; NO-MERGE-NOT: .zerofill __DATA,__bss,__MergedGlobals,60,4 +; NO-MERGE: .zerofill __DATA,__bss,_bar,20,2 +; NO-MERGE: .zerofill __DATA,__bss,_baz,20,2 +; NO-MERGE: .zerofill __DATA,__bss,_foo,20,2 +; NO-MERGE-NOT: .zerofill __DATA,__bss,__MergedGlobals,60,4 + +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32" +target triple = "thumbv7-apple-ios3.0.0" + +@bar = internal global [5 x i32] zeroinitializer, align 4 +@baz = internal global [5 x i32] zeroinitializer, align 4 +@foo = internal global [5 x i32] zeroinitializer, align 4 + +; Function Attrs: nounwind ssp +define internal void @initialize() #0 { + %1 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3 + store i32 %1, i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 0), align 4, !tbaa !1 + %2 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3 + store i32 %2, i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 0), align 4, !tbaa !1 + %3 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3 + store i32 %3, i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 1), align 4, !tbaa !1 + %4 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3 + store i32 %4, i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 1), align 4, !tbaa !1 + %5 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3 + store i32 %5, i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 2), align 4, !tbaa !1 + %6 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3 + store i32 %6, i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 2), align 4, !tbaa !1 + %7 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3 + store i32 %7, i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 3), align 4, !tbaa !1 + %8 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3 + store i32 %8, i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 3), align 4, !tbaa !1 + %9 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3 + store i32 %9, i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 4), align 4, !tbaa !1 + %10 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3 + store i32 %10, i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 4), align 4, !tbaa !1 + ret void +} + +declare i32 @calc(...) #1 + +; Function Attrs: nounwind ssp +define internal void @calculate() #0 { + %1 = load <4 x i32>* bitcast ([5 x i32]* @bar to <4 x i32>*), align 4 + %2 = load <4 x i32>* bitcast ([5 x i32]* @baz to <4 x i32>*), align 4 + %3 = mul <4 x i32> %2, %1 + store <4 x i32> %3, <4 x i32>* bitcast ([5 x i32]* @foo to <4 x i32>*), align 4 + %4 = load i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 4), align 4, !tbaa !1 + %5 = load i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 4), align 4, !tbaa !1 + %6 = mul nsw i32 %5, %4 + store i32 %6, i32* getelementptr inbounds ([5 x i32]* @foo, i32 0, i32 4), align 4, !tbaa !1 + ret void +} + +; Function Attrs: nounwind readnone ssp +define internal i32* @returnFoo() #2 { + ret i32* getelementptr inbounds ([5 x i32]* @foo, i32 0, i32 0) +} + +attributes #0 = { nounwind ssp "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind readnone ssp "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #3 = { nounwind } + +!llvm.ident = !{!0} + +!0 = metadata !{metadata !"LLVM version 3.4 "} +!1 = metadata !{metadata !2, metadata !2, i64 0} +!2 = metadata !{metadata !"int", metadata !3, i64 0} +!3 = metadata !{metadata !"omnipotent char", metadata !4, i64 0} +!4 = metadata !{metadata !"Simple C/C++ TBAA"} diff --git a/test/CodeGen/ARM64/global-merge.ll b/test/CodeGen/ARM64/global-merge.ll new file mode 100644 index 00000000000..4715fd8de23 --- /dev/null +++ b/test/CodeGen/ARM64/global-merge.ll @@ -0,0 +1,88 @@ +; RUN: llc %s -O0 -o - | FileCheck -check-prefix=NO-MERGE %s +; RUN: llc %s -O0 -o - -enable-global-merge=false | FileCheck -check-prefix=NO-MERGE %s +; RUN: llc %s -O0 -o - -enable-global-merge=true | FileCheck -check-prefix=NO-MERGE %s +; RUN: llc %s -O1 -o - | FileCheck -check-prefix=MERGE %s +; RUN: llc %s -O1 -o - -enable-global-merge=false | FileCheck -check-prefix=NO-MERGE %s +; RUN: llc %s -O1 -o - -enable-global-merge=true | FileCheck -check-prefix=MERGE %s + +; MERGE-NOT: .zerofill __DATA,__bss,_bar,20,2 +; MERGE-NOT: .zerofill __DATA,__bss,_baz,20,2 +; MERGE-NOT: .zerofill __DATA,__bss,_foo,20,2 +; MERGE: .zerofill __DATA,__bss,__MergedGlobals,60,4 +; MERGE-NOT: .zerofill __DATA,__bss,_bar,20,2 +; MERGE-NOT: .zerofill __DATA,__bss,_baz,20,2 +; MERGE-NOT: .zerofill __DATA,__bss,_foo,20,2 + +; NO-MERGE-NOT: .zerofill __DATA,__bss,__MergedGlobals,60,4 +; NO-MERGE: .zerofill __DATA,__bss,_bar,20,2 +; NO-MERGE: .zerofill __DATA,__bss,_baz,20,2 +; NO-MERGE: .zerofill __DATA,__bss,_foo,20,2 +; NO-MERGE-NOT: .zerofill __DATA,__bss,__MergedGlobals,60,4 + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128" +target triple = "arm64-apple-ios7.0.0" + +@bar = internal global [5 x i32] zeroinitializer, align 4 +@baz = internal global [5 x i32] zeroinitializer, align 4 +@foo = internal global [5 x i32] zeroinitializer, align 4 + +; Function Attrs: nounwind ssp +define internal void @initialize() #0 { + %1 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2 + store i32 %1, i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 0), align 4 + %2 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2 + store i32 %2, i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 0), align 4 + %3 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2 + store i32 %3, i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 1), align 4 + %4 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2 + store i32 %4, i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 1), align 4 + %5 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2 + store i32 %5, i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 2), align 4 + %6 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2 + store i32 %6, i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 2), align 4 + %7 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2 + store i32 %7, i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 3), align 4 + %8 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2 + store i32 %8, i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 3), align 4 + %9 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2 + store i32 %9, i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 4), align 4 + %10 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2 + store i32 %10, i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 4), align 4 + ret void +} + +declare i32 @calc(...) + +; Function Attrs: nounwind ssp +define internal void @calculate() #0 { + %1 = load i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 0), align 4 + %2 = load i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 0), align 4 + %3 = mul nsw i32 %2, %1 + store i32 %3, i32* getelementptr inbounds ([5 x i32]* @foo, i64 0, i64 0), align 4 + %4 = load i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 1), align 4 + %5 = load i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 1), align 4 + %6 = mul nsw i32 %5, %4 + store i32 %6, i32* getelementptr inbounds ([5 x i32]* @foo, i64 0, i64 1), align 4 + %7 = load i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 2), align 4 + %8 = load i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 2), align 4 + %9 = mul nsw i32 %8, %7 + store i32 %9, i32* getelementptr inbounds ([5 x i32]* @foo, i64 0, i64 2), align 4 + %10 = load i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 3), align 4 + %11 = load i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 3), align 4 + %12 = mul nsw i32 %11, %10 + store i32 %12, i32* getelementptr inbounds ([5 x i32]* @foo, i64 0, i64 3), align 4 + %13 = load i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 4), align 4 + %14 = load i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 4), align 4 + %15 = mul nsw i32 %14, %13 + store i32 %15, i32* getelementptr inbounds ([5 x i32]* @foo, i64 0, i64 4), align 4 + ret void +} + +; Function Attrs: nounwind readnone ssp +define internal i32* @returnFoo() #1 { + ret i32* getelementptr inbounds ([5 x i32]* @foo, i64 0, i64 0) +} + +attributes #0 = { nounwind ssp } +attributes #1 = { nounwind readnone ssp } +attributes #2 = { nounwind } diff --git a/test/Transforms/GlobalMerge/AArch64/global-merge-1.ll b/test/Transforms/GlobalMerge/AArch64/global-merge-1.ll new file mode 100644 index 00000000000..346f176ec71 --- /dev/null +++ b/test/Transforms/GlobalMerge/AArch64/global-merge-1.ll @@ -0,0 +1,22 @@ +; RUN: opt %s -mtriple=aarch64-none-linux-gnu -global-merge -S -o - | FileCheck %s +; RUN: opt %s -mtriple=aarch64-none-linux-gnu -global-merge -global-merge-on-external -global-merge-aligned -S -o - | FileCheck %s + +; RUN: opt %s -mtriple=arm64-linux-gnuabi -global-merge -S -o - | FileCheck %s +; RUN: opt %s -mtriple=arm64-linux-gnuabi -global-merge -global-merge-on-external -global-merge-aligned -S -o - | FileCheck %s + +; RUN: opt %s -mtriple=arm64-apple-ios -global-merge -S -o - | FileCheck %s +; RUN: opt %s -mtriple=arm64-apple-ios -global-merge -global-merge-on-external -global-merge-aligned -S -o - | FileCheck %s + +@m = internal global i32 0, align 4 +@n = internal global i32 0, align 4 + +; CHECK: @_MergedGlobals = internal global { i32, i32 } zeroinitializer + +define void @f1(i32 %a1, i32 %a2) { +; CHECK-LABEL: @f1 +; CHECK: getelementptr inbounds ({ i32, i32 }* @_MergedGlobals, i32 0, i32 0) +; CHECK: getelementptr inbounds ({ i32, i32 }* @_MergedGlobals, i32 0, i32 1) + store i32 %a1, i32* @m, align 4 + store i32 %a2, i32* @n, align 4 + ret void +} diff --git a/test/Transforms/GlobalMerge/AArch64/global-merge-2.ll b/test/Transforms/GlobalMerge/AArch64/global-merge-2.ll new file mode 100644 index 00000000000..0445b237f5e --- /dev/null +++ b/test/Transforms/GlobalMerge/AArch64/global-merge-2.ll @@ -0,0 +1,30 @@ +; RUN: opt %s -mtriple=aarch64-none-linux-gnu -global-merge -global-merge-on-external -global-merge-aligned -S -o - | FileCheck %s +; RUN: opt %s -mtriple=arm64-linux-gnuabi -global-merge -global-merge-on-external -global-merge-aligned -S -o - | FileCheck %s +; RUN: opt %s -mtriple=arm64-apple-ios -global-merge -global-merge-on-external -global-merge-aligned -S -o - | FileCheck %s + +@x = global i32 0, align 4 +@y = global i32 0, align 4 +@z = global i32 0, align 4 + +; CHECK: @_MergedGlobals_x = global { i32, i32, i32 } zeroinitializer, align 16 +; CHECK: @x = alias getelementptr inbounds ({ i32, i32, i32 }* @_MergedGlobals_x, i32 0, i32 0) +; CHECK: @y = alias getelementptr inbounds ({ i32, i32, i32 }* @_MergedGlobals_x, i32 0, i32 1) +; CHECK: @z = alias getelementptr inbounds ({ i32, i32, i32 }* @_MergedGlobals_x, i32 0, i32 2) + +define void @f1(i32 %a1, i32 %a2) { +; CHECK-LABEL: @f1 +; CHECK: getelementptr inbounds ({ i32, i32, i32 }* @_MergedGlobals_x, i32 0, i32 0) +; CHECK: getelementptr inbounds ({ i32, i32, i32 }* @_MergedGlobals_x, i32 0, i32 1) + store i32 %a1, i32* @x, align 4 + store i32 %a2, i32* @y, align 4 + ret void +} + +define void @g1(i32 %a1, i32 %a2) { +; CHECK-LABEL: @g1 +; CHECK: getelementptr inbounds ({ i32, i32, i32 }* @_MergedGlobals_x, i32 0, i32 1) +; CHECK: getelementptr inbounds ({ i32, i32, i32 }* @_MergedGlobals_x, i32 0, i32 2) + store i32 %a1, i32* @y, align 4 + store i32 %a2, i32* @z, align 4 + ret void +} diff --git a/test/Transforms/GlobalMerge/AArch64/global-merge-3.ll b/test/Transforms/GlobalMerge/AArch64/global-merge-3.ll new file mode 100644 index 00000000000..05ab1ac836d --- /dev/null +++ b/test/Transforms/GlobalMerge/AArch64/global-merge-3.ll @@ -0,0 +1,27 @@ +; RUN: opt %s -mtriple=aarch64-none-linux-gnu -global-merge -global-merge-on-external -global-merge-aligned -S -o - | FileCheck %s +; RUN: opt %s -mtriple=arm64-linux-gnuabi -global-merge -global-merge-on-external -global-merge-aligned -S -o - | FileCheck %s +; RUN: opt %s -mtriple=arm64-apple-ios -global-merge -global-merge-on-external -global-merge-aligned -S -o - | FileCheck %s + +@x = global [1000 x i32] zeroinitializer, align 1 +@y = global [1000 x i32] zeroinitializer, align 1 +@z = internal global i32 1, align 4 + +; CHECK: @_MergedGlobals_x = global { i32, [1000 x i32] } { i32 1, [1000 x i32] zeroinitializer }, align 4096 +; CHECK: @_MergedGlobals_y = global { [1000 x i32] } zeroinitializer, align 4096 + +; CHECK: @x = alias getelementptr inbounds ({ i32, [1000 x i32] }* @_MergedGlobals_x, i32 0, i32 1) +; CHECK: @y = alias getelementptr inbounds ({ [1000 x i32] }* @_MergedGlobals_y, i32 0, i32 0) + +define void @f1(i32 %a1, i32 %a2, i32 %a3) { +; CHECK-LABEL: @f1 +; CHECK: %x3 = getelementptr inbounds [1000 x i32]* getelementptr inbounds ({ i32, [1000 x i32] }* @_MergedGlobals_x, i32 0, i32 1), i32 0, i64 3 +; CHECK: %y3 = getelementptr inbounds [1000 x i32]* getelementptr inbounds ({ [1000 x i32] }* @_MergedGlobals_y, i32 0, i32 0), i32 0, i64 3 +; CHECK: store i32 %a3, i32* getelementptr inbounds ({ i32, [1000 x i32] }* @_MergedGlobals_x, i32 0, i32 0), align 4 + + %x3 = getelementptr inbounds [1000 x i32]* @x, i32 0, i64 3 + %y3 = getelementptr inbounds [1000 x i32]* @y, i32 0, i64 3 + store i32 %a1, i32* %x3, align 4 + store i32 %a2, i32* %y3, align 4 + store i32 %a3, i32* @z, align 4 + ret void +} diff --git a/test/Transforms/GlobalMerge/AArch64/lit.local.cfg b/test/Transforms/GlobalMerge/AArch64/lit.local.cfg new file mode 100644 index 00000000000..9a66a00189e --- /dev/null +++ b/test/Transforms/GlobalMerge/AArch64/lit.local.cfg @@ -0,0 +1,4 @@ +targets = set(config.root.targets_to_build.split()) +if not 'AArch64' in targets: + config.unsupported = True + diff --git a/test/Transforms/GlobalMerge/ARM/arm.ll b/test/Transforms/GlobalMerge/ARM/arm.ll index 8c77de62ece..45062af1177 100644 --- a/test/Transforms/GlobalMerge/ARM/arm.ll +++ b/test/Transforms/GlobalMerge/ARM/arm.ll @@ -1,23 +1,4 @@ -; RUN: llc %s -O0 -o - | FileCheck -check-prefix=NO-MERGE %s -; RUN: llc %s -O0 -o - -global-merge=false | FileCheck -check-prefix=NO-MERGE %s -; RUN: llc %s -O0 -o - -global-merge=true | FileCheck -check-prefix=NO-MERGE %s -; RUN: llc %s -O1 -o - | FileCheck -check-prefix=MERGE %s -; RUN: llc %s -O1 -o - -global-merge=false | FileCheck -check-prefix=NO-MERGE %s -; RUN: llc %s -O1 -o - -global-merge=true | FileCheck -check-prefix=MERGE %s - -; MERGE-NOT: .zerofill __DATA,__bss,_bar,20,2 -; MERGE-NOT: .zerofill __DATA,__bss,_baz,20,2 -; MERGE-NOT: .zerofill __DATA,__bss,_foo,20,2 -; MERGE: .zerofill __DATA,__bss,__MergedGlobals,60,4 -; MERGE-NOT: .zerofill __DATA,__bss,_bar,20,2 -; MERGE-NOT: .zerofill __DATA,__bss,_baz,20,2 -; MERGE-NOT: .zerofill __DATA,__bss,_foo,20,2 - -; NO-MERGE-NOT: .zerofill __DATA,__bss,__MergedGlobals,60,4 -; NO-MERGE: .zerofill __DATA,__bss,_bar,20,2 -; NO-MERGE: .zerofill __DATA,__bss,_baz,20,2 -; NO-MERGE: .zerofill __DATA,__bss,_foo,20,2 -; NO-MERGE-NOT: .zerofill __DATA,__bss,__MergedGlobals,60,4 +; RUN: opt %s -mtriple=arm-linux-gnuabi -global-merge -S -o - | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32" target triple = "thumbv7-apple-ios3.0.0" @@ -26,6 +7,8 @@ target triple = "thumbv7-apple-ios3.0.0" @baz = internal global [5 x i32] zeroinitializer, align 4 @foo = internal global [5 x i32] zeroinitializer, align 4 +; CHECK: @_MergedGlobals = internal global { [5 x i32], [5 x i32], [5 x i32] } zeroinitializer + ; Function Attrs: nounwind ssp define internal void @initialize() #0 { %1 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3 diff --git a/test/Transforms/GlobalMerge/ARM64/arm64.ll b/test/Transforms/GlobalMerge/ARM64/arm64.ll index eea474a74f1..ebc362ea97d 100644 --- a/test/Transforms/GlobalMerge/ARM64/arm64.ll +++ b/test/Transforms/GlobalMerge/ARM64/arm64.ll @@ -1,23 +1,6 @@ -; RUN: llc %s -O0 -o - | FileCheck -check-prefix=NO-MERGE %s -; RUN: llc %s -O0 -o - -global-merge=false | FileCheck -check-prefix=NO-MERGE %s -; RUN: llc %s -O0 -o - -global-merge=true | FileCheck -check-prefix=NO-MERGE %s -; RUN: llc %s -O1 -o - | FileCheck -check-prefix=MERGE %s -; RUN: llc %s -O1 -o - -global-merge=false | FileCheck -check-prefix=NO-MERGE %s -; RUN: llc %s -O1 -o - -global-merge=true | FileCheck -check-prefix=MERGE %s +; RUN: opt %s -mtriple=arm64-linux-gnuabi -global-merge -S -o - | FileCheck %s -; MERGE-NOT: .zerofill __DATA,__bss,_bar,20,2 -; MERGE-NOT: .zerofill __DATA,__bss,_baz,20,2 -; MERGE-NOT: .zerofill __DATA,__bss,_foo,20,2 -; MERGE: .zerofill __DATA,__bss,__MergedGlobals,60,4 -; MERGE-NOT: .zerofill __DATA,__bss,_bar,20,2 -; MERGE-NOT: .zerofill __DATA,__bss,_baz,20,2 -; MERGE-NOT: .zerofill __DATA,__bss,_foo,20,2 - -; NO-MERGE-NOT: .zerofill __DATA,__bss,__MergedGlobals,60,4 -; NO-MERGE: .zerofill __DATA,__bss,_bar,20,2 -; NO-MERGE: .zerofill __DATA,__bss,_baz,20,2 -; NO-MERGE: .zerofill __DATA,__bss,_foo,20,2 -; NO-MERGE-NOT: .zerofill __DATA,__bss,__MergedGlobals,60,4 +; CHECK: @_MergedGlobals = internal global { [5 x i32], [5 x i32], [5 x i32] } zeroinitializer target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128" target triple = "arm64-apple-ios7.0.0"