From 2085d00d09f4f3678a6c67da46df3a04e31b499c Mon Sep 17 00:00:00 2001 From: Justin Holewinski Date: Fri, 16 Nov 2012 21:03:51 +0000 Subject: [PATCH] [NVPTX] Order global variables in def-use order before emiting them in the final assembly git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@168198 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/NVPTX/NVPTXAsmPrinter.cpp | 70 +++++++++++++++++++++++++-- test/CodeGen/NVPTX/global-ordering.ll | 20 ++++++++ 2 files changed, 87 insertions(+), 3 deletions(-) create mode 100644 test/CodeGen/NVPTX/global-ordering.ll diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index 1cea9fac359..31ab68158ca 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -68,7 +68,54 @@ static cl::optInterleaveSrc("nvptx-emit-src", cl::location(llvm::InterleaveSrcInPtx)); +namespace { +/// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V +/// depends. +void DiscoverDependentGlobals(Value *V, + DenseSet &Globals) { + if (GlobalVariable *GV = dyn_cast(V)) + Globals.insert(GV); + else { + if (User *U = dyn_cast(V)) { + for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i) { + DiscoverDependentGlobals(U->getOperand(i), Globals); + } + } + } +} +/// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable +/// instances to be emitted, but only after any dependents have been added +/// first. +void VisitGlobalVariableForEmission(GlobalVariable *GV, + SmallVectorImpl &Order, + DenseSet &Visited, + DenseSet &Visiting) { + // Have we already visited this one? + if (Visited.count(GV)) return; + + // Do we have a circular dependency? + if (Visiting.count(GV)) + report_fatal_error("Circular dependency found in global variable set"); + + // Start visiting this global + Visiting.insert(GV); + + // Make sure we visit all dependents first + DenseSet Others; + for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i) + DiscoverDependentGlobals(GV->getOperand(i), Others); + + for (DenseSet::iterator I = Others.begin(), + E = Others.end(); I != E; ++I) + VisitGlobalVariableForEmission(*I, Order, Visited, Visiting); + + // Now we can visit ourself + Order.push_back(GV); + Visited.insert(GV); + Visiting.erase(GV); +} +} // @TODO: This is a copy from AsmPrinter.cpp. The function is static, so we // cannot just link to the existing version. @@ -893,10 +940,27 @@ bool NVPTXAsmPrinter::doInitialization (Module &M) { emitDeclarations(M, OS2); - // Print out module-level global variables here. + // As ptxas does not support forward references of globals, we need to first + // sort the list of module-level globals in def-use order. We visit each + // global variable in order, and ensure that we emit it *after* its dependent + // globals. We use a little extra memory maintaining both a set and a list to + // have fast searches while maintaining a strict ordering. + SmallVector Globals; + DenseSet GVVisited; + DenseSet GVVisiting; + + // Visit each global variable, in order for (Module::global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ++I) - printModuleLevelGV(I, OS2); + I != E; ++I) + VisitGlobalVariableForEmission(I, Globals, GVVisited, GVVisiting); + + assert(GVVisited.size() == M.getGlobalList().size() && + "Missed a global variable"); + assert(GVVisiting.size() == 0 && "Did not fully process a global variable"); + + // Print out module-level global variables in proper order + for (unsigned i = 0, e = Globals.size(); i != e; ++i) + printModuleLevelGV(Globals[i], OS2); OS2 << '\n'; diff --git a/test/CodeGen/NVPTX/global-ordering.ll b/test/CodeGen/NVPTX/global-ordering.ll new file mode 100644 index 00000000000..43394a79e91 --- /dev/null +++ b/test/CodeGen/NVPTX/global-ordering.ll @@ -0,0 +1,20 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32 +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64 + +; Make sure we emit these globals in def-use order + + +; PTX32: .visible .global .align 1 .u8 a = 2; +; PTX32-NEXT: .visible .global .align 4 .u32 a2 = a; +; PTX64: .visible .global .align 1 .u8 a = 2; +; PTX64-NEXT: .visible .global .align 8 .u64 a2 = a; +@a2 = addrspace(1) global i8 addrspace(1)* @a +@a = addrspace(1) global i8 2 + + +; PTX32: .visible .global .align 1 .u8 b = 1; +; PTX32-NEXT: .visible .global .align 4 .u32 b2[2] = {b, b}; +; PTX64: .visible .global .align 1 .u8 b = 1; +; PTX64-NEXT: .visible .global .align 8 .u64 b2[2] = {b, b}; +@b2 = addrspace(1) global [2 x i8 addrspace(1)*] [i8 addrspace(1)* @b, i8 addrspace(1)* @b] +@b = addrspace(1) global i8 1 -- 2.34.1