From 4e04297ac3868d9a8d0ce986842c3487a5366280 Mon Sep 17 00:00:00 2001 From: Jingyue Wu Date: Tue, 9 Jun 2015 00:05:56 +0000 Subject: [PATCH] [NVPTX] run SROA after NVPTXFavorNonGenericAddrSpaces Summary: This cleans up most allocas NVPTXLowerKernelArgs emits for byval parameters. Test Plan: makes bug21465.ll more stronger to verify no redundant local load/store. Reviewers: eliben, jholewinski Reviewed By: eliben, jholewinski Subscribers: jholewinski, llvm-commits Differential Revision: http://reviews.llvm.org/D10322 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@239368 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/NVPTX/NVPTXTargetMachine.cpp | 5 +++++ test/CodeGen/NVPTX/bug21465.ll | 5 +++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp index 7fdfdbfb83f..a6466687bc7 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -165,6 +165,11 @@ void NVPTXPassConfig::addIRPasses() { addPass(createGenericToNVVMPass()); addPass(createNVPTXLowerKernelArgsPass(&getNVPTXTargetMachine())); addPass(createNVPTXFavorNonGenericAddrSpacesPass()); + // NVPTXLowerKernelArgs emits alloca for byval parameters which can often + // be eliminated by SROA. We do not run SROA right after NVPTXLowerKernelArgs + // because we plan to merge NVPTXLowerKernelArgs and + // NVPTXFavorNonGenericAddrSpaces into one pass. + addPass(createSROAPass()); // FavorNonGenericAddrSpaces shortcuts unnecessary addrspacecasts, and leave // them unused. We could remove dead code in an ad-hoc manner, but that // requires manual work and might be error-prone. diff --git a/test/CodeGen/NVPTX/bug21465.ll b/test/CodeGen/NVPTX/bug21465.ll index c375cf8d580..2eae41f73a0 100644 --- a/test/CodeGen/NVPTX/bug21465.ll +++ b/test/CodeGen/NVPTX/bug21465.ll @@ -14,9 +14,10 @@ entry: ; CHECK: addrspacecast %struct.S* %input to %struct.S addrspace(101)* %b = getelementptr inbounds %struct.S, %struct.S* %input, i64 0, i32 1 %0 = load i32, i32* %b, align 4 -; PTX: ld.param.u32 %r{{[0-9]+}}, {{\[}}[[BASE:%rd[0-9]+]]{{\]}} -; PTX-NEXT: ld.param.u32 %r{{[0-9]+}}, {{\[}}[[BASE]]+4{{\]}} +; PTX-NOT: ld.param.u32 {{%r[0-9]+}}, [{{%rd[0-9]+}}] +; PTX: ld.param.u32 [[value:%r[0-9]+]], [{{%rd[0-9]+}}+4] store i32 %0, i32* %output, align 4 +; PTX-NEXT: st.global.u32 [{{%rd[0-9]+}}], [[value]] ret void } -- 2.34.1