From: Chris Lattner Date: Mon, 17 Jan 2005 17:55:19 +0000 (+0000) Subject: Implement a target independent optimization to codegen arguments only into X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=0afa8e348eab21d3e09ae3240544886d61879266;p=oota-llvm.git Implement a target independent optimization to codegen arguments only into the basic block that uses them if possible. This is a big win on X86, as it lets us fold the argument loads into instructions and reduce register pressure (by not loading all of the arguments in the entry block). For this (contrived to show the optimization) testcase: int %argtest(int %A, int %B) { %X = sub int 12345, %A br label %L L: %Y = add int %X, %B ret int %Y } we used to produce: argtest: mov %ECX, DWORD PTR [%ESP + 4] mov %EAX, 12345 sub %EAX, %ECX mov %EDX, DWORD PTR [%ESP + 8] .LBBargtest_1: # L add %EAX, %EDX ret now we produce: argtest: mov %EAX, 12345 sub %EAX, DWORD PTR [%ESP + 4] .LBBargtest_1: # L add %EAX, DWORD PTR [%ESP + 8] ret This also fixes the FIXME in the code. BTW, this occurs in real code. 164.gzip shrinks from 8623 to 8608 lines of .s file. The stack frame in huft_build shrinks from 1644->1628 bytes, inflate_codes shrinks from 116->108 bytes, and inflate_block from 2620->2612, due to fewer spills. Take that alkis. :-) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@19639 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 64940aa6b6c..d13030f847a 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -68,6 +68,14 @@ namespace llvm { /// anywhere in the function. std::map StaticAllocaMap; + /// BlockLocalArguments - If any arguments are only used in a single basic + /// block, and if the target can access the arguments without side-effects, + /// avoid emitting CopyToReg nodes for those arguments. This map keeps + /// track of which arguments are local to each BB. + std::multimap > BlockLocalArguments; + + unsigned MakeReg(MVT::ValueType VT) { return RegMap->createVirtualRegister(TLI.getRegClassFor(VT)); } @@ -806,28 +814,77 @@ CopyValueToVirtualRegister(SelectionDAGLowering &SDL, Value *V, unsigned Reg) { return DAG.getCopyToReg(DAG.getRoot(), Op, Reg); } +/// IsOnlyUsedInOneBasicBlock - If the specified argument is only used in a +/// single basic block, return that block. Otherwise, return a null pointer. +static BasicBlock *IsOnlyUsedInOneBasicBlock(Argument *A) { + if (A->use_empty()) return 0; + BasicBlock *BB = cast(A->use_back())->getParent(); + for (Argument::use_iterator UI = A->use_begin(), E = A->use_end(); UI != E; + ++UI) + if (isa(*UI) || cast(*UI)->getParent() != BB) + return 0; // Disagreement among the users? + return BB; +} + void SelectionDAGISel:: LowerArguments(BasicBlock *BB, SelectionDAGLowering &SDL, std::vector &UnorderedChains) { // If this is the entry block, emit arguments. Function &F = *BB->getParent(); + FunctionLoweringInfo &FuncInfo = SDL.FuncInfo; if (BB == &F.front()) { - // FIXME: If an argument is only used in one basic block, we could directly - // emit it (ONLY) into that block, not emitting the COPY_TO_VREG node. This - // would improve codegen in several cases on X86 by allowing the loads to be - // folded into the user operation. + SDOperand OldRoot = SDL.DAG.getRoot(); + std::vector Args = TLI.LowerArguments(F, SDL.DAG); - FunctionLoweringInfo &FuncInfo = SDL.FuncInfo; + // If there were side effects accessing the argument list, do not do + // anything special. + if (OldRoot != SDL.DAG.getRoot()) { + unsigned a = 0; + for (Function::aiterator AI = F.abegin(), E = F.aend(); AI != E; ++AI,++a) + if (!AI->use_empty()) { + SDL.setValue(AI, Args[a]); + SDOperand Copy = + CopyValueToVirtualRegister(SDL, AI, FuncInfo.ValueMap[AI]); + UnorderedChains.push_back(Copy); + } + } else { + // Otherwise, if any argument is only accessed in a single basic block, + // emit that argument only to that basic block. + unsigned a = 0; + for (Function::aiterator AI = F.abegin(), E = F.aend(); AI != E; ++AI,++a) + if (!AI->use_empty()) { + if (BasicBlock *BBU = IsOnlyUsedInOneBasicBlock(AI)) { + FuncInfo.BlockLocalArguments.insert(std::make_pair(BBU, + std::make_pair(AI, a))); + } else { + SDL.setValue(AI, Args[a]); + SDOperand Copy = + CopyValueToVirtualRegister(SDL, AI, FuncInfo.ValueMap[AI]); + UnorderedChains.push_back(Copy); + } + } + } + } - unsigned a = 0; - for (Function::aiterator AI = F.abegin(), E = F.aend(); AI != E; ++AI,++a) - if (!AI->use_empty()) { - SDL.setValue(AI, Args[a]); - UnorderedChains.push_back( - CopyValueToVirtualRegister(SDL, AI, FuncInfo.ValueMap[AI])); - } + // See if there are any block-local arguments that need to be emitted in this + // block. + + if (!FuncInfo.BlockLocalArguments.empty()) { + std::multimap >::iterator BLAI = + FuncInfo.BlockLocalArguments.lower_bound(BB); + if (BLAI != FuncInfo.BlockLocalArguments.end() && BLAI->first == BB) { + // Lower the arguments into this block. + std::vector Args = TLI.LowerArguments(F, SDL.DAG); + + // Set up the value mapping for the local arguments. + for (; BLAI != FuncInfo.BlockLocalArguments.end() && BLAI->first == BB; + ++BLAI) + SDL.setValue(BLAI->second.first, Args[BLAI->second.second]); + + // Any dead arguments will just be ignored here. + } } }