From: Chris Lattner Date: Thu, 6 Oct 2005 18:45:51 +0000 (+0000) Subject: Add a recursive-iterative hybrid stage to attempt to reduce stack space, this X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=bd937b98f48cac17bd3d5102b2182943b97f8c28;p=oota-llvm.git Add a recursive-iterative hybrid stage to attempt to reduce stack space, this helps but not enough. Start pulling cases out of PPC32DAGToDAGISel::Select. With GCC 4, this function required 8512 bytes of stack space for each invocation (GCC 3 required less than 700 bytes). Pulling this first function out gets us down to 8224. More to come :( git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@23647 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 14c74063cc0..8937784fb92 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -83,26 +83,70 @@ namespace { /// InstructionSelectBasicBlock - This callback is invoked by /// SelectionDAGISel when it has created a SelectionDAG for us to codegen. - virtual void InstructionSelectBasicBlock(SelectionDAG &DAG) { - DEBUG(BB->dump()); - // Select target instructions for the DAG. - DAG.setRoot(Select(DAG.getRoot())); - CodeGenMap.clear(); - DAG.RemoveDeadNodes(); - - // Emit machine code to BB. - ScheduleAndEmitDAG(DAG); - } - + virtual void InstructionSelectBasicBlock(SelectionDAG &DAG); + virtual const char *getPassName() const { return "PowerPC DAG->DAG Pattern Instruction Selection"; } // Include the pieces autogenerated from the target description. #include "PPC32GenDAGISel.inc" + +private: + SDOperand SelectDYNAMIC_STACKALLOC(SDOperand N); }; } +/// InstructionSelectBasicBlock - This callback is invoked by +/// SelectionDAGISel when it has created a SelectionDAG for us to codegen. +void PPC32DAGToDAGISel::InstructionSelectBasicBlock(SelectionDAG &DAG) { + DEBUG(BB->dump()); + + // The selection process is inherently a bottom-up recursive process (users + // select their uses before themselves). Given infinite stack space, we + // could just start selecting on the root and traverse the whole graph. In + // practice however, this causes us to run out of stack space on large basic + // blocks. To avoid this problem, select the entry node, then all its uses, + // iteratively instead of recursively. + std::vector Worklist; + Worklist.push_back(DAG.getEntryNode()); + + // Note that we can do this in the PPC target (scanning forward across token + // chain edges) because no nodes ever get folded across these edges. On a + // target like X86 which supports load/modify/store operations, this would + // have to be more careful. + while (!Worklist.empty()) { + SDOperand Node = Worklist.back(); + Worklist.pop_back(); + + if ((Node.Val->getOpcode() >= ISD::BUILTIN_OP_END && + Node.Val->getOpcode() < PPCISD::FIRST_NUMBER) || + CodeGenMap.count(Node)) continue; + + for (SDNode::use_iterator UI = Node.Val->use_begin(), + E = Node.Val->use_end(); UI != E; ++UI) { + // Scan the values. If this use has a value that is a token chain, add it + // to the worklist. + SDNode *User = *UI; + for (unsigned i = 0, e = User->getNumValues(); i != e; ++i) + if (User->getValueType(i) == MVT::Other) { + Worklist.push_back(SDOperand(User, i)); + break; + } + } + + // Finally, legalize this node. + Select(Node); + } + + // Select target instructions for the DAG. + DAG.setRoot(Select(DAG.getRoot())); + CodeGenMap.clear(); + DAG.RemoveDeadNodes(); + + // Emit machine code to BB. + ScheduleAndEmitDAG(DAG); +} /// getGlobalBaseReg - Output the instructions required to put the /// base address to use for accessing globals into a register. @@ -632,6 +676,43 @@ SDOperand PPC32DAGToDAGISel::BuildUDIVSequence(SDNode *N) { } } +SDOperand PPC32DAGToDAGISel::SelectDYNAMIC_STACKALLOC(SDOperand Op) { + SDNode *N = Op.Val; + + // FIXME: We are currently ignoring the requested alignment for handling + // greater than the stack alignment. This will need to be revisited at some + // point. Align = N.getOperand(2); + if (!isa(N->getOperand(2)) || + cast(N->getOperand(2))->getValue() != 0) { + std::cerr << "Cannot allocate stack object with greater alignment than" + << " the stack alignment yet!"; + abort(); + } + SDOperand Chain = Select(N->getOperand(0)); + SDOperand Amt = Select(N->getOperand(1)); + + SDOperand R1Reg = CurDAG->getRegister(PPC::R1, MVT::i32); + + SDOperand R1Val = CurDAG->getCopyFromReg(Chain, PPC::R1, MVT::i32); + Chain = R1Val.getValue(1); + + // Subtract the amount (guaranteed to be a multiple of the stack alignment) + // from the stack pointer, giving us the result pointer. + SDOperand Result = CurDAG->getTargetNode(PPC::SUBF, MVT::i32, Amt, R1Val); + + // Copy this result back into R1. + Chain = CurDAG->getNode(ISD::CopyToReg, MVT::Other, Chain, R1Reg, Result); + + // Copy this result back out of R1 to make sure we're not using the stack + // space without decrementing the stack pointer. + Result = CurDAG->getCopyFromReg(Chain, PPC::R1, MVT::i32); + + // Finally, replace the DYNAMIC_STACKALLOC with the copyfromreg. + CodeGenMap[Op.getValue(0)] = Result; + CodeGenMap[Op.getValue(1)] = Result.getValue(1); + return SDOperand(Result.Val, Op.ResNo); +} + // Select - Convert the specified operand from a target-independent to a // target-specific node if it hasn't already been changed. SDOperand PPC32DAGToDAGISel::Select(SDOperand Op) { @@ -718,40 +799,8 @@ SDOperand PPC32DAGToDAGISel::Select(SDOperand Op) { CurDAG->SelectNodeTo(N, PPC::LA, MVT::i32, Tmp, GA); return SDOperand(N, 0); } - case ISD::DYNAMIC_STACKALLOC: { - // FIXME: We are currently ignoring the requested alignment for handling - // greater than the stack alignment. This will need to be revisited at some - // point. Align = N.getOperand(2); - if (!isa(N->getOperand(2)) || - cast(N->getOperand(2))->getValue() != 0) { - std::cerr << "Cannot allocate stack object with greater alignment than" - << " the stack alignment yet!"; - abort(); - } - SDOperand Chain = Select(N->getOperand(0)); - SDOperand Amt = Select(N->getOperand(1)); - - SDOperand R1Reg = CurDAG->getRegister(PPC::R1, MVT::i32); - - SDOperand R1Val = CurDAG->getCopyFromReg(Chain, PPC::R1, MVT::i32); - Chain = R1Val.getValue(1); - - // Subtract the amount (guaranteed to be a multiple of the stack alignment) - // from the stack pointer, giving us the result pointer. - SDOperand Result = CurDAG->getTargetNode(PPC::SUBF, MVT::i32, Amt, R1Val); - - // Copy this result back into R1. - Chain = CurDAG->getNode(ISD::CopyToReg, MVT::Other, Chain, R1Reg, Result); - - // Copy this result back out of R1 to make sure we're not using the stack - // space without decrementing the stack pointer. - Result = CurDAG->getCopyFromReg(Chain, PPC::R1, MVT::i32); - - // Finally, replace the DYNAMIC_STACKALLOC with the copyfromreg. - CodeGenMap[Op.getValue(0)] = Result; - CodeGenMap[Op.getValue(1)] = Result.getValue(1); - return SDOperand(Result.Val, Op.ResNo); - } + case ISD::DYNAMIC_STACKALLOC: + return SelectDYNAMIC_STACKALLOC(Op); case PPCISD::FSEL: { SDOperand Comparison = Select(N->getOperand(0)); // Extend the comparison to 64-bits.