// evaluated each time through the tail recursion. Safely keeping allocas
// in the entry block requires analysis to proves that the tail-called
// function does not read or write the stack object.
-// 2. Tail recursion is only performed if the call immediately preceeds the
+// 2. Tail recursion is only performed if the call immediately precedes the
// return instruction. It's possible that there could be a jump between
// the call and the return.
// 3. There can be intervening operations between the call and the return that
#define DEBUG_TYPE "tailcallelim"
#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Pass.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/Loads.h"
-#include "llvm/Support/CallSite.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
#include "llvm/Support/CFG.h"
+#include "llvm/Support/CallSite.h"
#include "llvm/Support/Debug.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
STATISTIC(NumEliminated, "Number of tail calls removed");
+STATISTIC(NumRetDuped, "Number of return duplicated");
STATISTIC(NumAccumAdded, "Number of accumulators introduced");
namespace {
FunctionContainsEscapingAllocas |=
CheckForEscapingAllocas(BB, CannotTCETailMarkedCall);
}
-
+
/// FIXME: The code generator produces really bad code when an 'escaping
/// alloca' is changed from being a static alloca to being a dynamic alloca.
/// Until this is resolved, disable this transformation if that would ever
}
}
- // Finally, if this function contains no non-escaping allocas, mark all calls
- // in the function as eligible for tail calls (there is no stack memory for
- // them to access).
- if (!FunctionContainsEscapingAllocas)
+ // Finally, if this function contains no non-escaping allocas, or calls
+ // setjmp, mark all calls in the function as eligible for tail calls
+ //(there is no stack memory for them to access).
+ if (!FunctionContainsEscapingAllocas && !F.callsFunctionThatReturnsTwice())
for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
if (CallInst *CI = dyn_cast<CallInst>(I)) {
// call does not mod/ref the memory location being processed.
if (I->mayHaveSideEffects()) // This also handles volatile loads.
return false;
-
+
if (LoadInst *L = dyn_cast<LoadInst>(I)) {
// Loads may always be moved above calls without side effects.
if (CI->mayHaveSideEffects()) {
if (&BB->front() == TI) // Make sure there is something before the terminator.
return 0;
-
+
// Scan backwards from the return, checking to see if there is a tail call in
// this block. If so, set CI to it.
CallInst *CI = 0;
// double fabs(double f) { return __builtin_fabs(f); } // a 'fabs' call
// and disable this xform in this case, because the code generator will
// lower the call to fabs into inline code.
- if (BB == &F->getEntryBlock() &&
+ if (BB == &F->getEntryBlock() &&
FirstNonDbg(BB->front()) == CI &&
FirstNonDbg(llvm::next(BB->begin())) == TI &&
- callIsSmall(F)) {
+ callIsSmall(CI)) {
// A single-block function with just a call and a return. Check that
// the arguments match.
CallSite::arg_iterator I = CallSite(CI).arg_begin(),
BasicBlock::iterator BBI = CI;
for (++BBI; &*BBI != Ret; ++BBI) {
if (CanMoveAboveCall(BBI, CI)) continue;
-
+
// If we can't move the instruction above the call, it might be because it
- // is an associative and commutative operation that could be tranformed
+ // is an associative and commutative operation that could be transformed
// using accumulator recursion elimination. Check to see if this is the
// case, and if so, remember the initial accumulator value for later.
if ((AccumulatorRecursionEliminationInitVal =
Instruction *InsertPos = OldEntry->begin();
for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
I != E; ++I) {
- PHINode *PN = PHINode::Create(I->getType(),
+ PHINode *PN = PHINode::Create(I->getType(), 2,
I->getName() + ".tr", InsertPos);
I->replaceAllUsesWith(PN); // Everyone use the PHI node now!
PN->addIncoming(I, NewEntry);
if (AccumulatorRecursionEliminationInitVal) {
Instruction *AccRecInstr = AccumulatorRecursionInstr;
// Start by inserting a new PHI node for the accumulator.
+ pred_iterator PB = pred_begin(OldEntry), PE = pred_end(OldEntry);
PHINode *AccPN =
PHINode::Create(AccumulatorRecursionEliminationInitVal->getType(),
+ std::distance(PB, PE) + 1,
"accumulator.tr", OldEntry->begin());
// Loop over all of the predecessors of the tail recursion block. For the
// other tail recursions eliminated) the accumulator is not modified.
// Because we haven't added the branch in the current block to OldEntry yet,
// it will not show up as a predecessor.
- for (pred_iterator PI = pred_begin(OldEntry), PE = pred_end(OldEntry);
- PI != PE; ++PI) {
+ for (pred_iterator PI = PB; PI != PE; ++PI) {
BasicBlock *P = *PI;
if (P == &F->getEntryBlock())
AccPN->addIncoming(AccumulatorRecursionEliminationInitVal, P);
// Now that all of the PHI nodes are in place, remove the call and
// ret instructions, replacing them with an unconditional branch.
- BranchInst::Create(OldEntry, Ret);
+ BranchInst *NewBI = BranchInst::Create(OldEntry, Ret);
+ NewBI->setDebugLoc(CI->getDebugLoc());
+
BB->getInstList().erase(Ret); // Remove return.
BB->getInstList().erase(CI); // Remove call.
++NumEliminated;
// there might be an opportunity to duplicate the return in its
// predecessors and perform TRC there. Look for predecessors that end
// in unconditional branch and recursive call(s).
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB);
- PI != E; ++PI) {
+ SmallVector<BranchInst*, 8> UncondBranchPreds;
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
BasicBlock *Pred = *PI;
TerminatorInst *PTI = Pred->getTerminator();
- if (BranchInst *BI = dyn_cast<BranchInst>(PTI)) {
- CallInst *CI = 0;
- if (BI->isUnconditional() &&
- (CI = FindTRECandidate(BI, CannotTailCallElimCallsMarkedTail))) {
- DEBUG(dbgs() << "FOLDING: " << *BB
- << "INTO UNCOND BRANCH PRED: " << *Pred);
- EliminateRecursiveTailCall(CI,
- FoldReturnIntoUncondBranch(Ret, BB, Pred),
- OldEntry, TailCallsAreMarkedTail, ArgumentPHIs,
- CannotTailCallElimCallsMarkedTail);
- Change = true;
- }
+ if (BranchInst *BI = dyn_cast<BranchInst>(PTI))
+ if (BI->isUnconditional())
+ UncondBranchPreds.push_back(BI);
+ }
+
+ while (!UncondBranchPreds.empty()) {
+ BranchInst *BI = UncondBranchPreds.pop_back_val();
+ BasicBlock *Pred = BI->getParent();
+ if (CallInst *CI = FindTRECandidate(BI, CannotTailCallElimCallsMarkedTail)){
+ DEBUG(dbgs() << "FOLDING: " << *BB
+ << "INTO UNCOND BRANCH PRED: " << *Pred);
+ EliminateRecursiveTailCall(CI, FoldReturnIntoUncondBranch(Ret, BB, Pred),
+ OldEntry, TailCallsAreMarkedTail, ArgumentPHIs,
+ CannotTailCallElimCallsMarkedTail);
+ ++NumRetDuped;
+ Change = true;
}
}