#define DEBUG_TYPE "jump-threading"
#include "llvm/Transforms/Scalar.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Pass.h"
-#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/LazyValueInfo.h"
-#include "llvm/Analysis/Loads.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Utils/SSAUpdater.h"
-#include "llvm/Target/TargetData.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LazyValueInfo.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/DataLayout.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ValueHandle.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
using namespace llvm;
STATISTIC(NumThreads, "Number of jumps threaded");
/// revectored to the false side of the second if.
///
class JumpThreading : public FunctionPass {
- TargetData *TD;
+ DataLayout *TD;
+ TargetLibraryInfo *TLI;
LazyValueInfo *LVI;
#ifdef NDEBUG
SmallPtrSet<BasicBlock*, 16> LoopHeaders;
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<LazyValueInfo>();
AU.addPreserved<LazyValueInfo>();
+ AU.addRequired<TargetLibraryInfo>();
}
void FindLoopHeaders(Function &F);
INITIALIZE_PASS_BEGIN(JumpThreading, "jump-threading",
"Jump Threading", false, false)
INITIALIZE_PASS_DEPENDENCY(LazyValueInfo)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
INITIALIZE_PASS_END(JumpThreading, "jump-threading",
"Jump Threading", false, false)
///
bool JumpThreading::runOnFunction(Function &F) {
DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n");
- TD = getAnalysisIfAvailable<TargetData>();
+ TD = getAnalysisIfAvailable<DataLayout>();
+ TLI = &getAnalysis<TargetLibraryInfo>();
LVI = &getAnalysis<LazyValueInfo>();
FindLoopHeaders(F);
LVI->eraseBlock(BB);
DeleteDeadBlock(BB);
Changed = true;
- } else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
- // Can't thread an unconditional jump, but if the block is "almost
- // empty", we can replace uses of it with uses of the successor and make
- // this dead.
- if (BI->isUnconditional() &&
- BB != &BB->getParent()->getEntryBlock()) {
- BasicBlock::iterator BBI = BB->getFirstNonPHI();
- // Ignore dbg intrinsics.
- while (isa<DbgInfoIntrinsic>(BBI))
- ++BBI;
+ continue;
+ }
+
+ BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
+
+ // Can't thread an unconditional jump, but if the block is "almost
+ // empty", we can replace uses of it with uses of the successor and make
+ // this dead.
+ if (BI && BI->isUnconditional() &&
+ BB != &BB->getParent()->getEntryBlock() &&
// If the terminator is the only non-phi instruction, try to nuke it.
- if (BBI->isTerminator()) {
- // Since TryToSimplifyUncondBranchFromEmptyBlock may delete the
- // block, we have to make sure it isn't in the LoopHeaders set. We
- // reinsert afterward if needed.
- bool ErasedFromLoopHeaders = LoopHeaders.erase(BB);
- BasicBlock *Succ = BI->getSuccessor(0);
-
- // FIXME: It is always conservatively correct to drop the info
- // for a block even if it doesn't get erased. This isn't totally
- // awesome, but it allows us to use AssertingVH to prevent nasty
- // dangling pointer issues within LazyValueInfo.
- LVI->eraseBlock(BB);
- if (TryToSimplifyUncondBranchFromEmptyBlock(BB)) {
- Changed = true;
- // If we deleted BB and BB was the header of a loop, then the
- // successor is now the header of the loop.
- BB = Succ;
- }
-
- if (ErasedFromLoopHeaders)
- LoopHeaders.insert(BB);
- }
+ BB->getFirstNonPHIOrDbg()->isTerminator()) {
+ // Since TryToSimplifyUncondBranchFromEmptyBlock may delete the
+ // block, we have to make sure it isn't in the LoopHeaders set. We
+ // reinsert afterward if needed.
+ bool ErasedFromLoopHeaders = LoopHeaders.erase(BB);
+ BasicBlock *Succ = BI->getSuccessor(0);
+
+ // FIXME: It is always conservatively correct to drop the info
+ // for a block even if it doesn't get erased. This isn't totally
+ // awesome, but it allows us to use AssertingVH to prevent nasty
+ // dangling pointer issues within LazyValueInfo.
+ LVI->eraseBlock(BB);
+ if (TryToSimplifyUncondBranchFromEmptyBlock(BB)) {
+ Changed = true;
+ // If we deleted BB and BB was the header of a loop, then the
+ // successor is now the header of the loop.
+ BB = Succ;
}
+
+ if (ErasedFromLoopHeaders)
+ LoopHeaders.insert(BB);
}
}
EverChanged |= Changed;
}
/// getJumpThreadDuplicationCost - Return the cost of duplicating this block to
-/// thread across it.
-static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) {
+/// thread across it. Stop scanning the block when passing the threshold.
+static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB,
+ unsigned Threshold) {
/// Ignore PHI nodes, these will be flattened when duplication happens.
BasicBlock::const_iterator I = BB->getFirstNonPHI();
// FIXME: THREADING will delete values that are just used to compute the
// branch, so they shouldn't count against the duplication cost.
-
// Sum up the cost of each instruction until we get to the terminator. Don't
// include the terminator because the copy won't include it.
unsigned Size = 0;
for (; !isa<TerminatorInst>(I); ++I) {
+
+ // Stop scanning the block if we've reached the threshold.
+ if (Size > Threshold)
+ return Size;
+
// Debugger intrinsics don't incur code size.
if (isa<DbgInfoIntrinsic>(I)) continue;
}
}
+ if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
+ // Handle select instructions where at least one operand is a known constant
+ // and we can figure out the condition value for any predecessor block.
+ Constant *TrueVal = getKnownConstant(SI->getTrueValue(), Preference);
+ Constant *FalseVal = getKnownConstant(SI->getFalseValue(), Preference);
+ PredValueInfoTy Conds;
+ if ((TrueVal || FalseVal) &&
+ ComputeValueKnownInPredecessors(SI->getCondition(), BB, Conds,
+ WantInteger)) {
+ for (unsigned i = 0, e = Conds.size(); i != e; ++i) {
+ Constant *Cond = Conds[i].first;
+
+ // Figure out what value to use for the condition.
+ bool KnownCond;
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Cond)) {
+ // A known boolean.
+ KnownCond = CI->isOne();
+ } else {
+ assert(isa<UndefValue>(Cond) && "Unexpected condition value");
+ // Either operand will do, so be sure to pick the one that's a known
+ // constant.
+ // FIXME: Do this more cleverly if both values are known constants?
+ KnownCond = (TrueVal != 0);
+ }
+
+ // See if the select has a known constant value for this predecessor.
+ if (Constant *Val = KnownCond ? TrueVal : FalseVal)
+ Result.push_back(std::make_pair(Val, Conds[i].second));
+ }
+
+ return !Result.empty();
+ }
+ }
+
// If all else fails, see if LVI can figure out a constant value for us.
Constant *CI = LVI->getConstant(V, BB);
if (Constant *KC = getKnownConstant(CI, Preference)) {
for (unsigned i = 1, e = BBTerm->getNumSuccessors(); i != e; ++i) {
TestBB = BBTerm->getSuccessor(i);
unsigned NumPreds = std::distance(pred_begin(TestBB), pred_end(TestBB));
- if (NumPreds < MinNumPreds)
+ if (NumPreds < MinNumPreds) {
MinSucc = i;
+ MinNumPreds = NumPreds;
+ }
}
return MinSucc;
}
+static bool hasAddressTakenAndUsed(BasicBlock *BB) {
+ if (!BB->hasAddressTaken()) return false;
+
+ // If the block has its address taken, it may be a tree of dead constants
+ // hanging off of it. These shouldn't keep the block alive.
+ BlockAddress *BA = BlockAddress::get(BB);
+ BA->removeDeadConstantUsers();
+ return !BA->use_empty();
+}
+
/// ProcessBlock - If there are any predecessors whose control can be threaded
/// through to a successor, transform them now.
bool JumpThreading::ProcessBlock(BasicBlock *BB) {
// predecessors of our predecessor block.
if (BasicBlock *SinglePred = BB->getSinglePredecessor()) {
if (SinglePred->getTerminator()->getNumSuccessors() == 1 &&
- SinglePred != BB) {
+ SinglePred != BB && !hasAddressTakenAndUsed(BB)) {
// If SinglePred was a loop header, BB becomes one.
if (LoopHeaders.erase(SinglePred))
LoopHeaders.insert(BB);
} else if (SwitchInst *SI = dyn_cast<SwitchInst>(Terminator)) {
Condition = SI->getCondition();
} else if (IndirectBrInst *IB = dyn_cast<IndirectBrInst>(Terminator)) {
+ // Can't thread indirect branch with no successors.
+ if (IB->getNumSuccessors() == 0) return false;
Condition = IB->getAddress()->stripPointerCasts();
Preference = WantBlockAddress;
} else {
return false; // Must be an invoke.
}
+ // Run constant folding to see if we can reduce the condition to a simple
+ // constant.
+ if (Instruction *I = dyn_cast<Instruction>(Condition)) {
+ Value *SimpleVal = ConstantFoldInstruction(I, TD, TLI);
+ if (SimpleVal) {
+ I->replaceAllUsesWith(SimpleVal);
+ I->eraseFromParent();
+ Condition = SimpleVal;
+ }
+ }
+
// If the terminator is branching on an undef, we can pick any of the
// successors to branch to. Let GetBestDestForJumpOnUndef decide.
if (isa<UndefValue>(Condition)) {
DEBUG(dbgs() << " In block '" << BB->getName()
<< "' folding terminator: " << *BB->getTerminator() << '\n');
++NumFolds;
- ConstantFoldTerminator(BB);
+ ConstantFoldTerminator(BB, true);
return true;
}
/// important optimization that encourages jump threading, and needs to be run
/// interlaced with other jump threading tasks.
bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
- // Don't hack volatile loads.
- if (LI->isVolatile()) return false;
+ // Don't hack volatile/atomic loads.
+ if (!LI->isSimple()) return false;
// If the load is defined in a block with exactly one predecessor, it can't be
// partially redundant.
if (BBIt != LoadBB->begin())
return false;
+ // If all of the loads and stores that feed the value have the same TBAA tag,
+ // then we can propagate it onto any newly inserted loads.
+ MDNode *TBAATag = LI->getMetadata(LLVMContext::MD_tbaa);
SmallPtrSet<BasicBlock*, 8> PredsScanned;
typedef SmallVector<std::pair<BasicBlock*, Value*>, 8> AvailablePredsTy;
// Scan the predecessor to see if the value is available in the pred.
BBIt = PredBB->end();
- Value *PredAvailable = FindAvailableLoadedValue(LoadedPtr, PredBB, BBIt, 6);
+ MDNode *ThisTBAATag = 0;
+ Value *PredAvailable = FindAvailableLoadedValue(LoadedPtr, PredBB, BBIt, 6,
+ 0, &ThisTBAATag);
if (!PredAvailable) {
OneUnavailablePred = PredBB;
continue;
}
+ // If tbaa tags disagree or are not present, forget about them.
+ if (TBAATag != ThisTBAATag) TBAATag = 0;
+
// If so, this load is partially redundant. Remember this info so that we
// can create a PHI node.
AvailablePreds.push_back(std::make_pair(PredBB, PredAvailable));
// Split them out to their own block.
UnavailablePred =
- SplitBlockPredecessors(LoadBB, &PredsToSplit[0], PredsToSplit.size(),
- "thread-pre-split", this);
+ SplitBlockPredecessors(LoadBB, PredsToSplit, "thread-pre-split", this);
}
// If the value isn't available in all predecessors, then there will be
if (UnavailablePred) {
assert(UnavailablePred->getTerminator()->getNumSuccessors() == 1 &&
"Can't handle critical edge here!");
- Value *NewVal = new LoadInst(LoadedPtr, LI->getName()+".pr", false,
+ LoadInst *NewVal = new LoadInst(LoadedPtr, LI->getName()+".pr", false,
LI->getAlignment(),
UnavailablePred->getTerminator());
+ NewVal->setDebugLoc(LI->getDebugLoc());
+ if (TBAATag)
+ NewVal->setMetadata(LLVMContext::MD_tbaa, TBAATag);
+
AvailablePreds.push_back(std::make_pair(UnavailablePred, NewVal));
}
array_pod_sort(AvailablePreds.begin(), AvailablePreds.end());
// Create a PHI node at the start of the block for the PRE'd load value.
- PHINode *PN = PHINode::Create(LI->getType(), "", LoadBB->begin());
+ pred_iterator PB = pred_begin(LoadBB), PE = pred_end(LoadBB);
+ PHINode *PN = PHINode::Create(LI->getType(), std::distance(PB, PE), "",
+ LoadBB->begin());
PN->takeName(LI);
+ PN->setDebugLoc(LI->getDebugLoc());
// Insert new entries into the PHI for each predecessor. A single block may
// have multiple entries here.
- for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB); PI != E;
- ++PI) {
+ for (pred_iterator PI = PB; PI != PE; ++PI) {
BasicBlock *P = *PI;
AvailablePredsTy::iterator I =
std::lower_bound(AvailablePreds.begin(), AvailablePreds.end(),
}
}
- // Okay, now we know the most popular destination. If there is more than
+ // Okay, now we know the most popular destination. If there is more than one
// destination, we need to determine one. This is arbitrary, but we need
// to make a deterministic decision. Pick the first one that appears in the
// successor list.
DestBB = 0;
else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()))
DestBB = BI->getSuccessor(cast<ConstantInt>(Val)->isZero());
- else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator()))
- DestBB = SI->getSuccessor(SI->findCaseValue(cast<ConstantInt>(Val)));
- else {
+ else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
+ DestBB = SI->findCaseValue(cast<ConstantInt>(Val)).getCaseSuccessor();
+ } else {
assert(isa<IndirectBrInst>(BB->getTerminator())
&& "Unexpected terminator");
DestBB = cast<BlockAddress>(Val)->getBasicBlock();
}
// If we have exactly one destination, remember it for efficiency below.
- if (i == 0)
+ if (PredToDestList.empty())
OnlyDest = DestBB;
else if (OnlyDest != DestBB)
OnlyDest = MultipleDestSentinel;
return false;
}
- unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB);
+ unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB, Threshold);
if (JumpThreadCost > Threshold) {
DEBUG(dbgs() << " Not threading BB '" << BB->getName()
<< "' - Cost is too high: " << JumpThreadCost << "\n");
else {
DEBUG(dbgs() << " Factoring out " << PredBBs.size()
<< " common predecessors.\n");
- PredBB = SplitBlockPredecessors(BB, &PredBBs[0], PredBBs.size(),
- ".thr_comm", this);
+ PredBB = SplitBlockPredecessors(BB, PredBBs, ".thr_comm", this);
}
// And finally, do it!
// We didn't copy the terminator from BB over to NewBB, because there is now
// an unconditional jump to SuccBB. Insert the unconditional jump.
- BranchInst::Create(SuccBB, NewBB);
+ BranchInst *NewBI =BranchInst::Create(SuccBB, NewBB);
+ NewBI->setDebugLoc(BB->getTerminator()->getDebugLoc());
// Check to see if SuccBB has PHI nodes. If so, we need to add entries to the
// PHI nodes for NewBB now.
// At this point, the IR is fully up to date and consistent. Do a quick scan
// over the new instructions and zap any that are constants or dead. This
// frequently happens because of phi translation.
- SimplifyInstructionsInBlock(NewBB, TD);
+ SimplifyInstructionsInBlock(NewBB, TD, TLI);
// Threaded an edge!
++NumThreads;
return false;
}
- unsigned DuplicationCost = getJumpThreadDuplicationCost(BB);
+ unsigned DuplicationCost = getJumpThreadDuplicationCost(BB, Threshold);
if (DuplicationCost > Threshold) {
DEBUG(dbgs() << " Not duplicating BB '" << BB->getName()
<< "' - Cost is too high: " << DuplicationCost << "\n");
else {
DEBUG(dbgs() << " Factoring out " << PredBBs.size()
<< " common predecessors.\n");
- PredBB = SplitBlockPredecessors(BB, &PredBBs[0], PredBBs.size(),
- ".thr_comm", this);
+ PredBB = SplitBlockPredecessors(BB, PredBBs, ".thr_comm", this);
}
// Okay, we decided to do this! Clone all the instructions in BB onto the end