#define DEBUG_TYPE "jump-threading"
#include "llvm/Transforms/Scalar.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Pass.h"
-#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/LazyValueInfo.h"
-#include "llvm/Analysis/Loads.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Utils/SSAUpdater.h"
-#include "llvm/Target/TargetData.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LazyValueInfo.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/DataLayout.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ValueHandle.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
using namespace llvm;
STATISTIC(NumThreads, "Number of jumps threaded");
typedef SmallVectorImpl<std::pair<Constant*, BasicBlock*> > PredValueInfo;
typedef SmallVector<std::pair<Constant*, BasicBlock*>, 8> PredValueInfoTy;
+ // This is used to keep track of what kind of constant we're currently hoping
+ // to find.
+ enum ConstantPreference {
+ WantInteger,
+ WantBlockAddress
+ };
+
/// This pass performs 'jump threading', which looks at blocks that have
/// multiple predecessors and multiple successors. If one or more of the
/// predecessors of the block can be proven to always jump to one of the
/// revectored to the false side of the second if.
///
class JumpThreading : public FunctionPass {
- TargetData *TD;
+ DataLayout *TD;
+ TargetLibraryInfo *TLI;
LazyValueInfo *LVI;
#ifdef NDEBUG
SmallPtrSet<BasicBlock*, 16> LoopHeaders;
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<LazyValueInfo>();
AU.addPreserved<LazyValueInfo>();
+ AU.addRequired<TargetLibraryInfo>();
}
void FindLoopHeaders(Function &F);
const SmallVectorImpl<BasicBlock *> &PredBBs);
bool ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,
- PredValueInfo &Result);
- bool ProcessThreadableEdges(Value *Cond, BasicBlock *BB);
-
-
- bool ProcessBranchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB);
- bool ProcessSwitchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB);
+ PredValueInfo &Result,
+ ConstantPreference Preference);
+ bool ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
+ ConstantPreference Preference);
bool ProcessBranchOnPHI(PHINode *PN);
bool ProcessBranchOnXOR(BinaryOperator *BO);
INITIALIZE_PASS_BEGIN(JumpThreading, "jump-threading",
"Jump Threading", false, false)
INITIALIZE_PASS_DEPENDENCY(LazyValueInfo)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
INITIALIZE_PASS_END(JumpThreading, "jump-threading",
"Jump Threading", false, false)
///
bool JumpThreading::runOnFunction(Function &F) {
DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n");
- TD = getAnalysisIfAvailable<TargetData>();
+ TD = getAnalysisIfAvailable<DataLayout>();
+ TLI = &getAnalysis<TargetLibraryInfo>();
LVI = &getAnalysis<LazyValueInfo>();
FindLoopHeaders(F);
LVI->eraseBlock(BB);
DeleteDeadBlock(BB);
Changed = true;
- } else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
- // Can't thread an unconditional jump, but if the block is "almost
- // empty", we can replace uses of it with uses of the successor and make
- // this dead.
- if (BI->isUnconditional() &&
- BB != &BB->getParent()->getEntryBlock()) {
- BasicBlock::iterator BBI = BB->getFirstNonPHI();
- // Ignore dbg intrinsics.
- while (isa<DbgInfoIntrinsic>(BBI))
- ++BBI;
+ continue;
+ }
+
+ BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
+
+ // Can't thread an unconditional jump, but if the block is "almost
+ // empty", we can replace uses of it with uses of the successor and make
+ // this dead.
+ if (BI && BI->isUnconditional() &&
+ BB != &BB->getParent()->getEntryBlock() &&
// If the terminator is the only non-phi instruction, try to nuke it.
- if (BBI->isTerminator()) {
- // Since TryToSimplifyUncondBranchFromEmptyBlock may delete the
- // block, we have to make sure it isn't in the LoopHeaders set. We
- // reinsert afterward if needed.
- bool ErasedFromLoopHeaders = LoopHeaders.erase(BB);
- BasicBlock *Succ = BI->getSuccessor(0);
-
- // FIXME: It is always conservatively correct to drop the info
- // for a block even if it doesn't get erased. This isn't totally
- // awesome, but it allows us to use AssertingVH to prevent nasty
- // dangling pointer issues within LazyValueInfo.
- LVI->eraseBlock(BB);
- if (TryToSimplifyUncondBranchFromEmptyBlock(BB)) {
- Changed = true;
- // If we deleted BB and BB was the header of a loop, then the
- // successor is now the header of the loop.
- BB = Succ;
- }
-
- if (ErasedFromLoopHeaders)
- LoopHeaders.insert(BB);
- }
+ BB->getFirstNonPHIOrDbg()->isTerminator()) {
+ // Since TryToSimplifyUncondBranchFromEmptyBlock may delete the
+ // block, we have to make sure it isn't in the LoopHeaders set. We
+ // reinsert afterward if needed.
+ bool ErasedFromLoopHeaders = LoopHeaders.erase(BB);
+ BasicBlock *Succ = BI->getSuccessor(0);
+
+ // FIXME: It is always conservatively correct to drop the info
+ // for a block even if it doesn't get erased. This isn't totally
+ // awesome, but it allows us to use AssertingVH to prevent nasty
+ // dangling pointer issues within LazyValueInfo.
+ LVI->eraseBlock(BB);
+ if (TryToSimplifyUncondBranchFromEmptyBlock(BB)) {
+ Changed = true;
+ // If we deleted BB and BB was the header of a loop, then the
+ // successor is now the header of the loop.
+ BB = Succ;
}
+
+ if (ErasedFromLoopHeaders)
+ LoopHeaders.insert(BB);
}
}
EverChanged |= Changed;
}
/// getJumpThreadDuplicationCost - Return the cost of duplicating this block to
-/// thread across it.
-static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) {
+/// thread across it. Stop scanning the block when passing the threshold.
+static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB,
+ unsigned Threshold) {
/// Ignore PHI nodes, these will be flattened when duplication happens.
BasicBlock::const_iterator I = BB->getFirstNonPHI();
// FIXME: THREADING will delete values that are just used to compute the
// branch, so they shouldn't count against the duplication cost.
-
// Sum up the cost of each instruction until we get to the terminator. Don't
// include the terminator because the copy won't include it.
unsigned Size = 0;
for (; !isa<TerminatorInst>(I); ++I) {
+
+ // Stop scanning the block if we've reached the threshold.
+ if (Size > Threshold)
+ return Size;
+
// Debugger intrinsics don't incur code size.
if (isa<DbgInfoIntrinsic>(I)) continue;
if (isa<SwitchInst>(I))
Size = Size > 6 ? Size-6 : 0;
+ // The same holds for indirect branches, but slightly more so.
+ if (isa<IndirectBrInst>(I))
+ Size = Size > 8 ? Size-8 : 0;
+
return Size;
}
/// getKnownConstant - Helper method to determine if we can thread over a
/// terminator with the given value as its condition, and if so what value to
-/// use for that.
+/// use for that. What kind of value this is depends on whether we want an
+/// integer or a block address, but an undef is always accepted.
/// Returns null if Val is null or not an appropriate constant.
-static Constant *getKnownConstant(Value *Val) {
+static Constant *getKnownConstant(Value *Val, ConstantPreference Preference) {
if (!Val)
return 0;
if (UndefValue *U = dyn_cast<UndefValue>(Val))
return U;
- return dyn_cast<ConstantInt>(Val);
-}
+ if (Preference == WantBlockAddress)
+ return dyn_cast<BlockAddress>(Val->stripPointerCasts());
-// Helper method for ComputeValueKnownInPredecessors. If Value is a
-// ConstantInt or undef, push it. Otherwise, do nothing.
-static void PushKnownConstantOrUndef(PredValueInfo &Result, Constant *Value,
- BasicBlock *BB) {
- if (Constant *KC = getKnownConstant(Value))
- Result.push_back(std::make_pair(KC, BB));
+ return dyn_cast<ConstantInt>(Val);
}
/// ComputeValueKnownInPredecessors - Given a basic block BB and a value V, see
-/// if we can infer that the value is a known ConstantInt in any of our
-/// predecessors. If so, return the known list of value and pred BB in the
-/// result vector. If a value is known to be undef, it is returned as null.
+/// if we can infer that the value is a known ConstantInt/BlockAddress or undef
+/// in any of our predecessors. If so, return the known list of value and pred
+/// BB in the result vector.
///
/// This returns true if there were any known values.
///
bool JumpThreading::
-ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){
+ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
+ ConstantPreference Preference) {
// This method walks up use-def chains recursively. Because of this, we could
// get into an infinite loop going around loops in the use-def chain. To
// prevent this, keep track of what (value, block) pairs we've already visited
RecursionSetRemover remover(RecursionSet, std::make_pair(V, BB));
// If V is a constant, then it is known in all predecessors.
- if (Constant *KC = getKnownConstant(V)) {
+ if (Constant *KC = getKnownConstant(V, Preference)) {
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
Result.push_back(std::make_pair(KC, *PI));
// If the value is known by LazyValueInfo to be a constant in a
// predecessor, use that information to try to thread this block.
Constant *PredCst = LVI->getConstantOnEdge(V, P, BB);
- if (Constant *KC = getKnownConstant(PredCst))
+ if (Constant *KC = getKnownConstant(PredCst, Preference))
Result.push_back(std::make_pair(KC, P));
}
if (PHINode *PN = dyn_cast<PHINode>(I)) {
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
Value *InVal = PN->getIncomingValue(i);
- if (Constant *KC = getKnownConstant(InVal)) {
+ if (Constant *KC = getKnownConstant(InVal, Preference)) {
Result.push_back(std::make_pair(KC, PN->getIncomingBlock(i)));
} else {
Constant *CI = LVI->getConstantOnEdge(InVal,
PN->getIncomingBlock(i), BB);
- // LVI returns null is no value could be determined.
- if (!CI) continue;
- PushKnownConstantOrUndef(Result, CI, PN->getIncomingBlock(i));
+ if (Constant *KC = getKnownConstant(CI, Preference))
+ Result.push_back(std::make_pair(KC, PN->getIncomingBlock(i)));
}
}
// Handle some boolean conditions.
if (I->getType()->getPrimitiveSizeInBits() == 1) {
+ assert(Preference == WantInteger && "One-bit non-integer type?");
// X | true -> true
// X & false -> false
if (I->getOpcode() == Instruction::Or ||
I->getOpcode() == Instruction::And) {
- ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals);
- ComputeValueKnownInPredecessors(I->getOperand(1), BB, RHSVals);
+ ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals,
+ WantInteger);
+ ComputeValueKnownInPredecessors(I->getOperand(1), BB, RHSVals,
+ WantInteger);
if (LHSVals.empty() && RHSVals.empty())
return false;
if (I->getOpcode() == Instruction::Xor &&
isa<ConstantInt>(I->getOperand(1)) &&
cast<ConstantInt>(I->getOperand(1))->isOne()) {
- ComputeValueKnownInPredecessors(I->getOperand(0), BB, Result);
+ ComputeValueKnownInPredecessors(I->getOperand(0), BB, Result,
+ WantInteger);
if (Result.empty())
return false;
// Try to simplify some other binary operator values.
} else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
+ assert(Preference != WantBlockAddress
+ && "A binary operator creating a block address?");
if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) {
PredValueInfoTy LHSVals;
- ComputeValueKnownInPredecessors(BO->getOperand(0), BB, LHSVals);
+ ComputeValueKnownInPredecessors(BO->getOperand(0), BB, LHSVals,
+ WantInteger);
// Try to use constant folding to simplify the binary operator.
for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) {
Constant *V = LHSVals[i].first;
Constant *Folded = ConstantExpr::get(BO->getOpcode(), V, CI);
- PushKnownConstantOrUndef(Result, Folded, LHSVals[i].second);
+ if (Constant *KC = getKnownConstant(Folded, WantInteger))
+ Result.push_back(std::make_pair(KC, LHSVals[i].second));
}
}
// Handle compare with phi operand, where the PHI is defined in this block.
if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) {
+ assert(Preference == WantInteger && "Compares only produce integers");
PHINode *PN = dyn_cast<PHINode>(Cmp->getOperand(0));
if (PN && PN->getParent() == BB) {
// We can do this simplification if any comparisons fold to true or false.
Res = ConstantInt::get(Type::getInt1Ty(LHS->getContext()), ResT);
}
- if (Constant *ConstRes = dyn_cast<Constant>(Res))
- PushKnownConstantOrUndef(Result, ConstRes, PredBB);
+ if (Constant *KC = getKnownConstant(Res, WantInteger))
+ Result.push_back(std::make_pair(KC, PredBB));
}
return !Result.empty();
// and evaluate it statically if we can.
if (Constant *CmpConst = dyn_cast<Constant>(Cmp->getOperand(1))) {
PredValueInfoTy LHSVals;
- ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals);
+ ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals,
+ WantInteger);
for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) {
Constant *V = LHSVals[i].first;
Constant *Folded = ConstantExpr::getCompare(Cmp->getPredicate(),
V, CmpConst);
- PushKnownConstantOrUndef(Result, Folded, LHSVals[i].second);
+ if (Constant *KC = getKnownConstant(Folded, WantInteger))
+ Result.push_back(std::make_pair(KC, LHSVals[i].second));
}
return !Result.empty();
}
}
+ if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
+ // Handle select instructions where at least one operand is a known constant
+ // and we can figure out the condition value for any predecessor block.
+ Constant *TrueVal = getKnownConstant(SI->getTrueValue(), Preference);
+ Constant *FalseVal = getKnownConstant(SI->getFalseValue(), Preference);
+ PredValueInfoTy Conds;
+ if ((TrueVal || FalseVal) &&
+ ComputeValueKnownInPredecessors(SI->getCondition(), BB, Conds,
+ WantInteger)) {
+ for (unsigned i = 0, e = Conds.size(); i != e; ++i) {
+ Constant *Cond = Conds[i].first;
+
+ // Figure out what value to use for the condition.
+ bool KnownCond;
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Cond)) {
+ // A known boolean.
+ KnownCond = CI->isOne();
+ } else {
+ assert(isa<UndefValue>(Cond) && "Unexpected condition value");
+ // Either operand will do, so be sure to pick the one that's a known
+ // constant.
+ // FIXME: Do this more cleverly if both values are known constants?
+ KnownCond = (TrueVal != 0);
+ }
+
+ // See if the select has a known constant value for this predecessor.
+ if (Constant *Val = KnownCond ? TrueVal : FalseVal)
+ Result.push_back(std::make_pair(Val, Conds[i].second));
+ }
+
+ return !Result.empty();
+ }
+ }
+
// If all else fails, see if LVI can figure out a constant value for us.
Constant *CI = LVI->getConstant(V, BB);
- if (Constant *KC = getKnownConstant(CI)) {
+ if (Constant *KC = getKnownConstant(CI, Preference)) {
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
Result.push_back(std::make_pair(KC, *PI));
}
for (unsigned i = 1, e = BBTerm->getNumSuccessors(); i != e; ++i) {
TestBB = BBTerm->getSuccessor(i);
unsigned NumPreds = std::distance(pred_begin(TestBB), pred_end(TestBB));
- if (NumPreds < MinNumPreds)
+ if (NumPreds < MinNumPreds) {
MinSucc = i;
+ MinNumPreds = NumPreds;
+ }
}
return MinSucc;
}
+static bool hasAddressTakenAndUsed(BasicBlock *BB) {
+ if (!BB->hasAddressTaken()) return false;
+
+ // If the block has its address taken, it may be a tree of dead constants
+ // hanging off of it. These shouldn't keep the block alive.
+ BlockAddress *BA = BlockAddress::get(BB);
+ BA->removeDeadConstantUsers();
+ return !BA->use_empty();
+}
+
/// ProcessBlock - If there are any predecessors whose control can be threaded
/// through to a successor, transform them now.
bool JumpThreading::ProcessBlock(BasicBlock *BB) {
// predecessors of our predecessor block.
if (BasicBlock *SinglePred = BB->getSinglePredecessor()) {
if (SinglePred->getTerminator()->getNumSuccessors() == 1 &&
- SinglePred != BB) {
+ SinglePred != BB && !hasAddressTakenAndUsed(BB)) {
// If SinglePred was a loop header, BB becomes one.
if (LoopHeaders.erase(SinglePred))
LoopHeaders.insert(BB);
}
}
- // Look to see if the terminator is a branch of switch, if not we can't thread
- // it.
+ // What kind of constant we're looking for.
+ ConstantPreference Preference = WantInteger;
+
+ // Look to see if the terminator is a conditional branch, switch or indirect
+ // branch, if not we can't thread it.
Value *Condition;
- if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
+ Instruction *Terminator = BB->getTerminator();
+ if (BranchInst *BI = dyn_cast<BranchInst>(Terminator)) {
// Can't thread an unconditional jump.
if (BI->isUnconditional()) return false;
Condition = BI->getCondition();
- } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator()))
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(Terminator)) {
Condition = SI->getCondition();
- else
+ } else if (IndirectBrInst *IB = dyn_cast<IndirectBrInst>(Terminator)) {
+ // Can't thread indirect branch with no successors.
+ if (IB->getNumSuccessors() == 0) return false;
+ Condition = IB->getAddress()->stripPointerCasts();
+ Preference = WantBlockAddress;
+ } else {
return false; // Must be an invoke.
+ }
+
+ // Run constant folding to see if we can reduce the condition to a simple
+ // constant.
+ if (Instruction *I = dyn_cast<Instruction>(Condition)) {
+ Value *SimpleVal = ConstantFoldInstruction(I, TD, TLI);
+ if (SimpleVal) {
+ I->replaceAllUsesWith(SimpleVal);
+ I->eraseFromParent();
+ Condition = SimpleVal;
+ }
+ }
// If the terminator is branching on an undef, we can pick any of the
// successors to branch to. Let GetBestDestForJumpOnUndef decide.
// If the terminator of this block is branching on a constant, simplify the
// terminator to an unconditional branch. This can occur due to threading in
// other blocks.
- if (getKnownConstant(Condition)) {
+ if (getKnownConstant(Condition, Preference)) {
DEBUG(dbgs() << " In block '" << BB->getName()
<< "' folding terminator: " << *BB->getTerminator() << '\n');
++NumFolds;
- ConstantFoldTerminator(BB);
+ ConstantFoldTerminator(BB, true);
return true;
}
// All the rest of our checks depend on the condition being an instruction.
if (CondInst == 0) {
// FIXME: Unify this with code below.
- if (ProcessThreadableEdges(Condition, BB))
+ if (ProcessThreadableEdges(Condition, BB, Preference))
return true;
return false;
}
// a PHI node in the current block. If we can prove that any predecessors
// compute a predictable value based on a PHI node, thread those predecessors.
//
- if (ProcessThreadableEdges(CondInst, BB))
+ if (ProcessThreadableEdges(CondInst, BB, Preference))
return true;
// If this is an otherwise-unfoldable branch on a phi node in the current
return false;
}
-/// ProcessBranchOnDuplicateCond - We found a block and a predecessor of that
-/// block that jump on exactly the same condition. This means that we almost
-/// always know the direction of the edge in the DESTBB:
-/// PREDBB:
-/// br COND, DESTBB, BBY
-/// DESTBB:
-/// br COND, BBZ, BBW
-///
-/// If DESTBB has multiple predecessors, we can't just constant fold the branch
-/// in DESTBB, we have to thread over it.
-bool JumpThreading::ProcessBranchOnDuplicateCond(BasicBlock *PredBB,
- BasicBlock *BB) {
- BranchInst *PredBI = cast<BranchInst>(PredBB->getTerminator());
-
- // If both successors of PredBB go to DESTBB, we don't know anything. We can
- // fold the branch to an unconditional one, which allows other recursive
- // simplifications.
- bool BranchDir;
- if (PredBI->getSuccessor(1) != BB)
- BranchDir = true;
- else if (PredBI->getSuccessor(0) != BB)
- BranchDir = false;
- else {
- DEBUG(dbgs() << " In block '" << PredBB->getName()
- << "' folding terminator: " << *PredBB->getTerminator() << '\n');
- ++NumFolds;
- ConstantFoldTerminator(PredBB);
- return true;
- }
-
- BranchInst *DestBI = cast<BranchInst>(BB->getTerminator());
-
- // If the dest block has one predecessor, just fix the branch condition to a
- // constant and fold it.
- if (BB->getSinglePredecessor()) {
- DEBUG(dbgs() << " In block '" << BB->getName()
- << "' folding condition to '" << BranchDir << "': "
- << *BB->getTerminator() << '\n');
- ++NumFolds;
- Value *OldCond = DestBI->getCondition();
- DestBI->setCondition(ConstantInt::get(Type::getInt1Ty(BB->getContext()),
- BranchDir));
- // Delete dead instructions before we fold the branch. Folding the branch
- // can eliminate edges from the CFG which can end up deleting OldCond.
- RecursivelyDeleteTriviallyDeadInstructions(OldCond);
- ConstantFoldTerminator(BB);
- return true;
- }
-
-
- // Next, figure out which successor we are threading to.
- BasicBlock *SuccBB = DestBI->getSuccessor(!BranchDir);
-
- SmallVector<BasicBlock*, 2> Preds;
- Preds.push_back(PredBB);
-
- // Ok, try to thread it!
- return ThreadEdge(BB, Preds, SuccBB);
-}
-
-/// ProcessSwitchOnDuplicateCond - We found a block and a predecessor of that
-/// block that switch on exactly the same condition. This means that we almost
-/// always know the direction of the edge in the DESTBB:
-/// PREDBB:
-/// switch COND [... DESTBB, BBY ... ]
-/// DESTBB:
-/// switch COND [... BBZ, BBW ]
-///
-/// Optimizing switches like this is very important, because simplifycfg builds
-/// switches out of repeated 'if' conditions.
-bool JumpThreading::ProcessSwitchOnDuplicateCond(BasicBlock *PredBB,
- BasicBlock *DestBB) {
- // Can't thread edge to self.
- if (PredBB == DestBB)
- return false;
-
- SwitchInst *PredSI = cast<SwitchInst>(PredBB->getTerminator());
- SwitchInst *DestSI = cast<SwitchInst>(DestBB->getTerminator());
-
- // There are a variety of optimizations that we can potentially do on these
- // blocks: we order them from most to least preferable.
-
- // If DESTBB *just* contains the switch, then we can forward edges from PREDBB
- // directly to their destination. This does not introduce *any* code size
- // growth. Skip debug info first.
- BasicBlock::iterator BBI = DestBB->begin();
- while (isa<DbgInfoIntrinsic>(BBI))
- BBI++;
-
- // FIXME: Thread if it just contains a PHI.
- if (isa<SwitchInst>(BBI)) {
- bool MadeChange = false;
- // Ignore the default edge for now.
- for (unsigned i = 1, e = DestSI->getNumSuccessors(); i != e; ++i) {
- ConstantInt *DestVal = DestSI->getCaseValue(i);
- BasicBlock *DestSucc = DestSI->getSuccessor(i);
-
- // Okay, DestSI has a case for 'DestVal' that goes to 'DestSucc'. See if
- // PredSI has an explicit case for it. If so, forward. If it is covered
- // by the default case, we can't update PredSI.
- unsigned PredCase = PredSI->findCaseValue(DestVal);
- if (PredCase == 0) continue;
-
- // If PredSI doesn't go to DestBB on this value, then it won't reach the
- // case on this condition.
- if (PredSI->getSuccessor(PredCase) != DestBB &&
- DestSI->getSuccessor(i) != DestBB)
- continue;
-
- // Do not forward this if it already goes to this destination, this would
- // be an infinite loop.
- if (PredSI->getSuccessor(PredCase) == DestSucc)
- continue;
-
- // Otherwise, we're safe to make the change. Make sure that the edge from
- // DestSI to DestSucc is not critical and has no PHI nodes.
- DEBUG(dbgs() << "FORWARDING EDGE " << *DestVal << " FROM: " << *PredSI);
- DEBUG(dbgs() << "THROUGH: " << *DestSI);
-
- // If the destination has PHI nodes, just split the edge for updating
- // simplicity.
- if (isa<PHINode>(DestSucc->begin()) && !DestSucc->getSinglePredecessor()){
- SplitCriticalEdge(DestSI, i, this);
- DestSucc = DestSI->getSuccessor(i);
- }
- FoldSingleEntryPHINodes(DestSucc);
- PredSI->setSuccessor(PredCase, DestSucc);
- MadeChange = true;
- }
-
- if (MadeChange)
- return true;
- }
-
- return false;
-}
-
/// SimplifyPartiallyRedundantLoad - If LI is an obviously partially redundant
/// load instruction, eliminate it by replacing it with a PHI node. This is an
/// important optimization that encourages jump threading, and needs to be run
/// interlaced with other jump threading tasks.
bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
- // Don't hack volatile loads.
- if (LI->isVolatile()) return false;
+ // Don't hack volatile/atomic loads.
+ if (!LI->isSimple()) return false;
// If the load is defined in a block with exactly one predecessor, it can't be
// partially redundant.
if (BBIt != LoadBB->begin())
return false;
+ // If all of the loads and stores that feed the value have the same TBAA tag,
+ // then we can propagate it onto any newly inserted loads.
+ MDNode *TBAATag = LI->getMetadata(LLVMContext::MD_tbaa);
SmallPtrSet<BasicBlock*, 8> PredsScanned;
typedef SmallVector<std::pair<BasicBlock*, Value*>, 8> AvailablePredsTy;
// Scan the predecessor to see if the value is available in the pred.
BBIt = PredBB->end();
- Value *PredAvailable = FindAvailableLoadedValue(LoadedPtr, PredBB, BBIt, 6);
+ MDNode *ThisTBAATag = 0;
+ Value *PredAvailable = FindAvailableLoadedValue(LoadedPtr, PredBB, BBIt, 6,
+ 0, &ThisTBAATag);
if (!PredAvailable) {
OneUnavailablePred = PredBB;
continue;
}
+ // If tbaa tags disagree or are not present, forget about them.
+ if (TBAATag != ThisTBAATag) TBAATag = 0;
+
// If so, this load is partially redundant. Remember this info so that we
// can create a PHI node.
AvailablePreds.push_back(std::make_pair(PredBB, PredAvailable));
// Split them out to their own block.
UnavailablePred =
- SplitBlockPredecessors(LoadBB, &PredsToSplit[0], PredsToSplit.size(),
- "thread-pre-split", this);
+ SplitBlockPredecessors(LoadBB, PredsToSplit, "thread-pre-split", this);
}
// If the value isn't available in all predecessors, then there will be
if (UnavailablePred) {
assert(UnavailablePred->getTerminator()->getNumSuccessors() == 1 &&
"Can't handle critical edge here!");
- Value *NewVal = new LoadInst(LoadedPtr, LI->getName()+".pr", false,
+ LoadInst *NewVal = new LoadInst(LoadedPtr, LI->getName()+".pr", false,
LI->getAlignment(),
UnavailablePred->getTerminator());
+ NewVal->setDebugLoc(LI->getDebugLoc());
+ if (TBAATag)
+ NewVal->setMetadata(LLVMContext::MD_tbaa, TBAATag);
+
AvailablePreds.push_back(std::make_pair(UnavailablePred, NewVal));
}
array_pod_sort(AvailablePreds.begin(), AvailablePreds.end());
// Create a PHI node at the start of the block for the PRE'd load value.
- PHINode *PN = PHINode::Create(LI->getType(), "", LoadBB->begin());
+ pred_iterator PB = pred_begin(LoadBB), PE = pred_end(LoadBB);
+ PHINode *PN = PHINode::Create(LI->getType(), std::distance(PB, PE), "",
+ LoadBB->begin());
PN->takeName(LI);
+ PN->setDebugLoc(LI->getDebugLoc());
// Insert new entries into the PHI for each predecessor. A single block may
// have multiple entries here.
- for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB); PI != E;
- ++PI) {
+ for (pred_iterator PI = PB; PI != PE; ++PI) {
BasicBlock *P = *PI;
AvailablePredsTy::iterator I =
std::lower_bound(AvailablePreds.begin(), AvailablePreds.end(),
}
}
- // Okay, now we know the most popular destination. If there is more than
+ // Okay, now we know the most popular destination. If there is more than one
// destination, we need to determine one. This is arbitrary, but we need
// to make a deterministic decision. Pick the first one that appears in the
// successor list.
return MostPopularDest;
}
-bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB) {
+bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
+ ConstantPreference Preference) {
// If threading this would thread across a loop header, don't even try to
// thread the edge.
if (LoopHeaders.count(BB))
return false;
PredValueInfoTy PredValues;
- if (!ComputeValueKnownInPredecessors(Cond, BB, PredValues))
+ if (!ComputeValueKnownInPredecessors(Cond, BB, PredValues, Preference))
return false;
assert(!PredValues.empty() &&
DestBB = 0;
else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()))
DestBB = BI->getSuccessor(cast<ConstantInt>(Val)->isZero());
- else {
- SwitchInst *SI = cast<SwitchInst>(BB->getTerminator());
- DestBB = SI->getSuccessor(SI->findCaseValue(cast<ConstantInt>(Val)));
+ else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
+ DestBB = SI->findCaseValue(cast<ConstantInt>(Val)).getCaseSuccessor();
+ } else {
+ assert(isa<IndirectBrInst>(BB->getTerminator())
+ && "Unexpected terminator");
+ DestBB = cast<BlockAddress>(Val)->getBasicBlock();
}
// If we have exactly one destination, remember it for efficiency below.
- if (i == 0)
+ if (PredToDestList.empty())
OnlyDest = DestBB;
else if (OnlyDest != DestBB)
OnlyDest = MultipleDestSentinel;
PredValueInfoTy XorOpValues;
bool isLHS = true;
- if (!ComputeValueKnownInPredecessors(BO->getOperand(0), BB, XorOpValues)) {
+ if (!ComputeValueKnownInPredecessors(BO->getOperand(0), BB, XorOpValues,
+ WantInteger)) {
assert(XorOpValues.empty());
- if (!ComputeValueKnownInPredecessors(BO->getOperand(1), BB, XorOpValues))
+ if (!ComputeValueKnownInPredecessors(BO->getOperand(1), BB, XorOpValues,
+ WantInteger))
return false;
isLHS = false;
}
return false;
}
- unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB);
+ unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB, Threshold);
if (JumpThreadCost > Threshold) {
DEBUG(dbgs() << " Not threading BB '" << BB->getName()
<< "' - Cost is too high: " << JumpThreadCost << "\n");
else {
DEBUG(dbgs() << " Factoring out " << PredBBs.size()
<< " common predecessors.\n");
- PredBB = SplitBlockPredecessors(BB, &PredBBs[0], PredBBs.size(),
- ".thr_comm", this);
+ PredBB = SplitBlockPredecessors(BB, PredBBs, ".thr_comm", this);
}
// And finally, do it!
// We didn't copy the terminator from BB over to NewBB, because there is now
// an unconditional jump to SuccBB. Insert the unconditional jump.
- BranchInst::Create(SuccBB, NewBB);
+ BranchInst *NewBI =BranchInst::Create(SuccBB, NewBB);
+ NewBI->setDebugLoc(BB->getTerminator()->getDebugLoc());
// Check to see if SuccBB has PHI nodes. If so, we need to add entries to the
// PHI nodes for NewBB now.
// At this point, the IR is fully up to date and consistent. Do a quick scan
// over the new instructions and zap any that are constants or dead. This
// frequently happens because of phi translation.
- SimplifyInstructionsInBlock(NewBB, TD);
+ SimplifyInstructionsInBlock(NewBB, TD, TLI);
// Threaded an edge!
++NumThreads;
return false;
}
- unsigned DuplicationCost = getJumpThreadDuplicationCost(BB);
+ unsigned DuplicationCost = getJumpThreadDuplicationCost(BB, Threshold);
if (DuplicationCost > Threshold) {
DEBUG(dbgs() << " Not duplicating BB '" << BB->getName()
<< "' - Cost is too high: " << DuplicationCost << "\n");
else {
DEBUG(dbgs() << " Factoring out " << PredBBs.size()
<< " common predecessors.\n");
- PredBB = SplitBlockPredecessors(BB, &PredBBs[0], PredBBs.size(),
- ".thr_comm", this);
+ PredBB = SplitBlockPredecessors(BB, PredBBs, ".thr_comm", this);
}
// Okay, we decided to do this! Clone all the instructions in BB onto the end