#include "llvm/LLVMContext.h"
#include "llvm/Pass.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LazyValueInfo.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ValueHandle.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
cl::desc("Max block size to duplicate for jump threading"),
cl::init(6), cl::Hidden);
+// Turn on use of LazyValueInfo.
+static cl::opt<bool>
+EnableLVI("enable-jump-threading-lvi", cl::ReallyHidden);
+
+
+
namespace {
/// This pass performs 'jump threading', which looks at blocks that have
/// multiple predecessors and multiple successors. If one or more of the
///
class JumpThreading : public FunctionPass {
TargetData *TD;
+ LazyValueInfo *LVI;
#ifdef NDEBUG
SmallPtrSet<BasicBlock*, 16> LoopHeaders;
#else
JumpThreading() : FunctionPass(&ID) {}
bool runOnFunction(Function &F);
- void FindLoopHeaders(Function &F);
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ if (EnableLVI)
+ AU.addRequired<LazyValueInfo>();
+ }
+
+ void FindLoopHeaders(Function &F);
bool ProcessBlock(BasicBlock *BB);
bool ThreadEdge(BasicBlock *BB, const SmallVectorImpl<BasicBlock*> &PredBBs,
BasicBlock *SuccBB);
bool ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,
PredValueInfo &Result);
- bool ProcessThreadableEdges(Instruction *CondInst, BasicBlock *BB);
+ bool ProcessThreadableEdges(Value *Cond, BasicBlock *BB);
bool ProcessBranchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB);
/// runOnFunction - Top level algorithm.
///
bool JumpThreading::runOnFunction(Function &F) {
- DEBUG(errs() << "Jump threading on function '" << F.getName() << "'\n");
+ DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n");
TD = getAnalysisIfAvailable<TargetData>();
+ LVI = EnableLVI ? &getAnalysis<LazyValueInfo>() : 0;
FindLoopHeaders(F);
// edges which simplifies the CFG.
if (pred_begin(BB) == pred_end(BB) &&
BB != &BB->getParent()->getEntryBlock()) {
- DEBUG(errs() << " JT: Deleting dead block '" << BB->getName()
+ DEBUG(dbgs() << " JT: Deleting dead block '" << BB->getName()
<< "' with terminator: " << *BB->getTerminator() << '\n');
LoopHeaders.erase(BB);
DeleteDeadBlock(BB);
if (BBI->isTerminator()) {
// Since TryToSimplifyUncondBranchFromEmptyBlock may delete the
// block, we have to make sure it isn't in the LoopHeaders set. We
- // reinsert afterward in the rare case when the block isn't deleted.
+ // reinsert afterward if needed.
bool ErasedFromLoopHeaders = LoopHeaders.erase(BB);
+ BasicBlock *Succ = BI->getSuccessor(0);
- if (TryToSimplifyUncondBranchFromEmptyBlock(BB))
+ if (TryToSimplifyUncondBranchFromEmptyBlock(BB)) {
Changed = true;
- else if (ErasedFromLoopHeaders)
+ // If we deleted BB and BB was the header of a loop, then the
+ // successor is now the header of the loop.
+ BB = Succ;
+ }
+
+ if (ErasedFromLoopHeaders)
LoopHeaders.insert(BB);
}
}
/// predecessors. If so, return the known list of value and pred BB in the
/// result vector. If a value is known to be undef, it is returned as null.
///
-/// The BB basic block is known to start with a PHI node.
-///
/// This returns true if there were any known values.
///
-///
-/// TODO: Per PR2563, we could infer value range information about a predecessor
-/// based on its terminator.
bool JumpThreading::
ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){
- PHINode *TheFirstPHI = cast<PHINode>(BB->begin());
-
// If V is a constantint, then it is known in all predecessors.
if (isa<ConstantInt>(V) || isa<UndefValue>(V)) {
ConstantInt *CI = dyn_cast<ConstantInt>(V);
- Result.resize(TheFirstPHI->getNumIncomingValues());
- for (unsigned i = 0, e = Result.size(); i != e; ++i)
- Result[i] = std::make_pair(CI, TheFirstPHI->getIncomingBlock(i));
+
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+ Result.push_back(std::make_pair(CI, *PI));
return true;
}
// If V is a non-instruction value, or an instruction in a different block,
// then it can't be derived from a PHI.
Instruction *I = dyn_cast<Instruction>(V);
- if (I == 0 || I->getParent() != BB)
+ if (I == 0 || I->getParent() != BB) {
+
+ // Okay, if this is a live-in value, see if it has a known value at the end
+ // of any of our predecessors.
+ //
+ // FIXME: This should be an edge property, not a block end property.
+ /// TODO: Per PR2563, we could infer value range information about a
+ /// predecessor based on its terminator.
+ //
+ if (LVI) {
+ // FIXME: change this to use the more-rich 'getPredicateOnEdge' method if
+ // "I" is a non-local compare-with-a-constant instruction. This would be
+ // able to handle value inequalities better, for example if the compare is
+ // "X < 4" and "X < 3" is known true but "X < 4" itself is not available.
+ // Perhaps getConstantOnEdge should be smart enough to do this?
+
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+ // If the value is known by LazyValueInfo to be a constant in a
+ // predecessor, use that information to try to thread this block.
+ Constant *PredCst = LVI->getConstantOnEdge(V, *PI, BB);
+ if (PredCst == 0 ||
+ (!isa<ConstantInt>(PredCst) && !isa<UndefValue>(PredCst)))
+ continue;
+
+ Result.push_back(std::make_pair(dyn_cast<ConstantInt>(PredCst), *PI));
+ }
+
+ return !Result.empty();
+ }
+
return false;
+ }
/// If I is a PHI node, then we know the incoming values for any constants.
if (PHINode *PN = dyn_cast<PHINode>(I)) {
// Invert the known values.
for (unsigned i = 0, e = Result.size(); i != e; ++i)
- Result[i].first =
- cast<ConstantInt>(ConstantExpr::getNot(Result[i].first));
+ if (Result[i].first)
+ Result[i].first =
+ cast<ConstantInt>(ConstantExpr::getNot(Result[i].first));
return true;
}
}
Value *LHS = PN->getIncomingValue(i);
Value *RHS = Cmp->getOperand(1)->DoPHITranslation(BB, PredBB);
- Value *Res = SimplifyCmpInst(Cmp->getPredicate(), LHS, RHS);
- if (Res == 0) continue;
+ Value *Res = SimplifyCmpInst(Cmp->getPredicate(), LHS, RHS, TD);
+ if (Res == 0) {
+ if (!LVI || !isa<Constant>(RHS))
+ continue;
+
+ LazyValueInfo::Tristate
+ ResT = LVI->getPredicateOnEdge(Cmp->getPredicate(), LHS,
+ cast<Constant>(RHS), PredBB, BB);
+ if (ResT == LazyValueInfo::Unknown)
+ continue;
+ Res = ConstantInt::get(Type::getInt1Ty(LHS->getContext()), ResT);
+ }
if (isa<UndefValue>(Res))
Result.push_back(std::make_pair((ConstantInt*)0, PredBB));
return !Result.empty();
}
- // TODO: We could also recurse to see if we can determine constants another
- // way.
+
+ // If comparing a live-in value against a constant, see if we know the
+ // live-in value on any predecessors.
+ if (LVI && isa<Constant>(Cmp->getOperand(1)) &&
+ Cmp->getType()->isInteger() && // Not vector compare.
+ (!isa<Instruction>(Cmp->getOperand(0)) ||
+ cast<Instruction>(Cmp->getOperand(0))->getParent() != BB)) {
+ Constant *RHSCst = cast<Constant>(Cmp->getOperand(1));
+
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+ // If the value is known by LazyValueInfo to be a constant in a
+ // predecessor, use that information to try to thread this block.
+ LazyValueInfo::Tristate
+ Res = LVI->getPredicateOnEdge(Cmp->getPredicate(), Cmp->getOperand(0),
+ RHSCst, *PI, BB);
+ if (Res == LazyValueInfo::Unknown)
+ continue;
+
+ Constant *ResC = ConstantInt::get(Cmp->getType(), Res);
+ Result.push_back(std::make_pair(cast<ConstantInt>(ResC), *PI));
+ }
+
+ return !Result.empty();
+ }
}
return false;
}
// terminator to an unconditional branch. This can occur due to threading in
// other blocks.
if (isa<ConstantInt>(Condition)) {
- DEBUG(errs() << " In block '" << BB->getName()
+ DEBUG(dbgs() << " In block '" << BB->getName()
<< "' folding terminator: " << *BB->getTerminator() << '\n');
++NumFolds;
ConstantFoldTerminator(BB);
RemovePredecessorAndSimplify(BBTerm->getSuccessor(i), BB, TD);
}
- DEBUG(errs() << " In block '" << BB->getName()
+ DEBUG(dbgs() << " In block '" << BB->getName()
<< "' folding undef terminator: " << *BBTerm << '\n');
BranchInst::Create(BBTerm->getSuccessor(BestSucc), BBTerm);
BBTerm->eraseFromParent();
// br COND, BBX, BBY
// BBX:
// br COND, BBZ, BBW
- if (!Condition->hasOneUse() && // Multiple uses.
+ if (!LVI &&
+ !Condition->hasOneUse() && // Multiple uses.
(CondInst == 0 || CondInst->getParent() != BB)) { // Non-local definition.
pred_iterator PI = pred_begin(BB), E = pred_end(BB);
if (isa<BranchInst>(BB->getTerminator())) {
}
// All the rest of our checks depend on the condition being an instruction.
- if (CondInst == 0)
+ if (CondInst == 0) {
+ // FIXME: Unify this with code below.
+ if (LVI && ProcessThreadableEdges(Condition, BB))
+ return true;
return false;
+ }
+
// See if this is a phi node in the current block.
if (PHINode *PN = dyn_cast<PHINode>(CondInst))
return ProcessJumpOnPHI(PN);
if (CmpInst *CondCmp = dyn_cast<CmpInst>(CondInst)) {
- if (!isa<PHINode>(CondCmp->getOperand(0)) ||
- cast<PHINode>(CondCmp->getOperand(0))->getParent() != BB) {
+ if (!LVI &&
+ (!isa<PHINode>(CondCmp->getOperand(0)) ||
+ cast<PHINode>(CondCmp->getOperand(0))->getParent() != BB)) {
// If we have a comparison, loop over the predecessors to see if there is
// a condition with a lexically identical value.
pred_iterator PI = pred_begin(BB), E = pred_end(BB);
if (isa<Constant>(CondCmp->getOperand(1)))
SimplifyValue = CondCmp->getOperand(0);
+ // TODO: There are other places where load PRE would be profitable, such as
+ // more complex comparisons.
if (LoadInst *LI = dyn_cast<LoadInst>(SimplifyValue))
if (SimplifyPartiallyRedundantLoad(LI))
return true;
// a PHI node in the current block. If we can prove that any predecessors
// compute a predictable value based on a PHI node, thread those predecessors.
//
- // We only bother doing this if the current block has a PHI node and if the
- // conditional instruction lives in the current block. If either condition
- // fails, this won't be a computable value anyway.
- if (CondInst->getParent() == BB && isa<PHINode>(BB->front()))
- if (ProcessThreadableEdges(CondInst, BB))
- return true;
+ if (ProcessThreadableEdges(CondInst, BB))
+ return true;
// TODO: If we have: "br (X > 0)" and we have a predecessor where we know
else if (PredBI->getSuccessor(0) != BB)
BranchDir = false;
else {
- DEBUG(errs() << " In block '" << PredBB->getName()
+ DEBUG(dbgs() << " In block '" << PredBB->getName()
<< "' folding terminator: " << *PredBB->getTerminator() << '\n');
++NumFolds;
ConstantFoldTerminator(PredBB);
// If the dest block has one predecessor, just fix the branch condition to a
// constant and fold it.
if (BB->getSinglePredecessor()) {
- DEBUG(errs() << " In block '" << BB->getName()
+ DEBUG(dbgs() << " In block '" << BB->getName()
<< "' folding condition to '" << BranchDir << "': "
<< *BB->getTerminator() << '\n');
++NumFolds;
if (PredSI->getSuccessor(PredCase) != DestBB &&
DestSI->getSuccessor(i) != DestBB)
continue;
+
+ // Do not forward this if it already goes to this destination, this would
+ // be an infinite loop.
+ if (PredSI->getSuccessor(PredCase) == DestSucc)
+ continue;
// Otherwise, we're safe to make the change. Make sure that the edge from
// DestSI to DestSucc is not critical and has no PHI nodes.
- DEBUG(errs() << "FORWARDING EDGE " << *DestVal << " FROM: " << *PredSI);
- DEBUG(errs() << "THROUGH: " << *DestSI);
+ DEBUG(dbgs() << "FORWARDING EDGE " << *DestVal << " FROM: " << *PredSI);
+ DEBUG(dbgs() << "THROUGH: " << *DestSI);
// If the destination has PHI nodes, just split the edge for updating
// simplicity.
Value *LoadedPtr = LI->getOperand(0);
// If the loaded operand is defined in the LoadBB, it can't be available.
- // FIXME: Could do PHI translation, that would be fun :)
+ // TODO: Could do simple PHI translation, that would be fun :)
if (Instruction *PtrOp = dyn_cast<Instruction>(LoadedPtr))
if (PtrOp->getParent() == LoadBB)
return false;
// the entry to its block.
BasicBlock::iterator BBIt = LI;
- if (Value *AvailableVal = FindAvailableLoadedValue(LoadedPtr, LoadBB,
- BBIt, 6)) {
+ if (Value *AvailableVal =
+ FindAvailableLoadedValue(LoadedPtr, LoadBB, BBIt, 6)) {
// If the value if the load is locally available within the block, just use
// it. This frequently occurs for reg2mem'd allocas.
//cerr << "LOAD ELIMINATED:\n" << *BBIt << *LI << "\n";
// Split them out to their own block.
UnavailablePred =
SplitBlockPredecessors(LoadBB, &PredsToSplit[0], PredsToSplit.size(),
- "thread-split", this);
+ "thread-pre-split", this);
}
// If the value isn't available in all predecessors, then there will be
if (UnavailablePred) {
assert(UnavailablePred->getTerminator()->getNumSuccessors() == 1 &&
"Can't handle critical edge here!");
- Value *NewVal = new LoadInst(LoadedPtr, LI->getName()+".pr",
+ Value *NewVal = new LoadInst(LoadedPtr, LI->getName()+".pr", false,
+ LI->getAlignment(),
UnavailablePred->getTerminator());
AvailablePreds.push_back(std::make_pair(UnavailablePred, NewVal));
}
return MostPopularDest;
}
-bool JumpThreading::ProcessThreadableEdges(Instruction *CondInst,
- BasicBlock *BB) {
+bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB) {
// If threading this would thread across a loop header, don't even try to
// thread the edge.
if (LoopHeaders.count(BB))
return false;
SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> PredValues;
- if (!ComputeValueKnownInPredecessors(CondInst, BB, PredValues))
+ if (!ComputeValueKnownInPredecessors(Cond, BB, PredValues))
return false;
assert(!PredValues.empty() &&
"ComputeValueKnownInPredecessors returned true with no values");
- DEBUG(errs() << "IN BB: " << *BB;
+ DEBUG(dbgs() << "IN BB: " << *BB;
for (unsigned i = 0, e = PredValues.size(); i != e; ++i) {
- errs() << " BB '" << BB->getName() << "': FOUND condition = ";
+ dbgs() << " BB '" << BB->getName() << "': FOUND condition = ";
if (PredValues[i].first)
- errs() << *PredValues[i].first;
+ dbgs() << *PredValues[i].first;
else
- errs() << "UNDEF";
- errs() << " for pred '" << PredValues[i].second->getName()
+ dbgs() << "UNDEF";
+ dbgs() << " for pred '" << PredValues[i].second->getName()
<< "'.\n";
});
BasicBlock *SuccBB) {
// If threading to the same block as we come from, we would infinite loop.
if (SuccBB == BB) {
- DEBUG(errs() << " Not threading across BB '" << BB->getName()
+ DEBUG(dbgs() << " Not threading across BB '" << BB->getName()
<< "' - would thread to self!\n");
return false;
}
// If threading this would thread across a loop header, don't thread the edge.
// See the comments above FindLoopHeaders for justifications and caveats.
if (LoopHeaders.count(BB)) {
- DEBUG(errs() << " Not threading across loop header BB '" << BB->getName()
+ DEBUG(dbgs() << " Not threading across loop header BB '" << BB->getName()
<< "' to dest BB '" << SuccBB->getName()
<< "' - it might create an irreducible loop!\n");
return false;
unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB);
if (JumpThreadCost > Threshold) {
- DEBUG(errs() << " Not threading BB '" << BB->getName()
+ DEBUG(dbgs() << " Not threading BB '" << BB->getName()
<< "' - Cost is too high: " << JumpThreadCost << "\n");
return false;
}
if (PredBBs.size() == 1)
PredBB = PredBBs[0];
else {
- DEBUG(errs() << " Factoring out " << PredBBs.size()
+ DEBUG(dbgs() << " Factoring out " << PredBBs.size()
<< " common predecessors.\n");
PredBB = SplitBlockPredecessors(BB, &PredBBs[0], PredBBs.size(),
".thr_comm", this);
}
// And finally, do it!
- DEBUG(errs() << " Threading edge from '" << PredBB->getName() << "' to '"
+ DEBUG(dbgs() << " Threading edge from '" << PredBB->getName() << "' to '"
<< SuccBB->getName() << "' with cost: " << JumpThreadCost
<< ", across block:\n "
<< *BB << "\n");
if (UsesToRename.empty())
continue;
- DEBUG(errs() << "JT: Renaming non-local uses of: " << *I << "\n");
+ DEBUG(dbgs() << "JT: Renaming non-local uses of: " << *I << "\n");
// We found a use of I outside of BB. Rename all uses of I that are outside
// its block to be uses of the appropriate PHI node etc. See ValuesInBlocks
while (!UsesToRename.empty())
SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
- DEBUG(errs() << "\n");
+ DEBUG(dbgs() << "\n");
}
// cause us to transform this into an irreducible loop, don't do this.
// See the comments above FindLoopHeaders for justifications and caveats.
if (LoopHeaders.count(BB)) {
- DEBUG(errs() << " Not duplicating loop header '" << BB->getName()
+ DEBUG(dbgs() << " Not duplicating loop header '" << BB->getName()
<< "' into predecessor block '" << PredBB->getName()
<< "' - it might create an irreducible loop!\n");
return false;
unsigned DuplicationCost = getJumpThreadDuplicationCost(BB);
if (DuplicationCost > Threshold) {
- DEBUG(errs() << " Not duplicating BB '" << BB->getName()
+ DEBUG(dbgs() << " Not duplicating BB '" << BB->getName()
<< "' - Cost is too high: " << DuplicationCost << "\n");
return false;
}
// Okay, we decided to do this! Clone all the instructions in BB onto the end
// of PredBB.
- DEBUG(errs() << " Duplicating block '" << BB->getName() << "' into end of '"
+ DEBUG(dbgs() << " Duplicating block '" << BB->getName() << "' into end of '"
<< PredBB->getName() << "' to eliminate branch on phi. Cost: "
<< DuplicationCost << " block is:" << *BB << "\n");
if (UsesToRename.empty())
continue;
- DEBUG(errs() << "JT: Renaming non-local uses of: " << *I << "\n");
+ DEBUG(dbgs() << "JT: Renaming non-local uses of: " << *I << "\n");
// We found a use of I outside of BB. Rename all uses of I that are outside
// its block to be uses of the appropriate PHI node etc. See ValuesInBlocks
while (!UsesToRename.empty())
SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
- DEBUG(errs() << "\n");
+ DEBUG(dbgs() << "\n");
}
// PredBB no longer jumps to BB, remove entries in the PHI node for the edge