1 //===- EarlyCSE.cpp - Simple and fast CSE pass ----------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This pass performs a simple dominator tree walk that eliminates trivially
11 // redundant instructions.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/Transforms/Scalar.h"
16 #include "llvm/ADT/Hashing.h"
17 #include "llvm/ADT/ScopedHashTable.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/Analysis/AssumptionTracker.h"
20 #include "llvm/Analysis/InstructionSimplify.h"
21 #include "llvm/IR/DataLayout.h"
22 #include "llvm/IR/Dominators.h"
23 #include "llvm/IR/Instructions.h"
24 #include "llvm/IR/IntrinsicInst.h"
25 #include "llvm/IR/PatternMatch.h"
26 #include "llvm/Pass.h"
27 #include "llvm/Support/Debug.h"
28 #include "llvm/Support/RecyclingAllocator.h"
29 #include "llvm/Target/TargetLibraryInfo.h"
30 #include "llvm/Transforms/Utils/Local.h"
33 using namespace llvm::PatternMatch;
35 #define DEBUG_TYPE "early-cse"
37 STATISTIC(NumSimplify, "Number of instructions simplified or DCE'd");
38 STATISTIC(NumCSE, "Number of instructions CSE'd");
39 STATISTIC(NumCSELoad, "Number of load instructions CSE'd");
40 STATISTIC(NumCSECall, "Number of call instructions CSE'd");
41 STATISTIC(NumDSE, "Number of trivial dead stores removed");
43 static unsigned getHash(const void *V) {
44 return DenseMapInfo<const void*>::getHashValue(V);
47 //===----------------------------------------------------------------------===//
49 //===----------------------------------------------------------------------===//
52 /// SimpleValue - Instances of this struct represent available values in the
53 /// scoped hash table.
57 SimpleValue(Instruction *I) : Inst(I) {
58 assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
61 bool isSentinel() const {
62 return Inst == DenseMapInfo<Instruction*>::getEmptyKey() ||
63 Inst == DenseMapInfo<Instruction*>::getTombstoneKey();
66 static bool canHandle(Instruction *Inst) {
67 // This can only handle non-void readnone functions.
68 if (CallInst *CI = dyn_cast<CallInst>(Inst))
69 return CI->doesNotAccessMemory() && !CI->getType()->isVoidTy();
70 return isa<CastInst>(Inst) || isa<BinaryOperator>(Inst) ||
71 isa<GetElementPtrInst>(Inst) || isa<CmpInst>(Inst) ||
72 isa<SelectInst>(Inst) || isa<ExtractElementInst>(Inst) ||
73 isa<InsertElementInst>(Inst) || isa<ShuffleVectorInst>(Inst) ||
74 isa<ExtractValueInst>(Inst) || isa<InsertValueInst>(Inst);
80 template<> struct DenseMapInfo<SimpleValue> {
81 static inline SimpleValue getEmptyKey() {
82 return DenseMapInfo<Instruction*>::getEmptyKey();
84 static inline SimpleValue getTombstoneKey() {
85 return DenseMapInfo<Instruction*>::getTombstoneKey();
87 static unsigned getHashValue(SimpleValue Val);
88 static bool isEqual(SimpleValue LHS, SimpleValue RHS);
92 unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
93 Instruction *Inst = Val.Inst;
94 // Hash in all of the operands as pointers.
95 if (BinaryOperator* BinOp = dyn_cast<BinaryOperator>(Inst)) {
96 Value *LHS = BinOp->getOperand(0);
97 Value *RHS = BinOp->getOperand(1);
98 if (BinOp->isCommutative() && BinOp->getOperand(0) > BinOp->getOperand(1))
101 if (isa<OverflowingBinaryOperator>(BinOp)) {
102 // Hash the overflow behavior
104 BinOp->hasNoSignedWrap() * OverflowingBinaryOperator::NoSignedWrap |
105 BinOp->hasNoUnsignedWrap() * OverflowingBinaryOperator::NoUnsignedWrap;
106 return hash_combine(BinOp->getOpcode(), Overflow, LHS, RHS);
109 return hash_combine(BinOp->getOpcode(), LHS, RHS);
112 if (CmpInst *CI = dyn_cast<CmpInst>(Inst)) {
113 Value *LHS = CI->getOperand(0);
114 Value *RHS = CI->getOperand(1);
115 CmpInst::Predicate Pred = CI->getPredicate();
116 if (Inst->getOperand(0) > Inst->getOperand(1)) {
118 Pred = CI->getSwappedPredicate();
120 return hash_combine(Inst->getOpcode(), Pred, LHS, RHS);
123 if (CastInst *CI = dyn_cast<CastInst>(Inst))
124 return hash_combine(CI->getOpcode(), CI->getType(), CI->getOperand(0));
126 if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(Inst))
127 return hash_combine(EVI->getOpcode(), EVI->getOperand(0),
128 hash_combine_range(EVI->idx_begin(), EVI->idx_end()));
130 if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(Inst))
131 return hash_combine(IVI->getOpcode(), IVI->getOperand(0),
133 hash_combine_range(IVI->idx_begin(), IVI->idx_end()));
135 assert((isa<CallInst>(Inst) || isa<BinaryOperator>(Inst) ||
136 isa<GetElementPtrInst>(Inst) || isa<SelectInst>(Inst) ||
137 isa<ExtractElementInst>(Inst) || isa<InsertElementInst>(Inst) ||
138 isa<ShuffleVectorInst>(Inst)) && "Invalid/unknown instruction");
140 // Mix in the opcode.
141 return hash_combine(Inst->getOpcode(),
142 hash_combine_range(Inst->value_op_begin(),
143 Inst->value_op_end()));
146 bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) {
147 Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
149 if (LHS.isSentinel() || RHS.isSentinel())
152 if (LHSI->getOpcode() != RHSI->getOpcode()) return false;
153 if (LHSI->isIdenticalTo(RHSI)) return true;
155 // If we're not strictly identical, we still might be a commutable instruction
156 if (BinaryOperator *LHSBinOp = dyn_cast<BinaryOperator>(LHSI)) {
157 if (!LHSBinOp->isCommutative())
160 assert(isa<BinaryOperator>(RHSI)
161 && "same opcode, but different instruction type?");
162 BinaryOperator *RHSBinOp = cast<BinaryOperator>(RHSI);
164 // Check overflow attributes
165 if (isa<OverflowingBinaryOperator>(LHSBinOp)) {
166 assert(isa<OverflowingBinaryOperator>(RHSBinOp)
167 && "same opcode, but different operator type?");
168 if (LHSBinOp->hasNoUnsignedWrap() != RHSBinOp->hasNoUnsignedWrap() ||
169 LHSBinOp->hasNoSignedWrap() != RHSBinOp->hasNoSignedWrap())
174 return LHSBinOp->getOperand(0) == RHSBinOp->getOperand(1) &&
175 LHSBinOp->getOperand(1) == RHSBinOp->getOperand(0);
177 if (CmpInst *LHSCmp = dyn_cast<CmpInst>(LHSI)) {
178 assert(isa<CmpInst>(RHSI)
179 && "same opcode, but different instruction type?");
180 CmpInst *RHSCmp = cast<CmpInst>(RHSI);
182 return LHSCmp->getOperand(0) == RHSCmp->getOperand(1) &&
183 LHSCmp->getOperand(1) == RHSCmp->getOperand(0) &&
184 LHSCmp->getSwappedPredicate() == RHSCmp->getPredicate();
190 //===----------------------------------------------------------------------===//
192 //===----------------------------------------------------------------------===//
195 /// CallValue - Instances of this struct represent available call values in
196 /// the scoped hash table.
200 CallValue(Instruction *I) : Inst(I) {
201 assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
204 bool isSentinel() const {
205 return Inst == DenseMapInfo<Instruction*>::getEmptyKey() ||
206 Inst == DenseMapInfo<Instruction*>::getTombstoneKey();
209 static bool canHandle(Instruction *Inst) {
210 // Don't value number anything that returns void.
211 if (Inst->getType()->isVoidTy())
214 CallInst *CI = dyn_cast<CallInst>(Inst);
215 if (!CI || !CI->onlyReadsMemory())
223 template<> struct DenseMapInfo<CallValue> {
224 static inline CallValue getEmptyKey() {
225 return DenseMapInfo<Instruction*>::getEmptyKey();
227 static inline CallValue getTombstoneKey() {
228 return DenseMapInfo<Instruction*>::getTombstoneKey();
230 static unsigned getHashValue(CallValue Val);
231 static bool isEqual(CallValue LHS, CallValue RHS);
234 unsigned DenseMapInfo<CallValue>::getHashValue(CallValue Val) {
235 Instruction *Inst = Val.Inst;
236 // Hash in all of the operands as pointers.
238 for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i) {
239 assert(!Inst->getOperand(i)->getType()->isMetadataTy() &&
240 "Cannot value number calls with metadata operands");
241 Res ^= getHash(Inst->getOperand(i)) << (i & 0xF);
244 // Mix in the opcode.
245 return (Res << 1) ^ Inst->getOpcode();
248 bool DenseMapInfo<CallValue>::isEqual(CallValue LHS, CallValue RHS) {
249 Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
250 if (LHS.isSentinel() || RHS.isSentinel())
252 return LHSI->isIdenticalTo(RHSI);
256 //===----------------------------------------------------------------------===//
258 //===----------------------------------------------------------------------===//
262 /// EarlyCSE - This pass does a simple depth-first walk over the dominator
263 /// tree, eliminating trivially redundant instructions and using instsimplify
264 /// to canonicalize things as it goes. It is intended to be fast and catch
265 /// obvious cases so that instcombine and other passes are more effective. It
266 /// is expected that a later pass of GVN will catch the interesting/hard
268 class EarlyCSE : public FunctionPass {
270 const DataLayout *DL;
271 const TargetLibraryInfo *TLI;
273 AssumptionTracker *AT;
274 typedef RecyclingAllocator<BumpPtrAllocator,
275 ScopedHashTableVal<SimpleValue, Value*> > AllocatorTy;
276 typedef ScopedHashTable<SimpleValue, Value*, DenseMapInfo<SimpleValue>,
277 AllocatorTy> ScopedHTType;
279 /// AvailableValues - This scoped hash table contains the current values of
280 /// all of our simple scalar expressions. As we walk down the domtree, we
281 /// look to see if instructions are in this: if so, we replace them with what
282 /// we find, otherwise we insert them so that dominated values can succeed in
284 ScopedHTType *AvailableValues;
286 /// AvailableLoads - This scoped hash table contains the current values
287 /// of loads. This allows us to get efficient access to dominating loads when
288 /// we have a fully redundant load. In addition to the most recent load, we
289 /// keep track of a generation count of the read, which is compared against
290 /// the current generation count. The current generation count is
291 /// incremented after every possibly writing memory operation, which ensures
292 /// that we only CSE loads with other loads that have no intervening store.
293 typedef RecyclingAllocator<BumpPtrAllocator,
294 ScopedHashTableVal<Value*, std::pair<Value*, unsigned> > > LoadMapAllocator;
295 typedef ScopedHashTable<Value*, std::pair<Value*, unsigned>,
296 DenseMapInfo<Value*>, LoadMapAllocator> LoadHTType;
297 LoadHTType *AvailableLoads;
299 /// AvailableCalls - This scoped hash table contains the current values
300 /// of read-only call values. It uses the same generation count as loads.
301 typedef ScopedHashTable<CallValue, std::pair<Value*, unsigned> > CallHTType;
302 CallHTType *AvailableCalls;
304 /// CurrentGeneration - This is the current generation of the memory value.
305 unsigned CurrentGeneration;
308 explicit EarlyCSE() : FunctionPass(ID) {
309 initializeEarlyCSEPass(*PassRegistry::getPassRegistry());
312 bool runOnFunction(Function &F) override;
316 // NodeScope - almost a POD, but needs to call the constructors for the
317 // scoped hash tables so that a new scope gets pushed on. These are RAII so
318 // that the scope gets popped when the NodeScope is destroyed.
321 NodeScope(ScopedHTType *availableValues,
322 LoadHTType *availableLoads,
323 CallHTType *availableCalls) :
324 Scope(*availableValues),
325 LoadScope(*availableLoads),
326 CallScope(*availableCalls) {}
329 NodeScope(const NodeScope&) LLVM_DELETED_FUNCTION;
330 void operator=(const NodeScope&) LLVM_DELETED_FUNCTION;
332 ScopedHTType::ScopeTy Scope;
333 LoadHTType::ScopeTy LoadScope;
334 CallHTType::ScopeTy CallScope;
337 // StackNode - contains all the needed information to create a stack for
338 // doing a depth first tranversal of the tree. This includes scopes for
339 // values, loads, and calls as well as the generation. There is a child
340 // iterator so that the children do not need to be store spearately.
343 StackNode(ScopedHTType *availableValues,
344 LoadHTType *availableLoads,
345 CallHTType *availableCalls,
346 unsigned cg, DomTreeNode *n,
347 DomTreeNode::iterator child, DomTreeNode::iterator end) :
348 CurrentGeneration(cg), ChildGeneration(cg), Node(n),
349 ChildIter(child), EndIter(end),
350 Scopes(availableValues, availableLoads, availableCalls),
354 unsigned currentGeneration() { return CurrentGeneration; }
355 unsigned childGeneration() { return ChildGeneration; }
356 void childGeneration(unsigned generation) { ChildGeneration = generation; }
357 DomTreeNode *node() { return Node; }
358 DomTreeNode::iterator childIter() { return ChildIter; }
359 DomTreeNode *nextChild() {
360 DomTreeNode *child = *ChildIter;
364 DomTreeNode::iterator end() { return EndIter; }
365 bool isProcessed() { return Processed; }
366 void process() { Processed = true; }
369 StackNode(const StackNode&) LLVM_DELETED_FUNCTION;
370 void operator=(const StackNode&) LLVM_DELETED_FUNCTION;
373 unsigned CurrentGeneration;
374 unsigned ChildGeneration;
376 DomTreeNode::iterator ChildIter;
377 DomTreeNode::iterator EndIter;
382 bool processNode(DomTreeNode *Node);
384 // This transformation requires dominator postdominator info
385 void getAnalysisUsage(AnalysisUsage &AU) const override {
386 AU.addRequired<AssumptionTracker>();
387 AU.addRequired<DominatorTreeWrapperPass>();
388 AU.addRequired<TargetLibraryInfo>();
389 AU.setPreservesCFG();
394 char EarlyCSE::ID = 0;
396 // createEarlyCSEPass - The public interface to this file.
397 FunctionPass *llvm::createEarlyCSEPass() {
398 return new EarlyCSE();
401 INITIALIZE_PASS_BEGIN(EarlyCSE, "early-cse", "Early CSE", false, false)
402 INITIALIZE_PASS_DEPENDENCY(AssumptionTracker)
403 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
404 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
405 INITIALIZE_PASS_END(EarlyCSE, "early-cse", "Early CSE", false, false)
407 bool EarlyCSE::processNode(DomTreeNode *Node) {
408 BasicBlock *BB = Node->getBlock();
410 // If this block has a single predecessor, then the predecessor is the parent
411 // of the domtree node and all of the live out memory values are still current
412 // in this block. If this block has multiple predecessors, then they could
413 // have invalidated the live-out memory values of our parent value. For now,
414 // just be conservative and invalidate memory if this block has multiple
416 if (!BB->getSinglePredecessor())
419 /// LastStore - Keep track of the last non-volatile store that we saw... for
420 /// as long as there in no instruction that reads memory. If we see a store
421 /// to the same location, we delete the dead store. This zaps trivial dead
422 /// stores which can occur in bitfield code among other things.
423 StoreInst *LastStore = nullptr;
425 bool Changed = false;
427 // See if any instructions in the block can be eliminated. If so, do it. If
428 // not, add them to AvailableValues.
429 for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
430 Instruction *Inst = I++;
432 // Dead instructions should just be removed.
433 if (isInstructionTriviallyDead(Inst, TLI)) {
434 DEBUG(dbgs() << "EarlyCSE DCE: " << *Inst << '\n');
435 Inst->eraseFromParent();
441 // Skip assume intrinsics, they don't really have side effects (although
442 // they're marked as such to ensure preservation of control dependencies),
443 // and this pass will not disturb any of the assumption's control
445 if (match(Inst, m_Intrinsic<Intrinsic::assume>())) {
446 DEBUG(dbgs() << "EarlyCSE skipping assumption: " << *Inst << '\n');
450 // If the instruction can be simplified (e.g. X+0 = X) then replace it with
451 // its simpler value.
452 if (Value *V = SimplifyInstruction(Inst, DL, TLI, DT, AT)) {
453 DEBUG(dbgs() << "EarlyCSE Simplify: " << *Inst << " to: " << *V << '\n');
454 Inst->replaceAllUsesWith(V);
455 Inst->eraseFromParent();
461 // If this is a simple instruction that we can value number, process it.
462 if (SimpleValue::canHandle(Inst)) {
463 // See if the instruction has an available value. If so, use it.
464 if (Value *V = AvailableValues->lookup(Inst)) {
465 DEBUG(dbgs() << "EarlyCSE CSE: " << *Inst << " to: " << *V << '\n');
466 Inst->replaceAllUsesWith(V);
467 Inst->eraseFromParent();
473 // Otherwise, just remember that this value is available.
474 AvailableValues->insert(Inst, Inst);
478 // If this is a non-volatile load, process it.
479 if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
480 // Ignore volatile loads.
481 if (!LI->isSimple()) {
486 // If we have an available version of this load, and if it is the right
487 // generation, replace this instruction.
488 std::pair<Value*, unsigned> InVal =
489 AvailableLoads->lookup(Inst->getOperand(0));
490 if (InVal.first != nullptr && InVal.second == CurrentGeneration) {
491 DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << *Inst << " to: "
492 << *InVal.first << '\n');
493 if (!Inst->use_empty()) Inst->replaceAllUsesWith(InVal.first);
494 Inst->eraseFromParent();
500 // Otherwise, remember that we have this instruction.
501 AvailableLoads->insert(Inst->getOperand(0),
502 std::pair<Value*, unsigned>(Inst, CurrentGeneration));
507 // If this instruction may read from memory, forget LastStore.
508 if (Inst->mayReadFromMemory())
511 // If this is a read-only call, process it.
512 if (CallValue::canHandle(Inst)) {
513 // If we have an available version of this call, and if it is the right
514 // generation, replace this instruction.
515 std::pair<Value*, unsigned> InVal = AvailableCalls->lookup(Inst);
516 if (InVal.first != nullptr && InVal.second == CurrentGeneration) {
517 DEBUG(dbgs() << "EarlyCSE CSE CALL: " << *Inst << " to: "
518 << *InVal.first << '\n');
519 if (!Inst->use_empty()) Inst->replaceAllUsesWith(InVal.first);
520 Inst->eraseFromParent();
526 // Otherwise, remember that we have this instruction.
527 AvailableCalls->insert(Inst,
528 std::pair<Value*, unsigned>(Inst, CurrentGeneration));
532 // Okay, this isn't something we can CSE at all. Check to see if it is
533 // something that could modify memory. If so, our available memory values
534 // cannot be used so bump the generation count.
535 if (Inst->mayWriteToMemory()) {
538 if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
539 // We do a trivial form of DSE if there are two stores to the same
540 // location with no intervening loads. Delete the earlier store.
542 LastStore->getPointerOperand() == SI->getPointerOperand()) {
543 DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore << " due to: "
545 LastStore->eraseFromParent();
549 // fallthrough - we can exploit information about this store
552 // Okay, we just invalidated anything we knew about loaded values. Try
553 // to salvage *something* by remembering that the stored value is a live
554 // version of the pointer. It is safe to forward from volatile stores
555 // to non-volatile loads, so we don't have to check for volatility of
557 AvailableLoads->insert(SI->getPointerOperand(),
558 std::pair<Value*, unsigned>(SI->getValueOperand(), CurrentGeneration));
560 // Remember that this was the last store we saw for DSE.
571 bool EarlyCSE::runOnFunction(Function &F) {
572 if (skipOptnoneFunction(F))
575 // Note, deque is being used here because there is significant performance gains
576 // over vector when the container becomes very large due to the specific access
577 // patterns. For more information see the mailing list discussion on this:
578 // http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20120116/135228.html
579 std::deque<StackNode *> nodesToProcess;
581 DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
582 DL = DLP ? &DLP->getDataLayout() : nullptr;
583 TLI = &getAnalysis<TargetLibraryInfo>();
584 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
585 AT = &getAnalysis<AssumptionTracker>();
587 // Tables that the pass uses when walking the domtree.
588 ScopedHTType AVTable;
589 AvailableValues = &AVTable;
590 LoadHTType LoadTable;
591 AvailableLoads = &LoadTable;
592 CallHTType CallTable;
593 AvailableCalls = &CallTable;
595 CurrentGeneration = 0;
596 bool Changed = false;
598 // Process the root node.
599 nodesToProcess.push_back(
600 new StackNode(AvailableValues, AvailableLoads, AvailableCalls,
601 CurrentGeneration, DT->getRootNode(),
602 DT->getRootNode()->begin(),
603 DT->getRootNode()->end()));
605 // Save the current generation.
606 unsigned LiveOutGeneration = CurrentGeneration;
608 // Process the stack.
609 while (!nodesToProcess.empty()) {
610 // Grab the first item off the stack. Set the current generation, remove
611 // the node from the stack, and process it.
612 StackNode *NodeToProcess = nodesToProcess.back();
614 // Initialize class members.
615 CurrentGeneration = NodeToProcess->currentGeneration();
617 // Check if the node needs to be processed.
618 if (!NodeToProcess->isProcessed()) {
620 Changed |= processNode(NodeToProcess->node());
621 NodeToProcess->childGeneration(CurrentGeneration);
622 NodeToProcess->process();
623 } else if (NodeToProcess->childIter() != NodeToProcess->end()) {
624 // Push the next child onto the stack.
625 DomTreeNode *child = NodeToProcess->nextChild();
626 nodesToProcess.push_back(
627 new StackNode(AvailableValues,
630 NodeToProcess->childGeneration(), child,
631 child->begin(), child->end()));
633 // It has been processed, and there are no more children to process,
634 // so delete it and pop it off the stack.
635 delete NodeToProcess;
636 nodesToProcess.pop_back();
638 } // while (!nodes...)
640 // Reset the current generation.
641 CurrentGeneration = LiveOutGeneration;