/** See llvm::createAggressiveDCEPass function. */
void LLVMAddAggressiveDCEPass(LLVMPassManagerRef PM);
+/** See llvm::createBitTrackingDCEPass function. */
+void LLVMAddBitTrackingDCEPass(LLVMPassManagerRef PM);
+
/** See llvm::createAlignmentFromAssumptionsPass function. */
void LLVMAddAlignmentFromAssumptionsPass(LLVMPassManagerRef PM);
void initializeAAEvalPass(PassRegistry&);
void initializeAddDiscriminatorsPass(PassRegistry&);
void initializeADCEPass(PassRegistry&);
+void initializeBDCEPass(PassRegistry&);
void initializeAliasAnalysisAnalysisGroup(PassRegistry&);
void initializeAliasAnalysisCounterPass(PassRegistry&);
void initializeAliasDebuggerPass(PassRegistry&);
(void) llvm::createAAEvalPass();
(void) llvm::createAggressiveDCEPass();
+ (void) llvm::createBitTrackingDCEPass();
(void) llvm::createAliasAnalysisCounterPass();
(void) llvm::createAliasDebugger();
(void) llvm::createArgumentPromotionPass();
//
FunctionPass *createAggressiveDCEPass();
+//===----------------------------------------------------------------------===//
+//
+// BitTrackingDCE - This pass uses a bit-tracking DCE algorithm in order to
+// remove computations of dead bits.
+//
+FunctionPass *createBitTrackingDCEPass();
+
//===----------------------------------------------------------------------===//
//
// SROA - Replace aggregates or pieces of aggregates with scalar SSA values.
MPM.add(createMemCpyOptPass()); // Remove memcpy / form memset
MPM.add(createSCCPPass()); // Constant prop with SCCP
+ // Delete dead bit computations (instcombine runs after to fold away the dead
+ // computations, and then ADCE will run later to exploit any new DCE
+ // opportunities that creates).
+ MPM.add(createBitTrackingDCEPass()); // Delete dead bit computations
+
// Run instcombine after redundancy elimination to exploit opportunities
// opened up by them.
MPM.add(createInstructionCombiningPass());
--- /dev/null
+//===---- BDCE.cpp - Bit-tracking dead code elimination -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Bit-Tracking Dead Code Elimination pass. Some
+// instructions (shifts, some ands, ors, etc.) kill some of their input bits.
+// We track these dead bits and remove instructions that compute only these
+// dead bits.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "bdce"
+
+STATISTIC(NumRemoved, "Number of instructions removed (unused)");
+STATISTIC(NumSimplified, "Number of instructions trivialized (dead bits)");
+
+namespace {
+struct BDCE : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ BDCE() : FunctionPass(ID) {
+ initializeBDCEPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function& F) override;
+
+ void getAnalysisUsage(AnalysisUsage& AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ }
+
+ void determineLiveOperandBits(const Instruction *UserI,
+ const Instruction *I, unsigned OperandNo,
+ const APInt &AOut, APInt &AB,
+ APInt &KnownZero, APInt &KnownOne,
+ APInt &KnownZero2, APInt &KnownOne2);
+
+ AssumptionCache *AC;
+ const DataLayout *DL;
+ DominatorTree *DT;
+};
+}
+
+char BDCE::ID = 0;
+INITIALIZE_PASS_BEGIN(BDCE, "bdce", "Bit-Tracking Dead Code Elimination",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(BDCE, "bdce", "Bit-Tracking Dead Code Elimination",
+ false, false)
+
+static bool isAlwaysLive(Instruction *I) {
+ return isa<TerminatorInst>(I) || isa<DbgInfoIntrinsic>(I) ||
+ isa<LandingPadInst>(I) || I->mayHaveSideEffects();
+}
+
+void BDCE::determineLiveOperandBits(const Instruction *UserI,
+ const Instruction *I, unsigned OperandNo,
+ const APInt &AOut, APInt &AB,
+ APInt &KnownZero, APInt &KnownOne,
+ APInt &KnownZero2, APInt &KnownOne2) {
+ unsigned BitWidth = AB.getBitWidth();
+
+ // We're called once per operand, but for some instructions, we need to
+ // compute known bits of both operands in order to determine the live bits of
+ // either (when both operands are instructions themselves). We don't,
+ // however, want to do this twice, so we cache the result in APInts that live
+ // in the caller. For the two-relevant-operands case, both operand values are
+ // provided here.
+ auto ComputeKnownBits = [&](unsigned BitWidth, const Value *V1,
+ const Value *V2) {
+ KnownZero = APInt(BitWidth, 0);
+ KnownOne = APInt(BitWidth, 0);
+ computeKnownBits(const_cast<Value*>(V1), KnownZero, KnownOne, DL, 0, AC,
+ UserI, DT);
+
+ if (V2) {
+ KnownZero2 = APInt(BitWidth, 0);
+ KnownOne2 = APInt(BitWidth, 0);
+ computeKnownBits(const_cast<Value*>(V2), KnownZero2, KnownOne2, DL, 0, AC,
+ UserI, DT);
+ }
+ };
+
+ switch (UserI->getOpcode()) {
+ default: break;
+ case Instruction::Call:
+ case Instruction::Invoke:
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(UserI))
+ switch (II->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::bswap:
+ // The alive bits of the input are the swapped alive bits of
+ // the output.
+ AB = AOut.byteSwap();
+ break;
+ case Intrinsic::ctlz:
+ if (OperandNo == 0) {
+ // We need some output bits, so we need all bits of the
+ // input to the left of, and including, the leftmost bit
+ // known to be one.
+ ComputeKnownBits(BitWidth, I, nullptr);
+ AB = APInt::getHighBitsSet(BitWidth,
+ std::min(BitWidth, KnownOne.countLeadingZeros()+1));
+ }
+ break;
+ case Intrinsic::cttz:
+ if (OperandNo == 0) {
+ // We need some output bits, so we need all bits of the
+ // input to the right of, and including, the rightmost bit
+ // known to be one.
+ ComputeKnownBits(BitWidth, I, nullptr);
+ AB = APInt::getLowBitsSet(BitWidth,
+ std::min(BitWidth, KnownOne.countTrailingZeros()+1));
+ }
+ break;
+ }
+ break;
+ case Instruction::Add:
+ case Instruction::Sub:
+ // Find the highest live output bit. We don't need any more input
+ // bits than that (adds, and thus subtracts, ripple only to the
+ // left).
+ AB = APInt::getLowBitsSet(BitWidth, AOut.getActiveBits());
+ break;
+ case Instruction::Shl:
+ if (OperandNo == 0)
+ if (ConstantInt *CI =
+ dyn_cast<ConstantInt>(UserI->getOperand(1))) {
+ uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1);
+ AB = AOut.lshr(ShiftAmt);
+
+ // If the shift is nuw/nsw, then the high bits are not dead
+ // (because we've promised that they *must* be zero).
+ const ShlOperator *S = cast<ShlOperator>(UserI);
+ if (S->hasNoSignedWrap())
+ AB |= APInt::getHighBitsSet(BitWidth, ShiftAmt+1);
+ else if (S->hasNoUnsignedWrap())
+ AB |= APInt::getHighBitsSet(BitWidth, ShiftAmt);
+ }
+ break;
+ case Instruction::LShr:
+ if (OperandNo == 0)
+ if (ConstantInt *CI =
+ dyn_cast<ConstantInt>(UserI->getOperand(1))) {
+ uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1);
+ AB = AOut.shl(ShiftAmt);
+
+ // If the shift is exact, then the low bits are not dead
+ // (they must be zero).
+ if (cast<LShrOperator>(UserI)->isExact())
+ AB |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
+ }
+ break;
+ case Instruction::AShr:
+ if (OperandNo == 0)
+ if (ConstantInt *CI =
+ dyn_cast<ConstantInt>(UserI->getOperand(1))) {
+ uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1);
+ AB = AOut.shl(ShiftAmt);
+ // Because the high input bit is replicated into the
+ // high-order bits of the result, if we need any of those
+ // bits, then we must keep the highest input bit.
+ if ((AOut & APInt::getHighBitsSet(BitWidth, ShiftAmt))
+ .getBoolValue())
+ AB.setBit(BitWidth-1);
+
+ // If the shift is exact, then the low bits are not dead
+ // (they must be zero).
+ if (cast<AShrOperator>(UserI)->isExact())
+ AB |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
+ }
+ break;
+ case Instruction::And:
+ AB = AOut;
+
+ // For bits that are known zero, the corresponding bits in the
+ // other operand are dead (unless they're both zero, in which
+ // case they can't both be dead, so just mark the LHS bits as
+ // dead).
+ if (OperandNo == 0) {
+ ComputeKnownBits(BitWidth, I, UserI->getOperand(1));
+ AB &= ~KnownZero2;
+ } else {
+ if (!isa<Instruction>(UserI->getOperand(0)))
+ ComputeKnownBits(BitWidth, UserI->getOperand(0), I);
+ AB &= ~(KnownZero & ~KnownZero2);
+ }
+ break;
+ case Instruction::Or:
+ AB = AOut;
+
+ // For bits that are known one, the corresponding bits in the
+ // other operand are dead (unless they're both one, in which
+ // case they can't both be dead, so just mark the LHS bits as
+ // dead).
+ if (OperandNo == 0) {
+ ComputeKnownBits(BitWidth, I, UserI->getOperand(1));
+ AB &= ~KnownOne2;
+ } else {
+ if (!isa<Instruction>(UserI->getOperand(0)))
+ ComputeKnownBits(BitWidth, UserI->getOperand(0), I);
+ AB &= ~(KnownOne & ~KnownOne2);
+ }
+ break;
+ case Instruction::Xor:
+ case Instruction::PHI:
+ AB = AOut;
+ break;
+ case Instruction::Trunc:
+ AB = AOut.zext(BitWidth);
+ break;
+ case Instruction::ZExt:
+ AB = AOut.trunc(BitWidth);
+ break;
+ case Instruction::SExt:
+ AB = AOut.trunc(BitWidth);
+ // Because the high input bit is replicated into the
+ // high-order bits of the result, if we need any of those
+ // bits, then we must keep the highest input bit.
+ if ((AOut & APInt::getHighBitsSet(AOut.getBitWidth(),
+ AOut.getBitWidth() - BitWidth))
+ .getBoolValue())
+ AB.setBit(BitWidth-1);
+ break;
+ case Instruction::Select:
+ if (OperandNo != 0)
+ AB = AOut;
+ break;
+ }
+}
+
+bool BDCE::runOnFunction(Function& F) {
+ if (skipOptnoneFunction(F))
+ return false;
+
+ AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+ DL = F.getParent()->getDataLayout();
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+
+ DenseMap<Instruction *, APInt> AliveBits;
+ SmallVector<Instruction*, 128> Worklist;
+
+ // The set of visited instructions (non-integer-typed only).
+ SmallPtrSet<Instruction*, 128> Visited;
+
+ // Collect the set of "root" instructions that are known live.
+ for (Instruction &I : inst_range(F)) {
+ if (!isAlwaysLive(&I))
+ continue;
+
+ // For integer-valued instructions, set up an initial empty set of alive
+ // bits and add the instruction to the work list. For other instructions
+ // add their operands to the work list (for integer values operands, mark
+ // all bits as live).
+ if (IntegerType *IT = dyn_cast<IntegerType>(I.getType())) {
+ AliveBits[&I] = APInt(IT->getBitWidth(), 0);
+ Worklist.push_back(&I);
+ continue;
+ }
+
+ // Non-integer-typed instructions...
+ for (Use &OI : I.operands()) {
+ if (Instruction *J = dyn_cast<Instruction>(OI)) {
+ if (IntegerType *IT = dyn_cast<IntegerType>(J->getType()))
+ AliveBits[J] = APInt::getAllOnesValue(IT->getBitWidth());
+ Worklist.push_back(J);
+ }
+ }
+ // To save memory, we don't add I to the Visited set here. Instead, we
+ // check isAlwaysLive on every instruction when searching for dead
+ // instructions later (we need to check isAlwaysLive for the
+ // integer-typed instructions anyway).
+ }
+
+ // Propagate liveness backwards to operands.
+ while (!Worklist.empty()) {
+ Instruction *UserI = Worklist.pop_back_val();
+
+ DEBUG(dbgs() << "BDCE: Visiting: " << *UserI);
+ APInt AOut;
+ if (UserI->getType()->isIntegerTy()) {
+ AOut = AliveBits[UserI];
+ DEBUG(dbgs() << " Alive Out: " << AOut);
+ }
+ DEBUG(dbgs() << "\n");
+
+ if (!UserI->getType()->isIntegerTy())
+ Visited.insert(UserI);
+
+ APInt KnownZero, KnownOne, KnownZero2, KnownOne2;
+ // Compute the set of alive bits for each operand. These are anded into the
+ // existing set, if any, and if that changes the set of alive bits, the
+ // operand is added to the work-list.
+ for (Use &OI : UserI->operands()) {
+ if (Instruction *I = dyn_cast<Instruction>(OI)) {
+ if (IntegerType *IT = dyn_cast<IntegerType>(I->getType())) {
+ unsigned BitWidth = IT->getBitWidth();
+ APInt AB = APInt::getAllOnesValue(BitWidth);
+ if (UserI->getType()->isIntegerTy() && !AOut &&
+ !isAlwaysLive(UserI)) {
+ AB = APInt(BitWidth, 0);
+ } else {
+ // If all bits of the output are dead, then all bits of the input
+ // Bits of each operand that are used to compute alive bits of the
+ // output are alive, all others are dead.
+ determineLiveOperandBits(UserI, I, OI.getOperandNo(), AOut, AB,
+ KnownZero, KnownOne,
+ KnownZero2, KnownOne2);
+ }
+
+ // If we've added to the set of alive bits (or the operand has not
+ // been previously visited), then re-queue the operand to be visited
+ // again.
+ APInt ABPrev(BitWidth, 0);
+ auto ABI = AliveBits.find(I);
+ if (ABI != AliveBits.end())
+ ABPrev = ABI->second;
+
+ APInt ABNew = AB | ABPrev;
+ if (ABNew != ABPrev || ABI == AliveBits.end()) {
+ AliveBits[I] = std::move(ABNew);
+ Worklist.push_back(I);
+ }
+ } else if (!Visited.count(I)) {
+ Worklist.push_back(I);
+ }
+ }
+ }
+ }
+
+ bool Changed = false;
+ // The inverse of the live set is the dead set. These are those instructions
+ // which have no side effects and do not influence the control flow or return
+ // value of the function, and may therefore be deleted safely.
+ // NOTE: We reuse the Worklist vector here for memory efficiency.
+ for (Instruction &I : inst_range(F)) {
+ // For live instructions that have all dead bits, first make them dead by
+ // replacing all uses with something else. Then, if they don't need to
+ // remain live (because they have side effects, etc.) we can remove them.
+ if (I.getType()->isIntegerTy()) {
+ auto ABI = AliveBits.find(&I);
+ if (ABI != AliveBits.end()) {
+ if (ABI->second.getBoolValue())
+ continue;
+
+ DEBUG(dbgs() << "BDCE: Trivializing: " << I << " (all bits dead)\n");
+ // FIXME: In theory we could substitute undef here instead of zero.
+ // This should be reconsidered once we settle on the semantics of
+ // undef, poison, etc.
+ Value *Zero = ConstantInt::get(I.getType(), 0);
+ ++NumSimplified;
+ I.replaceAllUsesWith(Zero);
+ Changed = true;
+ }
+ } else if (Visited.count(&I)) {
+ continue;
+ }
+
+ if (isAlwaysLive(&I))
+ continue;
+
+ DEBUG(dbgs() << "BDCE: Removing: " << I << " (unused)\n");
+ Worklist.push_back(&I);
+ I.dropAllReferences();
+ Changed = true;
+ }
+
+ for (Instruction *&I : Worklist) {
+ ++NumRemoved;
+ I->eraseFromParent();
+ }
+
+ return Changed;
+}
+
+FunctionPass *llvm::createBitTrackingDCEPass() {
+ return new BDCE();
+}
+
add_llvm_library(LLVMScalarOpts
ADCE.cpp
AlignmentFromAssumptions.cpp
+ BDCE.cpp
ConstantHoisting.cpp
ConstantProp.cpp
CorrelatedValuePropagation.cpp
/// ScalarOpts library.
void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeADCEPass(Registry);
+ initializeBDCEPass(Registry);
initializeAlignmentFromAssumptionsPass(Registry);
initializeSampleProfileLoaderPass(Registry);
initializeConstantHoistingPass(Registry);
unwrap(PM)->add(createAggressiveDCEPass());
}
+void LLVMAddBitTrackingDCEPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createBitTrackingDCEPass());
+}
+
void LLVMAddAlignmentFromAssumptionsPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createAlignmentFromAssumptionsPass());
}
--- /dev/null
+; RUN: opt -S -bdce -instsimplify < %s | FileCheck %s
+; RUN: opt -S -instsimplify < %s | FileCheck %s -check-prefix=CHECK-IO
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind readnone
+define signext i32 @bar(i32 signext %x) #0 {
+entry:
+ %call = tail call signext i32 @foo(i32 signext 5) #0
+ %and = and i32 %call, 4
+ %or = or i32 %and, %x
+ %call1 = tail call signext i32 @foo(i32 signext 3) #0
+ %and2 = and i32 %call1, 8
+ %or3 = or i32 %or, %and2
+ %call4 = tail call signext i32 @foo(i32 signext 2) #0
+ %and5 = and i32 %call4, 16
+ %or6 = or i32 %or3, %and5
+ %call7 = tail call signext i32 @foo(i32 signext 1) #0
+ %and8 = and i32 %call7, 32
+ %or9 = or i32 %or6, %and8
+ %call10 = tail call signext i32 @foo(i32 signext 0) #0
+ %and11 = and i32 %call10, 64
+ %or12 = or i32 %or9, %and11
+ %call13 = tail call signext i32 @foo(i32 signext 4) #0
+ %and14 = and i32 %call13, 128
+ %or15 = or i32 %or12, %and14
+ %shr = ashr i32 %or15, 4
+ ret i32 %shr
+
+; CHECK-LABEL: @bar
+; CHECK-NOT: tail call signext i32 @foo(i32 signext 5)
+; CHECK-NOT: tail call signext i32 @foo(i32 signext 3)
+; CHECK: tail call signext i32 @foo(i32 signext 2)
+; CHECK: tail call signext i32 @foo(i32 signext 1)
+; CHECK: tail call signext i32 @foo(i32 signext 0)
+; CHECK: tail call signext i32 @foo(i32 signext 4)
+; CHECK: ret i32
+
+; Check that instsimplify is not doing this all on its own.
+; CHECK-IO-LABEL: @bar
+; CHECK-IO: tail call signext i32 @foo(i32 signext 5)
+; CHECK-IO: tail call signext i32 @foo(i32 signext 3)
+; CHECK-IO: tail call signext i32 @foo(i32 signext 2)
+; CHECK-IO: tail call signext i32 @foo(i32 signext 1)
+; CHECK-IO: tail call signext i32 @foo(i32 signext 0)
+; CHECK-IO: tail call signext i32 @foo(i32 signext 4)
+; CHECK-IO: ret i32
+}
+
+; Function Attrs: nounwind readnone
+declare signext i32 @foo(i32 signext) #0
+
+; Function Attrs: nounwind readnone
+define signext i32 @far(i32 signext %x) #1 {
+entry:
+ %call = tail call signext i32 @goo(i32 signext 5) #1
+ %and = and i32 %call, 4
+ %or = or i32 %and, %x
+ %call1 = tail call signext i32 @goo(i32 signext 3) #1
+ %and2 = and i32 %call1, 8
+ %or3 = or i32 %or, %and2
+ %call4 = tail call signext i32 @goo(i32 signext 2) #1
+ %and5 = and i32 %call4, 16
+ %or6 = or i32 %or3, %and5
+ %call7 = tail call signext i32 @goo(i32 signext 1) #1
+ %and8 = and i32 %call7, 32
+ %or9 = or i32 %or6, %and8
+ %call10 = tail call signext i32 @goo(i32 signext 0) #1
+ %and11 = and i32 %call10, 64
+ %or12 = or i32 %or9, %and11
+ %call13 = tail call signext i32 @goo(i32 signext 4) #1
+ %and14 = and i32 %call13, 128
+ %or15 = or i32 %or12, %and14
+ %shr = ashr i32 %or15, 4
+ ret i32 %shr
+
+; CHECK-LABEL: @far
+; Calls to foo(5) and foo(3) are still there, but their results are not used.
+; CHECK: tail call signext i32 @goo(i32 signext 5)
+; CHECK-NEXT: tail call signext i32 @goo(i32 signext 3)
+; CHECK-NEXT: tail call signext i32 @goo(i32 signext 2)
+; CHECK: tail call signext i32 @goo(i32 signext 1)
+; CHECK: tail call signext i32 @goo(i32 signext 0)
+; CHECK: tail call signext i32 @goo(i32 signext 4)
+; CHECK: ret i32
+
+; Check that instsimplify is not doing this all on its own.
+; CHECK-IO-LABEL: @far
+; CHECK-IO: tail call signext i32 @goo(i32 signext 5)
+; CHECK-IO: tail call signext i32 @goo(i32 signext 3)
+; CHECK-IO: tail call signext i32 @goo(i32 signext 2)
+; CHECK-IO: tail call signext i32 @goo(i32 signext 1)
+; CHECK-IO: tail call signext i32 @goo(i32 signext 0)
+; CHECK-IO: tail call signext i32 @goo(i32 signext 4)
+; CHECK-IO: ret i32
+}
+
+declare signext i32 @goo(i32 signext) #1
+
+; Function Attrs: nounwind readnone
+define signext i32 @tar1(i32 signext %x) #0 {
+entry:
+ %call = tail call signext i32 @foo(i32 signext 5) #0
+ %and = and i32 %call, 33554432
+ %or = or i32 %and, %x
+ %call1 = tail call signext i32 @foo(i32 signext 3) #0
+ %and2 = and i32 %call1, 67108864
+ %or3 = or i32 %or, %and2
+ %call4 = tail call signext i32 @foo(i32 signext 2) #0
+ %and5 = and i32 %call4, 16
+ %or6 = or i32 %or3, %and5
+ %call7 = tail call signext i32 @foo(i32 signext 1) #0
+ %and8 = and i32 %call7, 32
+ %or9 = or i32 %or6, %and8
+ %call10 = tail call signext i32 @foo(i32 signext 0) #0
+ %and11 = and i32 %call10, 64
+ %or12 = or i32 %or9, %and11
+ %call13 = tail call signext i32 @foo(i32 signext 4) #0
+ %and14 = and i32 %call13, 128
+ %or15 = or i32 %or12, %and14
+ %bs = tail call i32 @llvm.bswap.i32(i32 %or15) #0
+ %shr = ashr i32 %bs, 4
+ ret i32 %shr
+
+; CHECK-LABEL: @tar1
+; CHECK-NOT: tail call signext i32 @foo(i32 signext 5)
+; CHECK-NOT: tail call signext i32 @foo(i32 signext 3)
+; CHECK: tail call signext i32 @foo(i32 signext 2)
+; CHECK: tail call signext i32 @foo(i32 signext 1)
+; CHECK: tail call signext i32 @foo(i32 signext 0)
+; CHECK: tail call signext i32 @foo(i32 signext 4)
+; CHECK: ret i32
+}
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.bswap.i32(i32) #0
+
+; Function Attrs: nounwind readnone
+define signext i32 @tar2(i32 signext %x) #0 {
+entry:
+ %call = tail call signext i32 @foo(i32 signext 5) #0
+ %and = and i32 %call, 33554432
+ %or = or i32 %and, %x
+ %call1 = tail call signext i32 @foo(i32 signext 3) #0
+ %and2 = and i32 %call1, 67108864
+ %or3 = or i32 %or, %and2
+ %call4 = tail call signext i32 @foo(i32 signext 2) #0
+ %and5 = and i32 %call4, 16
+ %or6 = or i32 %or3, %and5
+ %call7 = tail call signext i32 @foo(i32 signext 1) #0
+ %and8 = and i32 %call7, 32
+ %or9 = or i32 %or6, %and8
+ %call10 = tail call signext i32 @foo(i32 signext 0) #0
+ %and11 = and i32 %call10, 64
+ %or12 = or i32 %or9, %and11
+ %call13 = tail call signext i32 @foo(i32 signext 4) #0
+ %and14 = and i32 %call13, 128
+ %or15 = or i32 %or12, %and14
+ %shl = shl i32 %or15, 10
+ ret i32 %shl
+
+; CHECK-LABEL: @tar2
+; CHECK-NOT: tail call signext i32 @foo(i32 signext 5)
+; CHECK-NOT: tail call signext i32 @foo(i32 signext 3)
+; CHECK: tail call signext i32 @foo(i32 signext 2)
+; CHECK: tail call signext i32 @foo(i32 signext 1)
+; CHECK: tail call signext i32 @foo(i32 signext 0)
+; CHECK: tail call signext i32 @foo(i32 signext 4)
+; CHECK: ret i32
+}
+
+; Function Attrs: nounwind readnone
+define signext i32 @tar3(i32 signext %x) #0 {
+entry:
+ %call = tail call signext i32 @foo(i32 signext 5) #0
+ %and = and i32 %call, 33554432
+ %or = or i32 %and, %x
+ %call1 = tail call signext i32 @foo(i32 signext 3) #0
+ %and2 = and i32 %call1, 67108864
+ %or3 = or i32 %or, %and2
+ %call4 = tail call signext i32 @foo(i32 signext 2) #0
+ %and5 = and i32 %call4, 16
+ %or6 = or i32 %or3, %and5
+ %call7 = tail call signext i32 @foo(i32 signext 1) #0
+ %and8 = and i32 %call7, 32
+ %or9 = or i32 %or6, %and8
+ %call10 = tail call signext i32 @foo(i32 signext 0) #0
+ %and11 = and i32 %call10, 64
+ %or12 = or i32 %or9, %and11
+ %call13 = tail call signext i32 @foo(i32 signext 4) #0
+ %and14 = and i32 %call13, 128
+ %or15 = or i32 %or12, %and14
+ %add = add i32 %or15, 5
+ %shl = shl i32 %add, 10
+ ret i32 %shl
+
+; CHECK-LABEL: @tar3
+; CHECK-NOT: tail call signext i32 @foo(i32 signext 5)
+; CHECK-NOT: tail call signext i32 @foo(i32 signext 3)
+; CHECK: tail call signext i32 @foo(i32 signext 2)
+; CHECK: tail call signext i32 @foo(i32 signext 1)
+; CHECK: tail call signext i32 @foo(i32 signext 0)
+; CHECK: tail call signext i32 @foo(i32 signext 4)
+; CHECK: ret i32
+}
+
+; Function Attrs: nounwind readnone
+define signext i32 @tar4(i32 signext %x) #0 {
+entry:
+ %call = tail call signext i32 @foo(i32 signext 5) #0
+ %and = and i32 %call, 33554432
+ %or = or i32 %and, %x
+ %call1 = tail call signext i32 @foo(i32 signext 3) #0
+ %and2 = and i32 %call1, 67108864
+ %or3 = or i32 %or, %and2
+ %call4 = tail call signext i32 @foo(i32 signext 2) #0
+ %and5 = and i32 %call4, 16
+ %or6 = or i32 %or3, %and5
+ %call7 = tail call signext i32 @foo(i32 signext 1) #0
+ %and8 = and i32 %call7, 32
+ %or9 = or i32 %or6, %and8
+ %call10 = tail call signext i32 @foo(i32 signext 0) #0
+ %and11 = and i32 %call10, 64
+ %or12 = or i32 %or9, %and11
+ %call13 = tail call signext i32 @foo(i32 signext 4) #0
+ %and14 = and i32 %call13, 128
+ %or15 = or i32 %or12, %and14
+ %sub = sub i32 %or15, 5
+ %shl = shl i32 %sub, 10
+ ret i32 %shl
+
+; CHECK-LABEL: @tar4
+; CHECK-NOT: tail call signext i32 @foo(i32 signext 5)
+; CHECK-NOT: tail call signext i32 @foo(i32 signext 3)
+; CHECK: tail call signext i32 @foo(i32 signext 2)
+; CHECK: tail call signext i32 @foo(i32 signext 1)
+; CHECK: tail call signext i32 @foo(i32 signext 0)
+; CHECK: tail call signext i32 @foo(i32 signext 4)
+; CHECK: ret i32
+}
+
+; Function Attrs: nounwind readnone
+define signext i32 @tar5(i32 signext %x) #0 {
+entry:
+ %call = tail call signext i32 @foo(i32 signext 5) #0
+ %and = and i32 %call, 33554432
+ %or = or i32 %and, %x
+ %call1 = tail call signext i32 @foo(i32 signext 3) #0
+ %and2 = and i32 %call1, 67108864
+ %or3 = or i32 %or, %and2
+ %call4 = tail call signext i32 @foo(i32 signext 2) #0
+ %and5 = and i32 %call4, 16
+ %or6 = or i32 %or3, %and5
+ %call7 = tail call signext i32 @foo(i32 signext 1) #0
+ %and8 = and i32 %call7, 32
+ %or9 = or i32 %or6, %and8
+ %call10 = tail call signext i32 @foo(i32 signext 0) #0
+ %and11 = and i32 %call10, 64
+ %or12 = or i32 %or9, %and11
+ %call13 = tail call signext i32 @foo(i32 signext 4) #0
+ %and14 = and i32 %call13, 128
+ %or15 = or i32 %or12, %and14
+ %xor = xor i32 %or15, 5
+ %shl = shl i32 %xor, 10
+ ret i32 %shl
+
+; CHECK-LABEL: @tar5
+; CHECK-NOT: tail call signext i32 @foo(i32 signext 5)
+; CHECK-NOT: tail call signext i32 @foo(i32 signext 3)
+; CHECK: tail call signext i32 @foo(i32 signext 2)
+; CHECK: tail call signext i32 @foo(i32 signext 1)
+; CHECK: tail call signext i32 @foo(i32 signext 0)
+; CHECK: tail call signext i32 @foo(i32 signext 4)
+; CHECK: ret i32
+}
+
+; Function Attrs: nounwind readnone
+define signext i32 @tar7(i32 signext %x, i1 %b) #0 {
+entry:
+ %call = tail call signext i32 @foo(i32 signext 5) #0
+ %and = and i32 %call, 33554432
+ %or = or i32 %and, %x
+ %call1 = tail call signext i32 @foo(i32 signext 3) #0
+ %and2 = and i32 %call1, 67108864
+ %or3 = or i32 %or, %and2
+ %call4 = tail call signext i32 @foo(i32 signext 2) #0
+ %and5 = and i32 %call4, 16
+ %or6 = or i32 %or3, %and5
+ %call7 = tail call signext i32 @foo(i32 signext 1) #0
+ %and8 = and i32 %call7, 32
+ %or9 = or i32 %or6, %and8
+ %call10 = tail call signext i32 @foo(i32 signext 0) #0
+ %and11 = and i32 %call10, 64
+ %or12 = or i32 %or9, %and11
+ %call13 = tail call signext i32 @foo(i32 signext 4) #0
+ %and14 = and i32 %call13, 128
+ %or15 = or i32 %or12, %and14
+ %v = select i1 %b, i32 %or15, i32 5
+ %shl = shl i32 %v, 10
+ ret i32 %shl
+
+; CHECK-LABEL: @tar7
+; CHECK-NOT: tail call signext i32 @foo(i32 signext 5)
+; CHECK-NOT: tail call signext i32 @foo(i32 signext 3)
+; CHECK: tail call signext i32 @foo(i32 signext 2)
+; CHECK: tail call signext i32 @foo(i32 signext 1)
+; CHECK: tail call signext i32 @foo(i32 signext 0)
+; CHECK: tail call signext i32 @foo(i32 signext 4)
+; CHECK: ret i32
+}
+
+; Function Attrs: nounwind readnone
+define signext i16 @tar8(i32 signext %x) #0 {
+entry:
+ %call = tail call signext i32 @foo(i32 signext 5) #0
+ %and = and i32 %call, 33554432
+ %or = or i32 %and, %x
+ %call1 = tail call signext i32 @foo(i32 signext 3) #0
+ %and2 = and i32 %call1, 67108864
+ %or3 = or i32 %or, %and2
+ %call4 = tail call signext i32 @foo(i32 signext 2) #0
+ %and5 = and i32 %call4, 16
+ %or6 = or i32 %or3, %and5
+ %call7 = tail call signext i32 @foo(i32 signext 1) #0
+ %and8 = and i32 %call7, 32
+ %or9 = or i32 %or6, %and8
+ %call10 = tail call signext i32 @foo(i32 signext 0) #0
+ %and11 = and i32 %call10, 64
+ %or12 = or i32 %or9, %and11
+ %call13 = tail call signext i32 @foo(i32 signext 4) #0
+ %and14 = and i32 %call13, 128
+ %or15 = or i32 %or12, %and14
+ %tr = trunc i32 %or15 to i16
+ ret i16 %tr
+
+; CHECK-LABEL: @tar8
+; CHECK-NOT: tail call signext i32 @foo(i32 signext 5)
+; CHECK-NOT: tail call signext i32 @foo(i32 signext 3)
+; CHECK: tail call signext i32 @foo(i32 signext 2)
+; CHECK: tail call signext i32 @foo(i32 signext 1)
+; CHECK: tail call signext i32 @foo(i32 signext 0)
+; CHECK: tail call signext i32 @foo(i32 signext 4)
+; CHECK: ret i16
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
+
--- /dev/null
+; RUN: opt -bdce -S < %s | FileCheck %s
+
+declare i32 @strlen(i8*) readonly nounwind
+
+define void @test1() {
+ call i32 @strlen( i8* null )
+ ret void
+
+; CHECK-LABEL: @test1
+; CHECK-NOT: call
+; CHECK: ret void
+}
+
+define i32 @test2() {
+ ; invoke of pure function should not be deleted!
+ invoke i32 @strlen( i8* null ) readnone
+ to label %Cont unwind label %Other
+
+Cont: ; preds = %0
+ ret i32 0
+
+Other: ; preds = %0
+ %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+ cleanup
+ ret i32 1
+
+; CHECK-LABEL: @test2
+; CHECK: invoke
+; CHECK: ret i32 1
+}
+
+declare i32 @__gxx_personality_v0(...)
+