X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FCodeGen%2FCodeGenPrepare.cpp;h=948fddc6ed8d766bd7eeb473d399a22144cd5f9b;hb=f15492fd7292563049ace40be9a2e0048e64b8f0;hp=642fbad1cdf5e9ea0325e579a30e764fd0dc752c;hpb=7d5100d14edd6d1595cc60ce5f89b64bfc564ef4;p=oota-llvm.git diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp index 642fbad1cdf..948fddc6ed8 100644 --- a/lib/CodeGen/CodeGenPrepare.cpp +++ b/lib/CodeGen/CodeGenPrepare.cpp @@ -13,7 +13,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "codegenprepare" #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallSet.h" @@ -47,6 +46,8 @@ using namespace llvm; using namespace llvm::PatternMatch; +#define DEBUG_TYPE "codegenprepare" + STATISTIC(NumBlocksElim, "Number of blocks eliminated"); STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated"); STATISTIC(NumGEPsElim, "Number of GEPs converted to casts"); @@ -150,19 +151,8 @@ typedef DenseMap InstrToOrigTy; } char CodeGenPrepare::ID = 0; -static void *initializeCodeGenPreparePassOnce(PassRegistry &Registry) { - initializeTargetLibraryInfoPass(Registry); - PassInfo *PI = new PassInfo( - "Optimize for code generation", "codegenprepare", &CodeGenPrepare::ID, - PassInfo::NormalCtor_t(callDefaultCtor), false, false, - PassInfo::TargetMachineCtor_t(callTargetMachineCtor)); - Registry.registerPass(*PI, true); - return PI; -} - -void llvm::initializeCodeGenPreparePass(PassRegistry &Registry) { - CALL_ONCE_INITIALIZATION(initializeCodeGenPreparePassOnce) -} +INITIALIZE_TM_PASS(CodeGenPrepare, "codegenprepare", + "Optimize for code generation", false, false) FunctionPass *llvm::createCodeGenPreparePass(const TargetMachine *TM) { return new CodeGenPrepare(TM); @@ -178,7 +168,8 @@ bool CodeGenPrepare::runOnFunction(Function &F) { PromotedInsts.clear(); ModifiedDT = false; - if (TM) TLI = TM->getTargetLowering(); + if (TM) + TLI = TM->getSubtargetImpl()->getTargetLowering(); TLInfo = &getAnalysis(); DominatorTreeWrapperPass *DTWP = getAnalysisIfAvailable(); @@ -628,6 +619,190 @@ static bool OptimizeCmpExpression(CmpInst *CI) { return MadeChange; } +/// isExtractBitsCandidateUse - Check if the candidates could +/// be combined with shift instruction, which includes: +/// 1. Truncate instruction +/// 2. And instruction and the imm is a mask of the low bits: +/// imm & (imm+1) == 0 +static bool isExtractBitsCandidateUse(Instruction *User) { + if (!isa(User)) { + if (User->getOpcode() != Instruction::And || + !isa(User->getOperand(1))) + return false; + + const APInt &Cimm = cast(User->getOperand(1))->getValue(); + + if ((Cimm & (Cimm + 1)).getBoolValue()) + return false; + } + return true; +} + +/// SinkShiftAndTruncate - sink both shift and truncate instruction +/// to the use of truncate's BB. +static bool +SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI, + DenseMap &InsertedShifts, + const TargetLowering &TLI) { + BasicBlock *UserBB = User->getParent(); + DenseMap InsertedTruncs; + TruncInst *TruncI = dyn_cast(User); + bool MadeChange = false; + + for (Value::user_iterator TruncUI = TruncI->user_begin(), + TruncE = TruncI->user_end(); + TruncUI != TruncE;) { + + Use &TruncTheUse = TruncUI.getUse(); + Instruction *TruncUser = cast(*TruncUI); + // Preincrement use iterator so we don't invalidate it. + + ++TruncUI; + + int ISDOpcode = TLI.InstructionOpcodeToISD(TruncUser->getOpcode()); + if (!ISDOpcode) + continue; + + // If the use is actually a legal node, there will not be an + // implicit truncate. + // FIXME: always querying the result type is just an + // approximation; some nodes' legality is determined by the + // operand or other means. There's no good way to find out though. + if (TLI.isOperationLegalOrCustom(ISDOpcode, + EVT::getEVT(TruncUser->getType(), true))) + continue; + + // Don't bother for PHI nodes. + if (isa(TruncUser)) + continue; + + BasicBlock *TruncUserBB = TruncUser->getParent(); + + if (UserBB == TruncUserBB) + continue; + + BinaryOperator *&InsertedShift = InsertedShifts[TruncUserBB]; + CastInst *&InsertedTrunc = InsertedTruncs[TruncUserBB]; + + if (!InsertedShift && !InsertedTrunc) { + BasicBlock::iterator InsertPt = TruncUserBB->getFirstInsertionPt(); + // Sink the shift + if (ShiftI->getOpcode() == Instruction::AShr) + InsertedShift = + BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "", InsertPt); + else + InsertedShift = + BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "", InsertPt); + + // Sink the trunc + BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt(); + TruncInsertPt++; + + InsertedTrunc = CastInst::Create(TruncI->getOpcode(), InsertedShift, + TruncI->getType(), "", TruncInsertPt); + + MadeChange = true; + + TruncTheUse = InsertedTrunc; + } + } + return MadeChange; +} + +/// OptimizeExtractBits - sink the shift *right* instruction into user blocks if +/// the uses could potentially be combined with this shift instruction and +/// generate BitExtract instruction. It will only be applied if the architecture +/// supports BitExtract instruction. Here is an example: +/// BB1: +/// %x.extract.shift = lshr i64 %arg1, 32 +/// BB2: +/// %x.extract.trunc = trunc i64 %x.extract.shift to i16 +/// ==> +/// +/// BB2: +/// %x.extract.shift.1 = lshr i64 %arg1, 32 +/// %x.extract.trunc = trunc i64 %x.extract.shift.1 to i16 +/// +/// CodeGen will recoginze the pattern in BB2 and generate BitExtract +/// instruction. +/// Return true if any changes are made. +static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI, + const TargetLowering &TLI) { + BasicBlock *DefBB = ShiftI->getParent(); + + /// Only insert instructions in each block once. + DenseMap InsertedShifts; + + bool shiftIsLegal = TLI.isTypeLegal(TLI.getValueType(ShiftI->getType())); + + bool MadeChange = false; + for (Value::user_iterator UI = ShiftI->user_begin(), E = ShiftI->user_end(); + UI != E;) { + Use &TheUse = UI.getUse(); + Instruction *User = cast(*UI); + // Preincrement use iterator so we don't invalidate it. + ++UI; + + // Don't bother for PHI nodes. + if (isa(User)) + continue; + + if (!isExtractBitsCandidateUse(User)) + continue; + + BasicBlock *UserBB = User->getParent(); + + if (UserBB == DefBB) { + // If the shift and truncate instruction are in the same BB. The use of + // the truncate(TruncUse) may still introduce another truncate if not + // legal. In this case, we would like to sink both shift and truncate + // instruction to the BB of TruncUse. + // for example: + // BB1: + // i64 shift.result = lshr i64 opnd, imm + // trunc.result = trunc shift.result to i16 + // + // BB2: + // ----> We will have an implicit truncate here if the architecture does + // not have i16 compare. + // cmp i16 trunc.result, opnd2 + // + if (isa(User) && shiftIsLegal + // If the type of the truncate is legal, no trucate will be + // introduced in other basic blocks. + && (!TLI.isTypeLegal(TLI.getValueType(User->getType())))) + MadeChange = + SinkShiftAndTruncate(ShiftI, User, CI, InsertedShifts, TLI); + + continue; + } + // If we have already inserted a shift into this block, use it. + BinaryOperator *&InsertedShift = InsertedShifts[UserBB]; + + if (!InsertedShift) { + BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); + + if (ShiftI->getOpcode() == Instruction::AShr) + InsertedShift = + BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "", InsertPt); + else + InsertedShift = + BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "", InsertPt); + + MadeChange = true; + } + + // Replace a use of the shift with a use of the new shift. + TheUse = InsertedShift; + } + + // If we removed all uses, nuke the shift. + if (ShiftI->use_empty()) + ShiftI->eraseFromParent(); + + return MadeChange; +} + namespace { class CodeGenPrepareFortifiedLibCalls : public SimplifyFortifiedLibCalls { protected: @@ -688,293 +863,6 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) { } return true; } - // Lower all uses of llvm.safe.[us]{div|rem}... - if (II && - (II->getIntrinsicID() == Intrinsic::safe_sdiv || - II->getIntrinsicID() == Intrinsic::safe_udiv || - II->getIntrinsicID() == Intrinsic::safe_srem || - II->getIntrinsicID() == Intrinsic::safe_urem)) { - // Given - // result_struct = type {iN, i1} - // %R = call result_struct llvm.safe.sdiv.iN(iN %x, iN %y) - // Expand it to actual IR, which produces result to the same variable %R. - // First element of the result %R.1 is the result of division, second - // element shows whether the division was correct or not. - // If %y is 0, %R.1 is 0, %R.2 is 1. (1) - // If %x is minSignedValue and %y is -1, %R.1 is %x, %R.2 is 1. (2) - // In other cases %R.1 is (sdiv %x, %y), %R.2 is 0. (3) - // - // Similar applies to srem, udiv, and urem builtins, except that in unsigned - // variants we don't check condition (2). - - bool IsSigned; - BinaryOperator::BinaryOps Op; - switch (II->getIntrinsicID()) { - case Intrinsic::safe_sdiv: - IsSigned = true; - Op = Instruction::SDiv; - break; - case Intrinsic::safe_udiv: - IsSigned = false; - Op = Instruction::UDiv; - break; - case Intrinsic::safe_srem: - IsSigned = true; - Op = Instruction::SRem; - break; - case Intrinsic::safe_urem: - IsSigned = false; - Op = Instruction::URem; - break; - default: - llvm_unreachable("Only Div/Rem intrinsics are handled here."); - } - - Value *LHS = II->getOperand(0), *RHS = II->getOperand(1); - bool DivWellDefined = TLI && TLI->isDivWellDefined(); - - bool ResultNeeded[2] = {false, false}; - SmallVector ResultsUsers[2]; - bool BadCase = false; - for (User *U: II->users()) { - ExtractValueInst *EVI = dyn_cast(U); - if (!EVI || EVI->getNumIndices() > 1 || EVI->getIndices()[0] > 1) { - BadCase = true; - break; - } - ResultNeeded[EVI->getIndices()[0]] = true; - ResultsUsers[EVI->getIndices()[0]].push_back(U); - } - // Behave conservatively, if there is an unusual user of the results. - if (BadCase) - ResultNeeded[0] = ResultNeeded[1] = true; - - // Early exit if non of the results is ever used. - if (!ResultNeeded[0] && !ResultNeeded[1]) { - II->eraseFromParent(); - return true; - } - - // Early exit if the second result (flag) isn't used and target - // div-instruction computes exactly what we want to get as the first result - // and never traps. - if (ResultNeeded[0] && !ResultNeeded[1] && DivWellDefined) { - BinaryOperator *Div = BinaryOperator::Create(Op, LHS, RHS); - Div->insertAfter(II); - for (User *U: ResultsUsers[0]) { - Instruction *UserInst = dyn_cast(U); - assert(UserInst && "Unexpected null-instruction"); - UserInst->replaceAllUsesWith(Div); - UserInst->eraseFromParent(); - } - II->eraseFromParent(); - CurInstIterator = Div; - ModifiedDT = true; - return true; - } - - // Check if the flag is used to jump out to a 'trap' block - // If it's the case, we want to use this block directly when we create - // branches after comparing with 0 and comparing with -1 (signed case). - // We can do it only iff we can track all the uses of the flag, i.e. the - // only users are EXTRACTVALUE-insns, and their users are conditional - // branches, targeting the same 'trap' basic block. - BasicBlock *TrapBB = nullptr; - bool DoRelinkTrap = true; - for (User *FlagU: ResultsUsers[1]) { - for (User *U: FlagU->users()) { - BranchInst *TrapBranch = dyn_cast(U); - // If the user isn't a branch-insn, or it jumps to another BB, don't - // try to use TrapBB in the lowering. - if (!TrapBranch || (TrapBB && TrapBB != TrapBranch->getSuccessor(0))) { - DoRelinkTrap = false; - break; - } - TrapBB = TrapBranch->getSuccessor(0); - } - } - if (!TrapBB) - DoRelinkTrap = false; - // We want to reuse TrapBB if possible, because in that case we can avoid - // creating new basic blocks and thus overcomplicating the IR. However, if - // DIV instruction isn't well defined, we still need those blocks to model - // well-defined behaviour. Thus, we can't reuse TrapBB in this case. - if (!DivWellDefined) - DoRelinkTrap = false; - - Value *MinusOne = Constant::getAllOnesValue(LHS->getType()); - Value *Zero = Constant::getNullValue(LHS->getType()); - - // Split the original BB and create other basic blocks that will be used - // for checks. - BasicBlock *StartBB = II->getParent(); - BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(II)); - BasicBlock *NextBB = StartBB->splitBasicBlock(SplitPt, "div.end"); - - BasicBlock *DivByZeroBB; - if (!DoRelinkTrap) { - DivByZeroBB = BasicBlock::Create(II->getContext(), "div.divz", - NextBB->getParent(), NextBB); - BranchInst::Create(NextBB, DivByZeroBB); - } - BasicBlock *DivBB = BasicBlock::Create(II->getContext(), "div.div", - NextBB->getParent(), NextBB); - BranchInst::Create(NextBB, DivBB); - - // For signed variants, check the condition (2): - // LHS == SignedMinValue, RHS == -1. - Value *CmpMinusOne; - Value *CmpMinValue; - BasicBlock *ChkDivMinBB; - BasicBlock *DivMinBB; - Value *MinValue; - if (IsSigned) { - APInt SignedMinValue = - APInt::getSignedMinValue(LHS->getType()->getPrimitiveSizeInBits()); - MinValue = Constant::getIntegerValue(LHS->getType(), SignedMinValue); - ChkDivMinBB = BasicBlock::Create(II->getContext(), "div.chkdivmin", - NextBB->getParent(), NextBB); - BranchInst::Create(NextBB, ChkDivMinBB); - if (!DoRelinkTrap) { - DivMinBB = BasicBlock::Create(II->getContext(), "div.divmin", - NextBB->getParent(), NextBB); - BranchInst::Create(NextBB, DivMinBB); - } - CmpMinusOne = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, - RHS, MinusOne, "cmp.rhs.minus.one", - ChkDivMinBB->getTerminator()); - CmpMinValue = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, - LHS, MinValue, "cmp.lhs.signed.min", - ChkDivMinBB->getTerminator()); - BinaryOperator *CmpSignedOvf = BinaryOperator::Create(Instruction::And, - CmpMinusOne, - CmpMinValue); - // Here we're interested in the case when both %x is TMin and %y is -1. - // In this case the result will overflow. - // If that's not the case, we can perform usual division. These blocks - // will be inserted after DivByZero, so the division will be safe. - CmpSignedOvf->insertBefore(ChkDivMinBB->getTerminator()); - BranchInst::Create(DoRelinkTrap ? TrapBB : DivMinBB, DivBB, CmpSignedOvf, - ChkDivMinBB->getTerminator()); - ChkDivMinBB->getTerminator()->eraseFromParent(); - } - - // Check the condition (1): - // RHS == 0. - Value *CmpDivZero = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, - RHS, Zero, "cmp.rhs.zero", - StartBB->getTerminator()); - - // If RHS != 0, we want to check condition (2) in signed case, or proceed - // to usual division in unsigned case. - BranchInst::Create(DoRelinkTrap ? TrapBB : DivByZeroBB, - IsSigned ? ChkDivMinBB : DivBB, CmpDivZero, - StartBB->getTerminator()); - StartBB->getTerminator()->eraseFromParent(); - - // At the moment we have all the control flow created. We just need to - // insert DIV and PHI (if needed) to get the result value. - Instruction *DivRes, *FlagRes; - Instruction *InsPoint = nullptr; - if (ResultNeeded[0]) { - BinaryOperator *Div = BinaryOperator::Create(Op, LHS, RHS); - if (DivWellDefined) { - // The result value is the result of DIV operation placed right at the - // original place of the intrinsic. - Div->insertAfter(II); - DivRes = Div; - } else { - // The result is a PHI-node. - Div->insertBefore(DivBB->getTerminator()); - PHINode *DivResPN = - PHINode::Create(LHS->getType(), IsSigned ? 3 : 2, "div.res.phi", - NextBB->begin()); - DivResPN->addIncoming(Div, DivBB); - DivResPN->addIncoming(Zero, DivByZeroBB); - if (IsSigned) - DivResPN->addIncoming(MinValue, DivMinBB); - DivRes = DivResPN; - InsPoint = DivResPN; - } - } - - // Prepare a value for the second result (flag) if it is needed. - if (ResultNeeded[1] && !DoRelinkTrap) { - Type *FlagTy = II->getType()->getStructElementType(1); - PHINode *FlagResPN = - PHINode::Create(FlagTy, IsSigned ? 3 : 2, "div.flag.phi", - NextBB->begin()); - FlagResPN->addIncoming(Constant::getNullValue(FlagTy), DivBB); - FlagResPN->addIncoming(Constant::getAllOnesValue(FlagTy), DivByZeroBB); - if (IsSigned) - FlagResPN->addIncoming(Constant::getAllOnesValue(FlagTy), DivMinBB); - FlagRes = FlagResPN; - if (!InsPoint) - InsPoint = FlagRes; - } - - // If possible, propagate the results to the user. Otherwise, create alloca, - // and create a struct with the results on stack. - if (!BadCase) { - if (ResultNeeded[0]) { - for (User *U: ResultsUsers[0]) { - Instruction *UserInst = dyn_cast(U); - assert(UserInst && "Unexpected null-instruction"); - UserInst->replaceAllUsesWith(DivRes); - UserInst->eraseFromParent(); - } - } - if (ResultNeeded[1]) { - for (User *FlagU: ResultsUsers[1]) { - Instruction *FlagUInst = dyn_cast(FlagU); - if (DoRelinkTrap) { - // Replace - // %flag = extractvalue %intrinsic.res, 1 - // br i1 %flag, label %trap.bb, label %other.bb - // With - // br label %other.bb - // We've already created checks that are pointing to %trap.bb, there - // is no need to have the same checks here. - for (User *U: FlagUInst->users()) { - BranchInst *TrapBranch = dyn_cast(U); - BasicBlock *CurBB = TrapBranch->getParent(); - BasicBlock *SuccessorBB = TrapBranch->getSuccessor(1); - CurBB->getTerminator()->eraseFromParent(); - BranchInst::Create(SuccessorBB, CurBB); - } - } else { - FlagUInst->replaceAllUsesWith(FlagRes); - } - dyn_cast(FlagUInst)->eraseFromParent(); - } - } - } else { - // Create alloca, store our new values to it, and then load the final - // result from it. - Constant *Idx0 = ConstantInt::get(Type::getInt32Ty(II->getContext()), 0); - Constant *Idx1 = ConstantInt::get(Type::getInt32Ty(II->getContext()), 1); - Value *Idxs_DivRes[2] = {Idx0, Idx0}; - Value *Idxs_FlagRes[2] = {Idx0, Idx1}; - Value *NewRes = new llvm::AllocaInst(II->getType(), 0, "div.res.ptr", II); - Instruction *ResDivAddr = GetElementPtrInst::Create(NewRes, Idxs_DivRes); - Instruction *ResFlagAddr = - GetElementPtrInst::Create(NewRes, Idxs_FlagRes); - ResDivAddr->insertAfter(InsPoint); - ResFlagAddr->insertAfter(ResDivAddr); - StoreInst *StoreResDiv = new StoreInst(DivRes, ResDivAddr); - StoreInst *StoreResFlag = new StoreInst(FlagRes, ResFlagAddr); - StoreResDiv->insertAfter(ResFlagAddr); - StoreResFlag->insertAfter(StoreResDiv); - LoadInst *LoadRes = new LoadInst(NewRes, "div.res"); - LoadRes->insertAfter(StoreResFlag); - II->replaceAllUsesWith(LoadRes); - } - - II->eraseFromParent(); - CurInstIterator = StartBB->end(); - ModifiedDT = true; - return true; - } if (II && TLI) { SmallVector PtrOps; @@ -1183,8 +1071,11 @@ void ExtAddrMode::print(raw_ostream &OS) const { NeedPlus = true; } - if (BaseOffs) - OS << (NeedPlus ? " + " : "") << BaseOffs, NeedPlus = true; + if (BaseOffs) { + OS << (NeedPlus ? " + " : "") + << BaseOffs; + NeedPlus = true; + } if (BaseReg) { OS << (NeedPlus ? " + " : "") @@ -1745,6 +1636,7 @@ bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale, static bool MightBeFoldableInst(Instruction *I) { switch (I->getOpcode()) { case Instruction::BitCast: + case Instruction::AddrSpaceCast: // Don't touch identity bitcasts. if (I->getType() == I->getOperand(0)->getType()) return false; @@ -2099,6 +1991,7 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, return MatchAddr(AddrInst->getOperand(0), Depth); return false; case Instruction::BitCast: + case Instruction::AddrSpaceCast: // BitCast is always a noop, and we can handle it as long as it is // int->int or pointer->pointer (we don't want int<->fp or something). if ((AddrInst->getOperand(0)->getType()->isPointerTy() || @@ -2147,7 +2040,8 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, case Instruction::Shl: { // Can only handle X*C and X << C. ConstantInt *RHS = dyn_cast(AddrInst->getOperand(1)); - if (!RHS) return false; + if (!RHS) + return false; int64_t Scale = RHS->getSExtValue(); if (Opcode == Instruction::Shl) Scale = 1LL << Scale; @@ -2241,8 +2135,11 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, return true; } case Instruction::SExt: { + Instruction *SExt = dyn_cast(AddrInst); + if (!SExt) + return false; + // Try to move this sext out of the way of the addressing mode. - Instruction *SExt = cast(AddrInst); // Ask for a method for doing so. TypePromotionHelper::Action TPH = TypePromotionHelper::getAction( SExt, InsertedTruncs, TLI, PromotedInsts); @@ -2704,7 +2601,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, Value *&SunkAddr = SunkAddrs[Addr]; if (SunkAddr) { DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for " - << *MemoryInst); + << *MemoryInst << "\n"); if (SunkAddr->getType() != Addr->getType()) SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType()); } else if (AddrSinkUsingGEPs || (!AddrSinkUsingGEPs.getNumOccurrences() && @@ -2712,7 +2609,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // By default, we use the GEP-based method when AA is used later. This // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities. DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " - << *MemoryInst); + << *MemoryInst << "\n"); Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(Addr->getType()); Value *ResultPtr = nullptr, *ResultIndex = nullptr; @@ -2830,7 +2727,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, } } else { DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " - << *MemoryInst); + << *MemoryInst << "\n"); Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(Addr->getType()); Value *Result = nullptr; @@ -2864,7 +2761,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // the original IR value was tossed in favor of a constant back when // the AddrMode was created we need to bail out gracefully if widths // do not match instead of extending it. - Instruction *I = dyn_cast(Result); + Instruction *I = dyn_cast_or_null(Result); if (I && (Result != AddrMode.BaseReg)) I->eraseFromParent(); return false; @@ -3276,6 +3173,17 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) { return false; } + BinaryOperator *BinOp = dyn_cast(I); + + if (BinOp && (BinOp->getOpcode() == Instruction::AShr || + BinOp->getOpcode() == Instruction::LShr)) { + ConstantInt *CI = dyn_cast(BinOp->getOperand(1)); + if (TLI && CI && TLI->hasExtractBitsInsn()) + return OptimizeExtractBits(BinOp, CI, *TLI); + + return false; + } + if (GetElementPtrInst *GEPI = dyn_cast(I)) { if (GEPI->hasAllZeroIndices()) { /// The GEP operand must be a pointer, so must its result -> BitCast @@ -3328,7 +3236,12 @@ bool CodeGenPrepare::PlaceDbgValues(Function &F) { for (BasicBlock::iterator BI = I->begin(), BE = I->end(); BI != BE;) { Instruction *Insn = BI; ++BI; DbgValueInst *DVI = dyn_cast(Insn); - if (!DVI) { + // Leave dbg.values that refer to an alloca alone. These + // instrinsics describe the address of a variable (= the alloca) + // being taken. They should not be moved next to the alloca + // (and to the beginning of the scope), but rather stay close to + // where said address is used. + if (!DVI || (DVI->getValue() && isa(DVI->getValue()))) { PrevNonDbgInst = Insn; continue; }