X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FCodeGen%2FCodeGenPrepare.cpp;h=948fddc6ed8d766bd7eeb473d399a22144cd5f9b;hb=25529b337f75a4b9b174592e2c95136e781bd824;hp=e81a9098ef5a3ae41b840baef2bfc9c0ba30d1ce;hpb=8048c44580056994eb0f2804e2914badc8fbef43;p=oota-llvm.git diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp index e81a9098ef5..948fddc6ed8 100644 --- a/lib/CodeGen/CodeGenPrepare.cpp +++ b/lib/CodeGen/CodeGenPrepare.cpp @@ -13,32 +13,32 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "codegenprepare" #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/ValueMap.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" +#include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/IR/ValueHandle.h" +#include "llvm/IR/ValueMap.h" #include "llvm/Pass.h" -#include "llvm/Support/CallSite.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/GetElementPtrTypeIterator.h" -#include "llvm/Support/PatternMatch.h" -#include "llvm/Support/ValueHandle.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" #include "llvm/Transforms/Utils/BypassSlowDivision.h" @@ -46,6 +46,8 @@ using namespace llvm; using namespace llvm::PatternMatch; +#define DEBUG_TYPE "codegenprepare" + STATISTIC(NumBlocksElim, "Number of blocks eliminated"); STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated"); STATISTIC(NumGEPsElim, "Number of GEPs converted to casts"); @@ -60,6 +62,7 @@ STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized"); STATISTIC(NumRetsDup, "Number of return instructions duplicated"); STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved"); STATISTIC(NumSelectsExpanded, "Number of selects turned into branches"); +STATISTIC(NumAndCmpsMoved, "Number of and/cmp's pushed into branches"); static cl::opt DisableBranchOpts( "disable-cgp-branch-opts", cl::Hidden, cl::init(false), @@ -69,6 +72,14 @@ static cl::opt DisableSelectToBranch( "disable-cgp-select2branch", cl::Hidden, cl::init(false), cl::desc("Disable select to branch conversion.")); +static cl::opt AddrSinkUsingGEPs( + "addr-sink-using-gep", cl::Hidden, cl::init(false), + cl::desc("Address sinking in CGP using GEPs.")); + +static cl::opt EnableAndCmpSinking( + "enable-andcmp-sinking", cl::Hidden, cl::init(true), + cl::desc("Enable sinkinig and/cmp into branches.")); + namespace { typedef SmallPtrSet SetOfInstrs; typedef DenseMap InstrToOrigTy; @@ -106,15 +117,15 @@ typedef DenseMap InstrToOrigTy; public: static char ID; // Pass identification, replacement for typeid - explicit CodeGenPrepare(const TargetMachine *TM = 0) - : FunctionPass(ID), TM(TM), TLI(0) { + explicit CodeGenPrepare(const TargetMachine *TM = nullptr) + : FunctionPass(ID), TM(TM), TLI(nullptr) { initializeCodeGenPreparePass(*PassRegistry::getPassRegistry()); } - bool runOnFunction(Function &F); + bool runOnFunction(Function &F) override; - const char *getPassName() const { return "CodeGen Prepare"; } + const char *getPassName() const override { return "CodeGen Prepare"; } - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addPreserved(); AU.addRequired(); } @@ -135,40 +146,34 @@ typedef DenseMap InstrToOrigTy; bool OptimizeShuffleVectorInst(ShuffleVectorInst *SI); bool DupRetToEnableTailCallOpts(BasicBlock *BB); bool PlaceDbgValues(Function &F); + bool sinkAndCmp(Function &F); }; } char CodeGenPrepare::ID = 0; -static void *initializeCodeGenPreparePassOnce(PassRegistry &Registry) { - initializeTargetLibraryInfoPass(Registry); - PassInfo *PI = new PassInfo( - "Optimize for code generation", "codegenprepare", &CodeGenPrepare::ID, - PassInfo::NormalCtor_t(callDefaultCtor), false, false, - PassInfo::TargetMachineCtor_t(callTargetMachineCtor)); - Registry.registerPass(*PI, true); - return PI; -} - -void llvm::initializeCodeGenPreparePass(PassRegistry &Registry) { - CALL_ONCE_INITIALIZATION(initializeCodeGenPreparePassOnce) -} +INITIALIZE_TM_PASS(CodeGenPrepare, "codegenprepare", + "Optimize for code generation", false, false) FunctionPass *llvm::createCodeGenPreparePass(const TargetMachine *TM) { return new CodeGenPrepare(TM); } bool CodeGenPrepare::runOnFunction(Function &F) { + if (skipOptnoneFunction(F)) + return false; + bool EverMadeChange = false; // Clear per function information. InsertedTruncsSet.clear(); PromotedInsts.clear(); ModifiedDT = false; - if (TM) TLI = TM->getTargetLowering(); + if (TM) + TLI = TM->getSubtargetImpl()->getTargetLowering(); TLInfo = &getAnalysis(); DominatorTreeWrapperPass *DTWP = getAnalysisIfAvailable(); - DT = DTWP ? &DTWP->getDomTree() : 0; + DT = DTWP ? &DTWP->getDomTree() : nullptr; OptSize = F.getAttributes().hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize); @@ -190,6 +195,13 @@ bool CodeGenPrepare::runOnFunction(Function &F) { // find a node corresponding to the value. EverMadeChange |= PlaceDbgValues(F); + // If there is a mask, compare against zero, and branch that can be combined + // into a single target instruction, push the mask and compare into branch + // users. Do this before OptimizeBlock -> OptimizeInst -> + // OptimizeCmpExpression, which perturbs the pattern being searched for. + if (!DisableBranchOpts) + EverMadeChange |= sinkAndCmp(F); + bool MadeChange = true; while (MadeChange) { MadeChange = false; @@ -253,7 +265,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) { bool CodeGenPrepare::EliminateFallThrough(Function &F) { bool Changed = false; // Scan all of the blocks in the function, except for the entry block. - for (Function::iterator I = llvm::next(F.begin()), E = F.end(); I != E; ) { + for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) { BasicBlock *BB = I++; // If the destination block has a single pred, then this is a trivial // edge, just collapse it. @@ -289,7 +301,7 @@ bool CodeGenPrepare::EliminateFallThrough(Function &F) { bool CodeGenPrepare::EliminateMostlyEmptyBlocks(Function &F) { bool MadeChange = false; // Note that this intentionally skips the entry block. - for (Function::iterator I = llvm::next(F.begin()), E = F.end(); I != E; ) { + for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) { BasicBlock *BB = I++; // If this block doesn't end with an uncond branch, ignore it. @@ -335,16 +347,15 @@ bool CodeGenPrepare::CanMergeBlocks(const BasicBlock *BB, // don't mess around with them. BasicBlock::const_iterator BBI = BB->begin(); while (const PHINode *PN = dyn_cast(BBI++)) { - for (Value::const_use_iterator UI = PN->use_begin(), E = PN->use_end(); - UI != E; ++UI) { - const Instruction *User = cast(*UI); - if (User->getParent() != DestBB || !isa(User)) + for (const User *U : PN->users()) { + const Instruction *UI = cast(U); + if (UI->getParent() != DestBB || !isa(UI)) return false; // If User is inside DestBB block and it is a PHINode then check // incoming value. If incoming value is not from BB then this is // a complex condition (e.g. preheaders) we want to avoid here. - if (User->getParent() == DestBB) { - if (const PHINode *UPN = dyn_cast(User)) + if (UI->getParent() == DestBB) { + if (const PHINode *UPN = dyn_cast(UI)) for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) { Instruction *Insn = dyn_cast(UPN->getIncomingValue(I)); if (Insn && Insn->getParent() == BB && @@ -465,47 +476,15 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) { DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); } -/// OptimizeNoopCopyExpression - If the specified cast instruction is a noop -/// copy (e.g. it's casting from one pointer type to another, i32->i8 on PPC), -/// sink it into user blocks to reduce the number of virtual -/// registers that must be created and coalesced. -/// -/// Return true if any changes are made. -/// -static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){ - // If this is a noop copy, - EVT SrcVT = TLI.getValueType(CI->getOperand(0)->getType()); - EVT DstVT = TLI.getValueType(CI->getType()); - - // This is an fp<->int conversion? - if (SrcVT.isInteger() != DstVT.isInteger()) - return false; - - // If this is an extension, it will be a zero or sign extension, which - // isn't a noop. - if (SrcVT.bitsLT(DstVT)) return false; - - // If these values will be promoted, find out what they will be promoted - // to. This helps us consider truncates on PPC as noop copies when they - // are. - if (TLI.getTypeAction(CI->getContext(), SrcVT) == - TargetLowering::TypePromoteInteger) - SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT); - if (TLI.getTypeAction(CI->getContext(), DstVT) == - TargetLowering::TypePromoteInteger) - DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT); - - // If, after promotion, these are the same types, this is a noop copy. - if (SrcVT != DstVT) - return false; - +/// SinkCast - Sink the specified cast instruction into its user blocks +static bool SinkCast(CastInst *CI) { BasicBlock *DefBB = CI->getParent(); /// InsertedCasts - Only insert a cast in each block once. DenseMap InsertedCasts; bool MadeChange = false; - for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end(); + for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end(); UI != E; ) { Use &TheUse = UI.getUse(); Instruction *User = cast(*UI); @@ -514,7 +493,7 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){ // appropriate predecessor block. BasicBlock *UserBB = User->getParent(); if (PHINode *PN = dyn_cast(User)) { - UserBB = PN->getIncomingBlock(UI); + UserBB = PN->getIncomingBlock(TheUse); } // Preincrement use iterator so we don't invalidate it. @@ -548,6 +527,43 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){ return MadeChange; } +/// OptimizeNoopCopyExpression - If the specified cast instruction is a noop +/// copy (e.g. it's casting from one pointer type to another, i32->i8 on PPC), +/// sink it into user blocks to reduce the number of virtual +/// registers that must be created and coalesced. +/// +/// Return true if any changes are made. +/// +static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){ + // If this is a noop copy, + EVT SrcVT = TLI.getValueType(CI->getOperand(0)->getType()); + EVT DstVT = TLI.getValueType(CI->getType()); + + // This is an fp<->int conversion? + if (SrcVT.isInteger() != DstVT.isInteger()) + return false; + + // If this is an extension, it will be a zero or sign extension, which + // isn't a noop. + if (SrcVT.bitsLT(DstVT)) return false; + + // If these values will be promoted, find out what they will be promoted + // to. This helps us consider truncates on PPC as noop copies when they + // are. + if (TLI.getTypeAction(CI->getContext(), SrcVT) == + TargetLowering::TypePromoteInteger) + SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT); + if (TLI.getTypeAction(CI->getContext(), DstVT) == + TargetLowering::TypePromoteInteger) + DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT); + + // If, after promotion, these are the same types, this is a noop copy. + if (SrcVT != DstVT) + return false; + + return SinkCast(CI); +} + /// OptimizeCmpExpression - sink the given CmpInst into user blocks to reduce /// the number of virtual registers that must be created and coalesced. This is /// a clear win except on targets with multiple condition code registers @@ -561,7 +577,7 @@ static bool OptimizeCmpExpression(CmpInst *CI) { DenseMap InsertedCmps; bool MadeChange = false; - for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end(); + for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end(); UI != E; ) { Use &TheUse = UI.getUse(); Instruction *User = cast(*UI); @@ -603,14 +619,198 @@ static bool OptimizeCmpExpression(CmpInst *CI) { return MadeChange; } +/// isExtractBitsCandidateUse - Check if the candidates could +/// be combined with shift instruction, which includes: +/// 1. Truncate instruction +/// 2. And instruction and the imm is a mask of the low bits: +/// imm & (imm+1) == 0 +static bool isExtractBitsCandidateUse(Instruction *User) { + if (!isa(User)) { + if (User->getOpcode() != Instruction::And || + !isa(User->getOperand(1))) + return false; + + const APInt &Cimm = cast(User->getOperand(1))->getValue(); + + if ((Cimm & (Cimm + 1)).getBoolValue()) + return false; + } + return true; +} + +/// SinkShiftAndTruncate - sink both shift and truncate instruction +/// to the use of truncate's BB. +static bool +SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI, + DenseMap &InsertedShifts, + const TargetLowering &TLI) { + BasicBlock *UserBB = User->getParent(); + DenseMap InsertedTruncs; + TruncInst *TruncI = dyn_cast(User); + bool MadeChange = false; + + for (Value::user_iterator TruncUI = TruncI->user_begin(), + TruncE = TruncI->user_end(); + TruncUI != TruncE;) { + + Use &TruncTheUse = TruncUI.getUse(); + Instruction *TruncUser = cast(*TruncUI); + // Preincrement use iterator so we don't invalidate it. + + ++TruncUI; + + int ISDOpcode = TLI.InstructionOpcodeToISD(TruncUser->getOpcode()); + if (!ISDOpcode) + continue; + + // If the use is actually a legal node, there will not be an + // implicit truncate. + // FIXME: always querying the result type is just an + // approximation; some nodes' legality is determined by the + // operand or other means. There's no good way to find out though. + if (TLI.isOperationLegalOrCustom(ISDOpcode, + EVT::getEVT(TruncUser->getType(), true))) + continue; + + // Don't bother for PHI nodes. + if (isa(TruncUser)) + continue; + + BasicBlock *TruncUserBB = TruncUser->getParent(); + + if (UserBB == TruncUserBB) + continue; + + BinaryOperator *&InsertedShift = InsertedShifts[TruncUserBB]; + CastInst *&InsertedTrunc = InsertedTruncs[TruncUserBB]; + + if (!InsertedShift && !InsertedTrunc) { + BasicBlock::iterator InsertPt = TruncUserBB->getFirstInsertionPt(); + // Sink the shift + if (ShiftI->getOpcode() == Instruction::AShr) + InsertedShift = + BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "", InsertPt); + else + InsertedShift = + BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "", InsertPt); + + // Sink the trunc + BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt(); + TruncInsertPt++; + + InsertedTrunc = CastInst::Create(TruncI->getOpcode(), InsertedShift, + TruncI->getType(), "", TruncInsertPt); + + MadeChange = true; + + TruncTheUse = InsertedTrunc; + } + } + return MadeChange; +} + +/// OptimizeExtractBits - sink the shift *right* instruction into user blocks if +/// the uses could potentially be combined with this shift instruction and +/// generate BitExtract instruction. It will only be applied if the architecture +/// supports BitExtract instruction. Here is an example: +/// BB1: +/// %x.extract.shift = lshr i64 %arg1, 32 +/// BB2: +/// %x.extract.trunc = trunc i64 %x.extract.shift to i16 +/// ==> +/// +/// BB2: +/// %x.extract.shift.1 = lshr i64 %arg1, 32 +/// %x.extract.trunc = trunc i64 %x.extract.shift.1 to i16 +/// +/// CodeGen will recoginze the pattern in BB2 and generate BitExtract +/// instruction. +/// Return true if any changes are made. +static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI, + const TargetLowering &TLI) { + BasicBlock *DefBB = ShiftI->getParent(); + + /// Only insert instructions in each block once. + DenseMap InsertedShifts; + + bool shiftIsLegal = TLI.isTypeLegal(TLI.getValueType(ShiftI->getType())); + + bool MadeChange = false; + for (Value::user_iterator UI = ShiftI->user_begin(), E = ShiftI->user_end(); + UI != E;) { + Use &TheUse = UI.getUse(); + Instruction *User = cast(*UI); + // Preincrement use iterator so we don't invalidate it. + ++UI; + + // Don't bother for PHI nodes. + if (isa(User)) + continue; + + if (!isExtractBitsCandidateUse(User)) + continue; + + BasicBlock *UserBB = User->getParent(); + + if (UserBB == DefBB) { + // If the shift and truncate instruction are in the same BB. The use of + // the truncate(TruncUse) may still introduce another truncate if not + // legal. In this case, we would like to sink both shift and truncate + // instruction to the BB of TruncUse. + // for example: + // BB1: + // i64 shift.result = lshr i64 opnd, imm + // trunc.result = trunc shift.result to i16 + // + // BB2: + // ----> We will have an implicit truncate here if the architecture does + // not have i16 compare. + // cmp i16 trunc.result, opnd2 + // + if (isa(User) && shiftIsLegal + // If the type of the truncate is legal, no trucate will be + // introduced in other basic blocks. + && (!TLI.isTypeLegal(TLI.getValueType(User->getType())))) + MadeChange = + SinkShiftAndTruncate(ShiftI, User, CI, InsertedShifts, TLI); + + continue; + } + // If we have already inserted a shift into this block, use it. + BinaryOperator *&InsertedShift = InsertedShifts[UserBB]; + + if (!InsertedShift) { + BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); + + if (ShiftI->getOpcode() == Instruction::AShr) + InsertedShift = + BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "", InsertPt); + else + InsertedShift = + BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "", InsertPt); + + MadeChange = true; + } + + // Replace a use of the shift with a use of the new shift. + TheUse = InsertedShift; + } + + // If we removed all uses, nuke the shift. + if (ShiftI->use_empty()) + ShiftI->eraseFromParent(); + + return MadeChange; +} + namespace { class CodeGenPrepareFortifiedLibCalls : public SimplifyFortifiedLibCalls { protected: - void replaceCall(Value *With) { + void replaceCall(Value *With) override { CI->replaceAllUsesWith(With); CI->eraseFromParent(); } - bool isFoldable(unsigned SizeCIOp, unsigned, bool) const { + bool isFoldable(unsigned SizeCIOp, unsigned, bool) const override { if (ConstantInt *SizeCI = dyn_cast(CI->getArgOperand(SizeCIOp))) return SizeCI->isAllOnesValue(); @@ -651,8 +851,9 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) { // happens. WeakVH IterHandle(CurInstIterator); - replaceAndRecursivelySimplify(CI, RetVal, TLI ? TLI->getDataLayout() : 0, - TLInfo, ModifiedDT ? 0 : DT); + replaceAndRecursivelySimplify(CI, RetVal, + TLI ? TLI->getDataLayout() : nullptr, + TLInfo, ModifiedDT ? nullptr : DT); // If the iterator instruction was recursively deleted, start over at the // start of the block. @@ -673,10 +874,10 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) { } // From here on out we're working with named functions. - if (CI->getCalledFunction() == 0) return false; + if (!CI->getCalledFunction()) return false; // We'll need DataLayout from here on out. - const DataLayout *TD = TLI ? TLI->getDataLayout() : 0; + const DataLayout *TD = TLI ? TLI->getDataLayout() : nullptr; if (!TD) return false; // Lower all default uses of _chk calls. This is very similar @@ -726,8 +927,8 @@ bool CodeGenPrepare::DupRetToEnableTailCallOpts(BasicBlock *BB) { if (!RI) return false; - PHINode *PN = 0; - BitCastInst *BCI = 0; + PHINode *PN = nullptr; + BitCastInst *BCI = nullptr; Value *V = RI->getReturnValue(); if (V) { BCI = dyn_cast(V); @@ -842,7 +1043,7 @@ namespace { struct ExtAddrMode : public TargetLowering::AddrMode { Value *BaseReg; Value *ScaledReg; - ExtAddrMode() : BaseReg(0), ScaledReg(0) {} + ExtAddrMode() : BaseReg(nullptr), ScaledReg(nullptr) {} void print(raw_ostream &OS) const; void dump() const; @@ -870,8 +1071,11 @@ void ExtAddrMode::print(raw_ostream &OS) const { NeedPlus = true; } - if (BaseOffs) - OS << (NeedPlus ? " + " : "") << BaseOffs, NeedPlus = true; + if (BaseOffs) { + OS << (NeedPlus ? " + " : "") + << BaseOffs; + NeedPlus = true; + } if (BaseReg) { OS << (NeedPlus ? " + " : "") @@ -984,7 +1188,7 @@ class TypePromotionTransaction { } /// \brief Move the instruction back to its original position. - void undo() { + void undo() override { DEBUG(dbgs() << "Undo: moveBefore: " << *Inst << "\n"); Position.insert(Inst); } @@ -1009,7 +1213,7 @@ class TypePromotionTransaction { } /// \brief Restore the original value of the instruction. - void undo() { + void undo() override { DEBUG(dbgs() << "Undo: setOperand:" << Idx << "\n" << "for: " << *Inst << "\n" << "with: " << *Origin << "\n"); @@ -1041,7 +1245,7 @@ class TypePromotionTransaction { } /// \brief Restore the original list of uses. - void undo() { + void undo() override { DEBUG(dbgs() << "Undo: OperandsHider: " << *Inst << "\n"); for (unsigned It = 0, EndIt = OriginalValues.size(); It != EndIt; ++It) Inst->setOperand(It, OriginalValues[It]); @@ -1064,7 +1268,7 @@ class TypePromotionTransaction { Instruction *getBuiltInstruction() { return Inst; } /// \brief Remove the built instruction. - void undo() { + void undo() override { DEBUG(dbgs() << "Undo: TruncBuilder: " << *Inst << "\n"); Inst->eraseFromParent(); } @@ -1087,7 +1291,7 @@ class TypePromotionTransaction { Instruction *getBuiltInstruction() { return Inst; } /// \brief Remove the built instruction. - void undo() { + void undo() override { DEBUG(dbgs() << "Undo: SExtBuilder: " << *Inst << "\n"); Inst->eraseFromParent(); } @@ -1108,7 +1312,7 @@ class TypePromotionTransaction { } /// \brief Mutate the instruction back to its original type. - void undo() { + void undo() override { DEBUG(dbgs() << "Undo: MutateType: " << *Inst << " with " << *OrigTy << "\n"); Inst->mutateType(OrigTy); @@ -1137,18 +1341,16 @@ class TypePromotionTransaction { DEBUG(dbgs() << "Do: UsersReplacer: " << *Inst << " with " << *New << "\n"); // Record the original uses. - for (Value::use_iterator UseIt = Inst->use_begin(), - EndIt = Inst->use_end(); - UseIt != EndIt; ++UseIt) { - Instruction *Use = cast(*UseIt); - OriginalUses.push_back(InstructionAndIdx(Use, UseIt.getOperandNo())); + for (Use &U : Inst->uses()) { + Instruction *UserI = cast(U.getUser()); + OriginalUses.push_back(InstructionAndIdx(UserI, U.getOperandNo())); } // Now, we can replace the uses. Inst->replaceAllUsesWith(New); } /// \brief Reassign the original uses of Inst to Inst. - void undo() { + void undo() override { DEBUG(dbgs() << "Undo: UsersReplacer: " << *Inst << "\n"); for (use_iterator UseIt = OriginalUses.begin(), EndIt = OriginalUses.end(); @@ -1171,10 +1373,10 @@ class TypePromotionTransaction { public: /// \brief Remove all reference of \p Inst and optinally replace all its /// uses with New. - /// \pre If !Inst->use_empty(), then New != NULL - InstructionRemover(Instruction *Inst, Value *New = NULL) + /// \pre If !Inst->use_empty(), then New != nullptr + InstructionRemover(Instruction *Inst, Value *New = nullptr) : TypePromotionAction(Inst), Inserter(Inst), Hider(Inst), - Replacer(NULL) { + Replacer(nullptr) { if (New) Replacer = new UsesReplacer(Inst, New); DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n"); @@ -1184,11 +1386,11 @@ class TypePromotionTransaction { ~InstructionRemover() { delete Replacer; } /// \brief Really remove the instruction. - void commit() { delete Inst; } + void commit() override { delete Inst; } /// \brief Resurrect the instruction and reassign it to the proper uses if /// new value was provided when build this action. - void undo() { + void undo() override { DEBUG(dbgs() << "Undo: InstructionRemover: " << *Inst << "\n"); Inserter.insert(Inst); if (Replacer) @@ -1214,7 +1416,7 @@ public: /// Same as Instruction::setOperand. void setOperand(Instruction *Inst, unsigned Idx, Value *NewVal); /// Same as Instruction::eraseFromParent. - void eraseInstruction(Instruction *Inst, Value *NewVal = NULL); + void eraseInstruction(Instruction *Inst, Value *NewVal = nullptr); /// Same as Value::replaceAllUsesWith. void replaceAllUsesWith(Instruction *Inst, Value *New); /// Same as Value::mutateType. @@ -1227,84 +1429,75 @@ public: void moveBefore(Instruction *Inst, Instruction *Before); /// @} - ~TypePromotionTransaction(); - private: /// The ordered list of actions made so far. - SmallVector Actions; - typedef SmallVectorImpl::iterator CommitPt; + SmallVector, 16> Actions; + typedef SmallVectorImpl>::iterator CommitPt; }; void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx, Value *NewVal) { Actions.push_back( - new TypePromotionTransaction::OperandSetter(Inst, Idx, NewVal)); + make_unique(Inst, Idx, NewVal)); } void TypePromotionTransaction::eraseInstruction(Instruction *Inst, Value *NewVal) { Actions.push_back( - new TypePromotionTransaction::InstructionRemover(Inst, NewVal)); + make_unique(Inst, NewVal)); } void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst, Value *New) { - Actions.push_back(new TypePromotionTransaction::UsesReplacer(Inst, New)); + Actions.push_back(make_unique(Inst, New)); } void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) { - Actions.push_back(new TypePromotionTransaction::TypeMutator(Inst, NewTy)); + Actions.push_back(make_unique(Inst, NewTy)); } Instruction *TypePromotionTransaction::createTrunc(Instruction *Opnd, Type *Ty) { - TruncBuilder *TB = new TruncBuilder(Opnd, Ty); - Actions.push_back(TB); - return TB->getBuiltInstruction(); + std::unique_ptr Ptr(new TruncBuilder(Opnd, Ty)); + Instruction *I = Ptr->getBuiltInstruction(); + Actions.push_back(std::move(Ptr)); + return I; } Instruction *TypePromotionTransaction::createSExt(Instruction *Inst, Value *Opnd, Type *Ty) { - SExtBuilder *SB = new SExtBuilder(Inst, Opnd, Ty); - Actions.push_back(SB); - return SB->getBuiltInstruction(); + std::unique_ptr Ptr(new SExtBuilder(Inst, Opnd, Ty)); + Instruction *I = Ptr->getBuiltInstruction(); + Actions.push_back(std::move(Ptr)); + return I; } void TypePromotionTransaction::moveBefore(Instruction *Inst, Instruction *Before) { Actions.push_back( - new TypePromotionTransaction::InstructionMoveBefore(Inst, Before)); + make_unique(Inst, Before)); } TypePromotionTransaction::ConstRestorationPt TypePromotionTransaction::getRestorationPoint() const { - return Actions.rbegin() != Actions.rend() ? *Actions.rbegin() : NULL; + return !Actions.empty() ? Actions.back().get() : nullptr; } void TypePromotionTransaction::commit() { for (CommitPt It = Actions.begin(), EndIt = Actions.end(); It != EndIt; - ++It) { + ++It) (*It)->commit(); - delete *It; - } Actions.clear(); } void TypePromotionTransaction::rollback( TypePromotionTransaction::ConstRestorationPt Point) { - while (!Actions.empty() && Point != (*Actions.rbegin())) { - TypePromotionAction *Curr = Actions.pop_back_val(); + while (!Actions.empty() && Point != Actions.back().get()) { + std::unique_ptr Curr = Actions.pop_back_val(); Curr->undo(); - delete Curr; } } -TypePromotionTransaction::~TypePromotionTransaction() { - for (CommitPt It = Actions.begin(), EndIt = Actions.end(); It != EndIt; ++It) - delete *It; - Actions.clear(); -} - /// \brief A helper class for matching addressing modes. /// /// This encapsulates the logic for matching the target-legal addressing modes. @@ -1372,7 +1565,7 @@ private: bool MatchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth); bool MatchAddr(Value *V, unsigned Depth); bool MatchOperationAddr(User *Operation, unsigned Opcode, unsigned Depth, - bool *MovedAway = NULL); + bool *MovedAway = nullptr); bool IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore, ExtAddrMode &AMAfter); @@ -1417,7 +1610,7 @@ bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale, // Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now // to see if ScaleReg is actually X+C. If so, we can turn this into adding // X*Scale + C*Scale to addr mode. - ConstantInt *CI = 0; Value *AddLHS = 0; + ConstantInt *CI = nullptr; Value *AddLHS = nullptr; if (isa(ScaleReg) && // not a constant expr. match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) { TestAddrMode.ScaledReg = AddLHS; @@ -1443,6 +1636,7 @@ bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale, static bool MightBeFoldableInst(Instruction *I) { switch (I->getOpcode()) { case Instruction::BitCast: + case Instruction::AddrSpaceCast: // Don't touch identity bitcasts. if (I->getType() == I->getOperand(0)->getType()) return false; @@ -1594,13 +1788,13 @@ TypePromotionHelper::Action TypePromotionHelper::getAction( // get through. // If it, check we can get through. if (!SExtOpnd || !canGetThrough(SExtOpnd, SExtTy, PromotedInsts)) - return NULL; + return nullptr; // Do not promote if the operand has been added by codegenprepare. // Otherwise, it means we are undoing an optimization that is likely to be // redone, thus causing potential infinite loop. if (isa(SExtOpnd) && InsertedTruncs.count(SExtOpnd)) - return NULL; + return nullptr; // SExt or Trunc instructions. // Return the related handler. @@ -1611,7 +1805,7 @@ TypePromotionHelper::Action TypePromotionHelper::getAction( // Abort early if we will have to insert non-free instructions. if (!SExtOpnd->hasOneUse() && !TLI.isTruncateFree(SExtTy, SExtOpnd->getType())) - return NULL; + return nullptr; return promoteOperandForOther; } @@ -1722,7 +1916,7 @@ TypePromotionHelper::promoteOperandForOther(Instruction *SExt, TPT.moveBefore(SExtForOpnd, SExtOpnd); TPT.setOperand(SExtOpnd, OpIdx, SExtForOpnd); // If more sext are required, new instructions will have to be created. - SExtForOpnd = NULL; + SExtForOpnd = nullptr; } if (SExtForOpnd == SExt) { DEBUG(dbgs() << "Sign extension is useless now\n"); @@ -1756,7 +1950,12 @@ AddressingModeMatcher::IsPromotionProfitable(unsigned MatchedSize, Instruction *PromotedInst = dyn_cast(PromotedOperand); if (!PromotedInst) return false; - return TLI.isOperationLegalOrCustom(PromotedInst->getOpcode(), + int ISDOpcode = TLI.InstructionOpcodeToISD(PromotedInst->getOpcode()); + // If the ISDOpcode is undefined, it was undefined before the promotion. + if (!ISDOpcode) + return true; + // Otherwise, check if the promoted instruction is legal or not. + return TLI.isOperationLegalOrCustom(ISDOpcode, EVT::getEVT(PromotedInst->getType())); } @@ -1792,6 +1991,7 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, return MatchAddr(AddrInst->getOperand(0), Depth); return false; case Instruction::BitCast: + case Instruction::AddrSpaceCast: // BitCast is always a noop, and we can handle it as long as it is // int->int or pointer->pointer (we don't want int<->fp or something). if ((AddrInst->getOperand(0)->getType()->isPointerTy() || @@ -1840,7 +2040,8 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, case Instruction::Shl: { // Can only handle X*C and X << C. ConstantInt *RHS = dyn_cast(AddrInst->getOperand(1)); - if (!RHS) return false; + if (!RHS) + return false; int64_t Scale = RHS->getSExtValue(); if (Opcode == Instruction::Shl) Scale = 1LL << Scale; @@ -1934,8 +2135,11 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, return true; } case Instruction::SExt: { + Instruction *SExt = dyn_cast(AddrInst); + if (!SExt) + return false; + // Try to move this sext out of the way of the addressing mode. - Instruction *SExt = cast(AddrInst); // Ask for a method for doing so. TypePromotionHelper::Action TPH = TypePromotionHelper::getAction( SExt, InsertedTruncs, TLI, PromotedInsts); @@ -1999,11 +2203,11 @@ bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) { AddrMode.BaseOffs -= CI->getSExtValue(); } else if (GlobalValue *GV = dyn_cast(Addr)) { // If this is a global variable, try to fold it into the addressing mode. - if (AddrMode.BaseGV == 0) { + if (!AddrMode.BaseGV) { AddrMode.BaseGV = GV; if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) return true; - AddrMode.BaseGV = 0; + AddrMode.BaseGV = nullptr; } } else if (Instruction *I = dyn_cast(Addr)) { ExtAddrMode BackupAddrMode = AddrMode; @@ -2048,7 +2252,7 @@ bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) { if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) return true; AddrMode.HasBaseReg = false; - AddrMode.BaseReg = 0; + AddrMode.BaseReg = nullptr; } // If the base register is already taken, see if we can do [r+r]. @@ -2058,7 +2262,7 @@ bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) { if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) return true; AddrMode.Scale = 0; - AddrMode.ScaledReg = 0; + AddrMode.ScaledReg = nullptr; } // Couldn't match. TPT.rollback(LastKnownGood); @@ -2104,23 +2308,22 @@ static bool FindAllMemoryUses(Instruction *I, return true; // Loop over all the uses, recursively processing them. - for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); - UI != E; ++UI) { - User *U = *UI; + for (Use &U : I->uses()) { + Instruction *UserI = cast(U.getUser()); - if (LoadInst *LI = dyn_cast(U)) { - MemoryUses.push_back(std::make_pair(LI, UI.getOperandNo())); + if (LoadInst *LI = dyn_cast(UserI)) { + MemoryUses.push_back(std::make_pair(LI, U.getOperandNo())); continue; } - if (StoreInst *SI = dyn_cast(U)) { - unsigned opNo = UI.getOperandNo(); + if (StoreInst *SI = dyn_cast(UserI)) { + unsigned opNo = U.getOperandNo(); if (opNo == 0) return true; // Storing addr, not into addr. MemoryUses.push_back(std::make_pair(SI, opNo)); continue; } - if (CallInst *CI = dyn_cast(U)) { + if (CallInst *CI = dyn_cast(UserI)) { InlineAsm *IA = dyn_cast(CI->getCalledValue()); if (!IA) return true; @@ -2130,8 +2333,7 @@ static bool FindAllMemoryUses(Instruction *I, continue; } - if (FindAllMemoryUses(cast(U), MemoryUses, ConsideredInsts, - TLI)) + if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI)) return true; } @@ -2145,7 +2347,7 @@ static bool FindAllMemoryUses(Instruction *I, bool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1, Value *KnownLive2) { // If Val is either of the known-live values, we know it is live! - if (Val == 0 || Val == KnownLive1 || Val == KnownLive2) + if (Val == nullptr || Val == KnownLive1 || Val == KnownLive2) return true; // All values other than instructions and arguments (e.g. constants) are live. @@ -2204,13 +2406,13 @@ IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore, // If the BaseReg or ScaledReg was referenced by the previous addrmode, their // lifetime wasn't extended by adding this instruction. if (ValueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg)) - BaseReg = 0; + BaseReg = nullptr; if (ValueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg)) - ScaledReg = 0; + ScaledReg = nullptr; // If folding this instruction (and it's subexprs) didn't extend any live // ranges, we're ok with it. - if (BaseReg == 0 && ScaledReg == 0) + if (!BaseReg && !ScaledReg) return true; // If all uses of this instruction are ultimately load/store/inlineasm's, @@ -2299,7 +2501,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // Use a worklist to iteratively look through PHI nodes, and ensure that // the addressing mode obtained from the non-PHI roots of the graph // are equivalent. - Value *Consensus = 0; + Value *Consensus = nullptr; unsigned NumUsesConsensus = 0; bool IsNumUsesConsensusValid = false; SmallVector AddrModeInsts; @@ -2313,7 +2515,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // Break use-def graph loops. if (!Visited.insert(V)) { - Consensus = 0; + Consensus = nullptr; break; } @@ -2359,7 +2561,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, continue; } - Consensus = 0; + Consensus = nullptr; break; } @@ -2399,14 +2601,135 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, Value *&SunkAddr = SunkAddrs[Addr]; if (SunkAddr) { DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for " - << *MemoryInst); + << *MemoryInst << "\n"); if (SunkAddr->getType() != Addr->getType()) SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType()); + } else if (AddrSinkUsingGEPs || (!AddrSinkUsingGEPs.getNumOccurrences() && + TM && TM->getSubtarget().useAA())) { + // By default, we use the GEP-based method when AA is used later. This + // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities. + DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " + << *MemoryInst << "\n"); + Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(Addr->getType()); + Value *ResultPtr = nullptr, *ResultIndex = nullptr; + + // First, find the pointer. + if (AddrMode.BaseReg && AddrMode.BaseReg->getType()->isPointerTy()) { + ResultPtr = AddrMode.BaseReg; + AddrMode.BaseReg = nullptr; + } + + if (AddrMode.Scale && AddrMode.ScaledReg->getType()->isPointerTy()) { + // We can't add more than one pointer together, nor can we scale a + // pointer (both of which seem meaningless). + if (ResultPtr || AddrMode.Scale != 1) + return false; + + ResultPtr = AddrMode.ScaledReg; + AddrMode.Scale = 0; + } + + if (AddrMode.BaseGV) { + if (ResultPtr) + return false; + + ResultPtr = AddrMode.BaseGV; + } + + // If the real base value actually came from an inttoptr, then the matcher + // will look through it and provide only the integer value. In that case, + // use it here. + if (!ResultPtr && AddrMode.BaseReg) { + ResultPtr = + Builder.CreateIntToPtr(AddrMode.BaseReg, Addr->getType(), "sunkaddr"); + AddrMode.BaseReg = nullptr; + } else if (!ResultPtr && AddrMode.Scale == 1) { + ResultPtr = + Builder.CreateIntToPtr(AddrMode.ScaledReg, Addr->getType(), "sunkaddr"); + AddrMode.Scale = 0; + } + + if (!ResultPtr && + !AddrMode.BaseReg && !AddrMode.Scale && !AddrMode.BaseOffs) { + SunkAddr = Constant::getNullValue(Addr->getType()); + } else if (!ResultPtr) { + return false; + } else { + Type *I8PtrTy = + Builder.getInt8PtrTy(Addr->getType()->getPointerAddressSpace()); + + // Start with the base register. Do this first so that subsequent address + // matching finds it last, which will prevent it from trying to match it + // as the scaled value in case it happens to be a mul. That would be + // problematic if we've sunk a different mul for the scale, because then + // we'd end up sinking both muls. + if (AddrMode.BaseReg) { + Value *V = AddrMode.BaseReg; + if (V->getType() != IntPtrTy) + V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr"); + + ResultIndex = V; + } + + // Add the scale value. + if (AddrMode.Scale) { + Value *V = AddrMode.ScaledReg; + if (V->getType() == IntPtrTy) { + // done. + } else if (cast(IntPtrTy)->getBitWidth() < + cast(V->getType())->getBitWidth()) { + V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr"); + } else { + // It is only safe to sign extend the BaseReg if we know that the math + // required to create it did not overflow before we extend it. Since + // the original IR value was tossed in favor of a constant back when + // the AddrMode was created we need to bail out gracefully if widths + // do not match instead of extending it. + Instruction *I = dyn_cast_or_null(ResultIndex); + if (I && (ResultIndex != AddrMode.BaseReg)) + I->eraseFromParent(); + return false; + } + + if (AddrMode.Scale != 1) + V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale), + "sunkaddr"); + if (ResultIndex) + ResultIndex = Builder.CreateAdd(ResultIndex, V, "sunkaddr"); + else + ResultIndex = V; + } + + // Add in the Base Offset if present. + if (AddrMode.BaseOffs) { + Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs); + if (ResultIndex) { + // We need to add this separately from the scale above to help with + // SDAG consecutive load/store merging. + if (ResultPtr->getType() != I8PtrTy) + ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy); + ResultPtr = Builder.CreateGEP(ResultPtr, ResultIndex, "sunkaddr"); + } + + ResultIndex = V; + } + + if (!ResultIndex) { + SunkAddr = ResultPtr; + } else { + if (ResultPtr->getType() != I8PtrTy) + ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy); + SunkAddr = Builder.CreateGEP(ResultPtr, ResultIndex, "sunkaddr"); + } + + if (SunkAddr->getType() != Addr->getType()) + SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType()); + } } else { DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " - << *MemoryInst); + << *MemoryInst << "\n"); Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(Addr->getType()); - Value *Result = 0; + Value *Result = nullptr; // Start with the base register. Do this first so that subsequent address // matching finds it last, which will prevent it from trying to match it @@ -2433,7 +2756,15 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, cast(V->getType())->getBitWidth()) { V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr"); } else { - V = Builder.CreateSExt(V, IntPtrTy, "sunkaddr"); + // It is only safe to sign extend the BaseReg if we know that the math + // required to create it did not overflow before we extend it. Since + // the original IR value was tossed in favor of a constant back when + // the AddrMode was created we need to bail out gracefully if widths + // do not match instead of extending it. + Instruction *I = dyn_cast_or_null(Result); + if (I && (Result != AddrMode.BaseReg)) + I->eraseFromParent(); + return false; } if (AddrMode.Scale != 1) V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale), @@ -2462,7 +2793,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, Result = V; } - if (Result == 0) + if (!Result) SunkAddr = Constant::getNullValue(Addr->getType()); else SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr"); @@ -2576,12 +2907,11 @@ bool CodeGenPrepare::OptimizeExtUses(Instruction *I) { return false; bool DefIsLiveOut = false; - for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); - UI != E; ++UI) { - Instruction *User = cast(*UI); + for (User *U : I->users()) { + Instruction *UI = cast(U); // Figure out which BB this ext is used in. - BasicBlock *UserBB = User->getParent(); + BasicBlock *UserBB = UI->getParent(); if (UserBB == DefBB) continue; DefIsLiveOut = true; break; @@ -2590,14 +2920,13 @@ bool CodeGenPrepare::OptimizeExtUses(Instruction *I) { return false; // Make sure none of the uses are PHI nodes. - for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end(); - UI != E; ++UI) { - Instruction *User = cast(*UI); - BasicBlock *UserBB = User->getParent(); + for (User *U : Src->users()) { + Instruction *UI = cast(U); + BasicBlock *UserBB = UI->getParent(); if (UserBB == DefBB) continue; // Be conservative. We don't want this xform to end up introducing // reloads just before load / store instructions. - if (isa(User) || isa(User) || isa(User)) + if (isa(UI) || isa(UI) || isa(UI)) return false; } @@ -2605,10 +2934,8 @@ bool CodeGenPrepare::OptimizeExtUses(Instruction *I) { DenseMap InsertedTruncs; bool MadeChange = false; - for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end(); - UI != E; ++UI) { - Use &TheUse = UI.getUse(); - Instruction *User = cast(*UI); + for (Use &U : Src->uses()) { + Instruction *User = cast(U.getUser()); // Figure out which BB this ext is used in. BasicBlock *UserBB = User->getParent(); @@ -2624,7 +2951,7 @@ bool CodeGenPrepare::OptimizeExtUses(Instruction *I) { } // Replace a use of the {s|z}ext source with a use of the result. - TheUse = InsertedTrunc; + U = InsertedTrunc; ++NumExtUses; MadeChange = true; } @@ -2720,8 +3047,7 @@ bool CodeGenPrepare::OptimizeSelectInst(SelectInst *SI) { return true; } - -bool isBroadcastShuffle(ShuffleVectorInst *SVI) { +static bool isBroadcastShuffle(ShuffleVectorInst *SVI) { SmallVector Mask(SVI->getShuffleMask()); int SplatElem = -1; for (unsigned i = 0; i < Mask.size(); ++i) { @@ -2753,16 +3079,15 @@ bool CodeGenPrepare::OptimizeShuffleVectorInst(ShuffleVectorInst *SVI) { DenseMap InsertedShuffles; bool MadeChange = false; - for (Value::use_iterator UI = SVI->use_begin(), E = SVI->use_end(); - UI != E; ++UI) { - Instruction *User = cast(*UI); + for (User *U : SVI->users()) { + Instruction *UI = cast(U); // Figure out which BB this ext is used in. - BasicBlock *UserBB = User->getParent(); + BasicBlock *UserBB = UI->getParent(); if (UserBB == DefBB) continue; // For now only apply this when the splat is used by a shift instruction. - if (!User->isShift()) continue; + if (!UI->isShift()) continue; // Everything checks out, sink the shuffle if the user's block doesn't // already have a copy. @@ -2775,7 +3100,7 @@ bool CodeGenPrepare::OptimizeShuffleVectorInst(ShuffleVectorInst *SVI) { SVI->getOperand(2), "", InsertPt); } - User->replaceUsesOfWith(SVI, InsertedShuffle); + UI->replaceUsesOfWith(SVI, InsertedShuffle); MadeChange = true; } @@ -2793,7 +3118,7 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) { // It is possible for very late stage optimizations (such as SimplifyCFG) // to introduce PHI nodes too late to be cleaned up. If we detect such a // trivial PHI, go ahead and zap it here. - if (Value *V = SimplifyInstruction(P, TLI ? TLI->getDataLayout() : 0, + if (Value *V = SimplifyInstruction(P, TLI ? TLI->getDataLayout() : nullptr, TLInfo, DT)) { P->replaceAllUsesWith(V); P->eraseFromParent(); @@ -2817,8 +3142,16 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) { return true; if (isa(I) || isa(I)) { - bool MadeChange = MoveExtToFormExtLoad(I); - return MadeChange | OptimizeExtUses(I); + /// Sink a zext or sext into its user blocks if the target type doesn't + /// fit in one register + if (TLI && TLI->getTypeAction(CI->getContext(), + TLI->getValueType(CI->getType())) == + TargetLowering::TypeExpandInteger) { + return SinkCast(CI); + } else { + bool MadeChange = MoveExtToFormExtLoad(I); + return MadeChange | OptimizeExtUses(I); + } } return false; } @@ -2840,6 +3173,17 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) { return false; } + BinaryOperator *BinOp = dyn_cast(I); + + if (BinOp && (BinOp->getOpcode() == Instruction::AShr || + BinOp->getOpcode() == Instruction::LShr)) { + ConstantInt *CI = dyn_cast(BinOp->getOperand(1)); + if (TLI && CI && TLI->hasExtractBitsInsn()) + return OptimizeExtractBits(BinOp, CI, *TLI); + + return false; + } + if (GetElementPtrInst *GEPI = dyn_cast(I)) { if (GEPI->hasAllZeroIndices()) { /// The GEP operand must be a pointer, so must its result -> BitCast @@ -2888,11 +3232,16 @@ bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) { bool CodeGenPrepare::PlaceDbgValues(Function &F) { bool MadeChange = false; for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { - Instruction *PrevNonDbgInst = NULL; + Instruction *PrevNonDbgInst = nullptr; for (BasicBlock::iterator BI = I->begin(), BE = I->end(); BI != BE;) { Instruction *Insn = BI; ++BI; DbgValueInst *DVI = dyn_cast(Insn); - if (!DVI) { + // Leave dbg.values that refer to an alloca alone. These + // instrinsics describe the address of a variable (= the alloca) + // being taken. They should not be moved next to the alloca + // (and to the beginning of the scope), but rather stay close to + // where said address is used. + if (!DVI || (DVI->getValue() && isa(DVI->getValue()))) { PrevNonDbgInst = Insn; continue; } @@ -2912,3 +3261,70 @@ bool CodeGenPrepare::PlaceDbgValues(Function &F) { } return MadeChange; } + +// If there is a sequence that branches based on comparing a single bit +// against zero that can be combined into a single instruction, and the +// target supports folding these into a single instruction, sink the +// mask and compare into the branch uses. Do this before OptimizeBlock -> +// OptimizeInst -> OptimizeCmpExpression, which perturbs the pattern being +// searched for. +bool CodeGenPrepare::sinkAndCmp(Function &F) { + if (!EnableAndCmpSinking) + return false; + if (!TLI || !TLI->isMaskAndBranchFoldingLegal()) + return false; + bool MadeChange = false; + for (Function::iterator I = F.begin(), E = F.end(); I != E; ) { + BasicBlock *BB = I++; + + // Does this BB end with the following? + // %andVal = and %val, #single-bit-set + // %icmpVal = icmp %andResult, 0 + // br i1 %cmpVal label %dest1, label %dest2" + BranchInst *Brcc = dyn_cast(BB->getTerminator()); + if (!Brcc || !Brcc->isConditional()) + continue; + ICmpInst *Cmp = dyn_cast(Brcc->getOperand(0)); + if (!Cmp || Cmp->getParent() != BB) + continue; + ConstantInt *Zero = dyn_cast(Cmp->getOperand(1)); + if (!Zero || !Zero->isZero()) + continue; + Instruction *And = dyn_cast(Cmp->getOperand(0)); + if (!And || And->getOpcode() != Instruction::And || And->getParent() != BB) + continue; + ConstantInt* Mask = dyn_cast(And->getOperand(1)); + if (!Mask || !Mask->getUniqueInteger().isPowerOf2()) + continue; + DEBUG(dbgs() << "found and; icmp ?,0; brcc\n"); DEBUG(BB->dump()); + + // Push the "and; icmp" for any users that are conditional branches. + // Since there can only be one branch use per BB, we don't need to keep + // track of which BBs we insert into. + for (Value::use_iterator UI = Cmp->use_begin(), E = Cmp->use_end(); + UI != E; ) { + Use &TheUse = *UI; + // Find brcc use. + BranchInst *BrccUser = dyn_cast(*UI); + ++UI; + if (!BrccUser || !BrccUser->isConditional()) + continue; + BasicBlock *UserBB = BrccUser->getParent(); + if (UserBB == BB) continue; + DEBUG(dbgs() << "found Brcc use\n"); + + // Sink the "and; icmp" to use. + MadeChange = true; + BinaryOperator *NewAnd = + BinaryOperator::CreateAnd(And->getOperand(0), And->getOperand(1), "", + BrccUser); + CmpInst *NewCmp = + CmpInst::Create(Cmp->getOpcode(), Cmp->getPredicate(), NewAnd, Zero, + "", BrccUser); + TheUse = NewCmp; + ++NumAndCmpsMoved; + DEBUG(BrccUser->getParent()->dump()); + } + } + return MadeChange; +}