X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FCodeGen%2FCodeGenPrepare.cpp;h=948fddc6ed8d766bd7eeb473d399a22144cd5f9b;hb=3d6a30ea3b8272a46b1cd20684a09ea5e56c9548;hp=1ab24162dc8137c23facdb2b73af93a679da6591;hpb=9eb71e20aead0f5642068e7582cef994258a7d69;p=oota-llvm.git diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp index 1ab24162dc8..948fddc6ed8 100644 --- a/lib/CodeGen/CodeGenPrepare.cpp +++ b/lib/CodeGen/CodeGenPrepare.cpp @@ -13,7 +13,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "codegenprepare" #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallSet.h" @@ -47,6 +46,8 @@ using namespace llvm; using namespace llvm::PatternMatch; +#define DEBUG_TYPE "codegenprepare" + STATISTIC(NumBlocksElim, "Number of blocks eliminated"); STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated"); STATISTIC(NumGEPsElim, "Number of GEPs converted to casts"); @@ -150,19 +151,8 @@ typedef DenseMap InstrToOrigTy; } char CodeGenPrepare::ID = 0; -static void *initializeCodeGenPreparePassOnce(PassRegistry &Registry) { - initializeTargetLibraryInfoPass(Registry); - PassInfo *PI = new PassInfo( - "Optimize for code generation", "codegenprepare", &CodeGenPrepare::ID, - PassInfo::NormalCtor_t(callDefaultCtor), false, false, - PassInfo::TargetMachineCtor_t(callTargetMachineCtor)); - Registry.registerPass(*PI, true); - return PI; -} - -void llvm::initializeCodeGenPreparePass(PassRegistry &Registry) { - CALL_ONCE_INITIALIZATION(initializeCodeGenPreparePassOnce) -} +INITIALIZE_TM_PASS(CodeGenPrepare, "codegenprepare", + "Optimize for code generation", false, false) FunctionPass *llvm::createCodeGenPreparePass(const TargetMachine *TM) { return new CodeGenPrepare(TM); @@ -178,7 +168,8 @@ bool CodeGenPrepare::runOnFunction(Function &F) { PromotedInsts.clear(); ModifiedDT = false; - if (TM) TLI = TM->getTargetLowering(); + if (TM) + TLI = TM->getSubtargetImpl()->getTargetLowering(); TLInfo = &getAnalysis(); DominatorTreeWrapperPass *DTWP = getAnalysisIfAvailable(); @@ -628,6 +619,190 @@ static bool OptimizeCmpExpression(CmpInst *CI) { return MadeChange; } +/// isExtractBitsCandidateUse - Check if the candidates could +/// be combined with shift instruction, which includes: +/// 1. Truncate instruction +/// 2. And instruction and the imm is a mask of the low bits: +/// imm & (imm+1) == 0 +static bool isExtractBitsCandidateUse(Instruction *User) { + if (!isa(User)) { + if (User->getOpcode() != Instruction::And || + !isa(User->getOperand(1))) + return false; + + const APInt &Cimm = cast(User->getOperand(1))->getValue(); + + if ((Cimm & (Cimm + 1)).getBoolValue()) + return false; + } + return true; +} + +/// SinkShiftAndTruncate - sink both shift and truncate instruction +/// to the use of truncate's BB. +static bool +SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI, + DenseMap &InsertedShifts, + const TargetLowering &TLI) { + BasicBlock *UserBB = User->getParent(); + DenseMap InsertedTruncs; + TruncInst *TruncI = dyn_cast(User); + bool MadeChange = false; + + for (Value::user_iterator TruncUI = TruncI->user_begin(), + TruncE = TruncI->user_end(); + TruncUI != TruncE;) { + + Use &TruncTheUse = TruncUI.getUse(); + Instruction *TruncUser = cast(*TruncUI); + // Preincrement use iterator so we don't invalidate it. + + ++TruncUI; + + int ISDOpcode = TLI.InstructionOpcodeToISD(TruncUser->getOpcode()); + if (!ISDOpcode) + continue; + + // If the use is actually a legal node, there will not be an + // implicit truncate. + // FIXME: always querying the result type is just an + // approximation; some nodes' legality is determined by the + // operand or other means. There's no good way to find out though. + if (TLI.isOperationLegalOrCustom(ISDOpcode, + EVT::getEVT(TruncUser->getType(), true))) + continue; + + // Don't bother for PHI nodes. + if (isa(TruncUser)) + continue; + + BasicBlock *TruncUserBB = TruncUser->getParent(); + + if (UserBB == TruncUserBB) + continue; + + BinaryOperator *&InsertedShift = InsertedShifts[TruncUserBB]; + CastInst *&InsertedTrunc = InsertedTruncs[TruncUserBB]; + + if (!InsertedShift && !InsertedTrunc) { + BasicBlock::iterator InsertPt = TruncUserBB->getFirstInsertionPt(); + // Sink the shift + if (ShiftI->getOpcode() == Instruction::AShr) + InsertedShift = + BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "", InsertPt); + else + InsertedShift = + BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "", InsertPt); + + // Sink the trunc + BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt(); + TruncInsertPt++; + + InsertedTrunc = CastInst::Create(TruncI->getOpcode(), InsertedShift, + TruncI->getType(), "", TruncInsertPt); + + MadeChange = true; + + TruncTheUse = InsertedTrunc; + } + } + return MadeChange; +} + +/// OptimizeExtractBits - sink the shift *right* instruction into user blocks if +/// the uses could potentially be combined with this shift instruction and +/// generate BitExtract instruction. It will only be applied if the architecture +/// supports BitExtract instruction. Here is an example: +/// BB1: +/// %x.extract.shift = lshr i64 %arg1, 32 +/// BB2: +/// %x.extract.trunc = trunc i64 %x.extract.shift to i16 +/// ==> +/// +/// BB2: +/// %x.extract.shift.1 = lshr i64 %arg1, 32 +/// %x.extract.trunc = trunc i64 %x.extract.shift.1 to i16 +/// +/// CodeGen will recoginze the pattern in BB2 and generate BitExtract +/// instruction. +/// Return true if any changes are made. +static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI, + const TargetLowering &TLI) { + BasicBlock *DefBB = ShiftI->getParent(); + + /// Only insert instructions in each block once. + DenseMap InsertedShifts; + + bool shiftIsLegal = TLI.isTypeLegal(TLI.getValueType(ShiftI->getType())); + + bool MadeChange = false; + for (Value::user_iterator UI = ShiftI->user_begin(), E = ShiftI->user_end(); + UI != E;) { + Use &TheUse = UI.getUse(); + Instruction *User = cast(*UI); + // Preincrement use iterator so we don't invalidate it. + ++UI; + + // Don't bother for PHI nodes. + if (isa(User)) + continue; + + if (!isExtractBitsCandidateUse(User)) + continue; + + BasicBlock *UserBB = User->getParent(); + + if (UserBB == DefBB) { + // If the shift and truncate instruction are in the same BB. The use of + // the truncate(TruncUse) may still introduce another truncate if not + // legal. In this case, we would like to sink both shift and truncate + // instruction to the BB of TruncUse. + // for example: + // BB1: + // i64 shift.result = lshr i64 opnd, imm + // trunc.result = trunc shift.result to i16 + // + // BB2: + // ----> We will have an implicit truncate here if the architecture does + // not have i16 compare. + // cmp i16 trunc.result, opnd2 + // + if (isa(User) && shiftIsLegal + // If the type of the truncate is legal, no trucate will be + // introduced in other basic blocks. + && (!TLI.isTypeLegal(TLI.getValueType(User->getType())))) + MadeChange = + SinkShiftAndTruncate(ShiftI, User, CI, InsertedShifts, TLI); + + continue; + } + // If we have already inserted a shift into this block, use it. + BinaryOperator *&InsertedShift = InsertedShifts[UserBB]; + + if (!InsertedShift) { + BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); + + if (ShiftI->getOpcode() == Instruction::AShr) + InsertedShift = + BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "", InsertPt); + else + InsertedShift = + BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "", InsertPt); + + MadeChange = true; + } + + // Replace a use of the shift with a use of the new shift. + TheUse = InsertedShift; + } + + // If we removed all uses, nuke the shift. + if (ShiftI->use_empty()) + ShiftI->eraseFromParent(); + + return MadeChange; +} + namespace { class CodeGenPrepareFortifiedLibCalls : public SimplifyFortifiedLibCalls { protected: @@ -896,8 +1071,11 @@ void ExtAddrMode::print(raw_ostream &OS) const { NeedPlus = true; } - if (BaseOffs) - OS << (NeedPlus ? " + " : "") << BaseOffs, NeedPlus = true; + if (BaseOffs) { + OS << (NeedPlus ? " + " : "") + << BaseOffs; + NeedPlus = true; + } if (BaseReg) { OS << (NeedPlus ? " + " : "") @@ -1458,6 +1636,7 @@ bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale, static bool MightBeFoldableInst(Instruction *I) { switch (I->getOpcode()) { case Instruction::BitCast: + case Instruction::AddrSpaceCast: // Don't touch identity bitcasts. if (I->getType() == I->getOperand(0)->getType()) return false; @@ -1812,6 +1991,7 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, return MatchAddr(AddrInst->getOperand(0), Depth); return false; case Instruction::BitCast: + case Instruction::AddrSpaceCast: // BitCast is always a noop, and we can handle it as long as it is // int->int or pointer->pointer (we don't want int<->fp or something). if ((AddrInst->getOperand(0)->getType()->isPointerTy() || @@ -1860,7 +2040,8 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, case Instruction::Shl: { // Can only handle X*C and X << C. ConstantInt *RHS = dyn_cast(AddrInst->getOperand(1)); - if (!RHS) return false; + if (!RHS) + return false; int64_t Scale = RHS->getSExtValue(); if (Opcode == Instruction::Shl) Scale = 1LL << Scale; @@ -1954,8 +2135,11 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, return true; } case Instruction::SExt: { + Instruction *SExt = dyn_cast(AddrInst); + if (!SExt) + return false; + // Try to move this sext out of the way of the addressing mode. - Instruction *SExt = cast(AddrInst); // Ask for a method for doing so. TypePromotionHelper::Action TPH = TypePromotionHelper::getAction( SExt, InsertedTruncs, TLI, PromotedInsts); @@ -2417,7 +2601,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, Value *&SunkAddr = SunkAddrs[Addr]; if (SunkAddr) { DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for " - << *MemoryInst); + << *MemoryInst << "\n"); if (SunkAddr->getType() != Addr->getType()) SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType()); } else if (AddrSinkUsingGEPs || (!AddrSinkUsingGEPs.getNumOccurrences() && @@ -2425,7 +2609,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // By default, we use the GEP-based method when AA is used later. This // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities. DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " - << *MemoryInst); + << *MemoryInst << "\n"); Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(Addr->getType()); Value *ResultPtr = nullptr, *ResultIndex = nullptr; @@ -2543,7 +2727,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, } } else { DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " - << *MemoryInst); + << *MemoryInst << "\n"); Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(Addr->getType()); Value *Result = nullptr; @@ -2577,7 +2761,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // the original IR value was tossed in favor of a constant back when // the AddrMode was created we need to bail out gracefully if widths // do not match instead of extending it. - Instruction *I = dyn_cast(Result); + Instruction *I = dyn_cast_or_null(Result); if (I && (Result != AddrMode.BaseReg)) I->eraseFromParent(); return false; @@ -2989,6 +3173,17 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) { return false; } + BinaryOperator *BinOp = dyn_cast(I); + + if (BinOp && (BinOp->getOpcode() == Instruction::AShr || + BinOp->getOpcode() == Instruction::LShr)) { + ConstantInt *CI = dyn_cast(BinOp->getOperand(1)); + if (TLI && CI && TLI->hasExtractBitsInsn()) + return OptimizeExtractBits(BinOp, CI, *TLI); + + return false; + } + if (GetElementPtrInst *GEPI = dyn_cast(I)) { if (GEPI->hasAllZeroIndices()) { /// The GEP operand must be a pointer, so must its result -> BitCast @@ -3041,7 +3236,12 @@ bool CodeGenPrepare::PlaceDbgValues(Function &F) { for (BasicBlock::iterator BI = I->begin(), BE = I->end(); BI != BE;) { Instruction *Insn = BI; ++BI; DbgValueInst *DVI = dyn_cast(Insn); - if (!DVI) { + // Leave dbg.values that refer to an alloca alone. These + // instrinsics describe the address of a variable (= the alloca) + // being taken. They should not be moved next to the alloca + // (and to the beginning of the scope), but rather stay close to + // where said address is used. + if (!DVI || (DVI->getValue() && isa(DVI->getValue()))) { PrevNonDbgInst = Insn; continue; }