//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "ctrloops"
-
#include "llvm/Transforms/Scalar.h"
-#include "llvm/ADT/Statistic.h"
+#include "PPC.h"
+#include "PPCTargetMachine.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/Analysis/Dominators.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/ValueHandle.h"
#include "llvm/PassSupport.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ValueHandle.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Target/TargetLibraryInfo.h"
-#include "PPCTargetMachine.h"
-#include "PPC.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
#ifndef NDEBUG
#include "llvm/CodeGen/MachineDominators.h"
using namespace llvm;
+#define DEBUG_TYPE "ctrloops"
+
#ifndef NDEBUG
static cl::opt<int> CTRLoopLimit("ppc-max-ctrloop", cl::Hidden, cl::init(-1));
#endif
public:
static char ID;
- PPCCTRLoops() : FunctionPass(ID), TM(0) {
+ PPCCTRLoops() : FunctionPass(ID), TM(nullptr) {
initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry());
}
PPCCTRLoops(PPCTargetMachine &TM) : FunctionPass(ID), TM(&TM) {
initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry());
}
- virtual bool runOnFunction(Function &F);
+ bool runOnFunction(Function &F) override;
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<LoopInfo>();
AU.addPreserved<LoopInfo>();
- AU.addRequired<DominatorTree>();
- AU.addPreserved<DominatorTree>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
AU.addRequired<ScalarEvolution>();
}
private:
- // FIXME: Copied from LoopSimplify.
- BasicBlock *InsertPreheaderForLoop(Loop *L);
- void PlaceSplitBlockCarefully(BasicBlock *NewBB,
- SmallVectorImpl<BasicBlock*> &SplitPreds,
- Loop *L);
-
bool mightUseCTR(const Triple &TT, BasicBlock *BB);
bool convertToCTRLoop(Loop *L);
+
private:
PPCTargetMachine *TM;
LoopInfo *LI;
ScalarEvolution *SE;
- DataLayout *TD;
+ const DataLayout *DL;
DominatorTree *DT;
const TargetLibraryInfo *LibInfo;
};
initializePPCCTRLoopsVerifyPass(*PassRegistry::getPassRegistry());
}
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<MachineDominatorTree>();
MachineFunctionPass::getAnalysisUsage(AU);
}
- virtual bool runOnMachineFunction(MachineFunction &MF);
+ bool runOnMachineFunction(MachineFunction &MF) override;
private:
MachineDominatorTree *MDT;
INITIALIZE_PASS_BEGIN(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfo)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
bool PPCCTRLoops::runOnFunction(Function &F) {
LI = &getAnalysis<LoopInfo>();
SE = &getAnalysis<ScalarEvolution>();
- DT = &getAnalysis<DominatorTree>();
- TD = getAnalysisIfAvailable<DataLayout>();
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
LibInfo = getAnalysisIfAvailable<TargetLibraryInfo>();
bool MadeChange = false;
return MadeChange;
}
+static bool isLargeIntegerTy(bool Is32Bit, Type *Ty) {
+ if (IntegerType *ITy = dyn_cast<IntegerType>(Ty))
+ return ITy->getBitWidth() > (Is32Bit ? 32U : 64U);
+
+ return false;
+}
+
bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
for (BasicBlock::iterator J = BB->begin(), JE = BB->end();
J != JE; ++J) {
if (!TM)
return true;
- const TargetLowering *TLI = TM->getTargetLowering();
+ const TargetLowering *TLI = TM->getSubtargetImpl()->getTargetLowering();
if (Function *F = CI->getCalledFunction()) {
// Most intrinsics don't become function calls, but some might.
#endif
case Intrinsic::longjmp:
+
+ // Exclude eh_sjlj_setjmp; we don't need to exclude eh_sjlj_longjmp
+ // because, although it does clobber the counter register, the
+ // control can't then return to inside the loop unless there is also
+ // an eh_sjlj_setjmp.
+ case Intrinsic::eh_sjlj_setjmp:
+
case Intrinsic::memcpy:
case Intrinsic::memmove:
case Intrinsic::memset:
case Intrinsic::sin:
case Intrinsic::cos:
return true;
+ case Intrinsic::copysign:
+ if (CI->getArgOperand(0)->getType()->getScalarType()->
+ isPPC_FP128Ty())
+ return true;
+ else
+ continue; // ISD::FCOPYSIGN is never a library call.
case Intrinsic::sqrt: Opcode = ISD::FSQRT; break;
case Intrinsic::floor: Opcode = ISD::FFLOOR; break;
case Intrinsic::ceil: Opcode = ISD::FCEIL; break;
case Intrinsic::trunc: Opcode = ISD::FTRUNC; break;
case Intrinsic::rint: Opcode = ISD::FRINT; break;
case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break;
+ case Intrinsic::round: Opcode = ISD::FROUND; break;
}
}
default: return true;
case LibFunc::copysign:
case LibFunc::copysignf:
- case LibFunc::copysignl:
continue; // ISD::FCOPYSIGN is never a library call.
+ case LibFunc::copysignl:
+ return true;
case LibFunc::fabs:
case LibFunc::fabsf:
case LibFunc::fabsl:
case LibFunc::rintf:
case LibFunc::rintl:
Opcode = ISD::FRINT; break;
+ case LibFunc::round:
+ case LibFunc::roundf:
+ case LibFunc::roundl:
+ Opcode = ISD::FROUND; break;
case LibFunc::trunc:
case LibFunc::truncf:
case LibFunc::truncl:
CastInst *CI = cast<CastInst>(J);
if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() ||
CI->getDestTy()->getScalarType()->isPPC_FP128Ty() ||
- (TT.isArch32Bit() &&
- (CI->getSrcTy()->getScalarType()->isIntegerTy(64) ||
- CI->getDestTy()->getScalarType()->isIntegerTy(64))
- ))
+ isLargeIntegerTy(TT.isArch32Bit(), CI->getSrcTy()->getScalarType()) ||
+ isLargeIntegerTy(TT.isArch32Bit(), CI->getDestTy()->getScalarType()))
return true;
+ } else if (isLargeIntegerTy(TT.isArch32Bit(),
+ J->getType()->getScalarType()) &&
+ (J->getOpcode() == Instruction::UDiv ||
+ J->getOpcode() == Instruction::SDiv ||
+ J->getOpcode() == Instruction::URem ||
+ J->getOpcode() == Instruction::SRem)) {
+ return true;
+ } else if (TT.isArch32Bit() &&
+ isLargeIntegerTy(false, J->getType()->getScalarType()) &&
+ (J->getOpcode() == Instruction::Shl ||
+ J->getOpcode() == Instruction::AShr ||
+ J->getOpcode() == Instruction::LShr)) {
+ // Only on PPC32, for 128-bit integers (specifically not 64-bit
+ // integers), these might be runtime calls.
+ return true;
} else if (isa<IndirectBrInst>(J) || isa<InvokeInst>(J)) {
// On PowerPC, indirect jumps use the counter register.
return true;
} else if (SwitchInst *SI = dyn_cast<SwitchInst>(J)) {
if (!TM)
return true;
- const TargetLowering *TLI = TM->getTargetLowering();
+ const TargetLowering *TLI = TM->getSubtargetImpl()->getTargetLowering();
- if (TLI->supportJumpTables() &&
- SI->getNumCases()+1 >= (unsigned) TLI->getMinimumJumpTableEntries())
+ if (SI->getNumCases() + 1 >= (unsigned)TLI->getMinimumJumpTableEntries())
return true;
}
}
SmallVector<BasicBlock*, 4> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
- BasicBlock *CountedExitBlock = 0;
- const SCEV *ExitCount = 0;
- BranchInst *CountedExitBranch = 0;
- for (SmallVector<BasicBlock*, 4>::iterator I = ExitingBlocks.begin(),
+ BasicBlock *CountedExitBlock = nullptr;
+ const SCEV *ExitCount = nullptr;
+ BranchInst *CountedExitBranch = nullptr;
+ for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(),
IE = ExitingBlocks.end(); I != IE; ++I) {
const SCEV *EC = SE->getExitCount(L, *I);
DEBUG(dbgs() << "Exit Count for " << *L << " from block " <<
} else if (!SE->isLoopInvariant(EC, L))
continue;
+ if (SE->getTypeSizeInBits(EC->getType()) > (TT.isArch64Bit() ? 64 : 32))
+ continue;
+
// We now have a loop-invariant count of loop iterations (which is not the
// constant zero) for which we know that this loop will not exit via this
// exisiting block.
// the CTR register because some such uses might be reordered by the
// selection DAG after the mtctr instruction).
if (!Preheader || mightUseCTR(TT, Preheader))
- Preheader = InsertPreheaderForLoop(L);
+ Preheader = InsertPreheaderForLoop(L, this);
if (!Preheader)
return MadeChange;
return MadeChange;
}
-// FIXME: Copied from LoopSimplify.
-BasicBlock *PPCCTRLoops::InsertPreheaderForLoop(Loop *L) {
- BasicBlock *Header = L->getHeader();
-
- // Compute the set of predecessors of the loop that are not in the loop.
- SmallVector<BasicBlock*, 8> OutsideBlocks;
- for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header);
- PI != PE; ++PI) {
- BasicBlock *P = *PI;
- if (!L->contains(P)) { // Coming in from outside the loop?
- // If the loop is branched to from an indirect branch, we won't
- // be able to fully transform the loop, because it prohibits
- // edge splitting.
- if (isa<IndirectBrInst>(P->getTerminator())) return 0;
-
- // Keep track of it.
- OutsideBlocks.push_back(P);
- }
- }
-
- // Split out the loop pre-header.
- BasicBlock *PreheaderBB;
- if (!Header->isLandingPad()) {
- PreheaderBB = SplitBlockPredecessors(Header, OutsideBlocks, ".preheader",
- this);
- } else {
- SmallVector<BasicBlock*, 2> NewBBs;
- SplitLandingPadPredecessors(Header, OutsideBlocks, ".preheader",
- ".split-lp", this, NewBBs);
- PreheaderBB = NewBBs[0];
- }
-
- PreheaderBB->getTerminator()->setDebugLoc(
- Header->getFirstNonPHI()->getDebugLoc());
- DEBUG(dbgs() << "Creating pre-header "
- << PreheaderBB->getName() << "\n");
-
- // Make sure that NewBB is put someplace intelligent, which doesn't mess up
- // code layout too horribly.
- PlaceSplitBlockCarefully(PreheaderBB, OutsideBlocks, L);
-
- return PreheaderBB;
-}
-
-void PPCCTRLoops::PlaceSplitBlockCarefully(BasicBlock *NewBB,
- SmallVectorImpl<BasicBlock*> &SplitPreds,
- Loop *L) {
- // Check to see if NewBB is already well placed.
- Function::iterator BBI = NewBB; --BBI;
- for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) {
- if (&*BBI == SplitPreds[i])
- return;
- }
-
- // If it isn't already after an outside block, move it after one. This is
- // always good as it makes the uncond branch from the outside block into a
- // fall-through.
-
- // Figure out *which* outside block to put this after. Prefer an outside
- // block that neighbors a BB actually in the loop.
- BasicBlock *FoundBB = 0;
- for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) {
- Function::iterator BBI = SplitPreds[i];
- if (++BBI != NewBB->getParent()->end() &&
- L->contains(BBI)) {
- FoundBB = SplitPreds[i];
- break;
- }
- }
-
- // If our heuristic for a *good* bb to place this after doesn't find
- // anything, just pick something. It's likely better than leaving it within
- // the loop.
- if (!FoundBB)
- FoundBB = SplitPreds[0];
- NewBB->moveAfter(FoundBB);
-}
-
#ifndef NDEBUG
static bool clobbersCTR(const MachineInstr *MI) {
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {