X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FR600%2FAMDGPUTargetTransformInfo.cpp;h=88934b65876e4893694c0c13d0daea2dfadfcab8;hb=18ecf3fff3ce9790fdcc357e936475552545bc36;hp=51225eb7f881ac5739e8728c1b9311f40fa47102;hpb=436906ab3cbe991826a8967f2c3c14850d267cbf;p=oota-llvm.git diff --git a/lib/Target/R600/AMDGPUTargetTransformInfo.cpp b/lib/Target/R600/AMDGPUTargetTransformInfo.cpp index 51225eb7f88..88934b65876 100644 --- a/lib/Target/R600/AMDGPUTargetTransformInfo.cpp +++ b/lib/Target/R600/AMDGPUTargetTransformInfo.cpp @@ -15,7 +15,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "AMDGPUtti" #include "AMDGPU.h" #include "AMDGPUTargetMachine.h" #include "llvm/Analysis/LoopInfo.h" @@ -26,6 +25,8 @@ #include "llvm/Target/TargetLowering.h" using namespace llvm; +#define DEBUG_TYPE "AMDGPUtti" + // Declare the pass initialization routine locally as target-specific passes // don't have a target-wide initialization entry point, and so we rely on the // pass constructor initialization. @@ -45,7 +46,7 @@ class AMDGPUTTI final : public ImmutablePass, public TargetTransformInfo { unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; public: - AMDGPUTTI() : ImmutablePass(ID), TM(0), ST(0), TLI(0) { + AMDGPUTTI() : ImmutablePass(ID), TM(nullptr), ST(nullptr), TLI(nullptr) { llvm_unreachable("This pass cannot be directly constructed"); } @@ -55,9 +56,9 @@ public: initializeAMDGPUTTIPass(*PassRegistry::getPassRegistry()); } - virtual void initializePass() override { pushTTIStack(this); } + void initializePass() override { pushTTIStack(this); } - virtual void getAnalysisUsage(AnalysisUsage &AU) const override { + void getAnalysisUsage(AnalysisUsage &AU) const override { TargetTransformInfo::getAnalysisUsage(AU); } @@ -65,15 +66,22 @@ public: static char ID; /// Provide necessary pointer adjustments for the two base classes. - virtual void *getAdjustedAnalysisPointer(const void *ID) override { + void *getAdjustedAnalysisPointer(const void *ID) override { if (ID == &TargetTransformInfo::ID) return (TargetTransformInfo *)this; return this; } - virtual bool hasBranchDivergence() const override; + bool hasBranchDivergence() const override; + + void getUnrollingPreferences(Loop *L, + UnrollingPreferences &UP) const override; - virtual void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const; + PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const override; + + unsigned getNumberOfRegisters(bool Vector) const override; + unsigned getRegisterBitWidth(bool Vector) const override; + unsigned getMaximumUnrollFactor() const override; /// @} }; @@ -93,14 +101,12 @@ bool AMDGPUTTI::hasBranchDivergence() const { return true; } void AMDGPUTTI::getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const { - for (Loop::block_iterator BI = L->block_begin(), BE = L->block_end(); - BI != BE; ++BI) { - BasicBlock *BB = *BI; - for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); - I != E; ++I) { - const GetElementPtrInst *GEP = dyn_cast(I); - if (!GEP) + for (const BasicBlock *BB : L->getBlocks()) { + for (const Instruction &I : *BB) { + const GetElementPtrInst *GEP = dyn_cast(&I); + if (!GEP || GEP->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) continue; + const Value *Ptr = GEP->getPointerOperand(); const AllocaInst *Alloca = dyn_cast(GetUnderlyingObject(Ptr)); if (Alloca) { @@ -109,13 +115,39 @@ void AMDGPUTTI::getUnrollingPreferences(Loop *L, // require us to use indirect addressing, which is slow and prone to // compiler bugs. If this loop does an address calculation on an // alloca ptr, then we want to use a higher than normal loop unroll - // threshold. This will give SROA a better chance to eliminate these - // allocas. - // - // Don't use the maximum allowed value here as it will make some - // programs way too big. + // threshold. This will give SROA a better chance to eliminate these + // allocas. + // + // Don't use the maximum allowed value here as it will make some + // programs way too big. UP.Threshold = 500; } } } } + +AMDGPUTTI::PopcntSupportKind +AMDGPUTTI::getPopcntSupport(unsigned TyWidth) const { + assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); + return ST->hasBCNT(TyWidth) ? PSK_FastHardware : PSK_Software; +} + +unsigned AMDGPUTTI::getNumberOfRegisters(bool Vec) const { + if (Vec) + return 0; + + // Number of VGPRs on SI. + if (ST->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) + return 256; + + return 4 * 128; // XXX - 4 channels. Should these count as vector instead? +} + +unsigned AMDGPUTTI::getRegisterBitWidth(bool) const { + return 32; +} + +unsigned AMDGPUTTI::getMaximumUnrollFactor() const { + // Semi-arbitrary large amount. + return 64; +}