//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "AMDGPUtti"
#include "AMDGPU.h"
#include "AMDGPUTargetMachine.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Target/TargetLowering.h"
using namespace llvm;
+#define DEBUG_TYPE "AMDGPUtti"
+
// Declare the pass initialization routine locally as target-specific passes
// don't have a target-wide initialization entry point, and so we rely on the
// pass constructor initialization.
unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
public:
- AMDGPUTTI() : ImmutablePass(ID), TM(0), ST(0), TLI(0) {
+ AMDGPUTTI() : ImmutablePass(ID), TM(nullptr), ST(nullptr), TLI(nullptr) {
llvm_unreachable("This pass cannot be directly constructed");
}
initializeAMDGPUTTIPass(*PassRegistry::getPassRegistry());
}
- virtual void initializePass() override { pushTTIStack(this); }
+ void initializePass() override { pushTTIStack(this); }
- virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
TargetTransformInfo::getAnalysisUsage(AU);
}
static char ID;
/// Provide necessary pointer adjustments for the two base classes.
- virtual void *getAdjustedAnalysisPointer(const void *ID) override {
+ void *getAdjustedAnalysisPointer(const void *ID) override {
if (ID == &TargetTransformInfo::ID)
return (TargetTransformInfo *)this;
return this;
}
- virtual bool hasBranchDivergence() const override;
+ bool hasBranchDivergence() const override;
+
+ void getUnrollingPreferences(Loop *L,
+ UnrollingPreferences &UP) const override;
- virtual void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const;
+ PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const override;
+
+ unsigned getNumberOfRegisters(bool Vector) const override;
+ unsigned getRegisterBitWidth(bool Vector) const override;
+ unsigned getMaximumUnrollFactor() const override;
/// @}
};
void AMDGPUTTI::getUnrollingPreferences(Loop *L,
UnrollingPreferences &UP) const {
- for (Loop::block_iterator BI = L->block_begin(), BE = L->block_end();
- BI != BE; ++BI) {
- BasicBlock *BB = *BI;
- for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
- I != E; ++I) {
- const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I);
- if (!GEP)
+ for (const BasicBlock *BB : L->getBlocks()) {
+ for (const Instruction &I : *BB) {
+ const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I);
+ if (!GEP || GEP->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS)
continue;
+
const Value *Ptr = GEP->getPointerOperand();
const AllocaInst *Alloca = dyn_cast<AllocaInst>(GetUnderlyingObject(Ptr));
if (Alloca) {
}
}
}
+
+AMDGPUTTI::PopcntSupportKind
+AMDGPUTTI::getPopcntSupport(unsigned TyWidth) const {
+ assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
+ return ST->hasBCNT(TyWidth) ? PSK_FastHardware : PSK_Software;
+}
+
+unsigned AMDGPUTTI::getNumberOfRegisters(bool Vec) const {
+ if (Vec)
+ return 0;
+
+ // Number of VGPRs on SI.
+ if (ST->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS)
+ return 256;
+
+ return 4 * 128; // XXX - 4 channels. Should these count as vector instead?
+}
+
+unsigned AMDGPUTTI::getRegisterBitWidth(bool) const {
+ return 32;
+}
+
+unsigned AMDGPUTTI::getMaximumUnrollFactor() const {
+ // Semi-arbitrary large amount.
+ return 64;
+}