unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
public:
- AMDGPUTTI() : ImmutablePass(ID), TM(0), ST(0), TLI(0) {
+ AMDGPUTTI() : ImmutablePass(ID), TM(nullptr), ST(nullptr), TLI(nullptr) {
llvm_unreachable("This pass cannot be directly constructed");
}
AMDGPUTTI(const AMDGPUTargetMachine *TM)
: ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()),
- TLI(TM->getTargetLowering()) {
+ TLI(TM->getSubtargetImpl()->getTargetLowering()) {
initializeAMDGPUTTIPass(*PassRegistry::getPassRegistry());
}
- virtual void initializePass() override { pushTTIStack(this); }
+ void initializePass() override { pushTTIStack(this); }
- virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
TargetTransformInfo::getAnalysisUsage(AU);
}
static char ID;
/// Provide necessary pointer adjustments for the two base classes.
- virtual void *getAdjustedAnalysisPointer(const void *ID) override {
+ void *getAdjustedAnalysisPointer(const void *ID) override {
if (ID == &TargetTransformInfo::ID)
return (TargetTransformInfo *)this;
return this;
}
- virtual bool hasBranchDivergence() const override;
+ bool hasBranchDivergence() const override;
- virtual void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const;
+ void getUnrollingPreferences(Loop *L,
+ UnrollingPreferences &UP) const override;
+
+ PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const override;
+
+ unsigned getNumberOfRegisters(bool Vector) const override;
+ unsigned getRegisterBitWidth(bool Vector) const override;
+ unsigned getMaximumUnrollFactor() const override;
/// @}
};
void AMDGPUTTI::getUnrollingPreferences(Loop *L,
UnrollingPreferences &UP) const {
- for (Loop::block_iterator BI = L->block_begin(), BE = L->block_end();
- BI != BE; ++BI) {
- BasicBlock *BB = *BI;
- for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
- I != E; ++I) {
- const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I);
- if (!GEP)
+ UP.Threshold = 300; // Twice the default.
+ UP.Count = UINT_MAX;
+ UP.Partial = true;
+
+ // TODO: Do we want runtime unrolling?
+
+ for (const BasicBlock *BB : L->getBlocks()) {
+ for (const Instruction &I : *BB) {
+ const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I);
+ if (!GEP || GEP->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS)
continue;
+
const Value *Ptr = GEP->getPointerOperand();
const AllocaInst *Alloca = dyn_cast<AllocaInst>(GetUnderlyingObject(Ptr));
if (Alloca) {
//
// Don't use the maximum allowed value here as it will make some
// programs way too big.
- UP.Threshold = 500;
+ UP.Threshold = 800;
}
}
}
}
+
+AMDGPUTTI::PopcntSupportKind
+AMDGPUTTI::getPopcntSupport(unsigned TyWidth) const {
+ assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
+ return ST->hasBCNT(TyWidth) ? PSK_FastHardware : PSK_Software;
+}
+
+unsigned AMDGPUTTI::getNumberOfRegisters(bool Vec) const {
+ if (Vec)
+ return 0;
+
+ // Number of VGPRs on SI.
+ if (ST->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS)
+ return 256;
+
+ return 4 * 128; // XXX - 4 channels. Should these count as vector instead?
+}
+
+unsigned AMDGPUTTI::getRegisterBitWidth(bool) const {
+ return 32;
+}
+
+unsigned AMDGPUTTI::getMaximumUnrollFactor() const {
+ // Semi-arbitrary large amount.
+ return 64;
+}