Summary:
Following the discussion on r241884, it's more reasonable to assume that a
target has no vector registers by default instead of letting every such
target overrides getNumberOfRegisters.
Therefore, this patch modifies BasicTTIImpl::getNumberOfRegisters to
return 0 when Vector is true, and partially reverts r241884 which
modifies NVPTXTTIImpl::getNumberOfRegisters.
It also fixes a performance bug in LoopVectorizer. Even if a target has
no vector registers, vectorization may still help ILP. So, we need both
checks to be false before disabling loop vectorization all together.
Reviewers: hfinkel
Subscribers: llvm-commits, jholewinski
Differential Revision: http://reviews.llvm.org/D11108
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@241942
91177308-0d34-0410-b5e6-
96231b3b80d8
/// \name Vector TTI Implementations
/// @{
- unsigned getNumberOfRegisters(bool Vector) { return 1; }
+ unsigned getNumberOfRegisters(bool Vector) { return Vector ? 0 : 1; }
unsigned getRegisterBitWidth(bool Vector) { return 32; }
Opd1PropInfo, Opd2PropInfo);
}
}
-
-unsigned NVPTXTTIImpl::getNumberOfRegisters(bool Vector) {
- if (Vector)
- return 0;
- return BaseT::getNumberOfRegisters(Vector);
-}
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None);
-
- unsigned getNumberOfRegisters(bool Vector);
};
} // end namespace llvm
const BranchProbability ColdProb(1, 5); // 20%
ColdEntryFreq = BlockFrequency(BFI->getEntryFreq()) * ColdProb;
- // If the target claims to have no vector registers don't attempt
- // vectorization.
- if (!TTI->getNumberOfRegisters(true))
+ // Don't attempt if
+ // 1. the target claims to have no vector registers, and
+ // 2. interleaving won't help ILP.
+ //
+ // The second condition is necessary because, even if the target has no
+ // vector registers, loop vectorization may still enable scalar
+ // interleaving.
+ if (!TTI->getNumberOfRegisters(true) && TTI->getMaxInterleaveFactor(1) < 2)
return false;
// Build up a worklist of inner-loops to vectorize. This is necessary as