This fix was suggested as part of D11345 and is part of fixing PR24141.
With this change, we can avoid walking the uses of a divisor node if the target
doesn't want the combineRepeatedFPDivisors transform in the first place.
There is no NFC-intended other than that.
Differential Revision: http://reviews.llvm.org/D11531
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@243498
91177308-0d34-0410-b5e6-
96231b3b80d8
return SDValue();
}
- /// Indicate whether this target prefers to combine the given number of FDIVs
- /// with the same divisor.
- virtual bool combineRepeatedFPDivisors(unsigned NumUsers) const {
- return false;
+ /// Indicate whether this target prefers to combine FDIVs with the same
+ /// divisor. If the transform should never be done, return zero. If the
+ /// transform should be done, return the minimum number of divisor uses
+ /// that must exist.
+ virtual unsigned combineRepeatedFPDivisors() const {
+ return 0;
}
/// Hooks for building estimates in place of slower divisions and square
if (!DAG.getTarget().Options.UnsafeFPMath)
return SDValue();
+ // Skip if current node is a reciprocal.
SDValue N0 = N->getOperand(0);
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
-
- // Skip if current node is a reciprocal.
if (N0CFP && N0CFP->isExactlyValue(1.0))
return SDValue();
+ // Exit early if the target does not want this transform or if there can't
+ // possibly be enough uses of the divisor to make the transform worthwhile.
SDValue N1 = N->getOperand(1);
- SmallVector<SDNode *, 4> Users;
+ unsigned MinUses = TLI.combineRepeatedFPDivisors();
+ if (!MinUses || N1->use_size() < MinUses)
+ return SDValue();
// Find all FDIV users of the same divisor.
+ SmallVector<SDNode *, 4> Users;
for (auto *U : N1->uses()) {
if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1)
Users.push_back(U);
}
- if (!TLI.combineRepeatedFPDivisors(Users.size()))
+ // Now that we have the actual number of divisor uses, make sure it meets
+ // the minimum threshold specified by the target.
+ if (Users.size() < MinUses)
return SDValue();
EVT VT = N->getValueType(0);
return true;
}
-bool AArch64TargetLowering::combineRepeatedFPDivisors(unsigned NumUsers) const {
+unsigned AArch64TargetLowering::combineRepeatedFPDivisors() const {
// Combine multiple FDIVs with the same divisor into multiple FMULs by the
// reciprocal if there are three or more FDIVs.
- return NumUsers > 2;
+ return 3;
}
TargetLoweringBase::LegalizeTypeAction
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
std::vector<SDNode *> *Created) const override;
- bool combineRepeatedFPDivisors(unsigned NumUsers) const override;
+ unsigned combineRepeatedFPDivisors() const override;
ConstraintType getConstraintType(StringRef Constraint) const override;
unsigned getRegisterByName(const char* RegName, EVT VT,
return SDValue();
}
-bool PPCTargetLowering::combineRepeatedFPDivisors(unsigned NumUsers) const {
+unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
// Note: This functionality is used only when unsafe-fp-math is enabled, and
// on cores with reciprocal estimates (which are used when unsafe-fp-math is
// enabled for division), this functionality is redundant with the default
// one FP pipeline) for three or more FDIVs (for generic OOO cores).
switch (Subtarget.getDarwinDirective()) {
default:
- return NumUsers > 2;
+ return 3;
case PPC::DIR_440:
case PPC::DIR_A2:
case PPC::DIR_E500mc:
case PPC::DIR_E5500:
- return NumUsers > 1;
+ return 2;
}
}
bool &UseOneConstNR) const override;
SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI,
unsigned &RefinementSteps) const override;
- bool combineRepeatedFPDivisors(unsigned NumUsers) const override;
+ unsigned combineRepeatedFPDivisors() const override;
CCAssignFn *useFastISelCCs(unsigned Flag) const;
};
/// This is because we still need one division to calculate the reciprocal and
/// then we need two multiplies by that reciprocal as replacements for the
/// original divisions.
-bool X86TargetLowering::combineRepeatedFPDivisors(unsigned NumUsers) const {
- return NumUsers > 1;
+unsigned X86TargetLowering::combineRepeatedFPDivisors() const {
+ return 2;
}
static bool isAllOnes(SDValue V) {
unsigned &RefinementSteps) const override;
/// Reassociate floating point divisions into multiply by reciprocal.
- bool combineRepeatedFPDivisors(unsigned NumUsers) const override;
+ unsigned combineRepeatedFPDivisors() const override;
};
namespace X86 {