if (TLI.combineRepeatedFPDivisors(Users.size())) {
SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
+ // FIXME: This optimization requires some level of fast-math, so the
+ // created reciprocal node should at least have the 'allowReciprocal'
+ // fast-math-flag set.
SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1);
// Dividend / Divisor -> Dividend * Reciprocal
if (Dividend != FPOne) {
SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
Reciprocal);
- DAG.ReplaceAllUsesWith(U, NewNode.getNode());
+ CombineTo(U, NewNode);
+ } else if (U != Reciprocal.getNode()) {
+ // In the absence of fast-math-flags, this user node is always the
+ // same node as Reciprocal, but with FMF they may be different nodes.
+ CombineTo(U, Reciprocal);
}
}
- return SDValue();
+ return SDValue(N, 0); // N was replaced.
}
}
ret float %div2
}
+; If the reciprocal is already calculated, we should not
+; generate an extra multiplication by 1.0.
+
+define double @div3_arcp(double %x, double %y, double %z) #0 {
+; CHECK-LABEL: div3_arcp:
+; CHECK: # BB#0:
+; CHECK-NEXT: movsd{{.*#+}} xmm2 = mem[0],zero
+; CHECK-NEXT: divsd %xmm1, %xmm2
+; CHECK-NEXT: mulsd %xmm2, %xmm0
+; CHECK-NEXT: addsd %xmm2, %xmm0
+; CHECK-NEXT: retq
+ %div1 = fdiv fast double 1.0, %y
+ %div2 = fdiv fast double %x, %y
+ %ret = fadd fast double %div2, %div1
+ ret double %ret
+}
+
; FIXME: If the backend understands 'arcp', then this attribute is unnecessary.
attributes #0 = { "unsafe-fp-math"="true" }