setTargetDAGCombine(ISD::SELECT);
setTargetDAGCombine(ISD::VSELECT);
+ setTargetDAGCombine(ISD::SELECT_CC);
setTargetDAGCombine(ISD::INTRINSIC_VOID);
setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
/// operations would *not* be semantically equivalent.
static bool selectCCOpsAreFMaxCompatible(SDValue Cmp, SDValue Result) {
if (Cmp == Result)
- return true;
+ return (Cmp.getValueType() == MVT::f32 ||
+ Cmp.getValueType() == MVT::f64);
ConstantFPSDNode *CCmp = dyn_cast<ConstantFPSDNode>(Cmp);
ConstantFPSDNode *CResult = dyn_cast<ConstantFPSDNode>(Result);
assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
assert(LHS.getValueType() == RHS.getValueType());
EVT VT = TVal.getValueType();
-
- // Try to match this select into a max/min operation, which have dedicated
- // opcode in the instruction set.
- // FIXME: This is not correct in the presence of NaNs, so we only enable this
- // in no-NaNs mode.
- if (getTargetMachine().Options.NoNaNsFPMath) {
- SDValue MinMaxLHS = TVal, MinMaxRHS = FVal;
- if (selectCCOpsAreFMaxCompatible(LHS, MinMaxRHS) &&
- selectCCOpsAreFMaxCompatible(RHS, MinMaxLHS)) {
- CC = ISD::getSetCCSwappedOperands(CC);
- std::swap(MinMaxLHS, MinMaxRHS);
- }
-
- if (selectCCOpsAreFMaxCompatible(LHS, MinMaxLHS) &&
- selectCCOpsAreFMaxCompatible(RHS, MinMaxRHS)) {
- switch (CC) {
- default:
- break;
- case ISD::SETGT:
- case ISD::SETGE:
- case ISD::SETUGT:
- case ISD::SETUGE:
- case ISD::SETOGT:
- case ISD::SETOGE:
- return DAG.getNode(AArch64ISD::FMAX, dl, VT, MinMaxLHS, MinMaxRHS);
- break;
- case ISD::SETLT:
- case ISD::SETLE:
- case ISD::SETULT:
- case ISD::SETULE:
- case ISD::SETOLT:
- case ISD::SETOLE:
- return DAG.getNode(AArch64ISD::FMIN, dl, VT, MinMaxLHS, MinMaxRHS);
- break;
- }
- }
- }
-
- // If that fails, we'll need to perform an FCMP + CSEL sequence. Go ahead
- // and do the comparison.
SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
// Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
return DAG.getSelect(DL, ResVT, Mask, N->getOperand(1), N->getOperand(2));
}
+/// performSelectCCCombine - Target-specific DAG combining for ISD::SELECT_CC
+/// to match FMIN/FMAX patterns.
+static SDValue performSelectCCCombine(SDNode *N, SelectionDAG &DAG) {
+ // Try to use FMIN/FMAX instructions for FP selects like "x < y ? x : y".
+ // Unless the NoNaNsFPMath option is set, be careful about NaNs:
+ // vmax/vmin return NaN if either operand is a NaN;
+ // only do the transformation when it matches that behavior.
+
+ SDValue CondLHS = N->getOperand(0);
+ SDValue CondRHS = N->getOperand(1);
+ SDValue LHS = N->getOperand(2);
+ SDValue RHS = N->getOperand(3);
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
+
+ unsigned Opcode;
+ bool IsReversed;
+ if (selectCCOpsAreFMaxCompatible(CondLHS, LHS) &&
+ selectCCOpsAreFMaxCompatible(CondRHS, RHS)) {
+ IsReversed = false; // x CC y ? x : y
+ } else if (selectCCOpsAreFMaxCompatible(CondRHS, LHS) &&
+ selectCCOpsAreFMaxCompatible(CondLHS, RHS)) {
+ IsReversed = true ; // x CC y ? y : x
+ } else {
+ return SDValue();
+ }
+
+ bool IsUnordered = false, IsOrEqual;
+ switch (CC) {
+ default:
+ return SDValue();
+ case ISD::SETULT:
+ case ISD::SETULE:
+ IsUnordered = true;
+ case ISD::SETOLT:
+ case ISD::SETOLE:
+ case ISD::SETLT:
+ case ISD::SETLE:
+ IsOrEqual = (CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE);
+ Opcode = IsReversed ? AArch64ISD::FMAX : AArch64ISD::FMIN;
+ break;
+
+ case ISD::SETUGT:
+ case ISD::SETUGE:
+ IsUnordered = true;
+ case ISD::SETOGT:
+ case ISD::SETOGE:
+ case ISD::SETGT:
+ case ISD::SETGE:
+ IsOrEqual = (CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE);
+ Opcode = IsReversed ? AArch64ISD::FMIN : AArch64ISD::FMAX;
+ break;
+ }
+
+ // If LHS is NaN, an ordered comparison will be false and the result will be
+ // the RHS, but FMIN(NaN, RHS) = FMAX(NaN, RHS) = NaN. Avoid this by checking
+ // that LHS != NaN. Likewise, for unordered comparisons, check for RHS != NaN.
+ if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS))
+ return SDValue();
+
+ // For xxx-or-equal comparisons, "+0 <= -0" and "-0 >= +0" will both be true,
+ // but FMIN will return -0, and FMAX will return +0. So FMIN/FMAX can only be
+ // used for unsafe math or if one of the operands is known to be nonzero.
+ if (IsOrEqual && !DAG.getTarget().Options.UnsafeFPMath &&
+ !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
+ return SDValue();
+
+ return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), LHS, RHS);
+}
+
SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
return performSelectCombine(N, DCI);
case ISD::VSELECT:
return performVSelectCombine(N, DCI.DAG);
+ case ISD::SELECT_CC:
+ return performSelectCCCombine(N, DCI.DAG);
case ISD::STORE:
return performSTORECombine(N, DCI, DAG, Subtarget);
case AArch64ISD::BRCOND:
; RUN: llc -march=arm64 -enable-no-nans-fp-math < %s | FileCheck %s
+; RUN: llc -march=arm64 < %s | FileCheck %s --check-prefix=CHECK-SAFE
-define double @test_direct(float %in) #1 {
+define double @test_direct(float %in) {
; CHECK-LABEL: test_direct:
+; CHECK-SAFE-LABEL: test_direct:
%cmp = fcmp olt float %in, 0.000000e+00
%longer = fpext float %in to double
%val = select i1 %cmp, double 0.000000e+00, double %longer
ret double %val
; CHECK: fmax
+; CHECK-SAFE: fmax
}
-define double @test_cross(float %in) #1 {
+define double @test_cross(float %in) {
; CHECK-LABEL: test_cross:
+; CHECK-SAFE-LABEL: test_cross:
+ %cmp = fcmp ult float %in, 0.000000e+00
+ %longer = fpext float %in to double
+ %val = select i1 %cmp, double %longer, double 0.000000e+00
+ ret double %val
+
+; CHECK: fmin
+; CHECK-SAFE: fmin
+}
+
+; Same as previous, but with ordered comparison;
+; can't be converted in safe-math mode.
+define double @test_cross_fail_nan(float %in) {
+; CHECK-LABEL: test_cross_fail_nan:
+; CHECK-SAFE-LABEL: test_cross_fail_nan:
%cmp = fcmp olt float %in, 0.000000e+00
%longer = fpext float %in to double
%val = select i1 %cmp, double %longer, double 0.000000e+00
ret double %val
; CHECK: fmin
+; CHECK-SAFE: fcsel d0, d1, d0, mi
}
; This isn't a min or a max, but passes the first condition for swapping the
; results. Make sure they're put back before we resort to the normal fcsel.
define float @test_cross_fail(float %lhs, float %rhs) {
; CHECK-LABEL: test_cross_fail:
+; CHECK-SAFE-LABEL: test_cross_fail:
%tst = fcmp une float %lhs, %rhs
%res = select i1 %tst, float %rhs, float %lhs
ret float %res
; The register allocator would have to decide to be deliberately obtuse before
; other register were used.
; CHECK: fcsel s0, s1, s0, ne
-}
\ No newline at end of file
+; CHECK-SAFE: fcsel s0, s1, s0, ne
+}
+
+; Make sure the transformation isn't triggered for integers
+define i64 @test_integer(i64 %in) {
+ %cmp = icmp slt i64 %in, 0
+ %val = select i1 %cmp, i64 0, i64 %in
+ ret i64 %val
+}