//===---------------------------------------------------------------------===//
-'select' on vectors and scalars could be a whole lot better. We currently
-lower them to conditional branches. On x86-64 for example, we compile this:
-
-double test(double a, double b, double c, double d) { return a<b ? c : d; }
-
-to:
-
-_test:
- ucomisd %xmm0, %xmm1
- ja LBB1_2 # entry
-LBB1_1: # entry
- movapd %xmm3, %xmm2
-LBB1_2: # entry
- movapd %xmm2, %xmm0
- ret
-
-instead of:
-
-_test:
- cmpltsd %xmm1, %xmm0
- andpd %xmm0, %xmm2
- andnpd %xmm3, %xmm0
- orpd %xmm2, %xmm0
- ret
-
-For unpredictable branches, the later is much more efficient. This should
-just be a matter of having scalar sse map to SELECT_CC and custom expanding
-or iseling it.
-
-//===---------------------------------------------------------------------===//
-
LLVM currently generates stack realignment code, when it is not necessary
needed. The problem is that we need to know about stack alignment too early,
before RA runs.
return SDValue();
}
+/// \brief - Turns an ISD::CondCode into a value suitable for SSE floating point
+/// mask CMPs.
+static int translateX86FSETCC(ISD::CondCode SetCCOpcode, SDValue &Op0,
+ SDValue &Op1) {
+ unsigned SSECC;
+ bool Swap = false;
+
+ // SSE Condition code mapping:
+ // 0 - EQ
+ // 1 - LT
+ // 2 - LE
+ // 3 - UNORD
+ // 4 - NEQ
+ // 5 - NLT
+ // 6 - NLE
+ // 7 - ORD
+ switch (SetCCOpcode) {
+ default: llvm_unreachable("Unexpected SETCC condition");
+ case ISD::SETOEQ:
+ case ISD::SETEQ: SSECC = 0; break;
+ case ISD::SETOGT:
+ case ISD::SETGT: Swap = true; // Fallthrough
+ case ISD::SETLT:
+ case ISD::SETOLT: SSECC = 1; break;
+ case ISD::SETOGE:
+ case ISD::SETGE: Swap = true; // Fallthrough
+ case ISD::SETLE:
+ case ISD::SETOLE: SSECC = 2; break;
+ case ISD::SETUO: SSECC = 3; break;
+ case ISD::SETUNE:
+ case ISD::SETNE: SSECC = 4; break;
+ case ISD::SETULE: Swap = true; // Fallthrough
+ case ISD::SETUGE: SSECC = 5; break;
+ case ISD::SETULT: Swap = true; // Fallthrough
+ case ISD::SETUGT: SSECC = 6; break;
+ case ISD::SETO: SSECC = 7; break;
+ case ISD::SETUEQ:
+ case ISD::SETONE: SSECC = 8; break;
+ }
+ if (Swap)
+ std::swap(Op0, Op1);
+
+ return SSECC;
+}
+
// Lower256IntVSETCC - Break a VSETCC 256-bit integer VSETCC into two new 128
// ones, and then concatenate the result back.
static SDValue Lower256IntVSETCC(SDValue Op, SelectionDAG &DAG) {
assert(EltVT == MVT::f32 || EltVT == MVT::f64);
#endif
- unsigned SSECC;
- bool Swap = false;
-
- // SSE Condition code mapping:
- // 0 - EQ
- // 1 - LT
- // 2 - LE
- // 3 - UNORD
- // 4 - NEQ
- // 5 - NLT
- // 6 - NLE
- // 7 - ORD
- switch (SetCCOpcode) {
- default: llvm_unreachable("Unexpected SETCC condition");
- case ISD::SETOEQ:
- case ISD::SETEQ: SSECC = 0; break;
- case ISD::SETOGT:
- case ISD::SETGT: Swap = true; // Fallthrough
- case ISD::SETLT:
- case ISD::SETOLT: SSECC = 1; break;
- case ISD::SETOGE:
- case ISD::SETGE: Swap = true; // Fallthrough
- case ISD::SETLE:
- case ISD::SETOLE: SSECC = 2; break;
- case ISD::SETUO: SSECC = 3; break;
- case ISD::SETUNE:
- case ISD::SETNE: SSECC = 4; break;
- case ISD::SETULE: Swap = true; // Fallthrough
- case ISD::SETUGE: SSECC = 5; break;
- case ISD::SETULT: Swap = true; // Fallthrough
- case ISD::SETUGT: SSECC = 6; break;
- case ISD::SETO: SSECC = 7; break;
- case ISD::SETUEQ:
- case ISD::SETONE: SSECC = 8; break;
- }
- if (Swap)
- std::swap(Op0, Op1);
+ unsigned SSECC = translateX86FSETCC(SetCCOpcode, Op0, Op1);
// In the two special cases we can't handle, emit two comparisons.
if (SSECC == 8) {
SDValue Op1 = Op.getOperand(1);
SDValue Op2 = Op.getOperand(2);
SDLoc DL(Op);
+ EVT VT = Op1.getValueType();
SDValue CC;
+ // Lower fp selects into a CMP/AND/ANDN/OR sequence when the necessary SSE ops
+ // are available. Otherwise fp cmovs get lowered into a less efficient branch
+ // sequence later on.
+ if (Cond.getOpcode() == ISD::SETCC &&
+ ((Subtarget->hasSSE2() && (VT == MVT::f32 || VT == MVT::f64)) ||
+ (Subtarget->hasSSE1() && VT == MVT::f32)) &&
+ VT == Cond.getOperand(0).getValueType() && Cond->hasOneUse()) {
+ SDValue CondOp0 = Cond.getOperand(0), CondOp1 = Cond.getOperand(1);
+ int SSECC = translateX86FSETCC(
+ cast<CondCodeSDNode>(Cond.getOperand(2))->get(), CondOp0, CondOp1);
+
+ if (SSECC != 8) {
+ unsigned Opcode = VT == MVT::f32 ? X86ISD::FSETCCss : X86ISD::FSETCCsd;
+ SDValue Cmp = DAG.getNode(Opcode, DL, VT, CondOp0, CondOp1,
+ DAG.getConstant(SSECC, MVT::i8));
+ SDValue AndN = DAG.getNode(X86ISD::FANDN, DL, VT, Cmp, Op2);
+ SDValue And = DAG.getNode(X86ISD::FAND, DL, VT, Cmp, Op1);
+ return DAG.getNode(X86ISD::FOR, DL, VT, AndN, And);
+ }
+ }
+
if (Cond.getOpcode() == ISD::SETCC) {
SDValue NewCond = LowerSETCC(Cond, DAG);
if (NewCond.getNode())
case X86ISD::SHLD: return "X86ISD::SHLD";
case X86ISD::SHRD: return "X86ISD::SHRD";
case X86ISD::FAND: return "X86ISD::FAND";
+ case X86ISD::FANDN: return "X86ISD::FANDN";
case X86ISD::FOR: return "X86ISD::FOR";
case X86ISD::FXOR: return "X86ISD::FXOR";
case X86ISD::FSRL: return "X86ISD::FSRL";
return SDValue();
}
+/// PerformFANDNCombine - Do target-specific dag combines on X86ISD::FANDN nodes
+static SDValue PerformFANDNCombine(SDNode *N, SelectionDAG &DAG) {
+ // FANDN(x, 0.0) -> 0.0
+ // FANDN(0.0, x) -> x
+ if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
+ if (C->getValueAPF().isPosZero())
+ return N->getOperand(1);
+ if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(1)))
+ if (C->getValueAPF().isPosZero())
+ return N->getOperand(1);
+ return SDValue();
+}
+
static SDValue PerformBTCombine(SDNode *N,
SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI) {
case X86ISD::FMIN:
case X86ISD::FMAX: return PerformFMinFMaxCombine(N, DAG);
case X86ISD::FAND: return PerformFANDCombine(N, DAG);
+ case X86ISD::FANDN: return PerformFANDNCombine(N, DAG);
case X86ISD::BT: return PerformBTCombine(N, DAG, DCI);
case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG);
case ISD::ANY_EXTEND:
/// to X86::XORPS or X86::XORPD.
FXOR,
+ /// FAND - Bitwise logical ANDNOT of floating point values. This
+ /// corresponds to X86::ANDNPS or X86::ANDNPD.
+ FANDN,
+
/// FSRL - Bitwise logical right shift of floating point values. These
/// corresponds to X86::PSRLDQ.
FSRL,
[SDNPCommutative, SDNPAssociative]>;
def X86fxor : SDNode<"X86ISD::FXOR", SDTFPBinOp,
[SDNPCommutative, SDNPAssociative]>;
+def X86fandn : SDNode<"X86ISD::FANDN", SDTFPBinOp,
+ [SDNPCommutative, SDNPAssociative]>;
def X86frsqrt : SDNode<"X86ISD::FRSQRT", SDTFPUnaryOp>;
def X86frcp : SDNode<"X86ISD::FRCP", SDTFPUnaryOp>;
def X86fsrl : SDNode<"X86ISD::FSRL", SDTX86FPShiftOp>;
defm FsXOR : sse12_fp_alias_pack_logical<0x57, "xor", X86fxor,
SSE_BIT_ITINS_P>;
-let neverHasSideEffects = 1, Pattern = []<dag>, isCommutable = 0 in
- defm FsANDN : sse12_fp_alias_pack_logical<0x55, "andn", undef,
+let isCommutable = 0 in
+ defm FsANDN : sse12_fp_alias_pack_logical<0x55, "andn", X86fandn,
SSE_BIT_ITINS_P>;
/// sse12_fp_packed_logical - SSE 1 & 2 packed FP logical ops
; Convert oeq and une to ole/oge/ule/uge when comparing with infinity
; and negative infinity, because those are more efficient on x86.
+declare void @f() nounwind
+
; CHECK-LABEL: oeq_inff:
; CHECK: ucomiss
; CHECK: jb
-define float @oeq_inff(float %x, float %y) nounwind readonly {
+define void @oeq_inff(float %x) nounwind {
%t0 = fcmp oeq float %x, 0x7FF0000000000000
- %t1 = select i1 %t0, float 1.0, float %y
- ret float %t1
+ br i1 %t0, label %true, label %false
+
+true:
+ call void @f() nounwind
+ br label %false
+
+false:
+ ret void
}
; CHECK-LABEL: oeq_inf:
; CHECK: ucomisd
; CHECK: jb
-define double @oeq_inf(double %x, double %y) nounwind readonly {
+define void @oeq_inf(double %x) nounwind {
%t0 = fcmp oeq double %x, 0x7FF0000000000000
- %t1 = select i1 %t0, double 1.0, double %y
- ret double %t1
+ br i1 %t0, label %true, label %false
+
+true:
+ call void @f() nounwind
+ br label %false
+
+false:
+ ret void
}
; CHECK-LABEL: une_inff:
; CHECK: ucomiss
; CHECK: jae
-define float @une_inff(float %x, float %y) nounwind readonly {
+define void @une_inff(float %x) nounwind {
%t0 = fcmp une float %x, 0x7FF0000000000000
- %t1 = select i1 %t0, float 1.0, float %y
- ret float %t1
+ br i1 %t0, label %true, label %false
+
+true:
+ call void @f() nounwind
+ br label %false
+
+false:
+ ret void
}
; CHECK-LABEL: une_inf:
; CHECK: ucomisd
; CHECK: jae
-define double @une_inf(double %x, double %y) nounwind readonly {
+define void @une_inf(double %x) nounwind {
%t0 = fcmp une double %x, 0x7FF0000000000000
- %t1 = select i1 %t0, double 1.0, double %y
- ret double %t1
+ br i1 %t0, label %true, label %false
+
+true:
+ call void @f() nounwind
+ br label %false
+
+false:
+ ret void
}
; CHECK-LABEL: oeq_neg_inff:
; CHECK: ucomiss
; CHECK: jb
-define float @oeq_neg_inff(float %x, float %y) nounwind readonly {
+define void @oeq_neg_inff(float %x) nounwind {
%t0 = fcmp oeq float %x, 0xFFF0000000000000
- %t1 = select i1 %t0, float 1.0, float %y
- ret float %t1
+ br i1 %t0, label %true, label %false
+
+true:
+ call void @f() nounwind
+ br label %false
+
+false:
+ ret void
}
; CHECK-LABEL: oeq_neg_inf:
; CHECK: ucomisd
; CHECK: jb
-define double @oeq_neg_inf(double %x, double %y) nounwind readonly {
+define void @oeq_neg_inf(double %x) nounwind {
%t0 = fcmp oeq double %x, 0xFFF0000000000000
- %t1 = select i1 %t0, double 1.0, double %y
- ret double %t1
+ br i1 %t0, label %true, label %false
+
+true:
+ call void @f() nounwind
+ br label %false
+
+false:
+ ret void
}
; CHECK-LABEL: une_neg_inff:
; CHECK: ucomiss
; CHECK: jae
-define float @une_neg_inff(float %x, float %y) nounwind readonly {
+define void @une_neg_inff(float %x) nounwind {
%t0 = fcmp une float %x, 0xFFF0000000000000
- %t1 = select i1 %t0, float 1.0, float %y
- ret float %t1
+ br i1 %t0, label %true, label %false
+
+true:
+ call void @f() nounwind
+ br label %false
+
+false:
+ ret void
}
; CHECK-LABEL: une_neg_inf:
; CHECK: ucomisd
; CHECK: jae
-define double @une_neg_inf(double %x, double %y) nounwind readonly {
+define void @une_neg_inf(double %x) nounwind {
%t0 = fcmp une double %x, 0xFFF0000000000000
- %t1 = select i1 %t0, double 1.0, double %y
- ret double %t1
+ br i1 %t0, label %true, label %false
+
+true:
+ call void @f() nounwind
+ br label %false
+
+false:
+ ret void
}
--- /dev/null
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=nehalem | FileCheck %s
+
+define double @test1(double %a, double %b, double %eps) {
+ %cmp = fcmp olt double %a, %eps
+ %cond = select i1 %cmp, double %b, double 0.000000e+00
+ ret double %cond
+
+; CHECK-LABEL: @test1
+; CHECK: cmpltsd %xmm2, %xmm0
+; CHECK-NEXT: andpd %xmm1, %xmm0
+}
+
+define double @test2(double %a, double %b, double %eps) {
+ %cmp = fcmp ole double %a, %eps
+ %cond = select i1 %cmp, double %b, double 0.000000e+00
+ ret double %cond
+
+; CHECK-LABEL: @test2
+; CHECK: cmplesd %xmm2, %xmm0
+; CHECK-NEXT: andpd %xmm1, %xmm0
+}
+
+define double @test3(double %a, double %b, double %eps) {
+ %cmp = fcmp ogt double %a, %eps
+ %cond = select i1 %cmp, double %b, double 0.000000e+00
+ ret double %cond
+
+; CHECK-LABEL: @test3
+; CHECK: cmpltsd %xmm0, %xmm2
+; CHECK-NEXT: andpd %xmm1, %xmm2
+}
+
+define double @test4(double %a, double %b, double %eps) {
+ %cmp = fcmp oge double %a, %eps
+ %cond = select i1 %cmp, double %b, double 0.000000e+00
+ ret double %cond
+
+; CHECK-LABEL: @test4
+; CHECK: cmplesd %xmm0, %xmm2
+; CHECK-NEXT: andpd %xmm1, %xmm2
+}
+
+define double @test5(double %a, double %b, double %eps) {
+ %cmp = fcmp olt double %a, %eps
+ %cond = select i1 %cmp, double 0.000000e+00, double %b
+ ret double %cond
+
+; CHECK-LABEL: @test5
+; CHECK: cmpltsd %xmm2, %xmm0
+; CHECK-NEXT: andnpd %xmm1, %xmm0
+}
+
+define double @test6(double %a, double %b, double %eps) {
+ %cmp = fcmp ole double %a, %eps
+ %cond = select i1 %cmp, double 0.000000e+00, double %b
+ ret double %cond
+
+; CHECK-LABEL: @test6
+; CHECK: cmplesd %xmm2, %xmm0
+; CHECK-NEXT: andnpd %xmm1, %xmm0
+}
+
+define double @test7(double %a, double %b, double %eps) {
+ %cmp = fcmp ogt double %a, %eps
+ %cond = select i1 %cmp, double 0.000000e+00, double %b
+ ret double %cond
+
+; CHECK-LABEL: @test7
+; CHECK: cmpltsd %xmm0, %xmm2
+; CHECK-NEXT: andnpd %xmm1, %xmm2
+}
+
+define double @test8(double %a, double %b, double %eps) {
+ %cmp = fcmp oge double %a, %eps
+ %cond = select i1 %cmp, double 0.000000e+00, double %b
+ ret double %cond
+
+; CHECK-LABEL: @test8
+; CHECK: cmplesd %xmm0, %xmm2
+; CHECK-NEXT: andnpd %xmm1, %xmm2
+}
+
+define float @test9(float %a, float %b, float %eps) {
+ %cmp = fcmp olt float %a, %eps
+ %cond = select i1 %cmp, float %b, float 0.000000e+00
+ ret float %cond
+
+; CHECK-LABEL: @test9
+; CHECK: cmpltss %xmm2, %xmm0
+; CHECK-NEXT: andps %xmm1, %xmm0
+}
+
+define float @test10(float %a, float %b, float %eps) {
+ %cmp = fcmp ole float %a, %eps
+ %cond = select i1 %cmp, float %b, float 0.000000e+00
+ ret float %cond
+
+; CHECK-LABEL: @test10
+; CHECK: cmpless %xmm2, %xmm0
+; CHECK-NEXT: andps %xmm1, %xmm0
+}
+
+define float @test11(float %a, float %b, float %eps) {
+ %cmp = fcmp ogt float %a, %eps
+ %cond = select i1 %cmp, float %b, float 0.000000e+00
+ ret float %cond
+
+; CHECK-LABEL: @test11
+; CHECK: cmpltss %xmm0, %xmm2
+; CHECK-NEXT: andps %xmm1, %xmm2
+}
+
+define float @test12(float %a, float %b, float %eps) {
+ %cmp = fcmp oge float %a, %eps
+ %cond = select i1 %cmp, float %b, float 0.000000e+00
+ ret float %cond
+
+; CHECK-LABEL: @test12
+; CHECK: cmpless %xmm0, %xmm2
+; CHECK-NEXT: andps %xmm1, %xmm2
+}
+
+define float @test13(float %a, float %b, float %eps) {
+ %cmp = fcmp olt float %a, %eps
+ %cond = select i1 %cmp, float 0.000000e+00, float %b
+ ret float %cond
+
+; CHECK-LABEL: @test13
+; CHECK: cmpltss %xmm2, %xmm0
+; CHECK-NEXT: andnps %xmm1, %xmm0
+}
+
+define float @test14(float %a, float %b, float %eps) {
+ %cmp = fcmp ole float %a, %eps
+ %cond = select i1 %cmp, float 0.000000e+00, float %b
+ ret float %cond
+
+; CHECK-LABEL: @test14
+; CHECK: cmpless %xmm2, %xmm0
+; CHECK-NEXT: andnps %xmm1, %xmm0
+}
+
+define float @test15(float %a, float %b, float %eps) {
+ %cmp = fcmp ogt float %a, %eps
+ %cond = select i1 %cmp, float 0.000000e+00, float %b
+ ret float %cond
+
+; CHECK-LABEL: @test15
+; CHECK: cmpltss %xmm0, %xmm2
+; CHECK-NEXT: andnps %xmm1, %xmm2
+}
+
+define float @test16(float %a, float %b, float %eps) {
+ %cmp = fcmp oge float %a, %eps
+ %cond = select i1 %cmp, float 0.000000e+00, float %b
+ ret float %cond
+
+; CHECK-LABEL: @test16
+; CHECK: cmpless %xmm0, %xmm2
+; CHECK-NEXT: andnps %xmm1, %xmm2
+}
+
+define float @test17(float %a, float %b, float %c, float %eps) {
+ %cmp = fcmp oge float %a, %eps
+ %cond = select i1 %cmp, float %c, float %b
+ ret float %cond
+
+; CHECK-LABEL: @test17
+; CHECK: cmpless %xmm0, %xmm3
+; CHECK-NEXT: andps %xmm3, %xmm2
+; CHECK-NEXT: andnps %xmm1, %xmm3
+; CHECK-NEXT: orps %xmm2, %xmm3
+}
+
+define double @test18(double %a, double %b, double %c, double %eps) {
+ %cmp = fcmp oge double %a, %eps
+ %cond = select i1 %cmp, double %c, double %b
+ ret double %cond
+
+; CHECK-LABEL: @test18
+; CHECK: cmplesd %xmm0, %xmm3
+; CHECK-NEXT: andpd %xmm3, %xmm2
+; CHECK-NEXT: andnpd %xmm1, %xmm3
+; CHECK-NEXT: orpd %xmm2, %xmm3
+}
}
; CHECK-LABEL: oge:
-; CHECK-NEXT: ucomisd %xmm1, %xmm0
+; CHECK: cmplesd %xmm0
; UNSAFE-LABEL: oge:
; UNSAFE-NEXT: maxsd %xmm1, %xmm0
; UNSAFE-NEXT: ret
}
; CHECK-LABEL: ole:
-; CHECK-NEXT: ucomisd %xmm0, %xmm1
+; CHECK: cmplesd %xmm1
; UNSAFE-LABEL: ole:
; UNSAFE-NEXT: minsd %xmm1, %xmm0
; FINITE-LABEL: ole:
}
; CHECK-LABEL: oge_inverse:
-; CHECK-NEXT: ucomisd %xmm1, %xmm0
+; CHECK: cmplesd %xmm0
; UNSAFE-LABEL: oge_inverse:
; UNSAFE-NEXT: minsd %xmm1, %xmm0
; UNSAFE-NEXT: ret
}
; CHECK-LABEL: ole_inverse:
-; CHECK-NEXT: ucomisd %xmm0, %xmm1
+; CHECK: cmplesd %xmm1
; UNSAFE-LABEL: ole_inverse:
; UNSAFE-NEXT: maxsd %xmm1, %xmm0
; UNSAFE-NEXT: ret
}
; CHECK-LABEL: oge_x:
-; CHECK: ucomisd %xmm1, %xmm0
+; CHECK: cmplesd %xmm
+; CHECK-NEXT: andpd
; UNSAFE-LABEL: oge_x:
; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
; UNSAFE-NEXT: maxsd %xmm0, %xmm1
}
; CHECK-LABEL: ole_x:
-; CHECK: ucomisd %xmm0, %xmm1
+; CHECK: cmplesd %xmm
+; CHECK-NEXT: andpd
; UNSAFE-LABEL: ole_x:
; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
; UNSAFE-NEXT: minsd %xmm0, %xmm1
}
; CHECK-LABEL: oge_inverse_x:
-; CHECK: ucomisd %xmm
+; CHECK: cmplesd %xmm
+; CHECK-NEXT: andnpd
; UNSAFE-LABEL: oge_inverse_x:
; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
; UNSAFE-NEXT: minsd %xmm0, %xmm1
}
; CHECK-LABEL: ole_inverse_x:
-; CHECK: ucomisd %xmm
+; CHECK: cmplesd %xmm
; UNSAFE-LABEL: ole_inverse_x:
; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
; UNSAFE-NEXT: maxsd %xmm0, %xmm1
}
; CHECK-LABEL: ugt:
-; CHECK: ucomisd %xmm0, %xmm1
+; CHECK: cmpnlesd %xmm1
; UNSAFE-LABEL: ugt:
; UNSAFE-NEXT: maxsd %xmm1, %xmm0
; UNSAFE-NEXT: ret
}
; CHECK-LABEL: ult:
-; CHECK: ucomisd %xmm1, %xmm0
+; CHECK: cmpnlesd %xmm0
; UNSAFE-LABEL: ult:
; UNSAFE-NEXT: minsd %xmm1, %xmm0
; UNSAFE-NEXT: ret
}
; CHECK-LABEL: ugt_inverse:
-; CHECK: ucomisd %xmm0, %xmm1
+; CHECK: cmpnlesd %xmm1
; UNSAFE-LABEL: ugt_inverse:
; UNSAFE-NEXT: minsd %xmm1, %xmm0
; UNSAFE-NEXT: ret
}
; CHECK-LABEL: ult_inverse:
-; CHECK: ucomisd %xmm1, %xmm0
+; CHECK: cmpnlesd %xmm0
; UNSAFE-LABEL: ult_inverse:
; UNSAFE-NEXT: maxsd %xmm1, %xmm0
; UNSAFE-NEXT: ret
}
; CHECK-LABEL: ugt_x:
-; CHECK: ucomisd %xmm0, %xmm1
+; CHECK: cmpnlesd %xmm
+; CHECK-NEXT: andpd
; UNSAFE-LABEL: ugt_x:
; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
; UNSAFE-NEXT: maxsd %xmm0, %xmm1
}
; CHECK-LABEL: ult_x:
-; CHECK: ucomisd %xmm1, %xmm0
+; CHECK: cmpnlesd %xmm
+; CHECK-NEXT: andpd
; UNSAFE-LABEL: ult_x:
; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
; UNSAFE-NEXT: minsd %xmm0, %xmm1
}
; CHECK-LABEL: ugt_inverse_x:
-; CHECK: ucomisd %xmm
+; CHECK: cmpnlesd %xmm
+; CHECK-NEXT: andnpd
; UNSAFE-LABEL: ugt_inverse_x:
; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
; UNSAFE-NEXT: minsd %xmm0, %xmm1
}
; CHECK-LABEL: ult_inverse_x:
-; CHECK: ucomisd %xmm
+; CHECK: cmpnlesd %xmm
+; CHECK-NEXT: andnpd
; UNSAFE-LABEL: ult_inverse_x:
; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
; UNSAFE-NEXT: maxsd %xmm0, %xmm1
}
; CHECK-LABEL: oge_y:
-; CHECK: ucomisd %xmm1, %xmm0
+; CHECK: cmplesd %xmm0
; UNSAFE-LABEL: oge_y:
; UNSAFE-NEXT: maxsd {{[^,]*}}, %xmm0
; UNSAFE-NEXT: ret
}
; CHECK-LABEL: ole_y:
-; CHECK: ucomisd %xmm0, %xmm1
+; CHECK: cmplesd %xmm
; UNSAFE-LABEL: ole_y:
; UNSAFE-NEXT: minsd {{[^,]*}}, %xmm0
; UNSAFE-NEXT: ret
}
; CHECK-LABEL: oge_inverse_y:
-; CHECK: ucomisd %xmm
+; CHECK: cmplesd %xmm0
; UNSAFE-LABEL: oge_inverse_y:
; UNSAFE-NEXT: minsd {{[^,]*}}, %xmm0
; UNSAFE-NEXT: ret
}
; CHECK-LABEL: ole_inverse_y:
-; CHECK: ucomisd %xmm
+; CHECK: cmplesd %xmm
; UNSAFE-LABEL: ole_inverse_y:
; UNSAFE-NEXT: maxsd {{[^,]*}}, %xmm0
; UNSAFE-NEXT: ret
}
; CHECK-LABEL: ugt_y:
-; CHECK: ucomisd %xmm0, %xmm1
+; CHECK: cmpnlesd %xmm
; UNSAFE-LABEL: ugt_y:
; UNSAFE-NEXT: maxsd {{[^,]*}}, %xmm0
; UNSAFE-NEXT: ret
}
; CHECK-LABEL: ult_y:
-; CHECK: ucomisd %xmm1, %xmm0
+; CHECK: cmpnlesd %xmm0
; UNSAFE-LABEL: ult_y:
; UNSAFE-NEXT: minsd {{[^,]*}}, %xmm0
; UNSAFE-NEXT: ret
}
; CHECK-LABEL: ugt_inverse_y:
-; CHECK: ucomisd %xmm
+; CHECK: cmpnlesd %xmm
; UNSAFE-LABEL: ugt_inverse_y:
; UNSAFE-NEXT: minsd {{[^,]*}}, %xmm0
; UNSAFE-NEXT: ret
}
; CHECK-LABEL: ult_inverse_y:
-; CHECK: ucomisd %xmm
+; CHECK: cmpnlesd %xmm
; UNSAFE-LABEL: ult_inverse_y:
; UNSAFE-NEXT: maxsd {{[^,]*}}, %xmm0
; UNSAFE-NEXT: ret