From f48b1beeec4f20986130566f935721e0aec3acac Mon Sep 17 00:00:00 2001 From: Michael Kuperstein Date: Tue, 25 Aug 2015 07:42:09 +0000 Subject: [PATCH] [X86] Fix fptoui conversions This fixes two issues in x86 fptoui lowering. 1) Makes conversions from f80 go through the right path on AVX-512. 2) Implements an inline sequence for fptoui i64 instead of a library call. This improves performance by 6X on SSE3+ and 3X otherwise. Incidentally, it also removes the use of ftol2 for fptoui, which was wrong to begin with, as ftol2 converts to a signed i64, producing wrong results for values >= 2^63. Patch by: mitch.l.bodart@intel.com Differential Revision: http://reviews.llvm.org/D11316 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@245924 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 203 +++++++++++++++++++-------- lib/Target/X86/X86ISelLowering.h | 9 -- test/CodeGen/X86/pr17631.ll | 2 +- test/CodeGen/X86/scalar-fp-to-i64.ll | 135 ++++++++++++++++++ test/CodeGen/X86/win_ftol2.ll | 14 +- 5 files changed, 287 insertions(+), 76 deletions(-) create mode 100644 test/CodeGen/X86/scalar-fp-to-i64.ll diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index ae40d6abcd5..8bf7ad47bae 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -114,13 +114,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall); setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall); setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall); - - // The _ftol2 runtime function has an unusual calling conv, which - // is modeled by a special pseudo-instruction. - setLibcallName(RTLIB::FPTOUINT_F64_I64, nullptr); - setLibcallName(RTLIB::FPTOUINT_F32_I64, nullptr); - setLibcallName(RTLIB::FPTOUINT_F64_I32, nullptr); - setLibcallName(RTLIB::FPTOUINT_F32_I32, nullptr); } if (Subtarget->isTargetDarwin()) { @@ -228,8 +221,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote); if (Subtarget->is64Bit()) { - setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand); - setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); + if (!Subtarget->useSoftFloat() && Subtarget->hasAVX512()) { + // FP_TO_UINT-i32/i64 is legal for f32/f64, but custom for f80. + setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom); + setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom); + } else { + setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); + setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand); + } } else if (!Subtarget->useSoftFloat()) { // Since AVX is a superset of SSE3, only check for SSE here. if (Subtarget->hasSSE1() && !Subtarget->hasSSE3()) @@ -238,14 +237,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // the optimal thing for SSE vs. the default expansion in the legalizer. setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand); else + // With AVX512 we can use vcvts[ds]2usi for f32/f64->i32, f80 is custom. // With SSE3 we can use fisttpll to convert to a signed i64; without // SSE, we're stuck with a fistpll. setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom); - } - if (isTargetFTOL()) { - // Use the _ftol2 runtime function, which has a pseudo-instruction - // to handle its weird calling convention. setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom); } @@ -1330,13 +1326,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FMA, MVT::v8f64, Legal); setOperationAction(ISD::FMA, MVT::v16f32, Legal); - setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal); - setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal); + // FIXME: [US]INT_TO_FP are not legal for f80. setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal); if (Subtarget->is64Bit()) { - setOperationAction(ISD::FP_TO_UINT, MVT::i64, Legal); - setOperationAction(ISD::FP_TO_SINT, MVT::i64, Legal); setOperationAction(ISD::SINT_TO_FP, MVT::i64, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::i64, Legal); } @@ -12334,15 +12327,45 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, DAG.getIntPtrConstant(0, dl)); } +// If the given FP_TO_SINT (IsSigned) or FP_TO_UINT (!IsSigned) operation +// is legal, or has an f16 source (which needs to be promoted to f32), +// just return an pair. +// Otherwise it is assumed to be a conversion from one of f32, f64 or f80 +// to i16, i32 or i64, and we lower it to a legal sequence. +// If lowered to the final integer result we return a pair. +// Otherwise we lower it to a sequence ending with a FIST, return a +// pair, and the caller is responsible for loading +// the final integer result from StackSlot. std::pair -X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, - bool IsSigned, bool IsReplace) const { +X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, + bool IsSigned, bool IsReplace) const { SDLoc DL(Op); EVT DstTy = Op.getValueType(); + EVT TheVT = Op.getOperand(0).getValueType(); auto PtrVT = getPointerTy(DAG.getDataLayout()); - if (!IsSigned && !isIntegerTypeFTOL(DstTy)) { + if (TheVT == MVT::f16) + // We need to promote the f16 to f32 before using the lowering + // in this routine. + return std::make_pair(SDValue(), SDValue()); + + assert((TheVT == MVT::f32 || + TheVT == MVT::f64 || + TheVT == MVT::f80) && + "Unexpected FP operand type in FP_TO_INTHelper"); + + // If using FIST to compute an unsigned i64, we'll need some fixup + // to handle values above the maximum signed i64. A FIST is always + // used for the 32-bit subtarget, but also for f80 on a 64-bit target. + bool UnsignedFixup = !IsSigned && + DstTy == MVT::i64 && + (!Subtarget->is64Bit() || + !isScalarFPTypeInSSEReg(TheVT)); + + if (!IsSigned && DstTy != MVT::i64 && !Subtarget->hasAVX512()) { + // Replace the fp-to-uint32 operation with an fp-to-sint64 FIST. + // The low 32 bits of the fist result will have the correct uint32 result. assert(DstTy == MVT::i32 && "Unexpected FP_TO_UINT"); DstTy = MVT::i64; } @@ -12360,27 +12383,72 @@ X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType())) return std::make_pair(SDValue(), SDValue()); - // We lower FP->int64 either into FISTP64 followed by a load from a temporary - // stack slot, or into the FTOL runtime function. + // We lower FP->int64 into FISTP64 followed by a load from a temporary + // stack slot. MachineFunction &MF = DAG.getMachineFunction(); unsigned MemSize = DstTy.getSizeInBits()/8; int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false); SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); unsigned Opc; - if (!IsSigned && isIntegerTypeFTOL(DstTy)) - Opc = X86ISD::WIN_FTOL; - else - switch (DstTy.getSimpleVT().SimpleTy) { - default: llvm_unreachable("Invalid FP_TO_SINT to lower!"); - case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break; - case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break; - case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break; - } + switch (DstTy.getSimpleVT().SimpleTy) { + default: llvm_unreachable("Invalid FP_TO_SINT to lower!"); + case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break; + case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break; + case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break; + } SDValue Chain = DAG.getEntryNode(); SDValue Value = Op.getOperand(0); - EVT TheVT = Op.getOperand(0).getValueType(); + SDValue Adjust; // 0x0 or 0x80000000, for result sign bit adjustment. + + if (UnsignedFixup) { + // + // Conversion to unsigned i64 is implemented with a select, + // depending on whether the source value fits in the range + // of a signed i64. Let Thresh be the FP equivalent of + // 0x8000000000000000ULL. + // + // Adjust i32 = (Value < Thresh) ? 0 : 0x80000000; + // FistSrc = (Value < Thresh) ? Value : (Value - Thresh); + // Fist-to-mem64 FistSrc + // Add 0 or 0x800...0ULL to the 64-bit result, which is equivalent + // to XOR'ing the high 32 bits with Adjust. + // + // Being a power of 2, Thresh is exactly representable in all FP formats. + // For X87 we'd like to use the smallest FP type for this constant, but + // for DAG type consistency we have to match the FP operand type. + + APFloat Thresh(APFloat::IEEEsingle, APInt(32, 0x5f000000)); + APFloat::opStatus Status = APFloat::opOK; + bool LosesInfo = false; + if (TheVT == MVT::f64) + // The rounding mode is irrelevant as the conversion should be exact. + Status = Thresh.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, + &LosesInfo); + else if (TheVT == MVT::f80) + Status = Thresh.convert(APFloat::x87DoubleExtended, + APFloat::rmNearestTiesToEven, &LosesInfo); + + assert(Status == APFloat::opOK && !LosesInfo && + "FP conversion should have been exact"); + + SDValue ThreshVal = DAG.getConstantFP(Thresh, DL, TheVT); + + SDValue Cmp = DAG.getSetCC(DL, + getSetCCResultType(DAG.getDataLayout(), + *DAG.getContext(), TheVT), + Value, ThreshVal, ISD::SETLT); + Adjust = DAG.getSelect(DL, MVT::i32, Cmp, + DAG.getConstant(0, DL, MVT::i32), + DAG.getConstant(0x80000000, DL, MVT::i32)); + SDValue Sub = DAG.getNode(ISD::FSUB, DL, TheVT, Value, ThreshVal); + Cmp = DAG.getSetCC(DL, getSetCCResultType(DAG.getDataLayout(), + *DAG.getContext(), TheVT), + Value, ThreshVal, ISD::SETLT); + Value = DAG.getSelect(DL, TheVT, Cmp, Value, Sub); + } + // FIXME This causes a redundant load/store if the SSE-class value is already // in memory, such as if it is on the callstack. if (isScalarFPTypeInSSEReg(TheVT)) { @@ -12406,25 +12474,49 @@ X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, SSFI), MachineMemOperand::MOStore, MemSize, MemSize); - if (Opc != X86ISD::WIN_FTOL) { + if (UnsignedFixup) { + + // Insert the FIST, load its result as two i32's, + // and XOR the high i32 with Adjust. + + SDValue FistOps[] = { Chain, Value, StackSlot }; + SDValue FIST = DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::Other), + FistOps, DstTy, MMO); + + SDValue Low32 = DAG.getLoad(MVT::i32, DL, FIST, StackSlot, + MachinePointerInfo(), + false, false, false, 0); + SDValue HighAddr = DAG.getNode(ISD::ADD, DL, PtrVT, StackSlot, + DAG.getConstant(4, DL, PtrVT)); + + SDValue High32 = DAG.getLoad(MVT::i32, DL, FIST, HighAddr, + MachinePointerInfo(), + false, false, false, 0); + High32 = DAG.getNode(ISD::XOR, DL, MVT::i32, High32, Adjust); + + if (Subtarget->is64Bit()) { + // Join High32 and Low32 into a 64-bit result. + // (High32 << 32) | Low32 + Low32 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Low32); + High32 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, High32); + High32 = DAG.getNode(ISD::SHL, DL, MVT::i64, High32, + DAG.getConstant(32, DL, MVT::i8)); + SDValue Result = DAG.getNode(ISD::OR, DL, MVT::i64, High32, Low32); + return std::make_pair(Result, SDValue()); + } + + SDValue ResultOps[] = { Low32, High32 }; + + SDValue pair = IsReplace + ? DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, ResultOps) + : DAG.getMergeValues(ResultOps, DL); + return std::make_pair(pair, SDValue()); + } else { // Build the FP_TO_INT*_IN_MEM SDValue Ops[] = { Chain, Value, StackSlot }; SDValue FIST = DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::Other), Ops, DstTy, MMO); return std::make_pair(FIST, StackSlot); - } else { - SDValue ftol = DAG.getNode(X86ISD::WIN_FTOL, DL, - DAG.getVTList(MVT::Other, MVT::Glue), - Chain, Value); - SDValue eax = DAG.getCopyFromReg(ftol, DL, X86::EAX, - MVT::i32, ftol.getValue(1)); - SDValue edx = DAG.getCopyFromReg(eax.getValue(1), DL, X86::EDX, - MVT::i32, eax.getValue(2)); - SDValue Ops[] = { eax, edx }; - SDValue pair = IsReplace - ? DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops) - : DAG.getMergeValues(Ops, DL); - return std::make_pair(pair, SDValue()); } } @@ -12688,7 +12780,8 @@ SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op, /*IsSigned=*/ true, /*IsReplace=*/ false); SDValue FIST = Vals.first, StackSlot = Vals.second; // If FP_TO_INTHelper failed, the node is actually supposed to be Legal. - if (!FIST.getNode()) return Op; + if (!FIST.getNode()) + return Op; if (StackSlot.getNode()) // Load the result. @@ -12705,7 +12798,9 @@ SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op, std::pair Vals = FP_TO_INTHelper(Op, DAG, /*IsSigned=*/ false, /*IsReplace=*/ false); SDValue FIST = Vals.first, StackSlot = Vals.second; - assert(FIST.getNode() && "Unexpected failure"); + // If FP_TO_INTHelper failed, the node is actually supposed to be Legal. + if (!FIST.getNode()) + return Op; if (StackSlot.getNode()) // Load the result. @@ -18885,17 +18980,9 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, return; } case ISD::FP_TO_SINT: - // FP_TO_INT*_IN_MEM is not legal for f16 inputs. Do not convert - // (FP_TO_SINT (load f16)) to FP_TO_INT*. - if (N->getOperand(0).getValueType() == MVT::f16) - break; - // fallthrough case ISD::FP_TO_UINT: { bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT; - if (!IsSigned && !isIntegerTypeFTOL(SDValue(N, 0).getValueType())) - return; - std::pair Vals = FP_TO_INTHelper(SDValue(N, 0), DAG, IsSigned, /*IsReplace=*/ true); SDValue FIST = Vals.first, StackSlot = Vals.second; @@ -26507,10 +26594,6 @@ int X86TargetLowering::getScalingFactorCost(const DataLayout &DL, return -1; } -bool X86TargetLowering::isTargetFTOL() const { - return Subtarget->isTargetKnownWindowsMSVC() && !Subtarget->is64Bit(); -} - bool X86TargetLowering::isIntDivCheap(EVT VT, AttributeSet Attr) const { // Integer division on x86 is expensive. However, when aggressively optimizing // for code size, we prefer to use a div instruction, as it is usually smaller diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 026c4d0cbda..d3de850b3d6 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -856,15 +856,6 @@ namespace llvm { (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1 } - /// Return true if the target uses the MSVC _ftol2 routine for fptoui. - bool isTargetFTOL() const; - - /// Return true if the MSVC _ftol2 routine should be used for fptoui to the - /// given type. - bool isIntegerTypeFTOL(EVT VT) const { - return isTargetFTOL() && VT == MVT::i64; - } - /// \brief Returns true if it is beneficial to convert a load of a constant /// to just the constant itself. bool shouldConvertConstantLoadToIntImm(const APInt &Imm, diff --git a/test/CodeGen/X86/pr17631.ll b/test/CodeGen/X86/pr17631.ll index 98f951f1b10..08c393d29d6 100644 --- a/test/CodeGen/X86/pr17631.ll +++ b/test/CodeGen/X86/pr17631.ll @@ -30,5 +30,5 @@ define <8 x float> @foo(<8 x float> %y, i64* %p, double %x) { ; CHECK: foo ; CHECK-NOT: vzeroupper -; CHECK: _ftol2 +; CHECK: {{cvtt|fist}} ; CHECK: ret diff --git a/test/CodeGen/X86/scalar-fp-to-i64.ll b/test/CodeGen/X86/scalar-fp-to-i64.ll new file mode 100644 index 00000000000..b53938928ee --- /dev/null +++ b/test/CodeGen/X86/scalar-fp-to-i64.ll @@ -0,0 +1,135 @@ +; Check that scalar FP conversions to signed and unsigned int64 are using +; reasonable sequences, across platforms and target switches. +; +; The signed case is straight forward, and the tests here basically +; ensure successful compilation (f80 with avx512 was broken at one point). +; +; For the unsigned case there are many possible sequences, so to avoid +; a fragile test we just check for the presence of a few key instructions. +; AVX512 on Intel64 can use vcvtts[ds]2usi directly for float and double. +; Otherwise the sequence will involve an FP subtract (fsub, subss or subsd), +; and a truncating conversion (cvtts[ds]2si, fisttp, or fnstcw+fist). When +; both a subtract and fnstcw are needed, they can occur in either order. +; +; The interesting subtargets are AVX512F (vcvtts[ds]2usi), SSE3 (fisttp), +; SSE2 (cvtts[ds]2si) and vanilla X87 (fnstcw+fist, 32-bit only). +; +; RUN: llc < %s -mtriple=i386-pc-windows-msvc -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512_32 +; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512_32 +; RUN: llc < %s -mtriple=x86_64-pc-windows-msvc -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512_64 +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512_64 +; RUN: llc < %s -mtriple=i386-pc-windows-msvc -mattr=+sse3 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE3_32 +; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mattr=+sse3 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE3_32 +; RUN: llc < %s -mtriple=x86_64-pc-windows-msvc -mattr=+sse3 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE3_64 +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse3 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE3_64 +; RUN: llc < %s -mtriple=i386-pc-windows-msvc -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE2_32 +; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE2_32 +; RUN: llc < %s -mtriple=x86_64-pc-windows-msvc -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE2_64 +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE2_64 +; RUN: llc < %s -mtriple=i386-pc-windows-msvc -mattr=-sse | FileCheck %s --check-prefix=CHECK --check-prefix=X87 +; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mattr=-sse | FileCheck %s --check-prefix=CHECK --check-prefix=X87 + +; CHECK-LABEL: f_to_u64 +; X87-DAG: fsub +; X87-DAG: fnstcw +; X87: fist +; SSE2_32-DAG: {{subss|fsub}} +; SSE2_32-DAG: fnstcw +; SSE2_32: fist +; SSE2_64: subss +; SSE2_64: cvttss2si +; SSE3_32: {{subss|fsub}} +; SSE3_32: fistt +; SSE3_64: subss +; SSE3_64: cvttss2si +; AVX512_32: {{subss|fsub}} +; AVX512_32: fistt +; AVX512_64: vcvttss2usi +; CHECK: ret +define i64 @f_to_u64(float %a) nounwind { + %r = fptoui float %a to i64 + ret i64 %r +} + +; CHECK-LABEL: f_to_s64 +; X87: fnstcw +; X87: fist +; SSE2_32: fnstcw +; SSE2_32: fist +; SSE2_64: cvttss2si +; SSE3_32: fistt +; SSE3_64: cvttss2si +; AVX512_32: fistt +; AVX512_64: vcvttss2si +; CHECK: ret +define i64 @f_to_s64(float %a) nounwind { + %r = fptosi float %a to i64 + ret i64 %r +} + +; CHECK-LABEL: d_to_u64 +; X87-DAG: fsub +; X87-DAG: fnstcw +; X87: fist +; SSE2_32-DAG: {{subsd|fsub}} +; SSE2_32-DAG: fnstcw +; SSE2_32: fist +; SSE2_64: subsd +; SSE2_64: cvttsd2si +; SSE3_32: {{subsd|fsub}} +; SSE3_32: fistt +; SSE3_64: subsd +; SSE3_64: cvttsd2si +; AVX512_32: {{subsd|fsub}} +; AVX512_32: fistt +; AVX512_64: vcvttsd2usi +; CHECK: ret +define i64 @d_to_u64(double %a) nounwind { + %r = fptoui double %a to i64 + ret i64 %r +} + +; CHECK-LABEL: d_to_s64 +; X87: fnstcw +; X87: fist +; SSE2_32: fnstcw +; SSE2_32: fist +; SSE2_64: cvttsd2si +; SSE3_32: fistt +; SSE3_64: cvttsd2si +; AVX512_32: fistt +; AVX512_64: vcvttsd2si +; CHECK: ret +define i64 @d_to_s64(double %a) nounwind { + %r = fptosi double %a to i64 + ret i64 %r +} + +; CHECK-LABEL: x_to_u64 +; CHECK-DAG: fsub +; X87-DAG: fnstcw +; SSE2_32-DAG: fnstcw +; SSE2_64-DAG: fnstcw +; CHECK: fist +; CHECK: ret +define i64 @x_to_u64(x86_fp80 %a) nounwind { + %r = fptoui x86_fp80 %a to i64 + ret i64 %r +} + +; CHECK-LABEL: x_to_s64 +; X87: fnstcw +; X87: fist +; SSE2_32: fnstcw +; SSE2_32: fist +; SSE2_64: fnstcw +; SSE2_64: fist +; SSE3_32: fistt +; SSE3_64: fistt +; AVX512_32: fistt +; AVX512_64: fistt +; CHECK: ret +define i64 @x_to_s64(x86_fp80 %a) nounwind { + %r = fptosi x86_fp80 %a to i64 + ret i64 %r +} diff --git a/test/CodeGen/X86/win_ftol2.ll b/test/CodeGen/X86/win_ftol2.ll index dfa6e3aa76b..001f54237fe 100644 --- a/test/CodeGen/X86/win_ftol2.ll +++ b/test/CodeGen/X86/win_ftol2.ll @@ -1,15 +1,17 @@ -; RUN: llc < %s -mtriple=i686-pc-win32 -mcpu=generic | FileCheck %s -check-prefix=FTOL +; RUN: llc < %s -mtriple=i686-pc-win32 -mcpu=generic | FileCheck %s -check-prefix=COMPILERRT ; RUN: llc < %s -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=COMPILERRT ; RUN: llc < %s -mtriple=i686-pc-linux | FileCheck %s -check-prefix=COMPILERRT ; RUN: llc < %s -mtriple=x86_64-pc-win32 | FileCheck %s -check-prefix=COMPILERRT ; RUN: llc < %s -mtriple=x86_64-pc-mingw32 | FileCheck %s -check-prefix=COMPILERRT ; RUN: llc < %s -mtriple=x86_64-pc-linux | FileCheck %s -check-prefix=COMPILERRT -; RUN: llc < %s -mattr=-sse -O0 -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=FTOL_2 +; RUN: llc < %s -mattr=-sse -O0 -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=COMPILERRT -; Win32 targets use the MSVCRT _ftol2 runtime function for fptoui to i64. This -; function has a nonstandard calling convention: the input value is expected on -; the x87 stack instead of the callstack. The input value is popped by the -; callee. Mingw32 uses normal cdecl compiler-rt functions. +; This test originally used the FTOL and FTOL_2 checks for the i686-pc-win32 +; triples, under the assumption that Win32 targets should use the MSVCRT +; _ftol2 runtime function for fptoui to i64. That usage was incorrect, +; as _ftol2 performs conversion to signed i64. As of the compiler fix, +; the FTOL/FTOL_2 checks are no longer used, which basically renders +; this test meaningless. define i64 @double_ui64(double %x) nounwind { entry: -- 2.34.1