// Without SSE, i64->f64 goes through memory.
setOperationAction(ISD::BITCAST , MVT::i64 , Expand);
}
- }
+ } else if (!Subtarget->is64Bit())
+ setOperationAction(ISD::BITCAST , MVT::i64 , Custom);
// Scalar integer divide and remainder are lowered to use operations that
// produce two results, to match the available instructions. This exposes
return Op;
}
+ SDValue ValueToStore = Op.getOperand(0);
+ if (SrcVT == MVT::i64 && isScalarFPTypeInSSEReg(Op.getValueType()) &&
+ !Subtarget->is64Bit())
+ // Bitcasting to f64 here allows us to do a single 64-bit store from
+ // an SSE register, avoiding the store forwarding penalty that would come
+ // with two 32-bit stores.
+ ValueToStore = DAG.getBitcast(MVT::f64, ValueToStore);
+
unsigned Size = SrcVT.getSizeInBits()/8;
MachineFunction &MF = DAG.getMachineFunction();
auto PtrVT = getPointerTy(MF.getDataLayout());
int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size, false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
SDValue Chain = DAG.getStore(
- DAG.getEntryNode(), dl, Op.getOperand(0), StackSlot,
+ DAG.getEntryNode(), dl, ValueToStore, StackSlot,
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI), false,
false, 0);
return BuildFILD(Op, SrcVT, Chain, StackSlot, DAG);
}
assert(SrcVT == MVT::i64 && "Unexpected type in UINT_TO_FP");
- SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
+ SDValue ValueToStore = Op.getOperand(0);
+ if (isScalarFPTypeInSSEReg(Op.getValueType()) && !Subtarget->is64Bit())
+ // Bitcasting to f64 here allows us to do a single 64-bit store from
+ // an SSE register, avoiding the store forwarding penalty that would come
+ // with two 32-bit stores.
+ ValueToStore = DAG.getBitcast(MVT::f64, ValueToStore);
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, ValueToStore,
StackSlot, MachinePointerInfo(),
false, false, 0);
// For i64 source, we need to add the appropriate power of 2 if the input
MVT SrcVT = Op.getOperand(0).getSimpleValueType();
MVT DstVT = Op.getSimpleValueType();
- if (SrcVT == MVT::v2i32 || SrcVT == MVT::v4i16 || SrcVT == MVT::v8i8) {
+ if (SrcVT == MVT::v2i32 || SrcVT == MVT::v4i16 || SrcVT == MVT::v8i8 ||
+ SrcVT == MVT::i64) {
assert(Subtarget->hasSSE2() && "Requires at least SSE2!");
if (DstVT != MVT::f64)
// This conversion needs to be expanded.
return SDValue();
- SDValue InVec = Op->getOperand(0);
- SDLoc dl(Op);
- unsigned NumElts = SrcVT.getVectorNumElements();
- MVT SVT = SrcVT.getVectorElementType();
-
- // Widen the vector in input in the case of MVT::v2i32.
- // Example: from MVT::v2i32 to MVT::v4i32.
+ SDValue Op0 = Op->getOperand(0);
SmallVector<SDValue, 16> Elts;
- for (unsigned i = 0, e = NumElts; i != e; ++i)
- Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SVT, InVec,
- DAG.getIntPtrConstant(i, dl)));
-
+ SDLoc dl(Op);
+ unsigned NumElts;
+ MVT SVT;
+ if (SrcVT.isVector()) {
+ NumElts = SrcVT.getVectorNumElements();
+ SVT = SrcVT.getVectorElementType();
+
+ // Widen the vector in input in the case of MVT::v2i32.
+ // Example: from MVT::v2i32 to MVT::v4i32.
+ for (unsigned i = 0, e = NumElts; i != e; ++i)
+ Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SVT, Op0,
+ DAG.getIntPtrConstant(i, dl)));
+ } else {
+ assert(SrcVT == MVT::i64 && !Subtarget->is64Bit() &&
+ "Unexpected source type in LowerBITCAST");
+ Elts.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op0,
+ DAG.getIntPtrConstant(0, dl)));
+ Elts.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op0,
+ DAG.getIntPtrConstant(1, dl)));
+ NumElts = 2;
+ SVT = MVT::i32;
+ }
// Explicitly mark the extra elements as Undef.
Elts.append(NumElts, DAG.getUNDEF(SVT));
}
; CHECK-LABEL: u64_to_f
+; AVX512_32: vmovq {{.*#+}} xmm0 = mem[0],zero
+; AVX512_32: vmovlpd %xmm0, {{[0-9]+}}(%esp)
; AVX512_32: fildll
+
; AVX512_64: vcvtusi2ssq
+
+; SSE2_32: movq {{.*#+}} xmm0 = mem[0],zero
+; SSE2_32: movq %xmm0, {{[0-9]+}}(%esp)
; SSE2_32: fildll
+
; SSE2_64: cvtsi2ssq
; X87: fildll
define float @u64_to_f(i64 %a) nounwind {
ret float %r
}
+; CHECK-LABEL: s64_to_f_2
+; SSE2_32: movd %ecx, %xmm0
+; SSE2_32: movd %eax, %xmm1
+; SSE2_32: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE2_32: movq %xmm1, {{[0-9]+}}(%esp)
+; SSE2_32: fildll {{[0-9]+}}(%esp)
+
+; AVX512_32: vmovd %eax, %xmm0
+; AVX512_32: vpinsrd $1, %ecx, %xmm0, %xmm0
+; AVX512_32: vmovlpd %xmm0, {{[0-9]+}}(%esp)
+; AVX512_32: fildll {{[0-9]+}}(%esp)
+
+define float @s64_to_f_2(i64 %a) nounwind {
+ %a1 = add i64 %a, 5
+ %r = sitofp i64 %a1 to float
+ ret float %r
+}
+
; CHECK-LABEL: u64_to_d
; AVX512_32: vpunpckldq
; AVX512_64: vcvtusi2sdq
ret double %r
}
+; CHECK-LABEL: s64_to_d_2
+; SSE2_32: movd %ecx, %xmm0
+; SSE2_32: movd %eax, %xmm1
+; SSE2_32: punpckldq %xmm0, %xmm1
+; SSE2_32: movq %xmm1, {{[0-9]+}}(%esp)
+; SSE2_32: fildll
+
+; AVX512_32: vmovd %eax, %xmm0
+; AVX512_32: vpinsrd $1, %ecx, %xmm0, %xmm0
+; AVX512_32: vmovlpd %xmm0, {{[0-9]+}}(%esp)
+; AVX512_32: fildll
+
+define double @s64_to_d_2(i64 %a) nounwind {
+ %b = add i64 %a, 5
+ %f = sitofp i64 %b to double
+ ret double %f
+}
+
; CHECK-LABEL: u64_to_x
; CHECK: fildll
define x86_fp80 @u64_to_x(i64 %a) nounwind {