case ISD::INSERT_SUBVECTOR: SplitVecRes_INSERT_SUBVECTOR(N, Lo, Hi); break;
case ISD::FP_ROUND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break;
case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break;
+ case ISD::FCOPYSIGN: SplitVecRes_FCOPYSIGN(N, Lo, Hi); break;
case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break;
case ISD::SCALAR_TO_VECTOR: SplitVecRes_SCALAR_TO_VECTOR(N, Lo, Hi); break;
case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break;
case ISD::SUB:
case ISD::MUL:
case ISD::FADD:
- case ISD::FCOPYSIGN:
case ISD::FSUB:
case ISD::FMUL:
case ISD::FMINNUM:
Hi = DAG.getNode(ISD::FPOWI, dl, Hi.getValueType(), Hi, N->getOperand(1));
}
+void DAGTypeLegalizer::SplitVecRes_FCOPYSIGN(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue LHSLo, LHSHi;
+ GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
+ SDLoc DL(N);
+
+ SDValue RHSLo, RHSHi;
+ SDValue RHS = N->getOperand(1);
+ EVT RHSVT = RHS.getValueType();
+ if (getTypeAction(RHSVT) == TargetLowering::TypeSplitVector)
+ GetSplitVector(RHS, RHSLo, RHSHi);
+ else
+ std::tie(RHSLo, RHSHi) = DAG.SplitVector(RHS, SDLoc(RHS));
+
+
+ Lo = DAG.getNode(ISD::FCOPYSIGN, DL, LHSLo.getValueType(), LHSLo, RHSLo);
+ Hi = DAG.getNode(ISD::FCOPYSIGN, DL, LHSHi.getValueType(), LHSHi, RHSHi);
+}
+
void DAGTypeLegalizer::SplitVecRes_InregOp(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDValue LHSLo, LHSHi;
Res = SplitVecOp_TruncateHelper(N);
break;
case ISD::FP_ROUND: Res = SplitVecOp_FP_ROUND(N); break;
+ case ISD::FCOPYSIGN: Res = SplitVecOp_FCOPYSIGN(N); break;
case ISD::STORE:
Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo);
break;
return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
}
+SDValue DAGTypeLegalizer::SplitVecOp_FCOPYSIGN(SDNode *N) {
+ // The result (and the first input) has a legal vector type, but the second
+ // input needs splitting.
+ return DAG.UnrollVectorOp(N, N->getValueType(0).getVectorNumElements());
+}
//===----------------------------------------------------------------------===//
break;
case ISD::FADD:
- case ISD::FCOPYSIGN:
case ISD::FMUL:
case ISD::FPOW:
case ISD::FSUB:
Res = WidenVecRes_BinaryCanTrap(N);
break;
+ case ISD::FCOPYSIGN:
+ Res = WidenVecRes_FCOPYSIGN(N);
+ break;
+
case ISD::FPOWI:
Res = WidenVecRes_POWI(N);
break;
return DAG.getNode(ISD::BUILD_VECTOR, DL, WidenVT, Ops);
}
+SDValue DAGTypeLegalizer::WidenVecRes_FCOPYSIGN(SDNode *N) {
+ // If this is an FCOPYSIGN with same input types, we can treat it as a
+ // normal (can trap) binary op.
+ if (N->getOperand(0).getValueType() == N->getOperand(1).getValueType())
+ return WidenVecRes_BinaryCanTrap(N);
+
+ // If the types are different, fall back to unrolling.
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements());
+}
+
SDValue DAGTypeLegalizer::WidenVecRes_POWI(SDNode *N) {
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue InOp = GetWidenedVector(N->getOperand(0));
case ISD::STORE: Res = WidenVecOp_STORE(N); break;
case ISD::MSTORE: Res = WidenVecOp_MSTORE(N, OpNo); break;
case ISD::SETCC: Res = WidenVecOp_SETCC(N); break;
+ case ISD::FCOPYSIGN: Res = WidenVecOp_FCOPYSIGN(N); break;
case ISD::ANY_EXTEND:
case ISD::SIGN_EXTEND:
}
}
+SDValue DAGTypeLegalizer::WidenVecOp_FCOPYSIGN(SDNode *N) {
+ // The result (and first input) is legal, but the second input is illegal.
+ // We can't do much to fix that, so just unroll and let the extracts off of
+ // the second input be widened as needed later.
+ return DAG.UnrollVectorOp(N);
+}
+
SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
// Since the result is legal and the input is illegal, it is unlikely
// that we can fix the input to a legal type so unroll the convert
--- /dev/null
+; RUN: llc < %s -mtriple aarch64-apple-darwin | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+
+;============ v1f32
+
+; WidenVecRes same
+define <1 x float> @test_copysign_v1f32_v1f32(<1 x float> %a, <1 x float> %b) #0 {
+; CHECK-LABEL: test_copysign_v1f32_v1f32:
+; CHECK: ; BB#0:
+; CHECK-NEXT: mov s2, v1[1]
+; CHECK-NEXT: mov s3, v0[1]
+; CHECK-NEXT: movi.4s v4, #0x80, lsl #24
+; CHECK-NEXT: bit.16b v3, v2, v4
+; CHECK-NEXT: bit.16b v0, v1, v4
+; CHECK-NEXT: ins.s v0[1], v3[0]
+; CHECK-NEXT: ret
+ %r = call <1 x float> @llvm.copysign.v1f32(<1 x float> %a, <1 x float> %b)
+ ret <1 x float> %r
+}
+
+; WidenVecRes mismatched
+define <1 x float> @test_copysign_v1f32_v1f64(<1 x float> %a, <1 x double> %b) #0 {
+; CHECK-LABEL: test_copysign_v1f32_v1f64:
+; CHECK: ; BB#0:
+; CHECK-NEXT: fcvt s1, d1
+; CHECK-NEXT: movi.4s v2, #0x80, lsl #24
+; CHECK-NEXT: bit.16b v0, v1, v2
+; CHECK-NEXT: ret
+ %tmp0 = fptrunc <1 x double> %b to <1 x float>
+ %r = call <1 x float> @llvm.copysign.v1f32(<1 x float> %a, <1 x float> %tmp0)
+ ret <1 x float> %r
+}
+
+declare <1 x float> @llvm.copysign.v1f32(<1 x float> %a, <1 x float> %b) #0
+
+;============ v1f64
+
+; WidenVecOp #1
+define <1 x double> @test_copysign_v1f64_v1f32(<1 x double> %a, <1 x float> %b) #0 {
+; CHECK-LABEL: test_copysign_v1f64_v1f32:
+; CHECK: ; BB#0:
+; CHECK-NEXT: fcvt d1, s1
+; CHECK-NEXT: movi.2d v2, #0000000000000000
+; CHECK-NEXT: fneg.2d v2, v2
+; CHECK-NEXT: bit.16b v0, v1, v2
+; CHECK-NEXT: ret
+ %tmp0 = fpext <1 x float> %b to <1 x double>
+ %r = call <1 x double> @llvm.copysign.v1f64(<1 x double> %a, <1 x double> %tmp0)
+ ret <1 x double> %r
+}
+
+define <1 x double> @test_copysign_v1f64_v1f64(<1 x double> %a, <1 x double> %b) #0 {
+; CHECK-LABEL: test_copysign_v1f64_v1f64:
+; CHECK: ; BB#0:
+; CHECK-NEXT: movi.2d v2, #0000000000000000
+; CHECK-NEXT: fneg.2d v2, v2
+; CHECK-NEXT: bit.16b v0, v1, v2
+; CHECK-NEXT: ret
+ %r = call <1 x double> @llvm.copysign.v1f64(<1 x double> %a, <1 x double> %b)
+ ret <1 x double> %r
+}
+
+declare <1 x double> @llvm.copysign.v1f64(<1 x double> %a, <1 x double> %b) #0
+
+;============ v2f32
+
+define <2 x float> @test_copysign_v2f32_v2f32(<2 x float> %a, <2 x float> %b) #0 {
+; CHECK-LABEL: test_copysign_v2f32_v2f32:
+; CHECK: ; BB#0:
+; CHECK-NEXT: mov s2, v1[1]
+; CHECK-NEXT: mov s3, v0[1]
+; CHECK-NEXT: movi.4s v4, #0x80, lsl #24
+; CHECK-NEXT: bit.16b v3, v2, v4
+; CHECK-NEXT: bit.16b v0, v1, v4
+; CHECK-NEXT: ins.s v0[1], v3[0]
+; CHECK-NEXT: ret
+ %r = call <2 x float> @llvm.copysign.v2f32(<2 x float> %a, <2 x float> %b)
+ ret <2 x float> %r
+}
+
+define <2 x float> @test_copysign_v2f32_v2f64(<2 x float> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: test_copysign_v2f32_v2f64:
+; CHECK: ; BB#0:
+; CHECK-NEXT: mov d2, v1[1]
+; CHECK-NEXT: mov s3, v0[1]
+; CHECK-NEXT: movi.4s v4, #0x80, lsl #24
+; CHECK-NEXT: fcvt s1, d1
+; CHECK-NEXT: fcvt s2, d2
+; CHECK-NEXT: bit.16b v3, v2, v4
+; CHECK-NEXT: bit.16b v0, v1, v4
+; CHECK-NEXT: ins.s v0[1], v3[0]
+; CHECK-NEXT: ret
+ %tmp0 = fptrunc <2 x double> %b to <2 x float>
+ %r = call <2 x float> @llvm.copysign.v2f32(<2 x float> %a, <2 x float> %tmp0)
+ ret <2 x float> %r
+}
+
+declare <2 x float> @llvm.copysign.v2f32(<2 x float> %a, <2 x float> %b) #0
+
+;============ v4f32
+
+define <4 x float> @test_copysign_v4f32_v4f32(<4 x float> %a, <4 x float> %b) #0 {
+; CHECK-LABEL: test_copysign_v4f32_v4f32:
+; CHECK: ; BB#0:
+; CHECK-NEXT: mov s2, v1[1]
+; CHECK-NEXT: mov s3, v0[1]
+; CHECK-NEXT: movi.4s v4, #0x80, lsl #24
+; CHECK-NEXT: mov s5, v0[2]
+; CHECK-NEXT: bit.16b v3, v2, v4
+; CHECK-NEXT: mov s2, v0[3]
+; CHECK-NEXT: mov s6, v1[2]
+; CHECK-NEXT: bit.16b v0, v1, v4
+; CHECK-NEXT: bit.16b v5, v6, v4
+; CHECK-NEXT: mov s1, v1[3]
+; CHECK-NEXT: ins.s v0[1], v3[0]
+; CHECK-NEXT: ins.s v0[2], v5[0]
+; CHECK-NEXT: bit.16b v2, v1, v4
+; CHECK-NEXT: ins.s v0[3], v2[0]
+; CHECK-NEXT: ret
+ %r = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %b)
+ ret <4 x float> %r
+}
+
+; SplitVecOp #1
+define <4 x float> @test_copysign_v4f32_v4f64(<4 x float> %a, <4 x double> %b) #0 {
+; CHECK-LABEL: test_copysign_v4f32_v4f64:
+; CHECK: ; BB#0:
+; CHECK-NEXT: mov s3, v0[1]
+; CHECK-NEXT: mov d4, v1[1]
+; CHECK-NEXT: movi.4s v5, #0x80, lsl #24
+; CHECK-NEXT: fcvt s1, d1
+; CHECK-NEXT: mov s6, v0[2]
+; CHECK-NEXT: mov s7, v0[3]
+; CHECK-NEXT: fcvt s16, d2
+; CHECK-NEXT: bit.16b v0, v1, v5
+; CHECK-NEXT: bit.16b v6, v16, v5
+; CHECK-NEXT: fcvt s1, d4
+; CHECK-NEXT: bit.16b v3, v1, v5
+; CHECK-NEXT: mov d1, v2[1]
+; CHECK-NEXT: fcvt s1, d1
+; CHECK-NEXT: ins.s v0[1], v3[0]
+; CHECK-NEXT: ins.s v0[2], v6[0]
+; CHECK-NEXT: bit.16b v7, v1, v5
+; CHECK-NEXT: ins.s v0[3], v7[0]
+; CHECK-NEXT: ret
+ %tmp0 = fptrunc <4 x double> %b to <4 x float>
+ %r = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %tmp0)
+ ret <4 x float> %r
+}
+
+declare <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %b) #0
+
+;============ v2f64
+
+define <2 x double> @test_copysign_v2f64_v232(<2 x double> %a, <2 x float> %b) #0 {
+; CHECK-LABEL: test_copysign_v2f64_v232:
+; CHECK: ; BB#0:
+; CHECK-NEXT: mov d2, v0[1]
+; CHECK-NEXT: mov s3, v1[1]
+; CHECK-NEXT: movi.2d v4, #0000000000000000
+; CHECK-NEXT: fcvt d1, s1
+; CHECK-NEXT: fcvt d3, s3
+; CHECK-NEXT: fneg.2d v4, v4
+; CHECK-NEXT: bit.16b v2, v3, v4
+; CHECK-NEXT: bit.16b v0, v1, v4
+; CHECK-NEXT: ins.d v0[1], v2[0]
+; CHECK-NEXT: ret
+ %tmp0 = fpext <2 x float> %b to <2 x double>
+ %r = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %tmp0)
+ ret <2 x double> %r
+}
+
+define <2 x double> @test_copysign_v2f64_v2f64(<2 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: test_copysign_v2f64_v2f64:
+; CHECK: ; BB#0:
+; CHECK-NEXT: mov d2, v1[1]
+; CHECK-NEXT: mov d3, v0[1]
+; CHECK-NEXT: movi.2d v4, #0000000000000000
+; CHECK-NEXT: fneg.2d v4, v4
+; CHECK-NEXT: bit.16b v3, v2, v4
+; CHECK-NEXT: bit.16b v0, v1, v4
+; CHECK-NEXT: ins.d v0[1], v3[0]
+; CHECK-NEXT: ret
+ %r = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %b)
+ ret <2 x double> %r
+}
+
+declare <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %b) #0
+
+;============ v4f64
+
+; SplitVecRes mismatched
+define <4 x double> @test_copysign_v4f64_v4f32(<4 x double> %a, <4 x float> %b) #0 {
+; CHECK-LABEL: test_copysign_v4f64_v4f32:
+; CHECK: ; BB#0:
+; CHECK-NEXT: ext.16b v3, v2, v2, #8
+; CHECK-NEXT: mov d4, v0[1]
+; CHECK-NEXT: mov s5, v2[1]
+; CHECK-NEXT: movi.2d v6, #0000000000000000
+; CHECK-NEXT: fcvt d2, s2
+; CHECK-NEXT: fcvt d5, s5
+; CHECK-NEXT: fneg.2d v6, v6
+; CHECK-NEXT: bit.16b v4, v5, v6
+; CHECK-NEXT: mov d5, v1[1]
+; CHECK-NEXT: bit.16b v0, v2, v6
+; CHECK-NEXT: mov s2, v3[1]
+; CHECK-NEXT: fcvt d3, s3
+; CHECK-NEXT: fcvt d2, s2
+; CHECK-NEXT: ins.d v0[1], v4[0]
+; CHECK-NEXT: bit.16b v5, v2, v6
+; CHECK-NEXT: bit.16b v1, v3, v6
+; CHECK-NEXT: ins.d v1[1], v5[0]
+; CHECK-NEXT: ret
+ %tmp0 = fpext <4 x float> %b to <4 x double>
+ %r = call <4 x double> @llvm.copysign.v4f64(<4 x double> %a, <4 x double> %tmp0)
+ ret <4 x double> %r
+}
+
+; SplitVecRes same
+define <4 x double> @test_copysign_v4f64_v4f64(<4 x double> %a, <4 x double> %b) #0 {
+; CHECK-LABEL: test_copysign_v4f64_v4f64:
+; CHECK: ; BB#0:
+; CHECK-NEXT: mov d4, v2[1]
+; CHECK-NEXT: mov d5, v0[1]
+; CHECK-NEXT: movi.2d v6, #0000000000000000
+; CHECK-NEXT: fneg.2d v6, v6
+; CHECK-NEXT: bit.16b v5, v4, v6
+; CHECK-NEXT: mov d4, v3[1]
+; CHECK-NEXT: bit.16b v0, v2, v6
+; CHECK-NEXT: mov d2, v1[1]
+; CHECK-NEXT: bit.16b v2, v4, v6
+; CHECK-NEXT: bit.16b v1, v3, v6
+; CHECK-NEXT: ins.d v0[1], v5[0]
+; CHECK-NEXT: ins.d v1[1], v2[0]
+; CHECK-NEXT: ret
+ %r = call <4 x double> @llvm.copysign.v4f64(<4 x double> %a, <4 x double> %b)
+ ret <4 x double> %r
+}
+
+declare <4 x double> @llvm.copysign.v4f64(<4 x double> %a, <4 x double> %b) #0
+
+attributes #0 = { nounwind }